[llvm] [Transforms] Recognize memcmp-like loops in LoopIdiomRecognize (PR #181562)

Sayan Sivakumaran via llvm-commits llvm-commits at lists.llvm.org
Mon Mar 23 20:03:45 PDT 2026


https://github.com/sivakusayan updated https://github.com/llvm/llvm-project/pull/181562

>From af457567bac58c250deda15d220e691f05e9366a Mon Sep 17 00:00:00 2001
From: Sayan Sivakumaran <sivakusayan at gmail.com>
Date: Tue, 20 Jan 2026 17:37:02 -0600
Subject: [PATCH] Prototype for recognizing memcmp idiom

---
 .../Transforms/Scalar/LoopIdiomRecognize.h    |   3 +
 .../Transforms/Scalar/LoopIdiomRecognize.cpp  | 251 +++++++++++-
 llvm/test/Transforms/LoopIdiom/memcmp.ll      | 378 ++++++++++++++++++
 3 files changed, 620 insertions(+), 12 deletions(-)
 create mode 100644 llvm/test/Transforms/LoopIdiom/memcmp.ll

diff --git a/llvm/include/llvm/Transforms/Scalar/LoopIdiomRecognize.h b/llvm/include/llvm/Transforms/Scalar/LoopIdiomRecognize.h
index 109b4520878cb..ddd190bb68c2f 100644
--- a/llvm/include/llvm/Transforms/Scalar/LoopIdiomRecognize.h
+++ b/llvm/include/llvm/Transforms/Scalar/LoopIdiomRecognize.h
@@ -43,6 +43,9 @@ struct DisableLIRP {
 
   /// When true, HashRecognize is disabled.
   static bool HashRecognize;
+
+  /// When true, Memcmp is disabled.
+  static bool Memcmp;
 };
 
 /// Performs Loop Idiom Recognize Pass.
diff --git a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
index 105757140c758..c62ade5cb0bc2 100644
--- a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
@@ -20,8 +20,6 @@
 //
 // TODO List:
 //
-// Future loop memory idioms to recognize: memcmp, etc.
-//
 // This could recognize common matrix multiplies and dot product idioms and
 // replace them with calls to BLAS (if linked in??).
 //
@@ -40,6 +38,7 @@
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Analysis/CmpInstAnalysis.h"
 #include "llvm/Analysis/HashRecognize.h"
+#include "llvm/Analysis/Loads.h"
 #include "llvm/Analysis/LoopAccessAnalysis.h"
 #include "llvm/Analysis/LoopInfo.h"
 #include "llvm/Analysis/LoopPass.h"
@@ -53,6 +52,7 @@
 #include "llvm/Analysis/ScalarEvolutionPatternMatch.h"
 #include "llvm/Analysis/TargetLibraryInfo.h"
 #include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/Analysis/Utils/Local.h"
 #include "llvm/Analysis/ValueTracking.h"
 #include "llvm/IR/BasicBlock.h"
 #include "llvm/IR/Constant.h"
@@ -100,6 +100,7 @@ using namespace SCEVPatternMatch;
 STATISTIC(NumMemSet, "Number of memset's formed from loop stores");
 STATISTIC(NumMemCpy, "Number of memcpy's formed from loop load+stores");
 STATISTIC(NumMemMove, "Number of memmove's formed from loop load+stores");
+STATISTIC(NumMemCmp, "Number of uncountable loops recognized as memcmp idiom");
 STATISTIC(NumStrLen, "Number of strlen's and wcslen's formed from loop loads");
 STATISTIC(
     NumShiftUntilBitTest,
@@ -131,6 +132,14 @@ static cl::opt<bool, true>
                       cl::location(DisableLIRP::Memcpy), cl::init(false),
                       cl::ReallyHidden);
 
+bool DisableLIRP::Memcmp;
+static cl::opt<bool, true>
+    DisableLIRPMemcmp("disable-loop-idiom-memcmp",
+                      cl::desc("Proceed with loop idiom recognize pass, but do "
+                               "not convert loop(s) to memcmp."),
+                      cl::location(DisableLIRP::Memcmp), cl::init(false),
+                      cl::ReallyHidden);
+
 bool DisableLIRP::Strlen;
 static cl::opt<bool, true>
     DisableLIRPStrlen("disable-loop-idiom-strlen",
@@ -182,17 +191,17 @@ class LoopIdiomRecognize {
   const TargetTransformInfo *TTI;
   const DataLayout *DL;
   OptimizationRemarkEmitter &ORE;
+  AssumptionCache *AC;
   bool ApplyCodeSizeHeuristics;
   std::unique_ptr<MemorySSAUpdater> MSSAU;
 
 public:
-  explicit LoopIdiomRecognize(AliasAnalysis *AA, DominatorTree *DT,
-                              LoopInfo *LI, ScalarEvolution *SE,
-                              TargetLibraryInfo *TLI,
-                              const TargetTransformInfo *TTI, MemorySSA *MSSA,
-                              const DataLayout *DL,
-                              OptimizationRemarkEmitter &ORE)
-      : AA(AA), DT(DT), LI(LI), SE(SE), TLI(TLI), TTI(TTI), DL(DL), ORE(ORE) {
+  explicit LoopIdiomRecognize(
+      AliasAnalysis *AA, DominatorTree *DT, LoopInfo *LI, ScalarEvolution *SE,
+      TargetLibraryInfo *TLI, const TargetTransformInfo *TTI, MemorySSA *MSSA,
+      const DataLayout *DL, OptimizationRemarkEmitter &ORE, AssumptionCache *AC)
+      : AA(AA), DT(DT), LI(LI), SE(SE), TLI(TLI), TTI(TTI), DL(DL), ORE(ORE),
+        AC(AC) {
     if (MSSA)
       MSSAU = std::make_unique<MemorySSAUpdater>(MSSA);
   }
@@ -286,6 +295,7 @@ class LoopIdiomRecognize {
   bool recognizeShiftUntilBitTest();
   bool recognizeShiftUntilZero();
   bool recognizeAndInsertStrLen();
+  bool recognizeAndInsertMemcmp();
 
   /// @}
 };
@@ -305,7 +315,7 @@ PreservedAnalyses LoopIdiomRecognizePass::run(Loop &L, LoopAnalysisManager &AM,
   OptimizationRemarkEmitter ORE(L.getHeader()->getParent());
 
   LoopIdiomRecognize LIR(&AR.AA, &AR.DT, &AR.LI, &AR.SE, &AR.TLI, &AR.TTI,
-                         AR.MSSA, DL, ORE);
+                         AR.MSSA, DL, ORE, &AR.AC);
   if (!LIR.runOnLoop(&L))
     return PreservedAnalyses::all();
 
@@ -336,7 +346,7 @@ bool LoopIdiomRecognize::runOnLoop(Loop *L) {
   // Disable loop idiom recognition if the function's name is a common idiom.
   StringRef Name = L->getHeader()->getParent()->getName();
   if (Name == "memset" || Name == "memcpy" || Name == "strlen" ||
-      Name == "wcslen")
+      Name == "wcslen" || Name == "memcmp")
     return false;
 
   // Determine if code size heuristics need to be applied.
@@ -1698,7 +1708,8 @@ bool LoopIdiomRecognize::runOnNoncountableLoop() {
 
   return recognizePopcount() || recognizeAndInsertFFS() ||
          recognizeShiftUntilBitTest() || recognizeShiftUntilZero() ||
-         recognizeShiftUntilLessThan() || recognizeAndInsertStrLen();
+         recognizeShiftUntilLessThan() || recognizeAndInsertStrLen() ||
+         recognizeAndInsertMemcmp();
 }
 
 /// Check if the given conditional branch is based on the comparison between
@@ -3577,3 +3588,219 @@ bool LoopIdiomRecognize::recognizeShiftUntilZero() {
   ++NumShiftUntilZero;
   return MadeChange;
 }
+
+namespace {
+class MemcmpVerifier {
+public:
+  explicit MemcmpVerifier(Loop *CurLoop, ScalarEvolution *SE, DominatorTree *DT,
+                          AssumptionCache *AC, TargetLibraryInfo *TLI,
+                          const DataLayout *DL)
+      : CurLoop(CurLoop), SE(SE), DT(DT), AC(AC), TLI(TLI), DL(DL) {}
+
+  bool isLoadMemcmpOperandCandidate(LoadInst *LI, const SCEVUnknown *&Base,
+                                    const APInt *&Step) {
+    if (!LI->isSimple())
+      return false;
+
+    Value *LoadPointer = LI->getPointerOperand();
+    if (LoadPointer->getType()->getPointerAddressSpace() != 0)
+      return false;
+
+    // Comparisons of floats can't be transformed. For example, the bits of
+    // two NaN values might be equivalent, but NaN is never equal to itself.
+    // This means `memcmp` would be a behavior change from float equality.
+    IntegerType *LoadType = dyn_cast<IntegerType>(LI->getType());
+    if (!LoadType)
+      return false;
+
+    // There should be no padding between consecutive members of the integer
+    // array, as `memcmp` could give a different answer from integer equality.
+    if (DL->getTypeAllocSizeInBits(LoadType) != DL->getTypeSizeInBits(LoadType))
+      return false;
+
+    // If the Load SCEV has a non-constant step, it is clearly not part of a
+    // `memcmp` idiom. Furthermore, the step must be equal to the size of
+    // the load type, as otherwise the for loop would skip equality checks
+    // for certain bytes.
+    const SCEVUse LoadSCEV = SE->getSCEVAtScope(LoadPointer, CurLoop);
+    const SCEVUnknown *LoadBase;
+    const APInt *LoadStep;
+    if (!match(LoadSCEV, m_scev_AffineAddRec(m_SCEVUnknown(LoadBase), m_scev_APInt(LoadStep)))) {
+      return false;
+    }
+
+    // TODO: Possibly handle negative strides.
+    if (LoadType->getBitWidth() != *LoadStep * CHAR_BIT || LoadStep->isNegative())
+      return false;
+
+    // Strangely the IR seems to allow this. Guess we'll just give up in this
+    // degenerate case.
+    unsigned SizeTBits = TLI->getSizeTSize(*CurLoop->getHeader()->getModule());
+    if (LoadStep->getBitWidth() > SizeTBits)
+      return false;
+
+    // The load must be dereferenceable no matter how many times the loop
+    // executes. Otherwise, creating a memcmp for this loop is undefined
+    // behavior.
+    if (!llvm::isDereferenceableAndAlignedInLoop(LI, CurLoop, *SE, *DT, AC))
+      return false;
+
+    Base = LoadBase;
+    Step = LoadStep;
+    return true;
+  }
+
+  /// We are trying to detect the following memcmp-like structure:
+  ///
+  /// preheader:
+  ///   ...
+  ///   br label %body
+  ///
+  /// body:
+  ///   ... ; Both loads have equal SCEV steps and satisfy certain properties
+  ///   %lhs = load i32, ptr %lhs_ptr
+  ///   %rhs = load i32, ptr %rhs_ptr
+  ///   %equal = icmp eq i32 %lhs, %rhs
+  ///   br i1 %equal, label %cond, label %exit
+  ///
+  /// cond:
+  ///   ... ; Compute whether loop should stop using some induction variable
+  ///   br i1 %stop_loop, label %exit, label %body
+  ///
+  /// exit:
+  ///   %buffers_equal = phi i1 [ %equal, %body ], [ %equal, %cond ]
+  ///
+  /// More specifically, we expect the pairs of loads to have a SCEV expression
+  /// of the form {%buffer,+,c}, where c is a ConstantInt that is equal to the
+  /// bitwidth of the load type. Furthermore, we must be able to prove that
+  /// there are no padding bytes in the buffers being read from, and that the
+  /// loads are always dereferenceable up to the maximum number of times the
+  /// loop backedge is taken.
+  bool detectMemcmpIdiom(PHINode *&PhiToReplace,
+                         Value *&Lhs, Value *&Rhs, const SCEV *&Len) {
+    // Step 1: Make sure we have single header, latch and exit block.    
+    // Furthermore, if the body or condition block have side effects,
+    // this is clearly not equivalent to memcmp.
+    BasicBlock *BodyBlock = CurLoop->getHeader();
+    BasicBlock *CondBlock = CurLoop->getLoopLatch();
+    BasicBlock *ExitBlock = CurLoop->getUniqueExitBlock();
+    if (!BodyBlock || !CondBlock || !ExitBlock)
+        return false;
+
+    for (const Instruction &I : *BodyBlock)
+      if (I.mayHaveSideEffects())
+        return false;
+
+    for (const Instruction &I : *CondBlock)
+      if (I.mayHaveSideEffects())
+        return false;
+
+    // Step 3: The pattern we are looking for has precisely one LCSSAPhi.
+    // Furthermore, that LCSSAPhi is constant.
+    if (std::distance(ExitBlock->phis().begin(), ExitBlock->phis().end()) != 1)
+      return false;
+
+    PHINode &Phi = *ExitBlock->phis().begin();
+    Value *Val = Phi.hasConstantValue();
+    if (!Val)
+      return false;
+
+    // Step 3: Verify that the value being used in the LCSSAPhi is actually
+    // a compare operation, and that the branch operation looks correct.
+    using namespace PatternMatch;
+    CmpInst *CI = dyn_cast<CmpInst>(Val);
+    const CondBrInst *BodyTerminator = dyn_cast<CondBrInst>(BodyBlock->getTerminator());
+    if (!match(BodyTerminator,
+              m_Br(m_Specific(CI), m_SpecificBB(CondBlock), m_SpecificBB(ExitBlock)))) {
+      return false; 
+    } 
+
+    // Step 5: Verify that the compare operation is comparing two
+    // pointers satisfying certain criteria. See
+    // `isLoadMemcmpOperandCandidate()` for more information.
+    LoadInst *LoadLHS = dyn_cast<LoadInst>(CI->getOperand(0));
+    LoadInst *LoadRHS = dyn_cast<LoadInst>(CI->getOperand(1));
+    if (!LoadLHS || !LoadRHS) {
+      return false;
+    }
+
+    const SCEVUnknown *BaseLHS;
+    const SCEVUnknown *BaseRHS;
+    const APInt *StepLHS;
+    const APInt *StepRHS;
+    if (!isLoadMemcmpOperandCandidate(LoadLHS, BaseLHS, StepLHS) ||
+        !isLoadMemcmpOperandCandidate(LoadRHS, BaseRHS, StepRHS))
+      return false;
+    if (*StepLHS != *StepRHS)
+      return false;
+
+    const SCEV *MaxBackedgeTaken =
+        llvm::dyn_cast<SCEVConstant>(SE->getBackedgeTakenCount(
+            CurLoop, llvm::ScalarEvolution::ConstantMaximum));
+    if (llvm::isa<SCEVCouldNotCompute>(MaxBackedgeTaken))
+      return false;
+
+    // Step 6: Celebrate! This transform is safe to be done, so return
+    // the information needed for the caller to emit an equivalent
+    // memcmp.
+    PhiToReplace = Φ
+    Lhs = BaseLHS->getValue();
+    Rhs = BaseRHS->getValue();
+    Type *IntIdxTy = DL->getIndexType(Lhs->getType());
+    Len = getNumBytes(MaxBackedgeTaken, IntIdxTy, SE->getConstant(*StepLHS), CurLoop, DL, SE); 
+    return true;
+  }
+
+private:
+  Loop *CurLoop;
+  ScalarEvolution *SE;
+  DominatorTree *DT;
+  AssumptionCache *AC;
+  const TargetLibraryInfo *TLI;
+  const DataLayout *DL;
+};
+} // namespace
+
+// TODO: Explain what's happening here a bit more.
+bool LoopIdiomRecognize::recognizeAndInsertMemcmp() {
+  if (!TLI->has(LibFunc_memcmp) || DisableLIRPMemcmp)
+    return false;
+
+  PHINode *PhiToReplace;
+  Value *Lhs;
+  Value *Rhs;
+  const SCEV *Len;
+  MemcmpVerifier Verifier(CurLoop, SE, DT, AC, TLI, DL);
+  if (!Verifier.detectMemcmpIdiom(PhiToReplace, Lhs, Rhs, Len))
+    return false;
+
+  BasicBlock *PhiBlock = PhiToReplace->getParent();
+  IRBuilder<> Builder(PhiBlock);
+  SCEVExpander Expander(*SE, "loop-idiom");
+  SCEVExpanderCleaner ExpCleaner(Expander);
+
+  if (!Expander.isSafeToExpand(Len))
+    return false;
+
+  Value* LenArg = Expander.expandCodeFor(Len, Len->getType(), PhiBlock->getTerminator());
+  Builder.SetInsertPoint(PhiBlock->getTerminator());
+  Value *MemCmpCall = llvm::emitMemCmp(Lhs, Rhs, LenArg, Builder, *DL, TLI);
+  Value *NewCmpInst = Builder.CreateCmp(
+      CmpInst::ICMP_EQ, MemCmpCall,
+      llvm::ConstantInt::get(
+          Builder.getContext(),
+          llvm::APInt(MemCmpCall->getType()->getPrimitiveSizeInBits(), 0)));
+  PhiToReplace->replaceAllUsesWith(NewCmpInst);
+  RecursivelyDeleteDeadPHINode(PhiToReplace);
+
+  ++NumMemCmp;
+  LLVM_DEBUG(dbgs() << "Formed memcmp idiom:" << *MemCmpCall << "\n");
+  ORE.emit([&]() {
+    return OptimizationRemark(DEBUG_TYPE, "recognizeAndInsertMemcmp",
+                              CurLoop->getStartLoc(), PhiBlock)
+           << "Transformed memcmp loop idiom";
+  });
+  ExpCleaner.markResultUsed();
+  // We'll let the loop-deletion pass handle deleting the now dead loop.
+  return true;
+}
diff --git a/llvm/test/Transforms/LoopIdiom/memcmp.ll b/llvm/test/Transforms/LoopIdiom/memcmp.ll
new file mode 100644
index 0000000000000..17d69f7736221
--- /dev/null
+++ b/llvm/test/Transforms/LoopIdiom/memcmp.ll
@@ -0,0 +1,378 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
+; RUN: opt -passes=loop-idiom < %s -S | FileCheck %s
+
+%memcmp_idiom_arr = type { [50 x i32] }
+define i1 @memcmp_idiom(ptr byval(%memcmp_idiom_arr) align 8 %lhs, ptr byval(%memcmp_idiom_arr) align 8 %rhs) {
+; CHECK-LABEL: define i1 @memcmp_idiom(
+; CHECK-SAME: ptr byval([[MEMCMP_IDIOM_ARR:%.*]]) align 8 [[LHS:%.*]], ptr byval([[MEMCMP_IDIOM_ARR]]) align 8 [[RHS:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*]]:
+; CHECK-NEXT:    br label %[[BODY:.*]]
+; CHECK:       [[COND:.*]]:
+; CHECK-NEXT:    [[INC:%.*]] = add nuw nsw i64 [[INDVAR:%.*]], 1
+; CHECK-NEXT:    [[STOP_LOOP:%.*]] = icmp eq i64 [[INC]], 50
+; CHECK-NEXT:    br i1 [[STOP_LOOP]], label %[[EXIT:.*]], label %[[BODY]]
+; CHECK:       [[BODY]]:
+; CHECK-NEXT:    [[INDVAR]] = phi i64 [ 0, %[[ENTRY]] ], [ [[INC]], %[[COND]] ]
+; CHECK-NEXT:    [[LHS_ADDR:%.*]] = getelementptr inbounds nuw i32, ptr [[LHS]], i64 [[INDVAR]]
+; CHECK-NEXT:    [[RHS_ADDR:%.*]] = getelementptr inbounds nuw i32, ptr [[RHS]], i64 [[INDVAR]]
+; CHECK-NEXT:    [[LHS_VAL:%.*]] = load i32, ptr [[LHS_ADDR]], align 4
+; CHECK-NEXT:    [[RHS_VAL:%.*]] = load i32, ptr [[RHS_ADDR]], align 4
+; CHECK-NEXT:    [[EQUAL:%.*]] = icmp eq i32 [[LHS_VAL]], [[RHS_VAL]]
+; CHECK-NEXT:    br i1 [[EQUAL]], label %[[COND]], label %[[EXIT]]
+; CHECK:       [[EXIT]]:
+; CHECK-NEXT:    [[MEMCMP:%.*]] = call i32 @memcmp(ptr [[LHS]], ptr [[RHS]], i64 200)
+; CHECK-NEXT:    [[TMP0:%.*]] = icmp eq i32 [[MEMCMP]], 0
+; CHECK-NEXT:    ret i1 [[TMP0]]
+;
+entry:
+  br label %body
+cond:
+  %inc = add nuw nsw i64 %indvar, 1
+  %stop_loop = icmp eq i64 %inc, 50
+  br i1 %stop_loop, label %exit, label %body
+body:
+  %indvar = phi i64 [ 0, %entry ], [ %inc, %cond ]
+  %lhs_addr = getelementptr inbounds nuw i32, ptr %lhs, i64 %indvar
+  %rhs_addr = getelementptr inbounds nuw i32, ptr %rhs, i64 %indvar
+  %lhs_val = load i32, ptr %lhs_addr, align 4
+  %rhs_val = load i32, ptr %rhs_addr, align 4
+  %equal = icmp eq i32 %lhs_val, %rhs_val
+  br i1 %equal, label %cond, label %exit
+exit:
+  %ret = phi i1 [ %equal, %body ], [ %equal, %cond ]
+  ret i1 %ret
+}
+
+%too_short_arr = type { [49 x i32] }
+define i1 @no_memcmp_idiom_array_too_short(ptr byval(%too_short_arr) align 8 %lhs, ptr byval(%too_short_arr) align 8 %rhs) {
+; CHECK-LABEL: define i1 @no_memcmp_idiom_array_too_short(
+; CHECK-SAME: ptr byval([[TOO_SHORT_ARR:%.*]]) align 8 [[LHS:%.*]], ptr byval([[TOO_SHORT_ARR]]) align 8 [[RHS:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*]]:
+; CHECK-NEXT:    br label %[[BODY:.*]]
+; CHECK:       [[COND:.*]]:
+; CHECK-NEXT:    [[INC:%.*]] = add nuw nsw i64 [[INDVAR:%.*]], 1
+; CHECK-NEXT:    [[STOP_LOOP:%.*]] = icmp eq i64 [[INC]], 50
+; CHECK-NEXT:    br i1 [[STOP_LOOP]], label %[[EXIT:.*]], label %[[BODY]]
+; CHECK:       [[BODY]]:
+; CHECK-NEXT:    [[INDVAR]] = phi i64 [ 0, %[[ENTRY]] ], [ [[INC]], %[[COND]] ]
+; CHECK-NEXT:    [[LHS_ADDR:%.*]] = getelementptr inbounds nuw i32, ptr [[LHS]], i64 [[INDVAR]]
+; CHECK-NEXT:    [[RHS_ADDR:%.*]] = getelementptr inbounds nuw i32, ptr [[RHS]], i64 [[INDVAR]]
+; CHECK-NEXT:    [[LHS_VAL:%.*]] = load i32, ptr [[LHS_ADDR]], align 4
+; CHECK-NEXT:    [[RHS_VAL:%.*]] = load i32, ptr [[RHS_ADDR]], align 4
+; CHECK-NEXT:    [[EQUAL:%.*]] = icmp eq i32 [[LHS_VAL]], [[RHS_VAL]]
+; CHECK-NEXT:    br i1 [[EQUAL]], label %[[COND]], label %[[EXIT]]
+; CHECK:       [[EXIT]]:
+; CHECK-NEXT:    [[RET:%.*]] = phi i1 [ [[EQUAL]], %[[BODY]] ], [ [[EQUAL]], %[[COND]] ]
+; CHECK-NEXT:    ret i1 [[RET]]
+;
+entry:
+  br label %body
+cond:
+  %inc = add nuw nsw i64 %indvar, 1
+  %stop_loop = icmp eq i64 %inc, 50
+  br i1 %stop_loop, label %exit, label %body
+body:
+  %indvar = phi i64 [ 0, %entry ], [ %inc, %cond ]
+  %lhs_addr = getelementptr inbounds nuw i32, ptr %lhs, i64 %indvar
+  %rhs_addr = getelementptr inbounds nuw i32, ptr %rhs, i64 %indvar
+  %lhs_val = load i32, ptr %lhs_addr, align 4
+  %rhs_val = load i32, ptr %rhs_addr, align 4
+  %equal = icmp eq i32 %lhs_val, %rhs_val
+  br i1 %equal, label %cond, label %exit
+exit:
+  %ret = phi i1 [ %equal, %body ], [ %equal, %cond ]
+  ret i1 %ret
+}
+
+%has_padding_arr = type { [50 x i36] }
+define i1 @no_memcmp_idiom_array_has_padding(ptr byval(%has_padding_arr) align 8 %lhs, ptr byval(%has_padding_arr) align 8 %rhs) {
+; CHECK-LABEL: define i1 @no_memcmp_idiom_array_has_padding(
+; CHECK-SAME: ptr byval([[HAS_PADDING_ARR:%.*]]) align 8 [[LHS:%.*]], ptr byval([[HAS_PADDING_ARR]]) align 8 [[RHS:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*]]:
+; CHECK-NEXT:    br label %[[BODY:.*]]
+; CHECK:       [[COND:.*]]:
+; CHECK-NEXT:    [[INC:%.*]] = add nuw nsw i64 [[INDVAR:%.*]], 1
+; CHECK-NEXT:    [[STOP_LOOP:%.*]] = icmp eq i64 [[INC]], 50
+; CHECK-NEXT:    br i1 [[STOP_LOOP]], label %[[EXIT:.*]], label %[[BODY]]
+; CHECK:       [[BODY]]:
+; CHECK-NEXT:    [[INDVAR]] = phi i64 [ 0, %[[ENTRY]] ], [ [[INC]], %[[COND]] ]
+; CHECK-NEXT:    [[LHS_ADDR:%.*]] = getelementptr inbounds nuw i32, ptr [[LHS]], i64 [[INDVAR]]
+; CHECK-NEXT:    [[RHS_ADDR:%.*]] = getelementptr inbounds nuw i32, ptr [[RHS]], i64 [[INDVAR]]
+; CHECK-NEXT:    [[LHS_VAL:%.*]] = load i36, ptr [[LHS_ADDR]], align 4
+; CHECK-NEXT:    [[RHS_VAL:%.*]] = load i36, ptr [[RHS_ADDR]], align 4
+; CHECK-NEXT:    [[EQUAL:%.*]] = icmp eq i36 [[LHS_VAL]], [[RHS_VAL]]
+; CHECK-NEXT:    br i1 [[EQUAL]], label %[[COND]], label %[[EXIT]]
+; CHECK:       [[EXIT]]:
+; CHECK-NEXT:    [[RET:%.*]] = phi i1 [ [[EQUAL]], %[[BODY]] ], [ [[EQUAL]], %[[COND]] ]
+; CHECK-NEXT:    ret i1 [[RET]]
+;
+entry:
+  br label %body
+cond:
+  %inc = add nuw nsw i64 %indvar, 1
+  %stop_loop = icmp eq i64 %inc, 50
+  br i1 %stop_loop, label %exit, label %body
+body:
+  %indvar = phi i64 [ 0, %entry ], [ %inc, %cond ]
+  %lhs_addr = getelementptr inbounds nuw i32, ptr %lhs, i64 %indvar
+  %rhs_addr = getelementptr inbounds nuw i32, ptr %rhs, i64 %indvar
+  %lhs_val = load i36, ptr %lhs_addr, align 4
+  %rhs_val = load i36, ptr %rhs_addr, align 4
+  %equal = icmp eq i36 %lhs_val, %rhs_val
+  br i1 %equal, label %cond, label %exit
+exit:
+  %ret = phi i1 [ %equal, %body ], [ %equal, %cond ]
+  ret i1 %ret
+}
+
+%mismatched_stride_arr = type { [100 x i32] }
+define i1 @no_memcmp_idiom_mismatched_stride(ptr byval(%mismatched_stride_arr) align 8 %lhs, ptr byval(%mismatched_stride_arr) align 8 %rhs) {
+; CHECK-LABEL: define i1 @no_memcmp_idiom_mismatched_stride(
+; CHECK-SAME: ptr byval([[MISMATCHED_STRIDE_ARR:%.*]]) align 8 [[LHS:%.*]], ptr byval([[MISMATCHED_STRIDE_ARR]]) align 8 [[RHS:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*]]:
+; CHECK-NEXT:    br label %[[BODY:.*]]
+; CHECK:       [[COND:.*]]:
+; CHECK-NEXT:    [[INC:%.*]] = add nuw nsw i64 [[INDVAR:%.*]], 1
+; CHECK-NEXT:    [[STOP_LOOP:%.*]] = icmp eq i64 [[INC]], 50
+; CHECK-NEXT:    br i1 [[STOP_LOOP]], label %[[EXIT:.*]], label %[[BODY]]
+; CHECK:       [[BODY]]:
+; CHECK-NEXT:    [[INDVAR]] = phi i64 [ 0, %[[ENTRY]] ], [ [[INC]], %[[COND]] ]
+; CHECK-NEXT:    [[DOUBLEINDVAR:%.*]] = mul i64 [[INDVAR]], 2
+; CHECK-NEXT:    [[LHS_ADDR:%.*]] = getelementptr inbounds nuw i32, ptr [[LHS]], i64 [[DOUBLEINDVAR]]
+; CHECK-NEXT:    [[RHS_ADDR:%.*]] = getelementptr inbounds nuw i32, ptr [[RHS]], i64 [[INDVAR]]
+; CHECK-NEXT:    [[LHS_VAL:%.*]] = load i32, ptr [[LHS_ADDR]], align 4
+; CHECK-NEXT:    [[RHS_VAL:%.*]] = load i32, ptr [[RHS_ADDR]], align 4
+; CHECK-NEXT:    [[EQUAL:%.*]] = icmp eq i32 [[LHS_VAL]], [[RHS_VAL]]
+; CHECK-NEXT:    br i1 [[EQUAL]], label %[[COND]], label %[[EXIT]]
+; CHECK:       [[EXIT]]:
+; CHECK-NEXT:    [[TMP0:%.*]] = phi i1 [ [[EQUAL]], %[[BODY]] ], [ [[EQUAL]], %[[COND]] ]
+; CHECK-NEXT:    ret i1 [[TMP0]]
+;
+entry:
+  br label %body
+cond:
+  %inc = add nuw nsw i64 %indvar, 1
+  %stop_loop = icmp eq i64 %inc, 50
+  br i1 %stop_loop, label %exit, label %body
+body:
+  %indvar = phi i64 [ 0, %entry ], [ %inc, %cond ]
+  %doubleindvar = mul i64 %indvar, 2
+  %lhs_addr = getelementptr inbounds nuw i32, ptr %lhs, i64 %doubleindvar
+  %rhs_addr = getelementptr inbounds nuw i32, ptr %rhs, i64 %indvar
+  %lhs_val = load i32, ptr %lhs_addr, align 4
+  %rhs_val = load i32, ptr %rhs_addr, align 4
+  %equal = icmp eq i32 %lhs_val, %rhs_val
+  br i1 %equal, label %cond, label %exit
+exit:
+  %ret = phi i1 [ %equal, %body ], [ %equal, %cond ]
+  ret i1 %ret
+}
+
+define i1 @no_memcmp_idiom_volatile_loads(ptr byval(%memcmp_idiom_arr) align 8 %lhs, ptr byval(%memcmp_idiom_arr) align 8 %rhs) {
+; CHECK-LABEL: define i1 @no_memcmp_idiom_volatile_loads(
+; CHECK-SAME: ptr byval([[MEMCMP_IDIOM_ARR:%.*]]) align 8 [[LHS:%.*]], ptr byval([[MEMCMP_IDIOM_ARR]]) align 8 [[RHS:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*]]:
+; CHECK-NEXT:    br label %[[BODY:.*]]
+; CHECK:       [[COND:.*]]:
+; CHECK-NEXT:    [[INC:%.*]] = add nuw nsw i64 [[INDVAR:%.*]], 1
+; CHECK-NEXT:    [[STOP_LOOP:%.*]] = icmp eq i64 [[INC]], 50
+; CHECK-NEXT:    br i1 [[STOP_LOOP]], label %[[EXIT:.*]], label %[[BODY]]
+; CHECK:       [[BODY]]:
+; CHECK-NEXT:    [[INDVAR]] = phi i64 [ 0, %[[ENTRY]] ], [ [[INC]], %[[COND]] ]
+; CHECK-NEXT:    [[LHS_ADDR:%.*]] = getelementptr inbounds nuw i32, ptr [[LHS]], i64 [[INDVAR]]
+; CHECK-NEXT:    [[RHS_ADDR:%.*]] = getelementptr inbounds nuw i32, ptr [[RHS]], i64 [[INDVAR]]
+; CHECK-NEXT:    [[LHS_VAL:%.*]] = load volatile i32, ptr [[LHS_ADDR]], align 4
+; CHECK-NEXT:    [[RHS_VAL:%.*]] = load volatile i32, ptr [[RHS_ADDR]], align 4
+; CHECK-NEXT:    [[EQUAL:%.*]] = icmp eq i32 [[LHS_VAL]], [[RHS_VAL]]
+; CHECK-NEXT:    br i1 [[EQUAL]], label %[[COND]], label %[[EXIT]]
+; CHECK:       [[EXIT]]:
+; CHECK-NEXT:    [[TMP0:%.*]] = phi i1 [ [[EQUAL]], %[[BODY]] ], [ [[EQUAL]], %[[COND]] ]
+; CHECK-NEXT:    ret i1 [[TMP0]]
+;
+entry:
+  br label %body
+cond:
+  %inc = add nuw nsw i64 %indvar, 1
+  %stop_loop = icmp eq i64 %inc, 50
+  br i1 %stop_loop, label %exit, label %body
+body:
+  %indvar = phi i64 [ 0, %entry ], [ %inc, %cond ]
+  %lhs_addr = getelementptr inbounds nuw i32, ptr %lhs, i64 %indvar
+  %rhs_addr = getelementptr inbounds nuw i32, ptr %rhs, i64 %indvar
+  %lhs_val = load volatile i32, ptr %lhs_addr, align 4
+  %rhs_val = load volatile i32, ptr %rhs_addr, align 4
+  %equal = icmp eq i32 %lhs_val, %rhs_val
+  br i1 %equal, label %cond, label %exit
+exit:
+  %ret = phi i1 [ %equal, %body ], [ %equal, %cond ]
+  ret i1 %ret
+}
+
+%float_arr = type { [50 x float] }
+define i1 @no_memcmp_idiom_float_comparisons(ptr byval(%float_arr) align 8 %lhs, ptr byval(%float_arr) align 8 %rhs) {
+; CHECK-LABEL: define i1 @no_memcmp_idiom_float_comparisons(
+; CHECK-SAME: ptr byval([[FLOAT_ARR:%.*]]) align 8 [[LHS:%.*]], ptr byval([[FLOAT_ARR]]) align 8 [[RHS:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*]]:
+; CHECK-NEXT:    br label %[[BODY:.*]]
+; CHECK:       [[COND:.*]]:
+; CHECK-NEXT:    [[INC:%.*]] = add nuw nsw i64 [[INDVAR:%.*]], 1
+; CHECK-NEXT:    [[STOP_LOOP:%.*]] = icmp eq i64 [[INC]], 50
+; CHECK-NEXT:    br i1 [[STOP_LOOP]], label %[[EXIT:.*]], label %[[BODY]]
+; CHECK:       [[BODY]]:
+; CHECK-NEXT:    [[INDVAR]] = phi i64 [ 0, %[[ENTRY]] ], [ [[INC]], %[[COND]] ]
+; CHECK-NEXT:    [[LHS_ADDR:%.*]] = getelementptr inbounds nuw i32, ptr [[LHS]], i64 [[INDVAR]]
+; CHECK-NEXT:    [[RHS_ADDR:%.*]] = getelementptr inbounds nuw i32, ptr [[RHS]], i64 [[INDVAR]]
+; CHECK-NEXT:    [[LHS_VAL:%.*]] = load float, ptr [[LHS_ADDR]], align 4
+; CHECK-NEXT:    [[RHS_VAL:%.*]] = load float, ptr [[RHS_ADDR]], align 4
+; CHECK-NEXT:    [[EQUAL:%.*]] = fcmp oeq float [[LHS_VAL]], [[RHS_VAL]]
+; CHECK-NEXT:    br i1 [[EQUAL]], label %[[COND]], label %[[EXIT]]
+; CHECK:       [[EXIT]]:
+; CHECK-NEXT:    [[TMP0:%.*]] = phi i1 [ [[EQUAL]], %[[BODY]] ], [ [[EQUAL]], %[[COND]] ]
+; CHECK-NEXT:    ret i1 [[TMP0]]
+;
+entry:
+  br label %body
+cond:
+  %inc = add nuw nsw i64 %indvar, 1
+  %stop_loop = icmp eq i64 %inc, 50
+  br i1 %stop_loop, label %exit, label %body
+body:
+  %indvar = phi i64 [ 0, %entry ], [ %inc, %cond ]
+  %lhs_addr = getelementptr inbounds nuw i32, ptr %lhs, i64 %indvar
+  %rhs_addr = getelementptr inbounds nuw i32, ptr %rhs, i64 %indvar
+  %lhs_val = load float, ptr %lhs_addr, align 4
+  %rhs_val = load float, ptr %rhs_addr, align 4
+  %equal = fcmp oeq float %lhs_val, %rhs_val
+  br i1 %equal, label %cond, label %exit
+exit:
+  %ret = phi i1 [ %equal, %body ], [ %equal, %cond ]
+  ret i1 %ret
+}
+
+define i1 @no_memcmp_idiom_cond_block_side_effect(ptr byval(%memcmp_idiom_arr) align 8 %lhs, ptr byval(%memcmp_idiom_arr) align 8 %rhs) {
+; CHECK-LABEL: define i1 @no_memcmp_idiom_cond_block_side_effect(
+; CHECK-SAME: ptr byval([[MEMCMP_IDIOM_ARR:%.*]]) align 8 [[LHS:%.*]], ptr byval([[MEMCMP_IDIOM_ARR]]) align 8 [[RHS:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*]]:
+; CHECK-NEXT:    br label %[[BODY:.*]]
+; CHECK:       [[COND:.*]]:
+; CHECK-NEXT:    [[INC:%.*]] = add nuw nsw i64 [[INDVAR:%.*]], 1
+; CHECK-NEXT:    [[PTR:%.*]] = alloca i8, align 1
+; CHECK-NEXT:    [[STOP_LOOP:%.*]] = icmp eq i64 [[INC]], 50
+; CHECK-NEXT:    br i1 [[STOP_LOOP]], label %[[EXIT:.*]], label %[[BODY]]
+; CHECK:       [[BODY]]:
+; CHECK-NEXT:    [[INDVAR]] = phi i64 [ 0, %[[ENTRY]] ], [ [[INC]], %[[COND]] ]
+; CHECK-NEXT:    [[LHS_ADDR:%.*]] = getelementptr inbounds nuw i32, ptr [[LHS]], i64 [[INDVAR]]
+; CHECK-NEXT:    [[RHS_ADDR:%.*]] = getelementptr inbounds nuw i32, ptr [[RHS]], i64 [[INDVAR]]
+; CHECK-NEXT:    [[LHS_VAL:%.*]] = load i32, ptr [[LHS_ADDR]], align 4
+; CHECK-NEXT:    [[RHS_VAL:%.*]] = load i32, ptr [[RHS_ADDR]], align 4
+; CHECK-NEXT:    [[EQUAL:%.*]] = icmp eq i32 [[LHS_VAL]], [[RHS_VAL]]
+; CHECK-NEXT:    br i1 [[EQUAL]], label %[[COND]], label %[[EXIT]]
+; CHECK:       [[EXIT]]:
+; CHECK-NEXT:    [[MEMCMP:%.*]] = call i32 @memcmp(ptr [[LHS]], ptr [[RHS]], i64 200)
+; CHECK-NEXT:    [[RET:%.*]] = icmp eq i32 [[MEMCMP]], 0
+; CHECK-NEXT:    ret i1 [[RET]]
+;
+entry:
+  br label %body
+cond:
+  %inc = add nuw nsw i64 %indvar, 1
+  %ptr = alloca i8
+  %stop_loop = icmp eq i64 %inc, 50
+  br i1 %stop_loop, label %exit, label %body
+body:
+  %indvar = phi i64 [ 0, %entry ], [ %inc, %cond ]
+  %lhs_addr = getelementptr inbounds nuw i32, ptr %lhs, i64 %indvar
+  %rhs_addr = getelementptr inbounds nuw i32, ptr %rhs, i64 %indvar
+  %lhs_val = load i32, ptr %lhs_addr, align 4
+  %rhs_val = load i32, ptr %rhs_addr, align 4
+  %equal = icmp eq i32 %lhs_val, %rhs_val
+  br i1 %equal, label %cond, label %exit
+exit:
+  %ret = phi i1 [ %equal, %body ], [ %equal, %cond ]
+  ret i1 %ret
+}
+
+define i1 @no_memcmp_idiom_body_block_side_effect(ptr byval(%memcmp_idiom_arr) align 8 %lhs, ptr byval(%memcmp_idiom_arr) align 8 %rhs) {
+; CHECK-LABEL: define i1 @no_memcmp_idiom_body_block_side_effect(
+; CHECK-SAME: ptr byval([[MEMCMP_IDIOM_ARR:%.*]]) align 8 [[LHS:%.*]], ptr byval([[MEMCMP_IDIOM_ARR]]) align 8 [[RHS:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*]]:
+; CHECK-NEXT:    br label %[[BODY:.*]]
+; CHECK:       [[COND:.*]]:
+; CHECK-NEXT:    [[INC:%.*]] = add nuw nsw i64 [[INDVAR:%.*]], 1
+; CHECK-NEXT:    [[STOP_LOOP:%.*]] = icmp eq i64 [[INC]], 50
+; CHECK-NEXT:    br i1 [[STOP_LOOP]], label %[[EXIT:.*]], label %[[BODY]]
+; CHECK:       [[BODY]]:
+; CHECK-NEXT:    [[INDVAR]] = phi i64 [ 0, %[[ENTRY]] ], [ [[INC]], %[[COND]] ]
+; CHECK-NEXT:    [[PTR:%.*]] = alloca i8, align 1
+; CHECK-NEXT:    [[LHS_ADDR:%.*]] = getelementptr inbounds nuw i32, ptr [[LHS]], i64 [[INDVAR]]
+; CHECK-NEXT:    [[RHS_ADDR:%.*]] = getelementptr inbounds nuw i32, ptr [[RHS]], i64 [[INDVAR]]
+; CHECK-NEXT:    [[LHS_VAL:%.*]] = load i32, ptr [[LHS_ADDR]], align 4
+; CHECK-NEXT:    [[RHS_VAL:%.*]] = load i32, ptr [[RHS_ADDR]], align 4
+; CHECK-NEXT:    [[EQUAL:%.*]] = icmp eq i32 [[LHS_VAL]], [[RHS_VAL]]
+; CHECK-NEXT:    br i1 [[EQUAL]], label %[[COND]], label %[[EXIT]]
+; CHECK:       [[EXIT]]:
+; CHECK-NEXT:    [[MEMCMP:%.*]] = call i32 @memcmp(ptr [[LHS]], ptr [[RHS]], i64 200)
+; CHECK-NEXT:    [[RET:%.*]] = icmp eq i32 [[MEMCMP]], 0
+; CHECK-NEXT:    ret i1 [[RET]]
+;
+entry:
+  br label %body
+cond:
+  %inc = add nuw nsw i64 %indvar, 1
+  %stop_loop = icmp eq i64 %inc, 50
+  br i1 %stop_loop, label %exit, label %body
+body:
+  %indvar = phi i64 [ 0, %entry ], [ %inc, %cond ]
+  %ptr = alloca i8
+  %lhs_addr = getelementptr inbounds nuw i32, ptr %lhs, i64 %indvar
+  %rhs_addr = getelementptr inbounds nuw i32, ptr %rhs, i64 %indvar
+  %lhs_val = load i32, ptr %lhs_addr, align 4
+  %rhs_val = load i32, ptr %rhs_addr, align 4
+  %equal = icmp eq i32 %lhs_val, %rhs_val
+  br i1 %equal, label %cond, label %exit
+exit:
+  %ret = phi i1 [ %equal, %body ], [ %equal, %cond ]
+  ret i1 %ret
+}
+
+define i1 @no_memcmp_idiom_non_constant_loop_bound(ptr byval(%memcmp_idiom_arr) align 8 %lhs, ptr byval(%memcmp_idiom_arr) align 8 %rhs, i64 %max) {
+; CHECK-LABEL: define i1 @no_memcmp_idiom_non_constant_loop_bound(
+; CHECK-SAME: ptr byval([[MEMCMP_IDIOM_ARR:%.*]]) align 8 [[LHS:%.*]], ptr byval([[MEMCMP_IDIOM_ARR]]) align 8 [[RHS:%.*]], i64 [[MAX:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*]]:
+; CHECK-NEXT:    br label %[[BODY:.*]]
+; CHECK:       [[COND:.*]]:
+; CHECK-NEXT:    [[INC:%.*]] = add nuw nsw i64 [[INDVAR:%.*]], 1
+; CHECK-NEXT:    [[STOP_LOOP:%.*]] = icmp eq i64 [[INC]], [[MAX]]
+; CHECK-NEXT:    br i1 [[STOP_LOOP]], label %[[EXIT:.*]], label %[[BODY]]
+; CHECK:       [[BODY]]:
+; CHECK-NEXT:    [[INDVAR]] = phi i64 [ 0, %[[ENTRY]] ], [ [[INC]], %[[COND]] ]
+; CHECK-NEXT:    [[PTR:%.*]] = alloca i8, align 1
+; CHECK-NEXT:    [[LHS_ADDR:%.*]] = getelementptr inbounds nuw i32, ptr [[LHS]], i64 [[INDVAR]]
+; CHECK-NEXT:    [[RHS_ADDR:%.*]] = getelementptr inbounds nuw i32, ptr [[RHS]], i64 [[INDVAR]]
+; CHECK-NEXT:    [[LHS_VAL:%.*]] = load i32, ptr [[LHS_ADDR]], align 4
+; CHECK-NEXT:    [[RHS_VAL:%.*]] = load i32, ptr [[RHS_ADDR]], align 4
+; CHECK-NEXT:    [[EQUAL:%.*]] = icmp eq i32 [[LHS_VAL]], [[RHS_VAL]]
+; CHECK-NEXT:    br i1 [[EQUAL]], label %[[COND]], label %[[EXIT]]
+; CHECK:       [[EXIT]]:
+; CHECK-NEXT:    [[RET:%.*]] = phi i1 [ [[EQUAL]], %[[BODY]] ], [ [[EQUAL]], %[[COND]] ]
+; CHECK-NEXT:    ret i1 [[RET]]
+;
+entry:
+  br label %body
+cond:
+  %inc = add nuw nsw i64 %indvar, 1
+  %stop_loop = icmp eq i64 %inc, %max
+  br i1 %stop_loop, label %exit, label %body
+body:
+  %indvar = phi i64 [ 0, %entry ], [ %inc, %cond ]
+  %ptr = alloca i8
+  %lhs_addr = getelementptr inbounds nuw i32, ptr %lhs, i64 %indvar
+  %rhs_addr = getelementptr inbounds nuw i32, ptr %rhs, i64 %indvar
+  %lhs_val = load i32, ptr %lhs_addr, align 4
+  %rhs_val = load i32, ptr %rhs_addr, align 4
+  %equal = icmp eq i32 %lhs_val, %rhs_val
+  br i1 %equal, label %cond, label %exit
+exit:
+  %ret = phi i1 [ %equal, %body ], [ %equal, %cond ]
+  ret i1 %ret
+}



More information about the llvm-commits mailing list