[llvm] [Transforms] Recognize memcmp-like loops in LoopIdiomRecognize (PR #181562)

Sayan Sivakumaran via llvm-commits llvm-commits at lists.llvm.org
Sun Feb 15 10:50:12 PST 2026


https://github.com/sivakusayan created https://github.com/llvm/llvm-project/pull/181562

Closes #167389

>From fa3469766eebabdb520dad3c892cacf04e9d9665 Mon Sep 17 00:00:00 2001
From: Sayan Sivakumaran <sivakusayan at gmail.com>
Date: Tue, 20 Jan 2026 17:37:02 -0600
Subject: [PATCH] Prototype for recognizing memcmp idiom

---
 .../Transforms/Scalar/LoopIdiomRecognize.h    |   3 +
 .../Transforms/Scalar/LoopIdiomRecognize.cpp  | 307 +++++++++++++++++-
 llvm/test/Transforms/LoopIdiom/memcmp.ll      | 250 ++++++++++++++
 3 files changed, 548 insertions(+), 12 deletions(-)
 create mode 100644 llvm/test/Transforms/LoopIdiom/memcmp.ll

diff --git a/llvm/include/llvm/Transforms/Scalar/LoopIdiomRecognize.h b/llvm/include/llvm/Transforms/Scalar/LoopIdiomRecognize.h
index 109b4520878cb..ddd190bb68c2f 100644
--- a/llvm/include/llvm/Transforms/Scalar/LoopIdiomRecognize.h
+++ b/llvm/include/llvm/Transforms/Scalar/LoopIdiomRecognize.h
@@ -43,6 +43,9 @@ struct DisableLIRP {
 
   /// When true, HashRecognize is disabled.
   static bool HashRecognize;
+
+  /// When true, Memcmp is disabled.
+  static bool Memcmp;
 };
 
 /// Performs Loop Idiom Recognize Pass.
diff --git a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
index 074cc73d53080..e2be9f82d50fc 100644
--- a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
@@ -20,8 +20,6 @@
 //
 // TODO List:
 //
-// Future loop memory idioms to recognize: memcmp, etc.
-//
 // This could recognize common matrix multiplies and dot product idioms and
 // replace them with calls to BLAS (if linked in??).
 //
@@ -40,6 +38,7 @@
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Analysis/CmpInstAnalysis.h"
 #include "llvm/Analysis/HashRecognize.h"
+#include "llvm/Analysis/Loads.h"
 #include "llvm/Analysis/LoopAccessAnalysis.h"
 #include "llvm/Analysis/LoopInfo.h"
 #include "llvm/Analysis/LoopPass.h"
@@ -53,6 +52,7 @@
 #include "llvm/Analysis/ScalarEvolutionPatternMatch.h"
 #include "llvm/Analysis/TargetLibraryInfo.h"
 #include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/Analysis/Utils/Local.h"
 #include "llvm/Analysis/ValueTracking.h"
 #include "llvm/IR/BasicBlock.h"
 #include "llvm/IR/Constant.h"
@@ -100,6 +100,7 @@ using namespace SCEVPatternMatch;
 STATISTIC(NumMemSet, "Number of memset's formed from loop stores");
 STATISTIC(NumMemCpy, "Number of memcpy's formed from loop load+stores");
 STATISTIC(NumMemMove, "Number of memmove's formed from loop load+stores");
+STATISTIC(NumMemCmp, "Number of uncountable loops recognized as memcmp idiom");
 STATISTIC(NumStrLen, "Number of strlen's and wcslen's formed from loop loads");
 STATISTIC(
     NumShiftUntilBitTest,
@@ -131,6 +132,14 @@ static cl::opt<bool, true>
                       cl::location(DisableLIRP::Memcpy), cl::init(false),
                       cl::ReallyHidden);
 
+bool DisableLIRP::Memcmp;
+static cl::opt<bool, true>
+    DisableLIRPMemcmp("disable-loop-idiom-memcmp",
+                      cl::desc("Proceed with loop idiom recognize pass, but do "
+                               "not convert loop(s) to memcmp."),
+                      cl::location(DisableLIRP::Memcmp), cl::init(false),
+                      cl::ReallyHidden);
+
 bool DisableLIRP::Strlen;
 static cl::opt<bool, true>
     DisableLIRPStrlen("disable-loop-idiom-strlen",
@@ -182,17 +191,17 @@ class LoopIdiomRecognize {
   const TargetTransformInfo *TTI;
   const DataLayout *DL;
   OptimizationRemarkEmitter &ORE;
+  AssumptionCache *AC;
   bool ApplyCodeSizeHeuristics;
   std::unique_ptr<MemorySSAUpdater> MSSAU;
 
 public:
-  explicit LoopIdiomRecognize(AliasAnalysis *AA, DominatorTree *DT,
-                              LoopInfo *LI, ScalarEvolution *SE,
-                              TargetLibraryInfo *TLI,
-                              const TargetTransformInfo *TTI, MemorySSA *MSSA,
-                              const DataLayout *DL,
-                              OptimizationRemarkEmitter &ORE)
-      : AA(AA), DT(DT), LI(LI), SE(SE), TLI(TLI), TTI(TTI), DL(DL), ORE(ORE) {
+  explicit LoopIdiomRecognize(
+      AliasAnalysis *AA, DominatorTree *DT, LoopInfo *LI, ScalarEvolution *SE,
+      TargetLibraryInfo *TLI, const TargetTransformInfo *TTI, MemorySSA *MSSA,
+      const DataLayout *DL, OptimizationRemarkEmitter &ORE, AssumptionCache *AC)
+      : AA(AA), DT(DT), LI(LI), SE(SE), TLI(TLI), TTI(TTI), DL(DL), ORE(ORE),
+        AC(AC) {
     if (MSSA)
       MSSAU = std::make_unique<MemorySSAUpdater>(MSSA);
   }
@@ -286,6 +295,7 @@ class LoopIdiomRecognize {
   bool recognizeShiftUntilBitTest();
   bool recognizeShiftUntilZero();
   bool recognizeAndInsertStrLen();
+  bool recognizeAndInsertMemcmp();
 
   /// @}
 };
@@ -305,7 +315,7 @@ PreservedAnalyses LoopIdiomRecognizePass::run(Loop &L, LoopAnalysisManager &AM,
   OptimizationRemarkEmitter ORE(L.getHeader()->getParent());
 
   LoopIdiomRecognize LIR(&AR.AA, &AR.DT, &AR.LI, &AR.SE, &AR.TLI, &AR.TTI,
-                         AR.MSSA, DL, ORE);
+                         AR.MSSA, DL, ORE, &AR.AC);
   if (!LIR.runOnLoop(&L))
     return PreservedAnalyses::all();
 
@@ -336,7 +346,7 @@ bool LoopIdiomRecognize::runOnLoop(Loop *L) {
   // Disable loop idiom recognition if the function's name is a common idiom.
   StringRef Name = L->getHeader()->getParent()->getName();
   if (Name == "memset" || Name == "memcpy" || Name == "strlen" ||
-      Name == "wcslen")
+      Name == "wcslen" || Name == "memcmp")
     return false;
 
   // Determine if code size heuristics need to be applied.
@@ -1698,7 +1708,8 @@ bool LoopIdiomRecognize::runOnNoncountableLoop() {
 
   return recognizePopcount() || recognizeAndInsertFFS() ||
          recognizeShiftUntilBitTest() || recognizeShiftUntilZero() ||
-         recognizeShiftUntilLessThan() || recognizeAndInsertStrLen();
+         recognizeShiftUntilLessThan() || recognizeAndInsertStrLen() ||
+         recognizeAndInsertMemcmp();
 }
 
 /// Check if the given conditional branch is based on the comparison between
@@ -3589,3 +3600,275 @@ bool LoopIdiomRecognize::recognizeShiftUntilZero() {
   ++NumShiftUntilZero;
   return MadeChange;
 }
+
+namespace {
+class MemcmpVerifier {
+public:
+  explicit MemcmpVerifier(Loop *CurLoop, ScalarEvolution *SE, DominatorTree *DT,
+                          AssumptionCache *AC, TargetLibraryInfo *TLI,
+                          const DataLayout *DL)
+      : CurLoop(CurLoop), SE(SE), DT(DT), AC(AC), TLI(TLI), DL(DL) {}
+
+  bool isLoadMemcmpOperandCandidate(LoadInst *LI, const SCEVUnknown *&Base,
+                                    const APInt *&Offset, const APInt *&Step) {
+    if (!LI->isSimple())
+      return false;
+
+    Value *LoadPointer = LI->getPointerOperand();
+    if (LoadPointer->getType()->getPointerAddressSpace() != 0)
+      return false;
+
+    // Comparisons of floats can't be transformed. For example, the bits of
+    // two NaN values might be equivalent, but NaN is never equal to itself.
+    // This means `memcmp` would be a behavior change from float equality.
+    IntegerType *LoadType = dyn_cast<IntegerType>(LI->getType());
+    if (!LoadType)
+      return false;
+
+    // There should be no padding between consecutive members of the integer
+    // array, as `memcmp` could give a different answer from integer equality.
+    if (DL->getTypeAllocSizeInBits(LoadType) != DL->getTypeSizeInBits(LoadType))
+      return false;
+
+    // If the Load SCEV has a non-constant step, it is clearly not part of a
+    // `memcmp` idiom. Furthermore, the step must be equal to the size of
+    // the load type, as otherwise the for loop would skip equality checks
+    // for certain bytes.
+    const SCEV *LoadSCEV = SE->getSCEVAtScope(LoadPointer, CurLoop);
+    const SCEVUnknown *LoadBase;
+    const APInt *LoadStep;
+    const APInt *LoadOffset = 0;
+    if (!match(LoadSCEV, m_scev_AffineAddRec(m_SCEVUnknown(LoadBase),
+                                             m_scev_APInt(LoadStep))) &&
+        !match(LoadSCEV,
+               m_scev_AffineAddRec(m_scev_Add(m_scev_APInt(LoadOffset),
+                                              m_SCEVUnknown(LoadBase)),
+                                   m_scev_APInt(LoadStep)))) {
+      return false;
+    }
+    // TODO: Possibly handle negative strides.
+    if (LoadType->getBitWidth() != *LoadStep * 8 || LoadStep->isNegative())
+      return false;
+
+    // Strangely the IR seems to allow this. Guess we'll just give up in this
+    // degenerate case.
+    unsigned SizeTBits = TLI->getSizeTSize(*CurLoop->getHeader()->getModule());
+    if (LoadStep->getBitWidth() > SizeTBits)
+      return false;
+
+    // The load must be dereferenceable no matter how many times the loop
+    // executes. Otherwise, creating a memcmp for this loop is undefined
+    // behavior.
+    if (!llvm::isDereferenceableAndAlignedInLoop(LI, CurLoop, *SE, *DT, AC))
+      return false;
+
+    Base = LoadBase;
+    Offset = LoadOffset;
+    Step = LoadStep;
+    return true;
+  }
+
+  /// We are trying to detect the following memcmp-like structure:
+  ///
+  /// preheader:
+  ///   ...
+  ///   br label %body
+  ///
+  /// body:
+  ///   ... ; Both loads have equal SCEV steps and satisfy certain properties
+  ///   %lhs = load i32, ptr %lhs_ptr
+  ///   %rhs = load i32, ptr %rhs_ptr
+  ///   %equal = icmp eq i32 %lhs, %rhs
+  ///   br i1 %equal, label %cond, label %exit
+  ///
+  /// cond:
+  ///   ... ; Compute whether loop should stop using some induction variable
+  ///   br i1 %stop_loop, label %exit, label %body
+  ///
+  /// exit:
+  ///   %buffers_equal = phi i1 [ %equal, %body ], [ %equal, %cond ]
+  ///
+  /// More specifically, we expect the pairs of loads to have a SCEV expression
+  /// of the form {%buffer,+,c}, where c is a ConstantInt that is equal to the
+  /// bitwidth of the load type. Furthermore, we must be able to prove that
+  /// there are no padding bytes in the buffers being read from, and that the
+  /// loads are always dereferenceable up to the maximum number of times the
+  /// loop backedge is taken.
+  bool detectMemcmpIdiom(PHINode *&PhiToReplace, CmpInst::Predicate &Pred,
+                         Value *&Lhs, APInt &OffsetLhs, Value *&Rhs,
+                         APInt &OffsetRhs, APInt &Len) {
+    // Step 1: Make sure the loop is in a nice enough form for analysis.
+    // For simplicity, we'll only transform loops that we can compute a
+    // maximum backedge taken count for.
+    if (!CurLoop->isLoopSimplifyForm() || !CurLoop->isLCSSAForm(*DT) ||
+        !CurLoop->getUniqueExitBlock())
+      return false;
+
+    const SCEVConstant *MaxBackedgeTaken =
+        llvm::dyn_cast<SCEVConstant>(SE->getBackedgeTakenCount(
+            CurLoop, llvm::ScalarEvolution::ConstantMaximum));
+    if (!MaxBackedgeTaken)
+      return false;
+
+    BasicBlock *LoopBody = *CurLoop->block_begin();
+    for (Instruction &I : *LoopBody)
+      if (I.mayHaveSideEffects())
+        return false;
+
+    // Step 2: Look at our LCSSAPhis, and make sure the only thing being
+    // used is the equality check of the buffers. Otherwise, it's not
+    // immediately obvious that extracting a memcmp from a loop that can't
+    // be eliminated is beneficial.
+    BasicBlock *ExitBlock = CurLoop->getUniqueExitBlock();
+    if (!ExitBlock ||
+        std::distance(ExitBlock->phis().begin(), ExitBlock->phis().end()) != 1)
+      return false;
+
+    PHINode &Phi = *ExitBlock->phis().begin();
+    Value *LCSSAVal = Phi.hasConstantValue();
+    if (Phi.getNumIncomingValues() != 2 || !LCSSAVal)
+      return false;
+
+    // Step 3: Verify that the value being used in the LCSSAPhi is actually
+    // a compare operation, and that the branch operation looks correct.
+    using namespace PatternMatch;
+    CmpInst *CI = dyn_cast<CmpInst>(LCSSAVal);
+    BasicBlock *CompareBlock = CI->getParent();
+    BasicBlock *ConditionBlock = *llvm::find_if(
+        Phi.blocks(), [&](BasicBlock *Block) { return Block != CompareBlock; });
+    assert(CompareBlock && ConditionBlock);
+
+    BranchInst *CompareBlockBranch =
+        dyn_cast<BranchInst>(CompareBlock->getTerminator());
+    CmpInst::Predicate CompareBlockPred;
+    if (match(CompareBlockBranch,
+              m_Br(m_SpecificICmp(CmpInst::ICMP_EQ, m_Value(), m_Value()),
+                   m_Specific(ConditionBlock), m_Specific(ExitBlock)))) {
+      CompareBlockPred = CmpInst::ICMP_EQ;
+    } else if (match(
+                   CompareBlockBranch,
+                   m_Br(m_SpecificICmp(CmpInst::ICMP_NE, m_Value(), m_Value()),
+                        m_Specific(ExitBlock), m_Specific(ConditionBlock)))) {
+      CompareBlockPred = CmpInst::ICMP_NE;
+    } else {
+      return false;
+    }
+
+    // Step 4: Look into the condition block of the loop, and verify that
+    // it has the expected pattern of a traditional loop. Note that we rely
+    // on IndVar simplification having been run to detect this.
+    Value *IndVar = CurLoop->getInductionVariable(*SE);
+    if (!match(ConditionBlock->getTerminator(),
+               m_Br(m_SpecificICmp(CmpInst::ICMP_EQ,
+                                   m_Add(m_Specific(IndVar), m_One()),
+                                   m_ConstantInt()),
+                    m_Specific(ExitBlock), m_Specific(LoopBody)))) {
+      LLVM_DEBUG(dbgs() << DEBUG_TYPE " Bad backedge structure.\n");
+      return false;
+    }
+
+    // Step 5: Verify that the compare operation is comparing two
+    // pointers satisfying certain criteria. See
+    // `isLoadMemcmpOperandCandidate()` for more information.
+    LoadInst *LoadLHS = dyn_cast<LoadInst>(CI->getOperand(0));
+    LoadInst *LoadRHS = dyn_cast<LoadInst>(CI->getOperand(1));
+    if (!LoadLHS || !LoadRHS) {
+      return false;
+    }
+
+    const SCEVUnknown *BaseLHS;
+    const SCEVUnknown *BaseRHS;
+    const APInt *OffsetLHS;
+    const APInt *OffsetRHS;
+    const APInt *StepLHS;
+    const APInt *StepRHS;
+    if (!isLoadMemcmpOperandCandidate(LoadLHS, BaseLHS, OffsetLHS, StepLHS) ||
+        !isLoadMemcmpOperandCandidate(LoadRHS, BaseRHS, OffsetRHS, StepRHS))
+      return false;
+    if (*StepLHS != *StepRHS)
+      return false;
+
+    // Step 6: Celebrate! This transform is safe to be done, so return
+    // the information needed for the caller to emit an equivalent
+    // memcmp.
+    unsigned SizeTBits = TLI->getSizeTSize(*CurLoop->getHeader()->getModule());
+    APInt ArraySize = (MaxBackedgeTaken->getAPInt() + 1).zext(SizeTBits);
+    PhiToReplace = Φ
+    Pred = CompareBlockPred;
+    Lhs = BaseLHS->getValue();
+    Rhs = BaseRHS->getValue();
+    if (OffsetLHS) {
+      OffsetLhs = *OffsetLHS;
+    }
+    if (OffsetRHS) {
+      OffsetRhs = *OffsetRHS;
+    }
+    Len = ArraySize * StepLHS->zext(SizeTBits);
+    return true;
+  }
+
+private:
+  Loop *CurLoop;
+  ScalarEvolution *SE;
+  DominatorTree *DT;
+  AssumptionCache *AC;
+  const TargetLibraryInfo *TLI;
+  const DataLayout *DL;
+};
+} // namespace
+
+// TODO: Explain what's happening here a bit more.
+bool LoopIdiomRecognize::recognizeAndInsertMemcmp() {
+  if (!TLI->has(LibFunc_memcmp) || DisableLIRPMemcmp)
+    return false;
+
+  PHINode *PhiToReplace;
+  CmpInst::Predicate Pred;
+  Value *Lhs;
+  APInt OffsetLhs;
+  Value *Rhs;
+  APInt OffsetRhs;
+  APInt Len;
+  MemcmpVerifier Verifier(CurLoop, SE, DT, AC, TLI, DL);
+  if (!Verifier.detectMemcmpIdiom(PhiToReplace, Pred, Lhs, OffsetLhs, Rhs,
+                                  OffsetRhs, Len))
+    return false;
+
+  BasicBlock *PhiBlock = PhiToReplace->getParent();
+  IRBuilder<> Builder(PhiBlock);
+  Builder.SetInsertPoint(PhiBlock->getFirstNonPHIIt());
+
+  // We'll need to create registers holding the offsetted pointer using geps
+  if (!OffsetLhs.isZero()) {
+    Lhs = Builder.CreateGEP(
+        Type::getInt8Ty(Builder.getContext()), Lhs,
+        ConstantInt::get(Type::getInt64Ty(Builder.getContext()), OffsetLhs));
+  }
+  if (!OffsetRhs.isZero()) {
+    Rhs = Builder.CreateGEP(
+        Type::getInt8Ty(Builder.getContext()), Rhs,
+        ConstantInt::get(Type::getInt64Ty(Builder.getContext()), OffsetRhs));
+  }
+
+  Value *MemCmpCall = llvm::emitMemCmp(
+      Lhs, Rhs, llvm::ConstantInt::get(Builder.getContext(), Len), Builder, *DL,
+      TLI);
+  Value *NewCmpInst = Builder.CreateCmp(
+      Pred, MemCmpCall,
+      llvm::ConstantInt::get(
+          Builder.getContext(),
+          llvm::APInt(MemCmpCall->getType()->getPrimitiveSizeInBits(), 0)));
+  PhiToReplace->replaceAllUsesWith(NewCmpInst);
+  RecursivelyDeleteDeadPHINode(PhiToReplace);
+
+  ++NumMemCmp;
+  LLVM_DEBUG(dbgs() << "Formed memcmp idiom:" << *MemCmpCall << "\n");
+  ORE.emit([&]() {
+    return OptimizationRemark(DEBUG_TYPE, "recognizeAndInsertMemcmp",
+                              CurLoop->getStartLoc(), PhiBlock)
+           << "Transformed memcmp loop idiom";
+  });
+
+  // We'll let the loop-deletion pass handle deleting the now dead loop.
+  return true;
+}
diff --git a/llvm/test/Transforms/LoopIdiom/memcmp.ll b/llvm/test/Transforms/LoopIdiom/memcmp.ll
new file mode 100644
index 0000000000000..439367f41295b
--- /dev/null
+++ b/llvm/test/Transforms/LoopIdiom/memcmp.ll
@@ -0,0 +1,250 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
+; RUN: opt -passes=loop-idiom < %s -S | FileCheck %s
+
+%memcmp_idiom_arr = type { [50 x i32] }
+define i1 @memcmp_idiom(ptr byval(%memcmp_idiom_arr) align 8 %lhs, ptr byval(%memcmp_idiom_arr) align 8 %rhs) {
+; CHECK-LABEL: define i1 @memcmp_idiom(
+; CHECK-SAME: ptr byval([[MEMCMP_IDIOM_ARR:%.*]]) align 8 [[LHS:%.*]], ptr byval([[MEMCMP_IDIOM_ARR]]) align 8 [[RHS:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*]]:
+; CHECK-NEXT:    br label %[[BODY:.*]]
+; CHECK:       [[COND:.*]]:
+; CHECK-NEXT:    [[INC:%.*]] = add nuw nsw i64 [[INDVAR:%.*]], 1
+; CHECK-NEXT:    [[STOP_LOOP:%.*]] = icmp eq i64 [[INC]], 50
+; CHECK-NEXT:    br i1 [[STOP_LOOP]], label %[[EXIT:.*]], label %[[BODY]]
+; CHECK:       [[BODY]]:
+; CHECK-NEXT:    [[INDVAR]] = phi i64 [ 0, %[[ENTRY]] ], [ [[INC]], %[[COND]] ]
+; CHECK-NEXT:    [[LHS_ADDR:%.*]] = getelementptr inbounds nuw i32, ptr [[LHS]], i64 [[INDVAR]]
+; CHECK-NEXT:    [[RHS_ADDR:%.*]] = getelementptr inbounds nuw i32, ptr [[RHS]], i64 [[INDVAR]]
+; CHECK-NEXT:    [[LHS_VAL:%.*]] = load i32, ptr [[LHS_ADDR]], align 4
+; CHECK-NEXT:    [[RHS_VAL:%.*]] = load i32, ptr [[RHS_ADDR]], align 4
+; CHECK-NEXT:    [[EQUAL:%.*]] = icmp eq i32 [[LHS_VAL]], [[RHS_VAL]]
+; CHECK-NEXT:    br i1 [[EQUAL]], label %[[COND]], label %[[EXIT]]
+; CHECK:       [[EXIT]]:
+; CHECK-NEXT:    [[MEMCMP:%.*]] = call i32 @memcmp(ptr [[LHS]], ptr [[RHS]], i64 200)
+; CHECK-NEXT:    [[TMP0:%.*]] = icmp eq i32 [[MEMCMP]], 0
+; CHECK-NEXT:    ret i1 [[TMP0]]
+;
+entry:
+  br label %body
+cond:
+  %inc = add nuw nsw i64 %indvar, 1
+  %stop_loop = icmp eq i64 %inc, 50
+  br i1 %stop_loop, label %exit, label %body
+body:
+  %indvar = phi i64 [ 0, %entry ], [ %inc, %cond ]
+  %lhs_addr = getelementptr inbounds nuw i32, ptr %lhs, i64 %indvar
+  %rhs_addr = getelementptr inbounds nuw i32, ptr %rhs, i64 %indvar
+  %lhs_val = load i32, ptr %lhs_addr, align 4
+  %rhs_val = load i32, ptr %rhs_addr, align 4
+  %equal = icmp eq i32 %lhs_val, %rhs_val
+  br i1 %equal, label %cond, label %exit
+exit:
+  %ret = phi i1 [ %equal, %body ], [ %equal, %cond ]
+  ret i1 %ret
+}
+
+%too_short_arr = type { [49 x i32] }
+define i1 @no_memcmp_idiom_array_too_short(ptr byval(%too_short_arr) align 8 %lhs, ptr byval(%too_short_arr) align 8 %rhs) {
+; CHECK-LABEL: define i1 @no_memcmp_idiom_array_too_short(
+; CHECK-SAME: ptr byval([[TOO_SHORT_ARR:%.*]]) align 8 [[LHS:%.*]], ptr byval([[TOO_SHORT_ARR]]) align 8 [[RHS:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*]]:
+; CHECK-NEXT:    br label %[[BODY:.*]]
+; CHECK:       [[COND:.*]]:
+; CHECK-NEXT:    [[INC:%.*]] = add nuw nsw i64 [[INDVAR:%.*]], 1
+; CHECK-NEXT:    [[STOP_LOOP:%.*]] = icmp eq i64 [[INC]], 50
+; CHECK-NEXT:    br i1 [[STOP_LOOP]], label %[[EXIT:.*]], label %[[BODY]]
+; CHECK:       [[BODY]]:
+; CHECK-NEXT:    [[INDVAR]] = phi i64 [ 0, %[[ENTRY]] ], [ [[INC]], %[[COND]] ]
+; CHECK-NEXT:    [[LHS_ADDR:%.*]] = getelementptr inbounds nuw i32, ptr [[LHS]], i64 [[INDVAR]]
+; CHECK-NEXT:    [[RHS_ADDR:%.*]] = getelementptr inbounds nuw i32, ptr [[RHS]], i64 [[INDVAR]]
+; CHECK-NEXT:    [[LHS_VAL:%.*]] = load i32, ptr [[LHS_ADDR]], align 4
+; CHECK-NEXT:    [[RHS_VAL:%.*]] = load i32, ptr [[RHS_ADDR]], align 4
+; CHECK-NEXT:    [[EQUAL:%.*]] = icmp eq i32 [[LHS_VAL]], [[RHS_VAL]]
+; CHECK-NEXT:    br i1 [[EQUAL]], label %[[COND]], label %[[EXIT]]
+; CHECK:       [[EXIT]]:
+; CHECK-NEXT:    [[RET:%.*]] = phi i1 [ [[EQUAL]], %[[BODY]] ], [ [[EQUAL]], %[[COND]] ]
+; CHECK-NEXT:    ret i1 [[RET]]
+;
+entry:
+  br label %body
+cond:
+  %inc = add nuw nsw i64 %indvar, 1
+  %stop_loop = icmp eq i64 %inc, 50
+  br i1 %stop_loop, label %exit, label %body
+body:
+  %indvar = phi i64 [ 0, %entry ], [ %inc, %cond ]
+  %lhs_addr = getelementptr inbounds nuw i32, ptr %lhs, i64 %indvar
+  %rhs_addr = getelementptr inbounds nuw i32, ptr %rhs, i64 %indvar
+  %lhs_val = load i32, ptr %lhs_addr, align 4
+  %rhs_val = load i32, ptr %rhs_addr, align 4
+  %equal = icmp eq i32 %lhs_val, %rhs_val
+  br i1 %equal, label %cond, label %exit
+exit:
+  %ret = phi i1 [ %equal, %body ], [ %equal, %cond ]
+  ret i1 %ret
+}
+
+%has_padding_arr = type { [50 x i36] }
+define i1 @no_memcmp_idiom_array_has_padding(ptr byval(%has_padding_arr) align 8 %lhs, ptr byval(%has_padding_arr) align 8 %rhs) {
+; CHECK-LABEL: define i1 @no_memcmp_idiom_array_has_padding(
+; CHECK-SAME: ptr byval([[HAS_PADDING_ARR:%.*]]) align 8 [[LHS:%.*]], ptr byval([[HAS_PADDING_ARR]]) align 8 [[RHS:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*]]:
+; CHECK-NEXT:    br label %[[BODY:.*]]
+; CHECK:       [[COND:.*]]:
+; CHECK-NEXT:    [[INC:%.*]] = add nuw nsw i64 [[INDVAR:%.*]], 1
+; CHECK-NEXT:    [[STOP_LOOP:%.*]] = icmp eq i64 [[INC]], 50
+; CHECK-NEXT:    br i1 [[STOP_LOOP]], label %[[EXIT:.*]], label %[[BODY]]
+; CHECK:       [[BODY]]:
+; CHECK-NEXT:    [[INDVAR]] = phi i64 [ 0, %[[ENTRY]] ], [ [[INC]], %[[COND]] ]
+; CHECK-NEXT:    [[LHS_ADDR:%.*]] = getelementptr inbounds nuw i32, ptr [[LHS]], i64 [[INDVAR]]
+; CHECK-NEXT:    [[RHS_ADDR:%.*]] = getelementptr inbounds nuw i32, ptr [[RHS]], i64 [[INDVAR]]
+; CHECK-NEXT:    [[LHS_VAL:%.*]] = load i36, ptr [[LHS_ADDR]], align 4
+; CHECK-NEXT:    [[RHS_VAL:%.*]] = load i36, ptr [[RHS_ADDR]], align 4
+; CHECK-NEXT:    [[EQUAL:%.*]] = icmp eq i36 [[LHS_VAL]], [[RHS_VAL]]
+; CHECK-NEXT:    br i1 [[EQUAL]], label %[[COND]], label %[[EXIT]]
+; CHECK:       [[EXIT]]:
+; CHECK-NEXT:    [[RET:%.*]] = phi i1 [ [[EQUAL]], %[[BODY]] ], [ [[EQUAL]], %[[COND]] ]
+; CHECK-NEXT:    ret i1 [[RET]]
+;
+entry:
+  br label %body
+cond:
+  %inc = add nuw nsw i64 %indvar, 1
+  %stop_loop = icmp eq i64 %inc, 50
+  br i1 %stop_loop, label %exit, label %body
+body:
+  %indvar = phi i64 [ 0, %entry ], [ %inc, %cond ]
+  %lhs_addr = getelementptr inbounds nuw i32, ptr %lhs, i64 %indvar
+  %rhs_addr = getelementptr inbounds nuw i32, ptr %rhs, i64 %indvar
+  %lhs_val = load i36, ptr %lhs_addr, align 4
+  %rhs_val = load i36, ptr %rhs_addr, align 4
+  %equal = icmp eq i36 %lhs_val, %rhs_val
+  br i1 %equal, label %cond, label %exit
+exit:
+  %ret = phi i1 [ %equal, %body ], [ %equal, %cond ]
+  ret i1 %ret
+}
+
+%mismatched_stride_arr = type { [100 x i32] }
+define i1 @no_memcmp_idiom_mismatched_stride(ptr byval(%mismatched_stride_arr) align 8 %lhs, ptr byval(%mismatched_stride_arr) align 8 %rhs) {
+; CHECK-LABEL: define i1 @no_memcmp_idiom_mismatched_stride(
+; CHECK-SAME: ptr byval([[MISMATCHED_STRIDE_ARR:%.*]]) align 8 [[LHS:%.*]], ptr byval([[MISMATCHED_STRIDE_ARR]]) align 8 [[RHS:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*]]:
+; CHECK-NEXT:    br label %[[BODY:.*]]
+; CHECK:       [[COND:.*]]:
+; CHECK-NEXT:    [[INC:%.*]] = add nuw nsw i64 [[INDVAR:%.*]], 1
+; CHECK-NEXT:    [[STOP_LOOP:%.*]] = icmp eq i64 [[INC]], 50
+; CHECK-NEXT:    br i1 [[STOP_LOOP]], label %[[EXIT:.*]], label %[[BODY]]
+; CHECK:       [[BODY]]:
+; CHECK-NEXT:    [[INDVAR]] = phi i64 [ 0, %[[ENTRY]] ], [ [[INC]], %[[COND]] ]
+; CHECK-NEXT:    [[DOUBLEINDVAR:%.*]] = mul i64 [[INDVAR]], 2
+; CHECK-NEXT:    [[LHS_ADDR:%.*]] = getelementptr inbounds nuw i32, ptr [[LHS]], i64 [[DOUBLEINDVAR]]
+; CHECK-NEXT:    [[RHS_ADDR:%.*]] = getelementptr inbounds nuw i32, ptr [[RHS]], i64 [[INDVAR]]
+; CHECK-NEXT:    [[LHS_VAL:%.*]] = load i32, ptr [[LHS_ADDR]], align 4
+; CHECK-NEXT:    [[RHS_VAL:%.*]] = load i32, ptr [[RHS_ADDR]], align 4
+; CHECK-NEXT:    [[EQUAL:%.*]] = icmp eq i32 [[LHS_VAL]], [[RHS_VAL]]
+; CHECK-NEXT:    br i1 [[EQUAL]], label %[[COND]], label %[[EXIT]]
+; CHECK:       [[EXIT]]:
+; CHECK-NEXT:    [[TMP0:%.*]] = phi i1 [ [[EQUAL]], %[[BODY]] ], [ [[EQUAL]], %[[COND]] ]
+; CHECK-NEXT:    ret i1 [[TMP0]]
+;
+entry:
+  br label %body
+cond:
+  %inc = add nuw nsw i64 %indvar, 1
+  %stop_loop = icmp eq i64 %inc, 50
+  br i1 %stop_loop, label %exit, label %body
+body:
+  %indvar = phi i64 [ 0, %entry ], [ %inc, %cond ]
+  %doubleindvar = mul i64 %indvar, 2
+  %lhs_addr = getelementptr inbounds nuw i32, ptr %lhs, i64 %doubleindvar
+  %rhs_addr = getelementptr inbounds nuw i32, ptr %rhs, i64 %indvar
+  %lhs_val = load i32, ptr %lhs_addr, align 4
+  %rhs_val = load i32, ptr %rhs_addr, align 4
+  %equal = icmp eq i32 %lhs_val, %rhs_val
+  br i1 %equal, label %cond, label %exit
+exit:
+  %ret = phi i1 [ %equal, %body ], [ %equal, %cond ]
+  ret i1 %ret
+}
+
+define i1 @no_memcmp_idiom_volatile_loads(ptr byval(%memcmp_idiom_arr) align 8 %lhs, ptr byval(%memcmp_idiom_arr) align 8 %rhs) {
+; CHECK-LABEL: define i1 @no_memcmp_idiom_volatile_loads(
+; CHECK-SAME: ptr byval([[MEMCMP_IDIOM_ARR:%.*]]) align 8 [[LHS:%.*]], ptr byval([[MEMCMP_IDIOM_ARR]]) align 8 [[RHS:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*]]:
+; CHECK-NEXT:    br label %[[BODY:.*]]
+; CHECK:       [[COND:.*]]:
+; CHECK-NEXT:    [[INC:%.*]] = add nuw nsw i64 [[INDVAR:%.*]], 1
+; CHECK-NEXT:    [[STOP_LOOP:%.*]] = icmp eq i64 [[INC]], 50
+; CHECK-NEXT:    br i1 [[STOP_LOOP]], label %[[EXIT:.*]], label %[[BODY]]
+; CHECK:       [[BODY]]:
+; CHECK-NEXT:    [[INDVAR]] = phi i64 [ 0, %[[ENTRY]] ], [ [[INC]], %[[COND]] ]
+; CHECK-NEXT:    [[LHS_ADDR:%.*]] = getelementptr inbounds nuw i32, ptr [[LHS]], i64 [[INDVAR]]
+; CHECK-NEXT:    [[RHS_ADDR:%.*]] = getelementptr inbounds nuw i32, ptr [[RHS]], i64 [[INDVAR]]
+; CHECK-NEXT:    [[LHS_VAL:%.*]] = load volatile i32, ptr [[LHS_ADDR]], align 4
+; CHECK-NEXT:    [[RHS_VAL:%.*]] = load volatile i32, ptr [[RHS_ADDR]], align 4
+; CHECK-NEXT:    [[EQUAL:%.*]] = icmp eq i32 [[LHS_VAL]], [[RHS_VAL]]
+; CHECK-NEXT:    br i1 [[EQUAL]], label %[[COND]], label %[[EXIT]]
+; CHECK:       [[EXIT]]:
+; CHECK-NEXT:    [[TMP0:%.*]] = phi i1 [ [[EQUAL]], %[[BODY]] ], [ [[EQUAL]], %[[COND]] ]
+; CHECK-NEXT:    ret i1 [[TMP0]]
+;
+entry:
+  br label %body
+cond:
+  %inc = add nuw nsw i64 %indvar, 1
+  %stop_loop = icmp eq i64 %inc, 50
+  br i1 %stop_loop, label %exit, label %body
+body:
+  %indvar = phi i64 [ 0, %entry ], [ %inc, %cond ]
+  %lhs_addr = getelementptr inbounds nuw i32, ptr %lhs, i64 %indvar
+  %rhs_addr = getelementptr inbounds nuw i32, ptr %rhs, i64 %indvar
+  %lhs_val = load volatile i32, ptr %lhs_addr, align 4
+  %rhs_val = load volatile i32, ptr %rhs_addr, align 4
+  %equal = icmp eq i32 %lhs_val, %rhs_val
+  br i1 %equal, label %cond, label %exit
+exit:
+  %ret = phi i1 [ %equal, %body ], [ %equal, %cond ]
+  ret i1 %ret
+}
+
+%float_arr = type { [50 x float] }
+define i1 @no_memcmp_idiom_float_comparisons(ptr byval(%float_arr) align 8 %lhs, ptr byval(%float_arr) align 8 %rhs) {
+; CHECK-LABEL: define i1 @no_memcmp_idiom_float_comparisons(
+; CHECK-SAME: ptr byval([[FLOAT_ARR:%.*]]) align 8 [[LHS:%.*]], ptr byval([[FLOAT_ARR]]) align 8 [[RHS:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*]]:
+; CHECK-NEXT:    br label %[[BODY:.*]]
+; CHECK:       [[COND:.*]]:
+; CHECK-NEXT:    [[INC:%.*]] = add nuw nsw i64 [[INDVAR:%.*]], 1
+; CHECK-NEXT:    [[STOP_LOOP:%.*]] = icmp eq i64 [[INC]], 50
+; CHECK-NEXT:    br i1 [[STOP_LOOP]], label %[[EXIT:.*]], label %[[BODY]]
+; CHECK:       [[BODY]]:
+; CHECK-NEXT:    [[INDVAR]] = phi i64 [ 0, %[[ENTRY]] ], [ [[INC]], %[[COND]] ]
+; CHECK-NEXT:    [[LHS_ADDR:%.*]] = getelementptr inbounds nuw i32, ptr [[LHS]], i64 [[INDVAR]]
+; CHECK-NEXT:    [[RHS_ADDR:%.*]] = getelementptr inbounds nuw i32, ptr [[RHS]], i64 [[INDVAR]]
+; CHECK-NEXT:    [[LHS_VAL:%.*]] = load float, ptr [[LHS_ADDR]], align 4
+; CHECK-NEXT:    [[RHS_VAL:%.*]] = load float, ptr [[RHS_ADDR]], align 4
+; CHECK-NEXT:    [[EQUAL:%.*]] = fcmp oeq float [[LHS_VAL]], [[RHS_VAL]]
+; CHECK-NEXT:    br i1 [[EQUAL]], label %[[COND]], label %[[EXIT]]
+; CHECK:       [[EXIT]]:
+; CHECK-NEXT:    [[TMP0:%.*]] = phi i1 [ [[EQUAL]], %[[BODY]] ], [ [[EQUAL]], %[[COND]] ]
+; CHECK-NEXT:    ret i1 [[TMP0]]
+;
+entry:
+  br label %body
+cond:
+  %inc = add nuw nsw i64 %indvar, 1
+  %stop_loop = icmp eq i64 %inc, 50
+  br i1 %stop_loop, label %exit, label %body
+body:
+  %indvar = phi i64 [ 0, %entry ], [ %inc, %cond ]
+  %lhs_addr = getelementptr inbounds nuw i32, ptr %lhs, i64 %indvar
+  %rhs_addr = getelementptr inbounds nuw i32, ptr %rhs, i64 %indvar
+  %lhs_val = load float, ptr %lhs_addr, align 4
+  %rhs_val = load float, ptr %rhs_addr, align 4
+  %equal = fcmp oeq float %lhs_val, %rhs_val
+  br i1 %equal, label %cond, label %exit
+exit:
+  %ret = phi i1 [ %equal, %body ], [ %equal, %cond ]
+  ret i1 %ret
+}



More information about the llvm-commits mailing list