[llvm] 6b94870 - [AggressiveInstCombine] Inline strcmp/strncmp (#89371)

via llvm-commits llvm-commits at lists.llvm.org
Thu May 2 21:24:42 PDT 2024


Author: Franklin Zhang
Date: 2024-05-03T13:24:38+09:00
New Revision: 6b948705a05261a2ff31cd7e6ea8319d1852ddfc

URL: https://github.com/llvm/llvm-project/commit/6b948705a05261a2ff31cd7e6ea8319d1852ddfc
DIFF: https://github.com/llvm/llvm-project/commit/6b948705a05261a2ff31cd7e6ea8319d1852ddfc.diff

LOG: [AggressiveInstCombine] Inline strcmp/strncmp (#89371)

Inline calls to strcmp(s1, s2) and strncmp(s1, s2, N), where N and
exactly one of s1 and s2 are constant.

For example:

```c
int res = strcmp(s, "ab");
```

is converted to

```c
int res = (int)s[0] - (int)'a';
if (res != 0)
  goto END;
res = (int)s[1] - (int)'b';
if (res != 0)
  goto END;
res = (int)s[2] - (int)'\0';
END:
```

Ported from a similar gcc feature [Inline strcmp with small constant
strings](https://gcc.gnu.org/bugzilla/show_bug.cgi?id=78809).

Added: 
    llvm/test/Transforms/AggressiveInstCombine/strncmp-1.ll
    llvm/test/Transforms/AggressiveInstCombine/strncmp-2.ll

Modified: 
    llvm/include/llvm/Analysis/ValueTracking.h
    llvm/include/llvm/Transforms/AggressiveInstCombine/AggressiveInstCombine.h
    llvm/lib/Analysis/ValueTracking.cpp
    llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp

Removed: 
    llvm/test/Transforms/AggressiveInstCombine/strcmp.ll


################################################################################
diff  --git a/llvm/include/llvm/Analysis/ValueTracking.h b/llvm/include/llvm/Analysis/ValueTracking.h
index afd18e7e56ba0c..0584b7e29f67b9 100644
--- a/llvm/include/llvm/Analysis/ValueTracking.h
+++ b/llvm/include/llvm/Analysis/ValueTracking.h
@@ -117,6 +117,8 @@ bool isKnownToBeAPowerOfTwo(const Value *V, const DataLayout &DL,
                             const DominatorTree *DT = nullptr,
                             bool UseInstrInfo = true);
 
+bool isOnlyUsedInZeroComparison(const Instruction *CxtI);
+
 bool isOnlyUsedInZeroEqualityComparison(const Instruction *CxtI);
 
 /// Return true if the given value is known to be non-zero when defined. For

diff  --git a/llvm/include/llvm/Transforms/AggressiveInstCombine/AggressiveInstCombine.h b/llvm/include/llvm/Transforms/AggressiveInstCombine/AggressiveInstCombine.h
index 2d76546316fafb..3568417510f107 100644
--- a/llvm/include/llvm/Transforms/AggressiveInstCombine/AggressiveInstCombine.h
+++ b/llvm/include/llvm/Transforms/AggressiveInstCombine/AggressiveInstCombine.h
@@ -8,7 +8,7 @@
 /// \file
 ///
 /// AggressiveInstCombiner - Combine expression patterns to form expressions
-/// with fewer, simple instructions. This pass does not modify the CFG.
+/// with fewer, simple instructions.
 ///
 //===----------------------------------------------------------------------===//
 

diff  --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp
index fed2061aae3a0d..0dbb39d7c8ec46 100644
--- a/llvm/lib/Analysis/ValueTracking.cpp
+++ b/llvm/lib/Analysis/ValueTracking.cpp
@@ -252,6 +252,13 @@ bool llvm::haveNoCommonBitsSet(const WithCache<const Value *> &LHSCache,
                                         RHSCache.getKnownBits(SQ));
 }
 
+bool llvm::isOnlyUsedInZeroComparison(const Instruction *I) {
+  return !I->user_empty() && all_of(I->users(), [](const User *U) {
+    ICmpInst::Predicate P;
+    return match(U, m_ICmp(P, m_Value(), m_Zero()));
+  });
+}
+
 bool llvm::isOnlyUsedInZeroEqualityComparison(const Instruction *I) {
   return !I->user_empty() && all_of(I->users(), [](const User *U) {
     ICmpInst::Predicate P;

diff  --git a/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp b/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp
index e586e9eda1322f..39eca4f41ec57f 100644
--- a/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp
+++ b/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp
@@ -19,6 +19,7 @@
 #include "llvm/Analysis/AssumptionCache.h"
 #include "llvm/Analysis/BasicAliasAnalysis.h"
 #include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/Analysis/DomTreeUpdater.h"
 #include "llvm/Analysis/GlobalsModRef.h"
 #include "llvm/Analysis/TargetLibraryInfo.h"
 #include "llvm/Analysis/TargetTransformInfo.h"
@@ -28,6 +29,7 @@
 #include "llvm/IR/Function.h"
 #include "llvm/IR/IRBuilder.h"
 #include "llvm/IR/PatternMatch.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
 #include "llvm/Transforms/Utils/BuildLibCalls.h"
 #include "llvm/Transforms/Utils/Local.h"
 
@@ -47,6 +49,11 @@ static cl::opt<unsigned> MaxInstrsToScan(
     "aggressive-instcombine-max-scan-instrs", cl::init(64), cl::Hidden,
     cl::desc("Max number of instructions to scan for aggressive instcombine."));
 
+static cl::opt<unsigned> StrNCmpInlineThreshold(
+    "strncmp-inline-threshold", cl::init(3), cl::Hidden,
+    cl::desc("The maximum length of a constant string for a builtin string cmp "
+             "call eligible for inlining. The default value is 3."));
+
 /// Match a pattern for a bitwise funnel/rotate operation that partially guards
 /// against undefined behavior by branching around the funnel-shift/rotation
 /// when the shift amount is 0.
@@ -73,7 +80,7 @@ static bool foldGuardedFunnelShift(Instruction &I, const DominatorTree &DT) {
                      m_Shl(m_Value(ShVal0), m_Value(ShAmt)),
                      m_LShr(m_Value(ShVal1),
                             m_Sub(m_SpecificInt(Width), m_Deferred(ShAmt))))))) {
-        return Intrinsic::fshl;
+      return Intrinsic::fshl;
     }
 
     // fshr(ShVal0, ShVal1, ShAmt)
@@ -82,7 +89,7 @@ static bool foldGuardedFunnelShift(Instruction &I, const DominatorTree &DT) {
               m_OneUse(m_c_Or(m_Shl(m_Value(ShVal0), m_Sub(m_SpecificInt(Width),
                                                            m_Value(ShAmt))),
                               m_LShr(m_Value(ShVal1), m_Deferred(ShAmt)))))) {
-        return Intrinsic::fshr;
+      return Intrinsic::fshr;
     }
 
     return Intrinsic::not_intrinsic;
@@ -399,21 +406,11 @@ static bool tryToFPToSat(Instruction &I, TargetTransformInfo &TTI) {
 /// Try to replace a mathlib call to sqrt with the LLVM intrinsic. This avoids
 /// pessimistic codegen that has to account for setting errno and can enable
 /// vectorization.
-static bool foldSqrt(Instruction &I, TargetTransformInfo &TTI,
+static bool foldSqrt(CallInst *Call, LibFunc Func, TargetTransformInfo &TTI,
                      TargetLibraryInfo &TLI, AssumptionCache &AC,
                      DominatorTree &DT) {
-  // Match a call to sqrt mathlib function.
-  auto *Call = dyn_cast<CallInst>(&I);
-  if (!Call)
-    return false;
 
   Module *M = Call->getModule();
-  LibFunc Func;
-  if (!TLI.getLibFunc(*Call, Func) || !isLibFuncEmittable(M, &TLI, Func))
-    return false;
-
-  if (Func != LibFunc_sqrt && Func != LibFunc_sqrtf && Func != LibFunc_sqrtl)
-    return false;
 
   // If (1) this is a sqrt libcall, (2) we can assume that NAN is not created
   // (because NNAN or the operand arg must not be less than -0.0) and (2) we
@@ -426,18 +423,18 @@ static bool foldSqrt(Instruction &I, TargetTransformInfo &TTI,
   if (TTI.haveFastSqrt(Ty) &&
       (Call->hasNoNaNs() ||
        cannotBeOrderedLessThanZero(
-           Arg, 0, SimplifyQuery(M->getDataLayout(), &TLI, &DT, &AC, &I)))) {
-    IRBuilder<> Builder(&I);
+           Arg, 0, SimplifyQuery(M->getDataLayout(), &TLI, &DT, &AC, Call)))) {
+    IRBuilder<> Builder(Call);
     IRBuilderBase::FastMathFlagGuard Guard(Builder);
     Builder.setFastMathFlags(Call->getFastMathFlags());
 
     Function *Sqrt = Intrinsic::getDeclaration(M, Intrinsic::sqrt, Ty);
     Value *NewSqrt = Builder.CreateCall(Sqrt, Arg, "sqrt");
-    I.replaceAllUsesWith(NewSqrt);
+    Call->replaceAllUsesWith(NewSqrt);
 
     // Explicitly erase the old call because a call with side effects is not
     // trivially dead.
-    I.eraseFromParent();
+    Call->eraseFromParent();
     return true;
   }
 
@@ -922,13 +919,232 @@ static bool foldPatternedLoads(Instruction &I, const DataLayout &DL) {
   return true;
 }
 
+namespace {
+class StrNCmpInliner {
+public:
+  StrNCmpInliner(CallInst *CI, LibFunc Func, DomTreeUpdater *DTU,
+                 const DataLayout &DL)
+      : CI(CI), Func(Func), DTU(DTU), DL(DL) {}
+
+  bool optimizeStrNCmp();
+
+private:
+  void inlineCompare(Value *LHS, StringRef RHS, uint64_t N, bool Swapped);
+
+  CallInst *CI;
+  LibFunc Func;
+  DomTreeUpdater *DTU;
+  const DataLayout &DL;
+};
+
+} // namespace
+
+/// First we normalize calls to strncmp/strcmp to the form of
+/// compare(s1, s2, N), which means comparing first N bytes of s1 and s2
+/// (without considering '\0').
+///
+/// Examples:
+///
+/// \code
+///   strncmp(s, "a", 3) -> compare(s, "a", 2)
+///   strncmp(s, "abc", 3) -> compare(s, "abc", 3)
+///   strncmp(s, "a\0b", 3) -> compare(s, "a\0b", 2)
+///   strcmp(s, "a") -> compare(s, "a", 2)
+///
+///   char s2[] = {'a'}
+///   strncmp(s, s2, 3) -> compare(s, s2, 3)
+///
+///   char s2[] = {'a', 'b', 'c', 'd'}
+///   strncmp(s, s2, 3) -> compare(s, s2, 3)
+/// \endcode
+///
+/// We only handle cases where N and exactly one of s1 and s2 are constant.
+/// Cases that s1 and s2 are both constant are already handled by the
+/// instcombine pass.
+///
+/// We do not handle cases where N > StrNCmpInlineThreshold.
+///
+/// We also do not handles cases where N < 2, which are already
+/// handled by the instcombine pass.
+///
+bool StrNCmpInliner::optimizeStrNCmp() {
+  if (StrNCmpInlineThreshold < 2)
+    return false;
+
+  if (!isOnlyUsedInZeroComparison(CI))
+    return false;
+
+  Value *Str1P = CI->getArgOperand(0);
+  Value *Str2P = CI->getArgOperand(1);
+  // Should be handled elsewhere.
+  if (Str1P == Str2P)
+    return false;
+
+  StringRef Str1, Str2;
+  bool HasStr1 = getConstantStringInfo(Str1P, Str1, /*TrimAtNul=*/false);
+  bool HasStr2 = getConstantStringInfo(Str2P, Str2, /*TrimAtNul=*/false);
+  if (HasStr1 == HasStr2)
+    return false;
+
+  // Note that '\0' and characters after it are not trimmed.
+  StringRef Str = HasStr1 ? Str1 : Str2;
+  Value *StrP = HasStr1 ? Str2P : Str1P;
+
+  size_t Idx = Str.find('\0');
+  uint64_t N = Idx == StringRef::npos ? UINT64_MAX : Idx + 1;
+  if (Func == LibFunc_strncmp) {
+    if (auto *ConstInt = dyn_cast<ConstantInt>(CI->getArgOperand(2)))
+      N = std::min(N, ConstInt->getZExtValue());
+    else
+      return false;
+  }
+  // Now N means how many bytes we need to compare at most.
+  if (N > Str.size() || N < 2 || N > StrNCmpInlineThreshold)
+    return false;
+
+  // Cases where StrP has two or more dereferenceable bytes might be better
+  // optimized elsewhere.
+  bool CanBeNull = false, CanBeFreed = false;
+  if (StrP->getPointerDereferenceableBytes(DL, CanBeNull, CanBeFreed) > 1)
+    return false;
+  inlineCompare(StrP, Str, N, HasStr1);
+  return true;
+}
+
+/// Convert
+///
+/// \code
+///   ret = compare(s1, s2, N)
+/// \endcode
+///
+/// into
+///
+/// \code
+///   ret = (int)s1[0] - (int)s2[0]
+///   if (ret != 0)
+///     goto NE
+///   ...
+///   ret = (int)s1[N-2] - (int)s2[N-2]
+///   if (ret != 0)
+///     goto NE
+///   ret = (int)s1[N-1] - (int)s2[N-1]
+///   NE:
+/// \endcode
+///
+/// CFG before and after the transformation:
+///
+/// (before)
+/// BBCI
+///
+/// (after)
+/// BBCI -> BBSubs[0] (sub,icmp) --NE-> BBNE -> BBTail
+///                 |                    ^
+///                 E                    |
+///                 |                    |
+///        BBSubs[1] (sub,icmp) --NE-----+
+///                ...                   |
+///        BBSubs[N-1]    (sub) ---------+
+///
+void StrNCmpInliner::inlineCompare(Value *LHS, StringRef RHS, uint64_t N,
+                                   bool Swapped) {
+  auto &Ctx = CI->getContext();
+  IRBuilder<> B(Ctx);
+
+  BasicBlock *BBCI = CI->getParent();
+  BasicBlock *BBTail =
+      SplitBlock(BBCI, CI, DTU, nullptr, nullptr, BBCI->getName() + ".tail");
+
+  SmallVector<BasicBlock *> BBSubs;
+  for (uint64_t I = 0; I < N; ++I)
+    BBSubs.push_back(
+        BasicBlock::Create(Ctx, "sub_" + Twine(I), BBCI->getParent(), BBTail));
+  BasicBlock *BBNE = BasicBlock::Create(Ctx, "ne", BBCI->getParent(), BBTail);
+
+  cast<BranchInst>(BBCI->getTerminator())->setSuccessor(0, BBSubs[0]);
+
+  B.SetInsertPoint(BBNE);
+  PHINode *Phi = B.CreatePHI(CI->getType(), N);
+  B.CreateBr(BBTail);
+
+  Value *Base = LHS;
+  for (uint64_t i = 0; i < N; ++i) {
+    B.SetInsertPoint(BBSubs[i]);
+    Value *VL =
+        B.CreateZExt(B.CreateLoad(B.getInt8Ty(),
+                                  B.CreateInBoundsPtrAdd(Base, B.getInt64(i))),
+                     CI->getType());
+    Value *VR = ConstantInt::get(CI->getType(), RHS[i]);
+    Value *Sub = Swapped ? B.CreateSub(VR, VL) : B.CreateSub(VL, VR);
+    if (i < N - 1)
+      B.CreateCondBr(B.CreateICmpNE(Sub, ConstantInt::get(CI->getType(), 0)),
+                     BBNE, BBSubs[i + 1]);
+    else
+      B.CreateBr(BBNE);
+
+    Phi->addIncoming(Sub, BBSubs[i]);
+  }
+
+  CI->replaceAllUsesWith(Phi);
+  CI->eraseFromParent();
+
+  if (DTU) {
+    SmallVector<DominatorTree::UpdateType, 8> Updates;
+    Updates.push_back({DominatorTree::Insert, BBCI, BBSubs[0]});
+    for (uint64_t i = 0; i < N; ++i) {
+      if (i < N - 1)
+        Updates.push_back({DominatorTree::Insert, BBSubs[i], BBSubs[i + 1]});
+      Updates.push_back({DominatorTree::Insert, BBSubs[i], BBNE});
+    }
+    Updates.push_back({DominatorTree::Insert, BBNE, BBTail});
+    Updates.push_back({DominatorTree::Delete, BBCI, BBTail});
+    DTU->applyUpdates(Updates);
+  }
+}
+
+static bool foldLibCalls(Instruction &I, TargetTransformInfo &TTI,
+                         TargetLibraryInfo &TLI, AssumptionCache &AC,
+                         DominatorTree &DT, const DataLayout &DL,
+                         bool &MadeCFGChange) {
+
+  auto *CI = dyn_cast<CallInst>(&I);
+  if (!CI || CI->isNoBuiltin())
+    return false;
+
+  Function *CalledFunc = CI->getCalledFunction();
+  if (!CalledFunc)
+    return false;
+
+  LibFunc LF;
+  if (!TLI.getLibFunc(*CalledFunc, LF) ||
+      !isLibFuncEmittable(CI->getModule(), &TLI, LF))
+    return false;
+
+  DomTreeUpdater DTU(&DT, DomTreeUpdater::UpdateStrategy::Lazy);
+
+  switch (LF) {
+  case LibFunc_sqrt:
+  case LibFunc_sqrtf:
+  case LibFunc_sqrtl:
+    return foldSqrt(CI, LF, TTI, TLI, AC, DT);
+  case LibFunc_strcmp:
+  case LibFunc_strncmp:
+    if (StrNCmpInliner(CI, LF, &DTU, DL).optimizeStrNCmp()) {
+      MadeCFGChange = true;
+      return true;
+    }
+    break;
+  default:;
+  }
+  return false;
+}
+
 /// This is the entry point for folds that could be implemented in regular
 /// InstCombine, but they are separated because they are not expected to
 /// occur frequently and/or have more than a constant-length pattern match.
 static bool foldUnusualPatterns(Function &F, DominatorTree &DT,
                                 TargetTransformInfo &TTI,
                                 TargetLibraryInfo &TLI, AliasAnalysis &AA,
-                                AssumptionCache &AC) {
+                                AssumptionCache &AC, bool &MadeCFGChange) {
   bool MadeChange = false;
   for (BasicBlock &BB : F) {
     // Ignore unreachable basic blocks.
@@ -953,7 +1169,7 @@ static bool foldUnusualPatterns(Function &F, DominatorTree &DT,
       // NOTE: This function introduces erasing of the instruction `I`, so it
       // needs to be called at the end of this sequence, otherwise we may make
       // bugs.
-      MadeChange |= foldSqrt(I, TTI, TLI, AC, DT);
+      MadeChange |= foldLibCalls(I, TTI, TLI, AC, DT, DL, MadeCFGChange);
     }
   }
 
@@ -969,12 +1185,12 @@ static bool foldUnusualPatterns(Function &F, DominatorTree &DT,
 /// handled in the callers of this function.
 static bool runImpl(Function &F, AssumptionCache &AC, TargetTransformInfo &TTI,
                     TargetLibraryInfo &TLI, DominatorTree &DT,
-                    AliasAnalysis &AA) {
+                    AliasAnalysis &AA, bool &MadeCFGChange) {
   bool MadeChange = false;
   const DataLayout &DL = F.getParent()->getDataLayout();
   TruncInstCombine TIC(AC, TLI, DL, DT);
   MadeChange |= TIC.run(F);
-  MadeChange |= foldUnusualPatterns(F, DT, TTI, TLI, AA, AC);
+  MadeChange |= foldUnusualPatterns(F, DT, TTI, TLI, AA, AC, MadeCFGChange);
   return MadeChange;
 }
 
@@ -985,12 +1201,16 @@ PreservedAnalyses AggressiveInstCombinePass::run(Function &F,
   auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
   auto &TTI = AM.getResult<TargetIRAnalysis>(F);
   auto &AA = AM.getResult<AAManager>(F);
-  if (!runImpl(F, AC, TTI, TLI, DT, AA)) {
+  bool MadeCFGChange = false;
+  if (!runImpl(F, AC, TTI, TLI, DT, AA, MadeCFGChange)) {
     // No changes, all analyses are preserved.
     return PreservedAnalyses::all();
   }
   // Mark all the analyses that instcombine updates as preserved.
   PreservedAnalyses PA;
-  PA.preserveSet<CFGAnalyses>();
+  if (MadeCFGChange)
+    PA.preserve<DominatorTreeAnalysis>();
+  else
+    PA.preserveSet<CFGAnalyses>();
   return PA;
 }

diff  --git a/llvm/test/Transforms/AggressiveInstCombine/strcmp.ll b/llvm/test/Transforms/AggressiveInstCombine/strcmp.ll
deleted file mode 100644
index 99dd450e6f44e6..00000000000000
--- a/llvm/test/Transforms/AggressiveInstCombine/strcmp.ll
+++ /dev/null
@@ -1,219 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt < %s -passes=aggressive-instcombine -S | FileCheck %s
-
-declare i32 @strcmp(ptr, ptr)
-
- at s0 = constant [1 x i8] c"\00"
- at s1 = constant [2 x i8] c"0\00"
- at s2 = constant [3 x i8] c"01\00"
- at s3 = constant [4 x i8] c"012\00"
- at s4 = constant [5 x i8] c"0123\00"
-
-; Expand strcmp(C, "x"), strcmp(C, "xy").
-
-define i1 @expand_strcmp_s0(ptr %C) {
-; CHECK-LABEL: @expand_strcmp_s0(
-; CHECK-NEXT:    [[CALL:%.*]] = call i32 @strcmp(ptr [[C:%.*]], ptr @s0)
-; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[CALL]], 0
-; CHECK-NEXT:    ret i1 [[CMP]]
-;
-  %call = call i32 @strcmp(ptr %C, ptr @s0)
-  %cmp = icmp eq i32 %call, 0
-  ret i1 %cmp
-}
-
-define i1 @expand_strcmp_eq_s1(ptr %C) {
-; CHECK-LABEL: @expand_strcmp_eq_s1(
-; CHECK-NEXT:    [[CALL:%.*]] = call i32 @strcmp(ptr [[C:%.*]], ptr @s1)
-; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[CALL]], 0
-; CHECK-NEXT:    ret i1 [[CMP]]
-;
-  %call = call i32 @strcmp(ptr %C, ptr @s1)
-  %cmp = icmp eq i32 %call, 0
-  ret i1 %cmp
-}
-
-define i1 @expand_strcmp_eq_s1_commuted(ptr %C) {
-; CHECK-LABEL: @expand_strcmp_eq_s1_commuted(
-; CHECK-NEXT:    [[CALL:%.*]] = call i32 @strcmp(ptr @s1, ptr [[C:%.*]])
-; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[CALL]], 0
-; CHECK-NEXT:    ret i1 [[CMP]]
-;
-  %call = call i32 @strcmp(ptr @s1, ptr %C)
-  %cmp = icmp eq i32 %call, 0
-  ret i1 %cmp
-}
-
-define i1 @expand_strcmp_ne_s1(ptr %C) {
-; CHECK-LABEL: @expand_strcmp_ne_s1(
-; CHECK-NEXT:    [[CALL:%.*]] = call i32 @strcmp(ptr [[C:%.*]], ptr @s1)
-; CHECK-NEXT:    [[CMP:%.*]] = icmp ne i32 [[CALL]], 0
-; CHECK-NEXT:    ret i1 [[CMP]]
-;
-  %call = call i32 @strcmp(ptr %C, ptr @s1)
-  %cmp = icmp ne i32 %call, 0
-  ret i1 %cmp
-}
-
-define i1 @expand_strcmp_sgt_s1(ptr %C) {
-; CHECK-LABEL: @expand_strcmp_sgt_s1(
-; CHECK-NEXT:    [[CALL:%.*]] = call i32 @strcmp(ptr [[C:%.*]], ptr @s1)
-; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
-; CHECK-NEXT:    ret i1 [[CMP]]
-;
-  %call = call i32 @strcmp(ptr %C, ptr @s1)
-  %cmp = icmp sgt i32 %call, 0
-  ret i1 %cmp
-}
-
-define i1 @expand_strcmp_sge_s1(ptr %C) {
-; CHECK-LABEL: @expand_strcmp_sge_s1(
-; CHECK-NEXT:    [[CALL:%.*]] = call i32 @strcmp(ptr [[C:%.*]], ptr @s1)
-; CHECK-NEXT:    [[CMP:%.*]] = icmp sge i32 [[CALL]], 0
-; CHECK-NEXT:    ret i1 [[CMP]]
-;
-  %call = call i32 @strcmp(ptr %C, ptr @s1)
-  %cmp = icmp sge i32 %call, 0
-  ret i1 %cmp
-}
-
-define i1 @expand_strcmp_slt_s1(ptr %C) {
-; CHECK-LABEL: @expand_strcmp_slt_s1(
-; CHECK-NEXT:    [[CALL:%.*]] = call i32 @strcmp(ptr [[C:%.*]], ptr @s1)
-; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
-; CHECK-NEXT:    ret i1 [[CMP]]
-;
-  %call = call i32 @strcmp(ptr %C, ptr @s1)
-  %cmp = icmp slt i32 %call, 0
-  ret i1 %cmp
-}
-
-define i1 @expand_strcmp_sle_s1(ptr %C) {
-; CHECK-LABEL: @expand_strcmp_sle_s1(
-; CHECK-NEXT:    [[CALL:%.*]] = call i32 @strcmp(ptr [[C:%.*]], ptr @s1)
-; CHECK-NEXT:    [[CMP:%.*]] = icmp sle i32 [[CALL]], 0
-; CHECK-NEXT:    ret i1 [[CMP]]
-;
-  %call = call i32 @strcmp(ptr %C, ptr @s1)
-  %cmp = icmp sle i32 %call, 0
-  ret i1 %cmp
-}
-
-define i1 @expand_strcmp_s1_fail_1(ptr %C) {
-; CHECK-LABEL: @expand_strcmp_s1_fail_1(
-; CHECK-NEXT:    [[CALL:%.*]] = call i32 @strcmp(ptr [[C:%.*]], ptr @s1)
-; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[CALL]], 1
-; CHECK-NEXT:    ret i1 [[CMP]]
-;
-  %call = call i32 @strcmp(ptr %C, ptr @s1)
-  %cmp = icmp eq i32 %call, 1
-  ret i1 %cmp
-}
-
-define i1 @expand_strcmp_s1_fail_2(ptr %C) {
-; CHECK-LABEL: @expand_strcmp_s1_fail_2(
-; CHECK-NEXT:    [[CALL:%.*]] = call i32 @strcmp(ptr @s1, ptr @s1)
-; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[CALL]], 0
-; CHECK-NEXT:    ret i1 [[CMP]]
-;
-  %call = call i32 @strcmp(ptr @s1, ptr @s1)
-  %cmp = icmp eq i32 %call, 0
-  ret i1 %cmp
-}
-
-define i32 @expand_strcmp_s1_fail_3(ptr %C) {
-; CHECK-LABEL: @expand_strcmp_s1_fail_3(
-; CHECK-NEXT:    [[CALL:%.*]] = call i32 @strcmp(ptr [[C:%.*]], ptr @s1)
-; CHECK-NEXT:    ret i32 [[CALL]]
-;
-  %call = call i32 @strcmp(ptr %C, ptr @s1)
-  ret i32 %call
-}
-
-define i1 @expand_strcmp_eq_s2(ptr %C) {
-; CHECK-LABEL: @expand_strcmp_eq_s2(
-; CHECK-NEXT:    [[CALL:%.*]] = call i32 @strcmp(ptr [[C:%.*]], ptr @s2)
-; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[CALL]], 0
-; CHECK-NEXT:    ret i1 [[CMP]]
-;
-  %call = call i32 @strcmp(ptr %C, ptr @s2)
-  %cmp = icmp eq i32 %call, 0
-  ret i1 %cmp
-}
-
-define i1 @expand_strcmp_ne_s2(ptr %C) {
-; CHECK-LABEL: @expand_strcmp_ne_s2(
-; CHECK-NEXT:    [[CALL:%.*]] = call i32 @strcmp(ptr [[C:%.*]], ptr @s2)
-; CHECK-NEXT:    [[CMP:%.*]] = icmp ne i32 [[CALL]], 0
-; CHECK-NEXT:    ret i1 [[CMP]]
-;
-  %call = call i32 @strcmp(ptr %C, ptr @s2)
-  %cmp = icmp ne i32 %call, 0
-  ret i1 %cmp
-}
-
-define i1 @expand_strcmp_sgt_s2(ptr %C) {
-; CHECK-LABEL: @expand_strcmp_sgt_s2(
-; CHECK-NEXT:    [[CALL:%.*]] = call i32 @strcmp(ptr [[C:%.*]], ptr @s2)
-; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
-; CHECK-NEXT:    ret i1 [[CMP]]
-;
-  %call = call i32 @strcmp(ptr %C, ptr @s2)
-  %cmp = icmp sgt i32 %call, 0
-  ret i1 %cmp
-}
-
-define i1 @expand_strcmp_sge_s2(ptr %C) {
-; CHECK-LABEL: @expand_strcmp_sge_s2(
-; CHECK-NEXT:    [[CALL:%.*]] = call i32 @strcmp(ptr [[C:%.*]], ptr @s2)
-; CHECK-NEXT:    [[CMP:%.*]] = icmp sge i32 [[CALL]], 0
-; CHECK-NEXT:    ret i1 [[CMP]]
-;
-  %call = call i32 @strcmp(ptr %C, ptr @s2)
-  %cmp = icmp sge i32 %call, 0
-  ret i1 %cmp
-}
-
-define i1 @expand_strcmp_slt_s2(ptr %C) {
-; CHECK-LABEL: @expand_strcmp_slt_s2(
-; CHECK-NEXT:    [[CALL:%.*]] = call i32 @strcmp(ptr [[C:%.*]], ptr @s2)
-; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
-; CHECK-NEXT:    ret i1 [[CMP]]
-;
-  %call = call i32 @strcmp(ptr %C, ptr @s2)
-  %cmp = icmp slt i32 %call, 0
-  ret i1 %cmp
-}
-
-define i1 @expand_strcmp_sle_s2(ptr %C) {
-; CHECK-LABEL: @expand_strcmp_sle_s2(
-; CHECK-NEXT:    [[CALL:%.*]] = call i32 @strcmp(ptr [[C:%.*]], ptr @s2)
-; CHECK-NEXT:    [[CMP:%.*]] = icmp sle i32 [[CALL]], 0
-; CHECK-NEXT:    ret i1 [[CMP]]
-;
-  %call = call i32 @strcmp(ptr %C, ptr @s2)
-  %cmp = icmp sle i32 %call, 0
-  ret i1 %cmp
-}
-
-define i1 @expand_strcmp_s3(ptr %C) {
-; CHECK-LABEL: @expand_strcmp_s3(
-; CHECK-NEXT:    [[CALL:%.*]] = call i32 @strcmp(ptr [[C:%.*]], ptr @s3)
-; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[CALL]], 0
-; CHECK-NEXT:    ret i1 [[CMP]]
-;
-  %call = call i32 @strcmp(ptr %C, ptr @s3)
-  %cmp = icmp eq i32 %call, 0
-  ret i1 %cmp
-}
-
-define i1 @expand_strcmp_s4(ptr %C) {
-; CHECK-LABEL: @expand_strcmp_s4(
-; CHECK-NEXT:    [[CALL:%.*]] = call i32 @strcmp(ptr [[C:%.*]], ptr @s4)
-; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[CALL]], 0
-; CHECK-NEXT:    ret i1 [[CMP]]
-;
-  %call = call i32 @strcmp(ptr %C, ptr @s4)
-  %cmp = icmp eq i32 %call, 0
-  ret i1 %cmp
-}

diff  --git a/llvm/test/Transforms/AggressiveInstCombine/strncmp-1.ll b/llvm/test/Transforms/AggressiveInstCombine/strncmp-1.ll
new file mode 100644
index 00000000000000..f3f88663672fe2
--- /dev/null
+++ b/llvm/test/Transforms/AggressiveInstCombine/strncmp-1.ll
@@ -0,0 +1,216 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
+; RUN: opt -S -passes=aggressive-instcombine < %s | FileCheck %s
+
+; check whether we generate the right IR
+
+declare i32 @strncmp(ptr nocapture, ptr nocapture, i64)
+declare i32 @strcmp(ptr nocapture, ptr nocapture)
+
+ at s2 = constant [2 x i8] c"a\00"
+ at s3 = constant [3 x i8] c"ab\00"
+
+define i1 @test_strncmp_1(ptr %s) {
+; CHECK-LABEL: define i1 @test_strncmp_1(
+; CHECK-SAME: ptr [[S:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[SUB:%.*]]
+; CHECK:       sub_0:
+; CHECK-NEXT:    [[TMP0:%.*]] = load i8, ptr [[S]], align 1
+; CHECK-NEXT:    [[TMP1:%.*]] = zext i8 [[TMP0]] to i32
+; CHECK-NEXT:    [[TMP2:%.*]] = sub i32 97, [[TMP1]]
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0
+; CHECK-NEXT:    br i1 [[TMP3]], label [[NE:%.*]], label [[SUB1:%.*]]
+; CHECK:       sub_1:
+; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[S]], i64 1
+; CHECK-NEXT:    [[TMP5:%.*]] = load i8, ptr [[TMP4]], align 1
+; CHECK-NEXT:    [[TMP6:%.*]] = zext i8 [[TMP5]] to i32
+; CHECK-NEXT:    [[TMP7:%.*]] = sub i32 98, [[TMP6]]
+; CHECK-NEXT:    br label [[NE]]
+; CHECK:       ne:
+; CHECK-NEXT:    [[TMP8:%.*]] = phi i32 [ [[TMP2]], [[SUB]] ], [ [[TMP7]], [[SUB1]] ]
+; CHECK-NEXT:    br label [[ENTRY:%.*]]
+; CHECK:       entry.tail:
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[TMP8]], 0
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+entry:
+  %call = tail call i32 @strncmp(ptr nonnull dereferenceable(3) @s3, ptr nonnull dereferenceable(1) %s, i64 2)
+  %cmp = icmp eq i32 %call, 0
+  ret i1 %cmp
+}
+
+define i1 @test_strncmp_2(ptr %s) {
+; CHECK-LABEL: define i1 @test_strncmp_2(
+; CHECK-SAME: ptr [[S:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[SUB:%.*]]
+; CHECK:       sub_0:
+; CHECK-NEXT:    [[TMP0:%.*]] = load i8, ptr [[S]], align 1
+; CHECK-NEXT:    [[TMP1:%.*]] = zext i8 [[TMP0]] to i32
+; CHECK-NEXT:    [[TMP2:%.*]] = sub i32 97, [[TMP1]]
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0
+; CHECK-NEXT:    br i1 [[TMP3]], label [[NE:%.*]], label [[SUB1:%.*]]
+; CHECK:       sub_1:
+; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[S]], i64 1
+; CHECK-NEXT:    [[TMP5:%.*]] = load i8, ptr [[TMP4]], align 1
+; CHECK-NEXT:    [[TMP6:%.*]] = zext i8 [[TMP5]] to i32
+; CHECK-NEXT:    [[TMP7:%.*]] = sub i32 98, [[TMP6]]
+; CHECK-NEXT:    [[TMP8:%.*]] = icmp ne i32 [[TMP7]], 0
+; CHECK-NEXT:    br i1 [[TMP8]], label [[NE]], label [[SUB2:%.*]]
+; CHECK:       sub_2:
+; CHECK-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[S]], i64 2
+; CHECK-NEXT:    [[TMP10:%.*]] = load i8, ptr [[TMP9]], align 1
+; CHECK-NEXT:    [[TMP11:%.*]] = zext i8 [[TMP10]] to i32
+; CHECK-NEXT:    [[TMP12:%.*]] = sub i32 0, [[TMP11]]
+; CHECK-NEXT:    br label [[NE]]
+; CHECK:       ne:
+; CHECK-NEXT:    [[TMP13:%.*]] = phi i32 [ [[TMP2]], [[SUB]] ], [ [[TMP7]], [[SUB1]] ], [ [[TMP12]], [[SUB2]] ]
+; CHECK-NEXT:    br label [[ENTRY:%.*]]
+; CHECK:       entry.tail:
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[TMP13]], 0
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+entry:
+  %call = tail call i32 @strncmp(ptr nonnull dereferenceable(3) @s3, ptr nonnull dereferenceable(1) %s, i64 3)
+  %cmp = icmp slt i32 %call, 0
+  ret i1 %cmp
+}
+
+define i1 @test_strncmp_3(ptr %s) {
+; CHECK-LABEL: define i1 @test_strncmp_3(
+; CHECK-SAME: ptr [[S:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[SUB:%.*]]
+; CHECK:       sub_0:
+; CHECK-NEXT:    [[TMP0:%.*]] = load i8, ptr [[S]], align 1
+; CHECK-NEXT:    [[TMP1:%.*]] = zext i8 [[TMP0]] to i32
+; CHECK-NEXT:    [[TMP2:%.*]] = sub i32 97, [[TMP1]]
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0
+; CHECK-NEXT:    br i1 [[TMP3]], label [[NE:%.*]], label [[SUB1:%.*]]
+; CHECK:       sub_1:
+; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[S]], i64 1
+; CHECK-NEXT:    [[TMP5:%.*]] = load i8, ptr [[TMP4]], align 1
+; CHECK-NEXT:    [[TMP6:%.*]] = zext i8 [[TMP5]] to i32
+; CHECK-NEXT:    [[TMP7:%.*]] = sub i32 98, [[TMP6]]
+; CHECK-NEXT:    [[TMP8:%.*]] = icmp ne i32 [[TMP7]], 0
+; CHECK-NEXT:    br i1 [[TMP8]], label [[NE]], label [[SUB2:%.*]]
+; CHECK:       sub_2:
+; CHECK-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[S]], i64 2
+; CHECK-NEXT:    [[TMP10:%.*]] = load i8, ptr [[TMP9]], align 1
+; CHECK-NEXT:    [[TMP11:%.*]] = zext i8 [[TMP10]] to i32
+; CHECK-NEXT:    [[TMP12:%.*]] = sub i32 0, [[TMP11]]
+; CHECK-NEXT:    br label [[NE]]
+; CHECK:       ne:
+; CHECK-NEXT:    [[TMP13:%.*]] = phi i32 [ [[TMP2]], [[SUB]] ], [ [[TMP7]], [[SUB1]] ], [ [[TMP12]], [[SUB2]] ]
+; CHECK-NEXT:    br label [[ENTRY:%.*]]
+; CHECK:       entry.tail:
+; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt i32 [[TMP13]], 0
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+entry:
+  %call = tail call i32 @strncmp(ptr nonnull dereferenceable(3) @s3, ptr nonnull dereferenceable(1) %s, i64 4)
+  %cmp = icmp sgt i32 %call, 0
+  ret i1 %cmp
+}
+
+define i1 @test_strcmp_1(ptr %s) {
+; CHECK-LABEL: define i1 @test_strcmp_1(
+; CHECK-SAME: ptr [[S:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[SUB:%.*]]
+; CHECK:       sub_0:
+; CHECK-NEXT:    [[TMP0:%.*]] = load i8, ptr [[S]], align 1
+; CHECK-NEXT:    [[TMP1:%.*]] = zext i8 [[TMP0]] to i32
+; CHECK-NEXT:    [[TMP2:%.*]] = sub i32 [[TMP1]], 97
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0
+; CHECK-NEXT:    br i1 [[TMP3]], label [[NE:%.*]], label [[SUB1:%.*]]
+; CHECK:       sub_1:
+; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[S]], i64 1
+; CHECK-NEXT:    [[TMP5:%.*]] = load i8, ptr [[TMP4]], align 1
+; CHECK-NEXT:    [[TMP6:%.*]] = zext i8 [[TMP5]] to i32
+; CHECK-NEXT:    br label [[NE]]
+; CHECK:       ne:
+; CHECK-NEXT:    [[TMP7:%.*]] = phi i32 [ [[TMP2]], [[SUB]] ], [ [[TMP6]], [[SUB1]] ]
+; CHECK-NEXT:    br label [[ENTRY:%.*]]
+; CHECK:       entry.tail:
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ne i32 [[TMP7]], 0
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+entry:
+  %call = tail call i32 @strcmp(ptr nonnull dereferenceable(1) %s, ptr nonnull dereferenceable(2) @s2)
+  %cmp = icmp ne i32 %call, 0
+  ret i1 %cmp
+}
+
+define i1 @test_strcmp_2(ptr %s) {
+; CHECK-LABEL: define i1 @test_strcmp_2(
+; CHECK-SAME: ptr [[S:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[SUB:%.*]]
+; CHECK:       sub_0:
+; CHECK-NEXT:    [[TMP0:%.*]] = load i8, ptr [[S]], align 1
+; CHECK-NEXT:    [[TMP1:%.*]] = zext i8 [[TMP0]] to i32
+; CHECK-NEXT:    [[TMP2:%.*]] = sub i32 [[TMP1]], 97
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0
+; CHECK-NEXT:    br i1 [[TMP3]], label [[NE:%.*]], label [[SUB1:%.*]]
+; CHECK:       sub_1:
+; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[S]], i64 1
+; CHECK-NEXT:    [[TMP5:%.*]] = load i8, ptr [[TMP4]], align 1
+; CHECK-NEXT:    [[TMP6:%.*]] = zext i8 [[TMP5]] to i32
+; CHECK-NEXT:    [[TMP7:%.*]] = sub i32 [[TMP6]], 98
+; CHECK-NEXT:    [[TMP8:%.*]] = icmp ne i32 [[TMP7]], 0
+; CHECK-NEXT:    br i1 [[TMP8]], label [[NE]], label [[SUB2:%.*]]
+; CHECK:       sub_2:
+; CHECK-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[S]], i64 2
+; CHECK-NEXT:    [[TMP10:%.*]] = load i8, ptr [[TMP9]], align 1
+; CHECK-NEXT:    [[TMP11:%.*]] = zext i8 [[TMP10]] to i32
+; CHECK-NEXT:    br label [[NE]]
+; CHECK:       ne:
+; CHECK-NEXT:    [[TMP12:%.*]] = phi i32 [ [[TMP2]], [[SUB]] ], [ [[TMP7]], [[SUB1]] ], [ [[TMP11]], [[SUB2]] ]
+; CHECK-NEXT:    br label [[ENTRY:%.*]]
+; CHECK:       entry.tail:
+; CHECK-NEXT:    [[CMP:%.*]] = icmp sge i32 [[TMP12]], 0
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+entry:
+  %call = tail call i32 @strcmp(ptr nonnull dereferenceable(1) %s, ptr nonnull dereferenceable(3) @s3)
+  %cmp = icmp sge i32 %call, 0
+  ret i1 %cmp
+}
+
+define i1 @test_strcmp_3(ptr %s) {
+; CHECK-LABEL: define i1 @test_strcmp_3(
+; CHECK-SAME: ptr [[S:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[SUB:%.*]]
+; CHECK:       sub_0:
+; CHECK-NEXT:    [[TMP0:%.*]] = load i8, ptr [[S]], align 1
+; CHECK-NEXT:    [[TMP1:%.*]] = zext i8 [[TMP0]] to i32
+; CHECK-NEXT:    [[TMP2:%.*]] = sub i32 97, [[TMP1]]
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0
+; CHECK-NEXT:    br i1 [[TMP3]], label [[NE:%.*]], label [[SUB1:%.*]]
+; CHECK:       sub_1:
+; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[S]], i64 1
+; CHECK-NEXT:    [[TMP5:%.*]] = load i8, ptr [[TMP4]], align 1
+; CHECK-NEXT:    [[TMP6:%.*]] = zext i8 [[TMP5]] to i32
+; CHECK-NEXT:    [[TMP7:%.*]] = sub i32 98, [[TMP6]]
+; CHECK-NEXT:    [[TMP8:%.*]] = icmp ne i32 [[TMP7]], 0
+; CHECK-NEXT:    br i1 [[TMP8]], label [[NE]], label [[SUB2:%.*]]
+; CHECK:       sub_2:
+; CHECK-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[S]], i64 2
+; CHECK-NEXT:    [[TMP10:%.*]] = load i8, ptr [[TMP9]], align 1
+; CHECK-NEXT:    [[TMP11:%.*]] = zext i8 [[TMP10]] to i32
+; CHECK-NEXT:    [[TMP12:%.*]] = sub i32 0, [[TMP11]]
+; CHECK-NEXT:    br label [[NE]]
+; CHECK:       ne:
+; CHECK-NEXT:    [[TMP13:%.*]] = phi i32 [ [[TMP2]], [[SUB]] ], [ [[TMP7]], [[SUB1]] ], [ [[TMP12]], [[SUB2]] ]
+; CHECK-NEXT:    br label [[ENTRY:%.*]]
+; CHECK:       entry.tail:
+; CHECK-NEXT:    [[CMP:%.*]] = icmp sle i32 [[TMP13]], 0
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+entry:
+  %call = tail call i32 @strcmp(ptr nonnull dereferenceable(3) @s3, ptr nonnull dereferenceable(1) %s)
+  %cmp = icmp sle i32 %call, 0
+  ret i1 %cmp
+}

diff  --git a/llvm/test/Transforms/AggressiveInstCombine/strncmp-2.ll b/llvm/test/Transforms/AggressiveInstCombine/strncmp-2.ll
new file mode 100644
index 00000000000000..0cc5e3f135b652
--- /dev/null
+++ b/llvm/test/Transforms/AggressiveInstCombine/strncmp-2.ll
@@ -0,0 +1,147 @@
+; RUN: opt -S -passes=aggressive-instcombine -strncmp-inline-threshold=3 < %s | FileCheck --check-prefixes=CHECK,TH-3 %s
+; RUN: opt -S -passes=aggressive-instcombine -strncmp-inline-threshold=2 < %s | FileCheck --check-prefixes=CHECK,TH-2 %s
+; RUN: opt -S -passes=aggressive-instcombine -strncmp-inline-threshold=1 < %s | FileCheck --check-prefixes=CHECK,TH-1 %s
+; RUN: opt -S -passes=aggressive-instcombine -strncmp-inline-threshold=0 < %s | FileCheck --check-prefixes=CHECK,TH-0 %s
+
+declare i32 @strcmp(ptr nocapture, ptr nocapture)
+declare i32 @strncmp(ptr nocapture, ptr nocapture, i64)
+
+ at s1 = constant [1 x i8] c"\00", align 1
+ at s2n = constant [2 x i8] c"aa", align 1
+ at s3 = constant [3 x i8] c"aa\00", align 1
+ at s4 = constant [4 x i8] c"aab\00", align 1
+
+; strncmp(s, "aa", 1)
+define i1 @test_strncmp_0(ptr %s) {
+entry:
+  %call = tail call i32 @strncmp(ptr nonnull dereferenceable(1) %s, ptr nonnull dereferenceable(3) @s3, i64 1)
+  %cmp = icmp eq i32 %call, 0
+  ret i1 %cmp
+}
+; CHECK-LABEL: @test_strncmp_0(
+; CHECK: @strncmp
+
+; strncmp(s, "aa", 2)
+define i1 @test_strncmp_1(ptr %s) {
+entry:
+  %call = tail call i32 @strncmp(ptr nonnull dereferenceable(1) %s, ptr nonnull dereferenceable(3) @s3, i64 2)
+  %cmp = icmp eq i32 %call, 0
+  ret i1 %cmp
+}
+; CHECK-LABEL: @test_strncmp_1(
+; TH-3-NOT: @strncmp
+; TH-2-NOT: @strncmp
+; TH-1: @strncmp
+; TH-0: @strncmp
+
+define i1 @test_strncmp_1_dereferenceable(ptr dereferenceable(2) %s) {
+entry:
+  %call = tail call i32 @strncmp(ptr nonnull %s, ptr nonnull dereferenceable(3) @s3, i64 2)
+  %cmp = icmp eq i32 %call, 0
+  ret i1 %cmp
+}
+; CHECK-LABEL: @test_strncmp_1_dereferenceable(
+; CHECK: @strncmp
+
+define i32 @test_strncmp_1_not_comparision(ptr %s) {
+entry:
+  %call = tail call i32 @strncmp(ptr nonnull dereferenceable(1) %s, ptr nonnull dereferenceable(3) @s3, i64 2)
+  ret i32 %call
+}
+; CHECK-LABEL: @test_strncmp_1_not_comparision(
+; CHECK: @strncmp
+
+; strncmp(s, "aa", 3)
+define i1 @test_strncmp_2(ptr %s) {
+entry:
+  %call = tail call i32 @strncmp(ptr nonnull dereferenceable(1) %s, ptr nonnull dereferenceable(3) @s3, i64 3)
+  %cmp = icmp eq i32 %call, 0
+  ret i1 %cmp
+}
+; CHECK-LABEL: @test_strncmp_2(
+; TH-3-NOT: @strncmp
+; TH-2: @strncmp
+; TH-1: @strncmp
+; TH-0: @strncmp
+
+; strncmp(s, "aab", 3)
+define i1 @test_strncmp_3(ptr %s) {
+entry:
+  %call = tail call i32 @strncmp(ptr nonnull dereferenceable(1) %s, ptr nonnull dereferenceable(4) @s4, i64 3)
+  %cmp = icmp eq i32 %call, 0
+  ret i1 %cmp
+}
+; CHECK-LABEL: @test_strncmp_3(
+; TH-3-NOT: @strncmp
+
+; strncmp(s, "aab", 4)
+define i1 @test_strncmp_4(ptr %s) {
+entry:
+  %call = tail call i32 @strncmp(ptr nonnull dereferenceable(1) %s, ptr nonnull dereferenceable(4) @s4, i64 4)
+  %cmp = icmp eq i32 %call, 0
+  ret i1 %cmp
+}
+; CHECK-LABEL: @test_strncmp_4(
+; TH-3: @strncmp
+
+; strncmp(s, "aa", 2)
+define i1 @test_strncmp_5(ptr %s) {
+entry:
+  %call = tail call i32 @strncmp(ptr nonnull dereferenceable(1) %s, ptr nonnull dereferenceable(3) @s3, i64 2)
+  %cmp = icmp eq i32 %call, 0
+  ret i1 %cmp
+}
+; CHECK-LABEL: @test_strncmp_5(
+; TH-3-NOT: @strncmp
+
+; char s2[] = {'a', 'a'}
+; strncmp(s1, s2, 2)
+define i1 @test_strncmp_6(ptr %s1) {
+entry:
+  %call = tail call i32 @strncmp(ptr nonnull dereferenceable(1) %s1, ptr nonnull dereferenceable(2) @s2n, i64 2)
+  %cmp = icmp eq i32 %call, 0
+  ret i1 %cmp
+}
+; CHECK-LABEL: @test_strncmp_6(
+; TH-3-NOT: @strncmp
+
+; char s2[] = {'a', 'a'}
+; strncmp(s, s2, 3)
+define i1 @test_strncmp_7(ptr %s) {
+entry:
+  %call = tail call i32 @strncmp(ptr nonnull dereferenceable(1) %s, ptr nonnull dereferenceable(2) @s2n, i64 3)
+  %cmp = icmp eq i32 %call, 0
+  ret i1 %cmp
+}
+; CHECK-LABEL: @test_strncmp_7(
+; CHECK: @strncmp
+
+; strcmp(s, "")
+define i1 @test_strcmp_0(ptr %s) {
+entry:
+  %call = tail call i32 @strcmp(ptr nonnull dereferenceable(1) %s, ptr nonnull dereferenceable(1) @s1)
+  %cmp = icmp eq i32 %call, 0
+  ret i1 %cmp
+}
+; CHECK-LABEL: @test_strcmp_0(
+; CHECK: @strcmp
+
+; strcmp(s, "aa")
+define i1 @test_strcmp_1(ptr %s) {
+entry:
+  %call = tail call i32 @strcmp(ptr nonnull dereferenceable(1) %s, ptr nonnull dereferenceable(3) @s3)
+  %cmp = icmp eq i32 %call, 0
+  ret i1 %cmp
+}
+; CHECK-LABEL: @test_strcmp_1(
+; TH-3-NOT: @strcmp
+
+; strcmp(s, "aab")
+define i1 @test_strcmp_2(ptr %s) {
+entry:
+  %call = tail call i32 @strcmp(ptr nonnull dereferenceable(1) %s, ptr nonnull dereferenceable(4) @s4)
+  %cmp = icmp eq i32 %call, 0
+  ret i1 %cmp
+}
+; CHECK-LABEL: @test_strcmp_2(
+; TH-3: @strcmp


        


More information about the llvm-commits mailing list