[llvm] [AggressiveInstCombine] Inline strcmp/strncmp (PR #89371)
Franklin Zhang via llvm-commits
llvm-commits at lists.llvm.org
Thu May 2 20:05:59 PDT 2024
https://github.com/FLZ101 updated https://github.com/llvm/llvm-project/pull/89371
>From 9726bd4c3ba06ac0451629f2f88b33f6e795a55a Mon Sep 17 00:00:00 2001
From: zhangfenglei <zhangfenglei at huawei.com>
Date: Fri, 19 Apr 2024 19:20:18 +0800
Subject: [PATCH 1/6] [AggressiveInstCombine] Inline strcmp/strncmp
Inline calls to strcmp(s1, s2) and strncmp(s1, s2, N),
where N and exactly one of s1 and s2 are constant.
---
.../AggressiveInstCombine.cpp | 258 +++++++++++++++++-
.../AggressiveInstCombine/strcmp.ll | 219 ---------------
.../AggressiveInstCombine/strncmp-1.ll | 203 ++++++++++++++
.../AggressiveInstCombine/strncmp-2.ll | 145 ++++++++++
4 files changed, 603 insertions(+), 222 deletions(-)
delete mode 100644 llvm/test/Transforms/AggressiveInstCombine/strcmp.ll
create mode 100644 llvm/test/Transforms/AggressiveInstCombine/strncmp-1.ll
create mode 100644 llvm/test/Transforms/AggressiveInstCombine/strncmp-2.ll
diff --git a/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp b/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp
index e586e9eda1322f..eddd7382c27bbc 100644
--- a/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp
+++ b/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp
@@ -19,6 +19,7 @@
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/BasicAliasAnalysis.h"
#include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/Analysis/DomTreeUpdater.h"
#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/TargetTransformInfo.h"
@@ -28,6 +29,7 @@
#include "llvm/IR/Function.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/PatternMatch.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/BuildLibCalls.h"
#include "llvm/Transforms/Utils/Local.h"
@@ -922,6 +924,251 @@ static bool foldPatternedLoads(Instruction &I, const DataLayout &DL) {
return true;
}
+static cl::opt<unsigned> StrNCmpInlineThreshold(
+ "strncmp-inline-threshold", cl::init(3), cl::Hidden,
+ cl::desc("The maximum length of a constant string for a builtin string cmp "
+ "call eligible for inlining. The default value is 3."));
+
+namespace {
+class StrNCmpInliner {
+public:
+ StrNCmpInliner(CallInst *CI, LibFunc Func, Function::iterator &BBNext,
+ DomTreeUpdater *DTU, const DataLayout &DL)
+ : CI(CI), Func(Func), BBNext(BBNext), DTU(DTU), DL(DL) {}
+
+ bool optimizeStrNCmp();
+
+private:
+ bool inlineCompare(Value *LHS, StringRef RHS, uint64_t N, bool Switched);
+
+ CallInst *CI;
+ LibFunc Func;
+ Function::iterator &BBNext;
+ DomTreeUpdater *DTU;
+ const DataLayout &DL;
+};
+
+} // namespace
+
+/// First we normalize calls to strncmp/strcmp to the form of
+/// compare(s1, s2, N), which means comparing first N bytes of s1 and s2
+/// (without considering '\0')
+///
+/// Examples:
+///
+/// \code
+/// strncmp(s, "a", 3) -> compare(s, "a", 2)
+/// strncmp(s, "abc", 3) -> compare(s, "abc", 3)
+/// strncmp(s, "a\0b", 3) -> compare(s, "a\0b", 2)
+/// strcmp(s, "a") -> compare(s, "a", 2)
+///
+/// char s2[] = {'a'}
+/// strncmp(s, s2, 3) -> compare(s, s2, 3)
+///
+/// char s2[] = {'a', 'b', 'c', 'd'}
+/// strncmp(s, s2, 3) -> compare(s, s2, 3)
+/// \endcode
+///
+/// We only handle cases that N and exactly one of s1 and s2 are constant. Cases
+/// that s1 and s2 are both constant are already handled by the instcombine
+/// pass.
+///
+/// We do not handle cases that N > StrNCmpInlineThreshold.
+///
+/// We also do not handles cases that N < 2, which are already
+/// handled by the instcombine pass.
+///
+bool StrNCmpInliner::optimizeStrNCmp() {
+ if (StrNCmpInlineThreshold < 2)
+ return false;
+
+ Value *Str1P = CI->getArgOperand(0);
+ Value *Str2P = CI->getArgOperand(1);
+ // should be handled elsewhere
+ if (Str1P == Str2P)
+ return false;
+
+ StringRef Str1, Str2;
+ bool HasStr1 = getConstantStringInfo(Str1P, Str1, false);
+ bool HasStr2 = getConstantStringInfo(Str2P, Str2, false);
+ if (!(HasStr1 ^ HasStr2))
+ return false;
+
+ // note that '\0' and characters after it are not trimmed
+ StringRef Str = HasStr1 ? Str1 : Str2;
+
+ size_t Idx = Str.find('\0');
+ uint64_t N = Idx == StringRef::npos ? UINT64_MAX : Idx + 1;
+ if (Func == LibFunc_strncmp) {
+ if (!isa<ConstantInt>(CI->getArgOperand(2)))
+ return false;
+ N = std::min(N, cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue());
+ }
+ // now N means how many bytes we need to compare at most
+ if (N > Str.size() || N < 2 || N > StrNCmpInlineThreshold)
+ return false;
+
+ Value *StrP = HasStr1 ? Str2P : Str1P;
+
+ // cases that StrP has two or more dereferenceable bytes might be better
+ // optimized elsewhere
+ bool CanBeNull = false, CanBeFreed = false;
+ if (StrP->getPointerDereferenceableBytes(DL, CanBeNull, CanBeFreed) > 1)
+ return false;
+
+ return inlineCompare(StrP, Str, N, HasStr1);
+}
+
+/// Convert
+///
+/// \code
+/// ret = compare(s1, s2, N)
+/// \endcode
+///
+/// into
+///
+/// \code
+/// ret = (int)s1[0] - (int)s2[0]
+/// if (ret != 0)
+/// goto NE
+/// ...
+/// ret = (int)s1[N-2] - (int)s2[N-2]
+/// if (ret != 0)
+/// goto NE
+/// ret = (int)s1[N-1] - (int)s2[N-1]
+/// NE:
+/// \endcode
+///
+/// CFG before and after the transformation:
+///
+/// (before)
+/// BBCI
+///
+/// (after)
+/// BBBefore -> BBSubs[0] (sub,icmp) --NE-> BBNE -> BBCI
+/// | ^
+/// E |
+/// | |
+/// BBSubs[1] (sub,icmp) --NE-----+
+/// ... |
+/// BBSubs[N-1] (sub) ---------+
+///
+bool StrNCmpInliner::inlineCompare(Value *LHS, StringRef RHS, uint64_t N,
+ bool Switched) {
+ IRBuilder<> B(CI->getContext());
+
+ BasicBlock *BBCI = CI->getParent();
+ bool IsEntry = BBCI->isEntryBlock();
+ BasicBlock *BBBefore = splitBlockBefore(BBCI, CI, DTU, nullptr, nullptr,
+ BBCI->getName() + ".before");
+
+ SmallVector<BasicBlock *> BBSubs;
+ for (uint64_t i = 0; i < N + 1; ++i)
+ BBSubs.push_back(
+ BasicBlock::Create(CI->getContext(), "sub", BBCI->getParent(), BBCI));
+ BasicBlock *BBNE = BBSubs[N];
+
+ cast<BranchInst>(BBBefore->getTerminator())->setSuccessor(0, BBSubs[0]);
+
+ B.SetInsertPoint(BBNE);
+ PHINode *Phi = B.CreatePHI(CI->getType(), N);
+ B.CreateBr(BBCI);
+
+ Value *Base = LHS;
+ for (uint64_t i = 0; i < N; ++i) {
+ B.SetInsertPoint(BBSubs[i]);
+ Value *VL = B.CreateZExt(
+ B.CreateLoad(B.getInt8Ty(),
+ B.CreateInBoundsGEP(B.getInt8Ty(), Base, B.getInt64(i))),
+ CI->getType());
+ Value *VR = ConstantInt::get(CI->getType(), RHS[i]);
+ Value *Sub = Switched ? B.CreateSub(VR, VL) : B.CreateSub(VL, VR);
+ if (i < N - 1)
+ B.CreateCondBr(B.CreateICmpNE(Sub, ConstantInt::get(CI->getType(), 0)),
+ BBNE, BBSubs[i + 1]);
+ else
+ B.CreateBr(BBNE);
+
+ Phi->addIncoming(Sub, BBSubs[i]);
+ }
+
+ CI->replaceAllUsesWith(Phi);
+ CI->eraseFromParent();
+
+ BBNext = BBCI->getIterator();
+
+ // Update DomTree
+ if (DTU) {
+ if (IsEntry) {
+ DTU->recalculate(*BBCI->getParent());
+ } else {
+ SmallVector<DominatorTree::UpdateType, 8> Updates;
+ Updates.push_back({DominatorTree::Delete, BBBefore, BBCI});
+ Updates.push_back({DominatorTree::Insert, BBBefore, BBSubs[0]});
+ for (uint64_t i = 0; i < N; ++i) {
+ if (i < N - 1)
+ Updates.push_back({DominatorTree::Insert, BBSubs[i], BBSubs[i + 1]});
+ Updates.push_back({DominatorTree::Insert, BBSubs[i], BBNE});
+ }
+ Updates.push_back({DominatorTree::Insert, BBNE, BBCI});
+ DTU->applyUpdates(Updates);
+ }
+ }
+ return true;
+}
+
+static bool inlineLibCalls(Function &F, TargetLibraryInfo &TLI,
+ const TargetTransformInfo &TTI, DominatorTree &DT,
+ bool &MadeCFGChange) {
+ MadeCFGChange = false;
+ DomTreeUpdater DTU(&DT, DomTreeUpdater::UpdateStrategy::Lazy);
+
+ bool MadeChange = false;
+
+ Function::iterator CurrBB;
+ for (Function::iterator BB = F.begin(), BE = F.end(); BB != BE;) {
+ CurrBB = BB++;
+
+ for (BasicBlock::iterator II = CurrBB->begin(), IE = CurrBB->end();
+ II != IE; ++II) {
+ CallInst *Call = dyn_cast<CallInst>(&*II);
+ Function *CalledFunc;
+
+ if (!Call || !(CalledFunc = Call->getCalledFunction()))
+ continue;
+
+ if (Call->isNoBuiltin())
+ continue;
+
+ // Skip if function either has local linkage or is not a known library
+ // function.
+ LibFunc LF;
+ if (CalledFunc->hasLocalLinkage() || !TLI.getLibFunc(*CalledFunc, LF) ||
+ !TLI.has(LF))
+ continue;
+
+ switch (LF) {
+ case LibFunc_strcmp:
+ case LibFunc_strncmp: {
+ auto &DL = F.getParent()->getDataLayout();
+ if (StrNCmpInliner(Call, LF, BB, &DTU, DL).optimizeStrNCmp()) {
+ MadeCFGChange = true;
+ break;
+ }
+ continue;
+ }
+ default:
+ continue;
+ }
+
+ MadeChange = true;
+ break;
+ }
+ }
+
+ return MadeChange;
+}
+
/// This is the entry point for folds that could be implemented in regular
/// InstCombine, but they are separated because they are not expected to
/// occur frequently and/or have more than a constant-length pattern match.
@@ -969,11 +1216,12 @@ static bool foldUnusualPatterns(Function &F, DominatorTree &DT,
/// handled in the callers of this function.
static bool runImpl(Function &F, AssumptionCache &AC, TargetTransformInfo &TTI,
TargetLibraryInfo &TLI, DominatorTree &DT,
- AliasAnalysis &AA) {
+ AliasAnalysis &AA, bool &MadeCFGChange) {
bool MadeChange = false;
const DataLayout &DL = F.getParent()->getDataLayout();
TruncInstCombine TIC(AC, TLI, DL, DT);
MadeChange |= TIC.run(F);
+ MadeChange |= inlineLibCalls(F, TLI, TTI, DT, MadeCFGChange);
MadeChange |= foldUnusualPatterns(F, DT, TTI, TLI, AA, AC);
return MadeChange;
}
@@ -985,12 +1233,16 @@ PreservedAnalyses AggressiveInstCombinePass::run(Function &F,
auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
auto &TTI = AM.getResult<TargetIRAnalysis>(F);
auto &AA = AM.getResult<AAManager>(F);
- if (!runImpl(F, AC, TTI, TLI, DT, AA)) {
+ bool MadeCFGChange = false;
+ if (!runImpl(F, AC, TTI, TLI, DT, AA, MadeCFGChange)) {
// No changes, all analyses are preserved.
return PreservedAnalyses::all();
}
// Mark all the analyses that instcombine updates as preserved.
PreservedAnalyses PA;
- PA.preserveSet<CFGAnalyses>();
+ if (MadeCFGChange)
+ PA.preserve<DominatorTreeAnalysis>();
+ else
+ PA.preserveSet<CFGAnalyses>();
return PA;
}
diff --git a/llvm/test/Transforms/AggressiveInstCombine/strcmp.ll b/llvm/test/Transforms/AggressiveInstCombine/strcmp.ll
deleted file mode 100644
index 99dd450e6f44e6..00000000000000
--- a/llvm/test/Transforms/AggressiveInstCombine/strcmp.ll
+++ /dev/null
@@ -1,219 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt < %s -passes=aggressive-instcombine -S | FileCheck %s
-
-declare i32 @strcmp(ptr, ptr)
-
- at s0 = constant [1 x i8] c"\00"
- at s1 = constant [2 x i8] c"0\00"
- at s2 = constant [3 x i8] c"01\00"
- at s3 = constant [4 x i8] c"012\00"
- at s4 = constant [5 x i8] c"0123\00"
-
-; Expand strcmp(C, "x"), strcmp(C, "xy").
-
-define i1 @expand_strcmp_s0(ptr %C) {
-; CHECK-LABEL: @expand_strcmp_s0(
-; CHECK-NEXT: [[CALL:%.*]] = call i32 @strcmp(ptr [[C:%.*]], ptr @s0)
-; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[CALL]], 0
-; CHECK-NEXT: ret i1 [[CMP]]
-;
- %call = call i32 @strcmp(ptr %C, ptr @s0)
- %cmp = icmp eq i32 %call, 0
- ret i1 %cmp
-}
-
-define i1 @expand_strcmp_eq_s1(ptr %C) {
-; CHECK-LABEL: @expand_strcmp_eq_s1(
-; CHECK-NEXT: [[CALL:%.*]] = call i32 @strcmp(ptr [[C:%.*]], ptr @s1)
-; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[CALL]], 0
-; CHECK-NEXT: ret i1 [[CMP]]
-;
- %call = call i32 @strcmp(ptr %C, ptr @s1)
- %cmp = icmp eq i32 %call, 0
- ret i1 %cmp
-}
-
-define i1 @expand_strcmp_eq_s1_commuted(ptr %C) {
-; CHECK-LABEL: @expand_strcmp_eq_s1_commuted(
-; CHECK-NEXT: [[CALL:%.*]] = call i32 @strcmp(ptr @s1, ptr [[C:%.*]])
-; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[CALL]], 0
-; CHECK-NEXT: ret i1 [[CMP]]
-;
- %call = call i32 @strcmp(ptr @s1, ptr %C)
- %cmp = icmp eq i32 %call, 0
- ret i1 %cmp
-}
-
-define i1 @expand_strcmp_ne_s1(ptr %C) {
-; CHECK-LABEL: @expand_strcmp_ne_s1(
-; CHECK-NEXT: [[CALL:%.*]] = call i32 @strcmp(ptr [[C:%.*]], ptr @s1)
-; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0
-; CHECK-NEXT: ret i1 [[CMP]]
-;
- %call = call i32 @strcmp(ptr %C, ptr @s1)
- %cmp = icmp ne i32 %call, 0
- ret i1 %cmp
-}
-
-define i1 @expand_strcmp_sgt_s1(ptr %C) {
-; CHECK-LABEL: @expand_strcmp_sgt_s1(
-; CHECK-NEXT: [[CALL:%.*]] = call i32 @strcmp(ptr [[C:%.*]], ptr @s1)
-; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
-; CHECK-NEXT: ret i1 [[CMP]]
-;
- %call = call i32 @strcmp(ptr %C, ptr @s1)
- %cmp = icmp sgt i32 %call, 0
- ret i1 %cmp
-}
-
-define i1 @expand_strcmp_sge_s1(ptr %C) {
-; CHECK-LABEL: @expand_strcmp_sge_s1(
-; CHECK-NEXT: [[CALL:%.*]] = call i32 @strcmp(ptr [[C:%.*]], ptr @s1)
-; CHECK-NEXT: [[CMP:%.*]] = icmp sge i32 [[CALL]], 0
-; CHECK-NEXT: ret i1 [[CMP]]
-;
- %call = call i32 @strcmp(ptr %C, ptr @s1)
- %cmp = icmp sge i32 %call, 0
- ret i1 %cmp
-}
-
-define i1 @expand_strcmp_slt_s1(ptr %C) {
-; CHECK-LABEL: @expand_strcmp_slt_s1(
-; CHECK-NEXT: [[CALL:%.*]] = call i32 @strcmp(ptr [[C:%.*]], ptr @s1)
-; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
-; CHECK-NEXT: ret i1 [[CMP]]
-;
- %call = call i32 @strcmp(ptr %C, ptr @s1)
- %cmp = icmp slt i32 %call, 0
- ret i1 %cmp
-}
-
-define i1 @expand_strcmp_sle_s1(ptr %C) {
-; CHECK-LABEL: @expand_strcmp_sle_s1(
-; CHECK-NEXT: [[CALL:%.*]] = call i32 @strcmp(ptr [[C:%.*]], ptr @s1)
-; CHECK-NEXT: [[CMP:%.*]] = icmp sle i32 [[CALL]], 0
-; CHECK-NEXT: ret i1 [[CMP]]
-;
- %call = call i32 @strcmp(ptr %C, ptr @s1)
- %cmp = icmp sle i32 %call, 0
- ret i1 %cmp
-}
-
-define i1 @expand_strcmp_s1_fail_1(ptr %C) {
-; CHECK-LABEL: @expand_strcmp_s1_fail_1(
-; CHECK-NEXT: [[CALL:%.*]] = call i32 @strcmp(ptr [[C:%.*]], ptr @s1)
-; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[CALL]], 1
-; CHECK-NEXT: ret i1 [[CMP]]
-;
- %call = call i32 @strcmp(ptr %C, ptr @s1)
- %cmp = icmp eq i32 %call, 1
- ret i1 %cmp
-}
-
-define i1 @expand_strcmp_s1_fail_2(ptr %C) {
-; CHECK-LABEL: @expand_strcmp_s1_fail_2(
-; CHECK-NEXT: [[CALL:%.*]] = call i32 @strcmp(ptr @s1, ptr @s1)
-; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[CALL]], 0
-; CHECK-NEXT: ret i1 [[CMP]]
-;
- %call = call i32 @strcmp(ptr @s1, ptr @s1)
- %cmp = icmp eq i32 %call, 0
- ret i1 %cmp
-}
-
-define i32 @expand_strcmp_s1_fail_3(ptr %C) {
-; CHECK-LABEL: @expand_strcmp_s1_fail_3(
-; CHECK-NEXT: [[CALL:%.*]] = call i32 @strcmp(ptr [[C:%.*]], ptr @s1)
-; CHECK-NEXT: ret i32 [[CALL]]
-;
- %call = call i32 @strcmp(ptr %C, ptr @s1)
- ret i32 %call
-}
-
-define i1 @expand_strcmp_eq_s2(ptr %C) {
-; CHECK-LABEL: @expand_strcmp_eq_s2(
-; CHECK-NEXT: [[CALL:%.*]] = call i32 @strcmp(ptr [[C:%.*]], ptr @s2)
-; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[CALL]], 0
-; CHECK-NEXT: ret i1 [[CMP]]
-;
- %call = call i32 @strcmp(ptr %C, ptr @s2)
- %cmp = icmp eq i32 %call, 0
- ret i1 %cmp
-}
-
-define i1 @expand_strcmp_ne_s2(ptr %C) {
-; CHECK-LABEL: @expand_strcmp_ne_s2(
-; CHECK-NEXT: [[CALL:%.*]] = call i32 @strcmp(ptr [[C:%.*]], ptr @s2)
-; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0
-; CHECK-NEXT: ret i1 [[CMP]]
-;
- %call = call i32 @strcmp(ptr %C, ptr @s2)
- %cmp = icmp ne i32 %call, 0
- ret i1 %cmp
-}
-
-define i1 @expand_strcmp_sgt_s2(ptr %C) {
-; CHECK-LABEL: @expand_strcmp_sgt_s2(
-; CHECK-NEXT: [[CALL:%.*]] = call i32 @strcmp(ptr [[C:%.*]], ptr @s2)
-; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0
-; CHECK-NEXT: ret i1 [[CMP]]
-;
- %call = call i32 @strcmp(ptr %C, ptr @s2)
- %cmp = icmp sgt i32 %call, 0
- ret i1 %cmp
-}
-
-define i1 @expand_strcmp_sge_s2(ptr %C) {
-; CHECK-LABEL: @expand_strcmp_sge_s2(
-; CHECK-NEXT: [[CALL:%.*]] = call i32 @strcmp(ptr [[C:%.*]], ptr @s2)
-; CHECK-NEXT: [[CMP:%.*]] = icmp sge i32 [[CALL]], 0
-; CHECK-NEXT: ret i1 [[CMP]]
-;
- %call = call i32 @strcmp(ptr %C, ptr @s2)
- %cmp = icmp sge i32 %call, 0
- ret i1 %cmp
-}
-
-define i1 @expand_strcmp_slt_s2(ptr %C) {
-; CHECK-LABEL: @expand_strcmp_slt_s2(
-; CHECK-NEXT: [[CALL:%.*]] = call i32 @strcmp(ptr [[C:%.*]], ptr @s2)
-; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0
-; CHECK-NEXT: ret i1 [[CMP]]
-;
- %call = call i32 @strcmp(ptr %C, ptr @s2)
- %cmp = icmp slt i32 %call, 0
- ret i1 %cmp
-}
-
-define i1 @expand_strcmp_sle_s2(ptr %C) {
-; CHECK-LABEL: @expand_strcmp_sle_s2(
-; CHECK-NEXT: [[CALL:%.*]] = call i32 @strcmp(ptr [[C:%.*]], ptr @s2)
-; CHECK-NEXT: [[CMP:%.*]] = icmp sle i32 [[CALL]], 0
-; CHECK-NEXT: ret i1 [[CMP]]
-;
- %call = call i32 @strcmp(ptr %C, ptr @s2)
- %cmp = icmp sle i32 %call, 0
- ret i1 %cmp
-}
-
-define i1 @expand_strcmp_s3(ptr %C) {
-; CHECK-LABEL: @expand_strcmp_s3(
-; CHECK-NEXT: [[CALL:%.*]] = call i32 @strcmp(ptr [[C:%.*]], ptr @s3)
-; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[CALL]], 0
-; CHECK-NEXT: ret i1 [[CMP]]
-;
- %call = call i32 @strcmp(ptr %C, ptr @s3)
- %cmp = icmp eq i32 %call, 0
- ret i1 %cmp
-}
-
-define i1 @expand_strcmp_s4(ptr %C) {
-; CHECK-LABEL: @expand_strcmp_s4(
-; CHECK-NEXT: [[CALL:%.*]] = call i32 @strcmp(ptr [[C:%.*]], ptr @s4)
-; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[CALL]], 0
-; CHECK-NEXT: ret i1 [[CMP]]
-;
- %call = call i32 @strcmp(ptr %C, ptr @s4)
- %cmp = icmp eq i32 %call, 0
- ret i1 %cmp
-}
diff --git a/llvm/test/Transforms/AggressiveInstCombine/strncmp-1.ll b/llvm/test/Transforms/AggressiveInstCombine/strncmp-1.ll
new file mode 100644
index 00000000000000..4679d6d7fca143
--- /dev/null
+++ b/llvm/test/Transforms/AggressiveInstCombine/strncmp-1.ll
@@ -0,0 +1,203 @@
+; RUN: opt -S -passes=aggressive-instcombine -strncmp-inline-threshold=3 < %s | FileCheck %s
+
+declare i32 @strncmp(ptr nocapture, ptr nocapture, i64)
+declare i32 @strcmp(ptr nocapture, ptr nocapture)
+
+ at .str = private unnamed_addr constant [3 x i8] c"ab\00", align 1
+ at .str.1 = private unnamed_addr constant [2 x i8] c"a\00", align 1
+
+define i32 @test_strncmp_1(ptr nocapture readonly %s) {
+; CHECK-LABEL: @test_strncmp_1(
+; CHECK-NEXT: entry.before:
+; CHECK-NEXT: br label [[SUB:%.*]]
+; CHECK: sub:
+; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[S:%.*]], i64 0
+; CHECK-NEXT: [[TMP1:%.*]] = load i8, ptr [[TMP0]], align 1
+; CHECK-NEXT: [[TMP2:%.*]] = zext i8 [[TMP1]] to i32
+; CHECK-NEXT: [[TMP3:%.*]] = sub i32 97, [[TMP2]]
+; CHECK-NEXT: [[TMP4:%.*]] = icmp ne i32 [[TMP3]], 0
+; CHECK-NEXT: br i1 [[TMP4]], label [[SUB2:%.*]], label [[SUB1:%.*]]
+; CHECK: sub1:
+; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[S]], i64 1
+; CHECK-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP5]], align 1
+; CHECK-NEXT: [[TMP7:%.*]] = zext i8 [[TMP6]] to i32
+; CHECK-NEXT: [[TMP8:%.*]] = sub i32 98, [[TMP7]]
+; CHECK-NEXT: br label [[SUB2]]
+; CHECK: sub2:
+; CHECK-NEXT: [[TMP9:%.*]] = phi i32 [ [[TMP3]], [[SUB]] ], [ [[TMP8]], [[SUB1]] ]
+; CHECK-NEXT: br label [[ENTRY:%.*]]
+; CHECK: entry:
+; CHECK-NEXT: ret i32 [[TMP9]]
+;
+entry:
+ %call = tail call i32 @strncmp(ptr nonnull dereferenceable(3) @.str, ptr nonnull dereferenceable(1) %s, i64 2)
+ ret i32 %call
+}
+
+define i32 @test_strncmp_2(ptr nocapture readonly %s) {
+; CHECK-LABEL: @test_strncmp_2(
+; CHECK-NEXT: entry.before:
+; CHECK-NEXT: br label [[SUB:%.*]]
+; CHECK: sub:
+; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[S:%.*]], i64 0
+; CHECK-NEXT: [[TMP1:%.*]] = load i8, ptr [[TMP0]], align 1
+; CHECK-NEXT: [[TMP2:%.*]] = zext i8 [[TMP1]] to i32
+; CHECK-NEXT: [[TMP3:%.*]] = sub i32 97, [[TMP2]]
+; CHECK-NEXT: [[TMP4:%.*]] = icmp ne i32 [[TMP3]], 0
+; CHECK-NEXT: br i1 [[TMP4]], label [[SUB3:%.*]], label [[SUB1:%.*]]
+; CHECK: sub1:
+; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[S]], i64 1
+; CHECK-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP5]], align 1
+; CHECK-NEXT: [[TMP7:%.*]] = zext i8 [[TMP6]] to i32
+; CHECK-NEXT: [[TMP8:%.*]] = sub i32 98, [[TMP7]]
+; CHECK-NEXT: [[TMP9:%.*]] = icmp ne i32 [[TMP8]], 0
+; CHECK-NEXT: br i1 [[TMP9]], label [[SUB3]], label [[SUB2:%.*]]
+; CHECK: sub2:
+; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i8, ptr [[S]], i64 2
+; CHECK-NEXT: [[TMP11:%.*]] = load i8, ptr [[TMP10]], align 1
+; CHECK-NEXT: [[TMP12:%.*]] = zext i8 [[TMP11]] to i32
+; CHECK-NEXT: [[TMP13:%.*]] = sub i32 0, [[TMP12]]
+; CHECK-NEXT: br label [[SUB3]]
+; CHECK: sub3:
+; CHECK-NEXT: [[TMP14:%.*]] = phi i32 [ [[TMP3]], [[SUB]] ], [ [[TMP8]], [[SUB1]] ], [ [[TMP13]], [[SUB2]] ]
+; CHECK-NEXT: br label [[ENTRY:%.*]]
+; CHECK: entry:
+; CHECK-NEXT: ret i32 [[TMP14]]
+;
+entry:
+ %call = tail call i32 @strncmp(ptr nonnull dereferenceable(3) @.str, ptr nonnull dereferenceable(1) %s, i64 3)
+ ret i32 %call
+}
+
+define i32 @test_strncmp_3(ptr nocapture readonly %s) {
+; CHECK-LABEL: @test_strncmp_3(
+; CHECK-NEXT: entry.before:
+; CHECK-NEXT: br label [[SUB:%.*]]
+; CHECK: sub:
+; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[S:%.*]], i64 0
+; CHECK-NEXT: [[TMP1:%.*]] = load i8, ptr [[TMP0]], align 1
+; CHECK-NEXT: [[TMP2:%.*]] = zext i8 [[TMP1]] to i32
+; CHECK-NEXT: [[TMP3:%.*]] = sub i32 97, [[TMP2]]
+; CHECK-NEXT: [[TMP4:%.*]] = icmp ne i32 [[TMP3]], 0
+; CHECK-NEXT: br i1 [[TMP4]], label [[SUB3:%.*]], label [[SUB1:%.*]]
+; CHECK: sub1:
+; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[S]], i64 1
+; CHECK-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP5]], align 1
+; CHECK-NEXT: [[TMP7:%.*]] = zext i8 [[TMP6]] to i32
+; CHECK-NEXT: [[TMP8:%.*]] = sub i32 98, [[TMP7]]
+; CHECK-NEXT: [[TMP9:%.*]] = icmp ne i32 [[TMP8]], 0
+; CHECK-NEXT: br i1 [[TMP9]], label [[SUB3]], label [[SUB2:%.*]]
+; CHECK: sub2:
+; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i8, ptr [[S]], i64 2
+; CHECK-NEXT: [[TMP11:%.*]] = load i8, ptr [[TMP10]], align 1
+; CHECK-NEXT: [[TMP12:%.*]] = zext i8 [[TMP11]] to i32
+; CHECK-NEXT: [[TMP13:%.*]] = sub i32 0, [[TMP12]]
+; CHECK-NEXT: br label [[SUB3]]
+; CHECK: sub3:
+; CHECK-NEXT: [[TMP14:%.*]] = phi i32 [ [[TMP3]], [[SUB]] ], [ [[TMP8]], [[SUB1]] ], [ [[TMP13]], [[SUB2]] ]
+; CHECK-NEXT: br label [[ENTRY:%.*]]
+; CHECK: entry:
+; CHECK-NEXT: ret i32 [[TMP14]]
+;
+entry:
+ %call = tail call i32 @strncmp(ptr nonnull dereferenceable(3) @.str, ptr nonnull dereferenceable(1) %s, i64 4)
+ ret i32 %call
+}
+
+define i32 @test_strcmp_1(ptr nocapture readonly %s) {
+; CHECK-LABEL: @test_strcmp_1(
+; CHECK-NEXT: entry.before:
+; CHECK-NEXT: br label [[SUB:%.*]]
+; CHECK: sub:
+; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[S:%.*]], i64 0
+; CHECK-NEXT: [[TMP1:%.*]] = load i8, ptr [[TMP0]], align 1
+; CHECK-NEXT: [[TMP2:%.*]] = zext i8 [[TMP1]] to i32
+; CHECK-NEXT: [[TMP3:%.*]] = sub i32 [[TMP2]], 97
+; CHECK-NEXT: [[TMP4:%.*]] = icmp ne i32 [[TMP3]], 0
+; CHECK-NEXT: br i1 [[TMP4]], label [[SUB2:%.*]], label [[SUB1:%.*]]
+; CHECK: sub1:
+; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[S]], i64 1
+; CHECK-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP5]], align 1
+; CHECK-NEXT: [[TMP7:%.*]] = zext i8 [[TMP6]] to i32
+; CHECK-NEXT: [[TMP8:%.*]] = sub i32 [[TMP7]], 0
+; CHECK-NEXT: br label [[SUB2]]
+; CHECK: sub2:
+; CHECK-NEXT: [[TMP9:%.*]] = phi i32 [ [[TMP3]], [[SUB]] ], [ [[TMP8]], [[SUB1]] ]
+; CHECK-NEXT: br label [[ENTRY:%.*]]
+; CHECK: entry:
+; CHECK-NEXT: ret i32 [[TMP9]]
+;
+entry:
+ %call = tail call i32 @strcmp(ptr nonnull dereferenceable(1) %s, ptr nonnull dereferenceable(2) @.str.1)
+ ret i32 %call
+}
+
+define i32 @test_strcmp_2(ptr nocapture readonly %s) {
+; CHECK-LABEL: @test_strcmp_2(
+; CHECK-NEXT: entry.before:
+; CHECK-NEXT: br label [[SUB:%.*]]
+; CHECK: sub:
+; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[S:%.*]], i64 0
+; CHECK-NEXT: [[TMP1:%.*]] = load i8, ptr [[TMP0]], align 1
+; CHECK-NEXT: [[TMP2:%.*]] = zext i8 [[TMP1]] to i32
+; CHECK-NEXT: [[TMP3:%.*]] = sub i32 [[TMP2]], 97
+; CHECK-NEXT: [[TMP4:%.*]] = icmp ne i32 [[TMP3]], 0
+; CHECK-NEXT: br i1 [[TMP4]], label [[SUB3:%.*]], label [[SUB1:%.*]]
+; CHECK: sub1:
+; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[S]], i64 1
+; CHECK-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP5]], align 1
+; CHECK-NEXT: [[TMP7:%.*]] = zext i8 [[TMP6]] to i32
+; CHECK-NEXT: [[TMP8:%.*]] = sub i32 [[TMP7]], 98
+; CHECK-NEXT: [[TMP9:%.*]] = icmp ne i32 [[TMP8]], 0
+; CHECK-NEXT: br i1 [[TMP9]], label [[SUB3]], label [[SUB2:%.*]]
+; CHECK: sub2:
+; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i8, ptr [[S]], i64 2
+; CHECK-NEXT: [[TMP11:%.*]] = load i8, ptr [[TMP10]], align 1
+; CHECK-NEXT: [[TMP12:%.*]] = zext i8 [[TMP11]] to i32
+; CHECK-NEXT: [[TMP13:%.*]] = sub i32 [[TMP12]], 0
+; CHECK-NEXT: br label [[SUB3]]
+; CHECK: sub3:
+; CHECK-NEXT: [[TMP14:%.*]] = phi i32 [ [[TMP3]], [[SUB]] ], [ [[TMP8]], [[SUB1]] ], [ [[TMP13]], [[SUB2]] ]
+; CHECK-NEXT: br label [[ENTRY:%.*]]
+; CHECK: entry:
+; CHECK-NEXT: ret i32 [[TMP14]]
+;
+entry:
+ %call = tail call i32 @strcmp(ptr nonnull dereferenceable(1) %s, ptr nonnull dereferenceable(3) @.str)
+ ret i32 %call
+}
+
+define i32 @test_strcmp_3(ptr nocapture readonly %s) {
+; CHECK-LABEL: @test_strcmp_3(
+; CHECK-NEXT: entry.before:
+; CHECK-NEXT: br label [[SUB:%.*]]
+; CHECK: sub:
+; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[S:%.*]], i64 0
+; CHECK-NEXT: [[TMP1:%.*]] = load i8, ptr [[TMP0]], align 1
+; CHECK-NEXT: [[TMP2:%.*]] = zext i8 [[TMP1]] to i32
+; CHECK-NEXT: [[TMP3:%.*]] = sub i32 97, [[TMP2]]
+; CHECK-NEXT: [[TMP4:%.*]] = icmp ne i32 [[TMP3]], 0
+; CHECK-NEXT: br i1 [[TMP4]], label [[SUB3:%.*]], label [[SUB1:%.*]]
+; CHECK: sub1:
+; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[S]], i64 1
+; CHECK-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP5]], align 1
+; CHECK-NEXT: [[TMP7:%.*]] = zext i8 [[TMP6]] to i32
+; CHECK-NEXT: [[TMP8:%.*]] = sub i32 98, [[TMP7]]
+; CHECK-NEXT: [[TMP9:%.*]] = icmp ne i32 [[TMP8]], 0
+; CHECK-NEXT: br i1 [[TMP9]], label [[SUB3]], label [[SUB2:%.*]]
+; CHECK: sub2:
+; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i8, ptr [[S]], i64 2
+; CHECK-NEXT: [[TMP11:%.*]] = load i8, ptr [[TMP10]], align 1
+; CHECK-NEXT: [[TMP12:%.*]] = zext i8 [[TMP11]] to i32
+; CHECK-NEXT: [[TMP13:%.*]] = sub i32 0, [[TMP12]]
+; CHECK-NEXT: br label [[SUB3]]
+; CHECK: sub3:
+; CHECK-NEXT: [[TMP14:%.*]] = phi i32 [ [[TMP3]], [[SUB]] ], [ [[TMP8]], [[SUB1]] ], [ [[TMP13]], [[SUB2]] ]
+; CHECK-NEXT: br label [[ENTRY:%.*]]
+; CHECK: entry:
+; CHECK-NEXT: ret i32 [[TMP14]]
+;
+entry:
+ %call = tail call i32 @strcmp(ptr nonnull dereferenceable(3) @.str, ptr nonnull dereferenceable(1) %s)
+ ret i32 %call
+}
diff --git a/llvm/test/Transforms/AggressiveInstCombine/strncmp-2.ll b/llvm/test/Transforms/AggressiveInstCombine/strncmp-2.ll
new file mode 100644
index 00000000000000..17dd9d39d17408
--- /dev/null
+++ b/llvm/test/Transforms/AggressiveInstCombine/strncmp-2.ll
@@ -0,0 +1,145 @@
+; RUN: opt -S -passes=aggressive-instcombine -strncmp-inline-threshold=3 < %s | FileCheck %s
+
+declare i32 @strncmp(ptr nocapture, ptr nocapture, i64)
+declare i32 @strcmp(ptr nocapture, ptr nocapture)
+
+ at .str = private unnamed_addr constant [3 x i8] c"aa\00", align 1
+ at .str.1 = private unnamed_addr constant [4 x i8] c"aab\00", align 1
+ at __const.test_strncmp_8.s2 = private unnamed_addr constant [2 x i8] c"aa", align 1
+
+; int test_strncmp_1(const char *s) {
+; if (!strncmp(s, "aa", 2))
+; return 11;
+; return 41;
+; }
+define i32 @test_strncmp_1(i8* nocapture readonly %s) {
+entry:
+ %call = tail call i32 @strncmp(ptr nonnull dereferenceable(1) %s, ptr nonnull dereferenceable(3) @.str, i64 2)
+ %tobool.not = icmp eq i32 %call, 0
+ %retval.0 = select i1 %tobool.not, i32 11, i32 41
+ ret i32 %retval.0
+}
+; CHECK-LABEL: @test_strncmp_1(
+; CHECK-NOT: @strncmp
+
+; int test_strncmp_2(const char *s) {
+; if (!strncmp(s, "aa", 3))
+; return 11;
+; return 41;
+; }
+define i32 @test_strncmp_2(i8* nocapture readonly %s) {
+entry:
+ %call = tail call i32 @strncmp(ptr nonnull dereferenceable(1) %s, ptr nonnull dereferenceable(3) @.str, i64 3)
+ %tobool.not = icmp eq i32 %call, 0
+ %retval.0 = select i1 %tobool.not, i32 11, i32 41
+ ret i32 %retval.0
+}
+; CHECK-LABEL: @test_strncmp_2(
+; CHECK-NOT: @strncmp
+
+; int test_strncmp_3(const char *s) {
+; if (!strncmp(s, "aab", 3))
+; return 11;
+; return 41;
+; }
+define i32 @test_strncmp_3(i8* nocapture readonly %s) {
+entry:
+ %call = tail call i32 @strncmp(ptr nonnull dereferenceable(1) %s, ptr nonnull dereferenceable(4) @.str.1, i64 3)
+ %tobool.not = icmp eq i32 %call, 0
+ %retval.0 = select i1 %tobool.not, i32 11, i32 41
+ ret i32 %retval.0
+}
+; CHECK-LABEL: @test_strncmp_3(
+; CHECK-NOT: @strncmp
+
+; int test_strncmp_4(const char *s) {
+; if (!strncmp(s, "aab", 4))
+; return 11;
+; return 41;
+; }
+define i32 @test_strncmp_4(i8* nocapture readonly %s) {
+entry:
+ %call = tail call i32 @strncmp(ptr nonnull dereferenceable(1) %s, ptr nonnull dereferenceable(4) @.str.1, i64 4)
+ %tobool.not = icmp eq i32 %call, 0
+ %retval.0 = select i1 %tobool.not, i32 11, i32 41
+ ret i32 %retval.0
+}
+; CHECK-LABEL: @test_strncmp_4(
+; CHECK: @strncmp
+
+; int test_strncmp_5(const char *s) {
+; if (strncmp(s, "aa", 2) < 0)
+; return 11;
+; return 41;
+; }
+define i32 @test_strncmp_5(i8* nocapture readonly %s) {
+entry:
+ %call = tail call i32 @strncmp(ptr nonnull dereferenceable(1) %s, ptr nonnull dereferenceable(3) @.str, i64 2)
+ %cmp = icmp slt i32 %call, 0
+ %retval.0 = select i1 %cmp, i32 11, i32 41
+ ret i32 %retval.0
+}
+; CHECK-LABEL: @test_strncmp_5(
+; CHECK-NOT: @strncmp
+
+; int test_strncmp_6(const char *s1) {
+; char s2[] = {'a', 'a'};
+; if (strncmp(s1, s2, 2) < 0)
+; return 11;
+; return 41;
+; }
+define i32 @test_strncmp_6(i8* nocapture readonly %s1) {
+entry:
+ %call = tail call i32 @strncmp(ptr nonnull dereferenceable(1) %s1, ptr nonnull dereferenceable(3) @__const.test_strncmp_8.s2, i64 2)
+ %cmp = icmp slt i32 %call, 0
+ %retval.0 = select i1 %cmp, i32 11, i32 41
+ ret i32 %retval.0
+}
+; CHECK-LABEL: @test_strncmp_6(
+; CHECK-NOT: @strncmp
+
+; int test_strncmp_7(const char *s1) {
+; char s2[] = {'a', 'a'};
+; if (strncmp(s1, s2, 3) < 0)
+; return 11;
+; return 41;
+; }
+define i32 @test_strncmp_7(i8* nocapture readonly %s1) {
+entry:
+ %call = tail call i32 @strncmp(ptr nonnull dereferenceable(1) %s1, ptr nonnull dereferenceable(3) @__const.test_strncmp_8.s2, i64 3)
+ %cmp = icmp slt i32 %call, 0
+ %retval.0 = select i1 %cmp, i32 11, i32 41
+ ret i32 %retval.0
+}
+; CHECK-LABEL: @test_strncmp_7(
+; CHECK: @strncmp
+
+; int test_strcmp_1(const char *s) {
+; if (!strcmp(s, "aa"))
+; return 11;
+; return 41;
+; }
+define i32 @test_strcmp_1(i8* nocapture readonly %s) {
+entry:
+ %call = tail call i32 @strcmp(ptr nonnull dereferenceable(1) %s, ptr nonnull dereferenceable(3) @.str)
+ %tobool.not = icmp eq i32 %call, 0
+ %retval.0 = select i1 %tobool.not, i32 11, i32 41
+ ret i32 %retval.0
+}
+; CHECK-LABEL: @test_strcmp_1(
+; CHECK-NOT: @strcmp
+
+; int test_strcmp_2(const char *s) {
+; if (!strcmp(s, "aab"))
+; return 11;
+; return 41;
+; }
+define i32 @test_strcmp_2(i8* nocapture readonly %s) {
+entry:
+ %call = tail call i32 @strcmp(ptr nonnull dereferenceable(1) %s, ptr nonnull dereferenceable(4) @.str.1)
+ %tobool.not = icmp eq i32 %call, 0
+ %retval.0 = select i1 %tobool.not, i32 11, i32 41
+ ret i32 %retval.0
+}
+; CHECK-LABEL: @test_strcmp_2(
+; CHECK: @strcmp
>From e2d35214107379b64dd009571ed159553aebb3dd Mon Sep 17 00:00:00 2001
From: zhangfenglei <zhangfenglei at huawei.com>
Date: Sat, 20 Apr 2024 17:39:42 +0800
Subject: [PATCH 2/6] [AggressiveInstCombine] Inline strcmp/strncmp
* add missing test cases
* remove redundant code
---
.../AggressiveInstCombine.cpp | 46 +++++-------
.../AggressiveInstCombine/strncmp-1.ll | 75 ++++++++++---------
.../AggressiveInstCombine/strncmp-2.ll | 20 +++++
3 files changed, 78 insertions(+), 63 deletions(-)
diff --git a/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp b/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp
index eddd7382c27bbc..011892b1110340 100644
--- a/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp
+++ b/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp
@@ -1055,18 +1055,17 @@ bool StrNCmpInliner::optimizeStrNCmp() {
///
bool StrNCmpInliner::inlineCompare(Value *LHS, StringRef RHS, uint64_t N,
bool Switched) {
- IRBuilder<> B(CI->getContext());
+ auto &Ctx = CI->getContext();
+ IRBuilder<> B(Ctx);
BasicBlock *BBCI = CI->getParent();
- bool IsEntry = BBCI->isEntryBlock();
BasicBlock *BBBefore = splitBlockBefore(BBCI, CI, DTU, nullptr, nullptr,
BBCI->getName() + ".before");
SmallVector<BasicBlock *> BBSubs;
- for (uint64_t i = 0; i < N + 1; ++i)
- BBSubs.push_back(
- BasicBlock::Create(CI->getContext(), "sub", BBCI->getParent(), BBCI));
- BasicBlock *BBNE = BBSubs[N];
+ for (uint64_t i = 0; i < N; ++i)
+ BBSubs.push_back(BasicBlock::Create(Ctx, "sub", BBCI->getParent(), BBCI));
+ BasicBlock *BBNE = BasicBlock::Create(Ctx, "ne", BBCI->getParent(), BBCI);
cast<BranchInst>(BBBefore->getTerminator())->setSuccessor(0, BBSubs[0]);
@@ -1099,27 +1098,23 @@ bool StrNCmpInliner::inlineCompare(Value *LHS, StringRef RHS, uint64_t N,
// Update DomTree
if (DTU) {
- if (IsEntry) {
- DTU->recalculate(*BBCI->getParent());
- } else {
- SmallVector<DominatorTree::UpdateType, 8> Updates;
- Updates.push_back({DominatorTree::Delete, BBBefore, BBCI});
- Updates.push_back({DominatorTree::Insert, BBBefore, BBSubs[0]});
- for (uint64_t i = 0; i < N; ++i) {
- if (i < N - 1)
- Updates.push_back({DominatorTree::Insert, BBSubs[i], BBSubs[i + 1]});
- Updates.push_back({DominatorTree::Insert, BBSubs[i], BBNE});
- }
- Updates.push_back({DominatorTree::Insert, BBNE, BBCI});
- DTU->applyUpdates(Updates);
+ SmallVector<DominatorTree::UpdateType, 8> Updates;
+ Updates.push_back({DominatorTree::Delete, BBBefore, BBCI});
+ Updates.push_back({DominatorTree::Insert, BBBefore, BBSubs[0]});
+ for (uint64_t i = 0; i < N; ++i) {
+ if (i < N - 1)
+ Updates.push_back({DominatorTree::Insert, BBSubs[i], BBSubs[i + 1]});
+ Updates.push_back({DominatorTree::Insert, BBSubs[i], BBNE});
}
+ Updates.push_back({DominatorTree::Insert, BBNE, BBCI});
+ DTU->applyUpdates(Updates);
}
return true;
}
static bool inlineLibCalls(Function &F, TargetLibraryInfo &TLI,
const TargetTransformInfo &TTI, DominatorTree &DT,
- bool &MadeCFGChange) {
+ const DataLayout &DL, bool &MadeCFGChange) {
MadeCFGChange = false;
DomTreeUpdater DTU(&DT, DomTreeUpdater::UpdateStrategy::Lazy);
@@ -1137,20 +1132,13 @@ static bool inlineLibCalls(Function &F, TargetLibraryInfo &TLI,
if (!Call || !(CalledFunc = Call->getCalledFunction()))
continue;
- if (Call->isNoBuiltin())
- continue;
-
- // Skip if function either has local linkage or is not a known library
- // function.
LibFunc LF;
- if (CalledFunc->hasLocalLinkage() || !TLI.getLibFunc(*CalledFunc, LF) ||
- !TLI.has(LF))
+ if (!TLI.getLibFunc(*CalledFunc, LF))
continue;
switch (LF) {
case LibFunc_strcmp:
case LibFunc_strncmp: {
- auto &DL = F.getParent()->getDataLayout();
if (StrNCmpInliner(Call, LF, BB, &DTU, DL).optimizeStrNCmp()) {
MadeCFGChange = true;
break;
@@ -1221,7 +1209,7 @@ static bool runImpl(Function &F, AssumptionCache &AC, TargetTransformInfo &TTI,
const DataLayout &DL = F.getParent()->getDataLayout();
TruncInstCombine TIC(AC, TLI, DL, DT);
MadeChange |= TIC.run(F);
- MadeChange |= inlineLibCalls(F, TLI, TTI, DT, MadeCFGChange);
+ MadeChange |= inlineLibCalls(F, TLI, TTI, DT, DL, MadeCFGChange);
MadeChange |= foldUnusualPatterns(F, DT, TTI, TLI, AA, AC);
return MadeChange;
}
diff --git a/llvm/test/Transforms/AggressiveInstCombine/strncmp-1.ll b/llvm/test/Transforms/AggressiveInstCombine/strncmp-1.ll
index 4679d6d7fca143..16f6d9c25c1c65 100644
--- a/llvm/test/Transforms/AggressiveInstCombine/strncmp-1.ll
+++ b/llvm/test/Transforms/AggressiveInstCombine/strncmp-1.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
; RUN: opt -S -passes=aggressive-instcombine -strncmp-inline-threshold=3 < %s | FileCheck %s
declare i32 @strncmp(ptr nocapture, ptr nocapture, i64)
@@ -7,23 +8,24 @@ declare i32 @strcmp(ptr nocapture, ptr nocapture)
@.str.1 = private unnamed_addr constant [2 x i8] c"a\00", align 1
define i32 @test_strncmp_1(ptr nocapture readonly %s) {
-; CHECK-LABEL: @test_strncmp_1(
+; CHECK-LABEL: define i32 @test_strncmp_1(
+; CHECK-SAME: ptr nocapture readonly [[S:%.*]]) {
; CHECK-NEXT: entry.before:
; CHECK-NEXT: br label [[SUB:%.*]]
; CHECK: sub:
-; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[S:%.*]], i64 0
+; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[S]], i64 0
; CHECK-NEXT: [[TMP1:%.*]] = load i8, ptr [[TMP0]], align 1
; CHECK-NEXT: [[TMP2:%.*]] = zext i8 [[TMP1]] to i32
; CHECK-NEXT: [[TMP3:%.*]] = sub i32 97, [[TMP2]]
; CHECK-NEXT: [[TMP4:%.*]] = icmp ne i32 [[TMP3]], 0
-; CHECK-NEXT: br i1 [[TMP4]], label [[SUB2:%.*]], label [[SUB1:%.*]]
+; CHECK-NEXT: br i1 [[TMP4]], label [[NE:%.*]], label [[SUB1:%.*]]
; CHECK: sub1:
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[S]], i64 1
; CHECK-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP5]], align 1
; CHECK-NEXT: [[TMP7:%.*]] = zext i8 [[TMP6]] to i32
; CHECK-NEXT: [[TMP8:%.*]] = sub i32 98, [[TMP7]]
-; CHECK-NEXT: br label [[SUB2]]
-; CHECK: sub2:
+; CHECK-NEXT: br label [[NE]]
+; CHECK: ne:
; CHECK-NEXT: [[TMP9:%.*]] = phi i32 [ [[TMP3]], [[SUB]] ], [ [[TMP8]], [[SUB1]] ]
; CHECK-NEXT: br label [[ENTRY:%.*]]
; CHECK: entry:
@@ -35,30 +37,31 @@ entry:
}
define i32 @test_strncmp_2(ptr nocapture readonly %s) {
-; CHECK-LABEL: @test_strncmp_2(
+; CHECK-LABEL: define i32 @test_strncmp_2(
+; CHECK-SAME: ptr nocapture readonly [[S:%.*]]) {
; CHECK-NEXT: entry.before:
; CHECK-NEXT: br label [[SUB:%.*]]
; CHECK: sub:
-; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[S:%.*]], i64 0
+; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[S]], i64 0
; CHECK-NEXT: [[TMP1:%.*]] = load i8, ptr [[TMP0]], align 1
; CHECK-NEXT: [[TMP2:%.*]] = zext i8 [[TMP1]] to i32
; CHECK-NEXT: [[TMP3:%.*]] = sub i32 97, [[TMP2]]
; CHECK-NEXT: [[TMP4:%.*]] = icmp ne i32 [[TMP3]], 0
-; CHECK-NEXT: br i1 [[TMP4]], label [[SUB3:%.*]], label [[SUB1:%.*]]
+; CHECK-NEXT: br i1 [[TMP4]], label [[NE:%.*]], label [[SUB1:%.*]]
; CHECK: sub1:
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[S]], i64 1
; CHECK-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP5]], align 1
; CHECK-NEXT: [[TMP7:%.*]] = zext i8 [[TMP6]] to i32
; CHECK-NEXT: [[TMP8:%.*]] = sub i32 98, [[TMP7]]
; CHECK-NEXT: [[TMP9:%.*]] = icmp ne i32 [[TMP8]], 0
-; CHECK-NEXT: br i1 [[TMP9]], label [[SUB3]], label [[SUB2:%.*]]
+; CHECK-NEXT: br i1 [[TMP9]], label [[NE]], label [[SUB2:%.*]]
; CHECK: sub2:
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i8, ptr [[S]], i64 2
; CHECK-NEXT: [[TMP11:%.*]] = load i8, ptr [[TMP10]], align 1
; CHECK-NEXT: [[TMP12:%.*]] = zext i8 [[TMP11]] to i32
; CHECK-NEXT: [[TMP13:%.*]] = sub i32 0, [[TMP12]]
-; CHECK-NEXT: br label [[SUB3]]
-; CHECK: sub3:
+; CHECK-NEXT: br label [[NE]]
+; CHECK: ne:
; CHECK-NEXT: [[TMP14:%.*]] = phi i32 [ [[TMP3]], [[SUB]] ], [ [[TMP8]], [[SUB1]] ], [ [[TMP13]], [[SUB2]] ]
; CHECK-NEXT: br label [[ENTRY:%.*]]
; CHECK: entry:
@@ -70,30 +73,31 @@ entry:
}
define i32 @test_strncmp_3(ptr nocapture readonly %s) {
-; CHECK-LABEL: @test_strncmp_3(
+; CHECK-LABEL: define i32 @test_strncmp_3(
+; CHECK-SAME: ptr nocapture readonly [[S:%.*]]) {
; CHECK-NEXT: entry.before:
; CHECK-NEXT: br label [[SUB:%.*]]
; CHECK: sub:
-; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[S:%.*]], i64 0
+; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[S]], i64 0
; CHECK-NEXT: [[TMP1:%.*]] = load i8, ptr [[TMP0]], align 1
; CHECK-NEXT: [[TMP2:%.*]] = zext i8 [[TMP1]] to i32
; CHECK-NEXT: [[TMP3:%.*]] = sub i32 97, [[TMP2]]
; CHECK-NEXT: [[TMP4:%.*]] = icmp ne i32 [[TMP3]], 0
-; CHECK-NEXT: br i1 [[TMP4]], label [[SUB3:%.*]], label [[SUB1:%.*]]
+; CHECK-NEXT: br i1 [[TMP4]], label [[NE:%.*]], label [[SUB1:%.*]]
; CHECK: sub1:
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[S]], i64 1
; CHECK-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP5]], align 1
; CHECK-NEXT: [[TMP7:%.*]] = zext i8 [[TMP6]] to i32
; CHECK-NEXT: [[TMP8:%.*]] = sub i32 98, [[TMP7]]
; CHECK-NEXT: [[TMP9:%.*]] = icmp ne i32 [[TMP8]], 0
-; CHECK-NEXT: br i1 [[TMP9]], label [[SUB3]], label [[SUB2:%.*]]
+; CHECK-NEXT: br i1 [[TMP9]], label [[NE]], label [[SUB2:%.*]]
; CHECK: sub2:
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i8, ptr [[S]], i64 2
; CHECK-NEXT: [[TMP11:%.*]] = load i8, ptr [[TMP10]], align 1
; CHECK-NEXT: [[TMP12:%.*]] = zext i8 [[TMP11]] to i32
; CHECK-NEXT: [[TMP13:%.*]] = sub i32 0, [[TMP12]]
-; CHECK-NEXT: br label [[SUB3]]
-; CHECK: sub3:
+; CHECK-NEXT: br label [[NE]]
+; CHECK: ne:
; CHECK-NEXT: [[TMP14:%.*]] = phi i32 [ [[TMP3]], [[SUB]] ], [ [[TMP8]], [[SUB1]] ], [ [[TMP13]], [[SUB2]] ]
; CHECK-NEXT: br label [[ENTRY:%.*]]
; CHECK: entry:
@@ -105,23 +109,24 @@ entry:
}
define i32 @test_strcmp_1(ptr nocapture readonly %s) {
-; CHECK-LABEL: @test_strcmp_1(
+; CHECK-LABEL: define i32 @test_strcmp_1(
+; CHECK-SAME: ptr nocapture readonly [[S:%.*]]) {
; CHECK-NEXT: entry.before:
; CHECK-NEXT: br label [[SUB:%.*]]
; CHECK: sub:
-; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[S:%.*]], i64 0
+; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[S]], i64 0
; CHECK-NEXT: [[TMP1:%.*]] = load i8, ptr [[TMP0]], align 1
; CHECK-NEXT: [[TMP2:%.*]] = zext i8 [[TMP1]] to i32
; CHECK-NEXT: [[TMP3:%.*]] = sub i32 [[TMP2]], 97
; CHECK-NEXT: [[TMP4:%.*]] = icmp ne i32 [[TMP3]], 0
-; CHECK-NEXT: br i1 [[TMP4]], label [[SUB2:%.*]], label [[SUB1:%.*]]
+; CHECK-NEXT: br i1 [[TMP4]], label [[NE:%.*]], label [[SUB1:%.*]]
; CHECK: sub1:
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[S]], i64 1
; CHECK-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP5]], align 1
; CHECK-NEXT: [[TMP7:%.*]] = zext i8 [[TMP6]] to i32
; CHECK-NEXT: [[TMP8:%.*]] = sub i32 [[TMP7]], 0
-; CHECK-NEXT: br label [[SUB2]]
-; CHECK: sub2:
+; CHECK-NEXT: br label [[NE]]
+; CHECK: ne:
; CHECK-NEXT: [[TMP9:%.*]] = phi i32 [ [[TMP3]], [[SUB]] ], [ [[TMP8]], [[SUB1]] ]
; CHECK-NEXT: br label [[ENTRY:%.*]]
; CHECK: entry:
@@ -133,30 +138,31 @@ entry:
}
define i32 @test_strcmp_2(ptr nocapture readonly %s) {
-; CHECK-LABEL: @test_strcmp_2(
+; CHECK-LABEL: define i32 @test_strcmp_2(
+; CHECK-SAME: ptr nocapture readonly [[S:%.*]]) {
; CHECK-NEXT: entry.before:
; CHECK-NEXT: br label [[SUB:%.*]]
; CHECK: sub:
-; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[S:%.*]], i64 0
+; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[S]], i64 0
; CHECK-NEXT: [[TMP1:%.*]] = load i8, ptr [[TMP0]], align 1
; CHECK-NEXT: [[TMP2:%.*]] = zext i8 [[TMP1]] to i32
; CHECK-NEXT: [[TMP3:%.*]] = sub i32 [[TMP2]], 97
; CHECK-NEXT: [[TMP4:%.*]] = icmp ne i32 [[TMP3]], 0
-; CHECK-NEXT: br i1 [[TMP4]], label [[SUB3:%.*]], label [[SUB1:%.*]]
+; CHECK-NEXT: br i1 [[TMP4]], label [[NE:%.*]], label [[SUB1:%.*]]
; CHECK: sub1:
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[S]], i64 1
; CHECK-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP5]], align 1
; CHECK-NEXT: [[TMP7:%.*]] = zext i8 [[TMP6]] to i32
; CHECK-NEXT: [[TMP8:%.*]] = sub i32 [[TMP7]], 98
; CHECK-NEXT: [[TMP9:%.*]] = icmp ne i32 [[TMP8]], 0
-; CHECK-NEXT: br i1 [[TMP9]], label [[SUB3]], label [[SUB2:%.*]]
+; CHECK-NEXT: br i1 [[TMP9]], label [[NE]], label [[SUB2:%.*]]
; CHECK: sub2:
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i8, ptr [[S]], i64 2
; CHECK-NEXT: [[TMP11:%.*]] = load i8, ptr [[TMP10]], align 1
; CHECK-NEXT: [[TMP12:%.*]] = zext i8 [[TMP11]] to i32
; CHECK-NEXT: [[TMP13:%.*]] = sub i32 [[TMP12]], 0
-; CHECK-NEXT: br label [[SUB3]]
-; CHECK: sub3:
+; CHECK-NEXT: br label [[NE]]
+; CHECK: ne:
; CHECK-NEXT: [[TMP14:%.*]] = phi i32 [ [[TMP3]], [[SUB]] ], [ [[TMP8]], [[SUB1]] ], [ [[TMP13]], [[SUB2]] ]
; CHECK-NEXT: br label [[ENTRY:%.*]]
; CHECK: entry:
@@ -168,30 +174,31 @@ entry:
}
define i32 @test_strcmp_3(ptr nocapture readonly %s) {
-; CHECK-LABEL: @test_strcmp_3(
+; CHECK-LABEL: define i32 @test_strcmp_3(
+; CHECK-SAME: ptr nocapture readonly [[S:%.*]]) {
; CHECK-NEXT: entry.before:
; CHECK-NEXT: br label [[SUB:%.*]]
; CHECK: sub:
-; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[S:%.*]], i64 0
+; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[S]], i64 0
; CHECK-NEXT: [[TMP1:%.*]] = load i8, ptr [[TMP0]], align 1
; CHECK-NEXT: [[TMP2:%.*]] = zext i8 [[TMP1]] to i32
; CHECK-NEXT: [[TMP3:%.*]] = sub i32 97, [[TMP2]]
; CHECK-NEXT: [[TMP4:%.*]] = icmp ne i32 [[TMP3]], 0
-; CHECK-NEXT: br i1 [[TMP4]], label [[SUB3:%.*]], label [[SUB1:%.*]]
+; CHECK-NEXT: br i1 [[TMP4]], label [[NE:%.*]], label [[SUB1:%.*]]
; CHECK: sub1:
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[S]], i64 1
; CHECK-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP5]], align 1
; CHECK-NEXT: [[TMP7:%.*]] = zext i8 [[TMP6]] to i32
; CHECK-NEXT: [[TMP8:%.*]] = sub i32 98, [[TMP7]]
; CHECK-NEXT: [[TMP9:%.*]] = icmp ne i32 [[TMP8]], 0
-; CHECK-NEXT: br i1 [[TMP9]], label [[SUB3]], label [[SUB2:%.*]]
+; CHECK-NEXT: br i1 [[TMP9]], label [[NE]], label [[SUB2:%.*]]
; CHECK: sub2:
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i8, ptr [[S]], i64 2
; CHECK-NEXT: [[TMP11:%.*]] = load i8, ptr [[TMP10]], align 1
; CHECK-NEXT: [[TMP12:%.*]] = zext i8 [[TMP11]] to i32
; CHECK-NEXT: [[TMP13:%.*]] = sub i32 0, [[TMP12]]
-; CHECK-NEXT: br label [[SUB3]]
-; CHECK: sub3:
+; CHECK-NEXT: br label [[NE]]
+; CHECK: ne:
; CHECK-NEXT: [[TMP14:%.*]] = phi i32 [ [[TMP3]], [[SUB]] ], [ [[TMP8]], [[SUB1]] ], [ [[TMP13]], [[SUB2]] ]
; CHECK-NEXT: br label [[ENTRY:%.*]]
; CHECK: entry:
diff --git a/llvm/test/Transforms/AggressiveInstCombine/strncmp-2.ll b/llvm/test/Transforms/AggressiveInstCombine/strncmp-2.ll
index 17dd9d39d17408..15d5e17a86cf03 100644
--- a/llvm/test/Transforms/AggressiveInstCombine/strncmp-2.ll
+++ b/llvm/test/Transforms/AggressiveInstCombine/strncmp-2.ll
@@ -22,6 +22,16 @@ entry:
; CHECK-LABEL: @test_strncmp_1(
; CHECK-NOT: @strncmp
+define i32 @test_strncmp_1_dereferenceable(i8* nocapture readonly dereferenceable(2) %s) {
+entry:
+ %call = tail call i32 @strncmp(ptr nonnull %s, ptr nonnull dereferenceable(3) @.str, i64 2)
+ %tobool.not = icmp eq i32 %call, 0
+ %retval.0 = select i1 %tobool.not, i32 11, i32 41
+ ret i32 %retval.0
+}
+; CHECK-LABEL: @test_strncmp_1_dereferenceable(
+; CHECK: @strncmp
+
; int test_strncmp_2(const char *s) {
; if (!strncmp(s, "aa", 3))
; return 11;
@@ -129,6 +139,16 @@ entry:
; CHECK-LABEL: @test_strcmp_1(
; CHECK-NOT: @strcmp
+define i32 @test_strcmp_1_dereferenceable(i8* nocapture readonly dereferenceable(2) %s) {
+entry:
+ %call = tail call i32 @strcmp(ptr nonnull %s, ptr nonnull dereferenceable(3) @.str)
+ %tobool.not = icmp eq i32 %call, 0
+ %retval.0 = select i1 %tobool.not, i32 11, i32 41
+ ret i32 %retval.0
+}
+; CHECK-LABEL: @test_strcmp_1_dereferenceable(
+; CHECK: @strcmp
+
; int test_strcmp_2(const char *s) {
; if (!strcmp(s, "aab"))
; return 11;
>From d5769df89b866c62443bd68c5709406df925f124 Mon Sep 17 00:00:00 2001
From: zhangfenglei <zhangfenglei at huawei.com>
Date: Wed, 1 May 2024 11:46:40 +0800
Subject: [PATCH 3/6] [AggressiveInstCombine] Inline strcmp/strncmp
* isOnlyUsedInZeroComparison
* more tests
* ...
---
llvm/include/llvm/Analysis/ValueTracking.h | 2 +
.../AggressiveInstCombine.h | 2 +-
llvm/lib/Analysis/ValueTracking.cpp | 7 +
.../AggressiveInstCombine.cpp | 153 +++++------
.../AggressiveInstCombine/strncmp-1.ll | 250 +++++++++---------
.../AggressiveInstCombine/strncmp-2.ll | 218 ++++++++-------
6 files changed, 313 insertions(+), 319 deletions(-)
diff --git a/llvm/include/llvm/Analysis/ValueTracking.h b/llvm/include/llvm/Analysis/ValueTracking.h
index e1c41b3b55ccfb..f494fd39aaa639 100644
--- a/llvm/include/llvm/Analysis/ValueTracking.h
+++ b/llvm/include/llvm/Analysis/ValueTracking.h
@@ -116,6 +116,8 @@ bool isKnownToBeAPowerOfTwo(const Value *V, const DataLayout &DL,
const DominatorTree *DT = nullptr,
bool UseInstrInfo = true);
+bool isOnlyUsedInZeroComparison(const Instruction *CxtI);
+
bool isOnlyUsedInZeroEqualityComparison(const Instruction *CxtI);
/// Return true if the given value is known to be non-zero when defined. For
diff --git a/llvm/include/llvm/Transforms/AggressiveInstCombine/AggressiveInstCombine.h b/llvm/include/llvm/Transforms/AggressiveInstCombine/AggressiveInstCombine.h
index 2d76546316fafb..3568417510f107 100644
--- a/llvm/include/llvm/Transforms/AggressiveInstCombine/AggressiveInstCombine.h
+++ b/llvm/include/llvm/Transforms/AggressiveInstCombine/AggressiveInstCombine.h
@@ -8,7 +8,7 @@
/// \file
///
/// AggressiveInstCombiner - Combine expression patterns to form expressions
-/// with fewer, simple instructions. This pass does not modify the CFG.
+/// with fewer, simple instructions.
///
//===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp
index b87bba7c20e0d6..399d57ce737c41 100644
--- a/llvm/lib/Analysis/ValueTracking.cpp
+++ b/llvm/lib/Analysis/ValueTracking.cpp
@@ -252,6 +252,13 @@ bool llvm::haveNoCommonBitsSet(const WithCache<const Value *> &LHSCache,
RHSCache.getKnownBits(SQ));
}
+bool llvm::isOnlyUsedInZeroComparison(const Instruction *I) {
+ return !I->user_empty() && all_of(I->users(), [](const User *U) {
+ ICmpInst::Predicate P;
+ return match(U, m_ICmp(P, m_Value(), m_Zero()));
+ });
+}
+
bool llvm::isOnlyUsedInZeroEqualityComparison(const Instruction *I) {
return !I->user_empty() && all_of(I->users(), [](const User *U) {
ICmpInst::Predicate P;
diff --git a/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp b/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp
index 011892b1110340..3349607a299ed5 100644
--- a/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp
+++ b/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp
@@ -75,7 +75,7 @@ static bool foldGuardedFunnelShift(Instruction &I, const DominatorTree &DT) {
m_Shl(m_Value(ShVal0), m_Value(ShAmt)),
m_LShr(m_Value(ShVal1),
m_Sub(m_SpecificInt(Width), m_Deferred(ShAmt))))))) {
- return Intrinsic::fshl;
+ return Intrinsic::fshl;
}
// fshr(ShVal0, ShVal1, ShAmt)
@@ -84,7 +84,7 @@ static bool foldGuardedFunnelShift(Instruction &I, const DominatorTree &DT) {
m_OneUse(m_c_Or(m_Shl(m_Value(ShVal0), m_Sub(m_SpecificInt(Width),
m_Value(ShAmt))),
m_LShr(m_Value(ShVal1), m_Deferred(ShAmt)))))) {
- return Intrinsic::fshr;
+ return Intrinsic::fshr;
}
return Intrinsic::not_intrinsic;
@@ -401,21 +401,11 @@ static bool tryToFPToSat(Instruction &I, TargetTransformInfo &TTI) {
/// Try to replace a mathlib call to sqrt with the LLVM intrinsic. This avoids
/// pessimistic codegen that has to account for setting errno and can enable
/// vectorization.
-static bool foldSqrt(Instruction &I, TargetTransformInfo &TTI,
+static bool foldSqrt(CallInst *Call, LibFunc Func, TargetTransformInfo &TTI,
TargetLibraryInfo &TLI, AssumptionCache &AC,
DominatorTree &DT) {
- // Match a call to sqrt mathlib function.
- auto *Call = dyn_cast<CallInst>(&I);
- if (!Call)
- return false;
Module *M = Call->getModule();
- LibFunc Func;
- if (!TLI.getLibFunc(*Call, Func) || !isLibFuncEmittable(M, &TLI, Func))
- return false;
-
- if (Func != LibFunc_sqrt && Func != LibFunc_sqrtf && Func != LibFunc_sqrtl)
- return false;
// If (1) this is a sqrt libcall, (2) we can assume that NAN is not created
// (because NNAN or the operand arg must not be less than -0.0) and (2) we
@@ -428,18 +418,18 @@ static bool foldSqrt(Instruction &I, TargetTransformInfo &TTI,
if (TTI.haveFastSqrt(Ty) &&
(Call->hasNoNaNs() ||
cannotBeOrderedLessThanZero(
- Arg, 0, SimplifyQuery(M->getDataLayout(), &TLI, &DT, &AC, &I)))) {
- IRBuilder<> Builder(&I);
+ Arg, 0, SimplifyQuery(M->getDataLayout(), &TLI, &DT, &AC, Call)))) {
+ IRBuilder<> Builder(Call);
IRBuilderBase::FastMathFlagGuard Guard(Builder);
Builder.setFastMathFlags(Call->getFastMathFlags());
Function *Sqrt = Intrinsic::getDeclaration(M, Intrinsic::sqrt, Ty);
Value *NewSqrt = Builder.CreateCall(Sqrt, Arg, "sqrt");
- I.replaceAllUsesWith(NewSqrt);
+ Call->replaceAllUsesWith(NewSqrt);
// Explicitly erase the old call because a call with side effects is not
// trivially dead.
- I.eraseFromParent();
+ Call->eraseFromParent();
return true;
}
@@ -932,18 +922,17 @@ static cl::opt<unsigned> StrNCmpInlineThreshold(
namespace {
class StrNCmpInliner {
public:
- StrNCmpInliner(CallInst *CI, LibFunc Func, Function::iterator &BBNext,
- DomTreeUpdater *DTU, const DataLayout &DL)
- : CI(CI), Func(Func), BBNext(BBNext), DTU(DTU), DL(DL) {}
+ StrNCmpInliner(CallInst *CI, LibFunc Func, DomTreeUpdater *DTU,
+ const DataLayout &DL)
+ : CI(CI), Func(Func), DTU(DTU), DL(DL) {}
bool optimizeStrNCmp();
private:
- bool inlineCompare(Value *LHS, StringRef RHS, uint64_t N, bool Switched);
+ bool inlineCompare(Value *LHS, StringRef RHS, uint64_t N, bool Swapped);
CallInst *CI;
LibFunc Func;
- Function::iterator &BBNext;
DomTreeUpdater *DTU;
const DataLayout &DL;
};
@@ -952,7 +941,7 @@ class StrNCmpInliner {
/// First we normalize calls to strncmp/strcmp to the form of
/// compare(s1, s2, N), which means comparing first N bytes of s1 and s2
-/// (without considering '\0')
+/// (without considering '\0').
///
/// Examples:
///
@@ -969,49 +958,53 @@ class StrNCmpInliner {
/// strncmp(s, s2, 3) -> compare(s, s2, 3)
/// \endcode
///
-/// We only handle cases that N and exactly one of s1 and s2 are constant. Cases
-/// that s1 and s2 are both constant are already handled by the instcombine
-/// pass.
+/// We only handle cases where N and exactly one of s1 and s2 are constant.
+/// Cases that s1 and s2 are both constant are already handled by the
+/// instcombine pass.
///
-/// We do not handle cases that N > StrNCmpInlineThreshold.
+/// We do not handle cases where N > StrNCmpInlineThreshold.
///
-/// We also do not handles cases that N < 2, which are already
+/// We also do not handles cases where N < 2, which are already
/// handled by the instcombine pass.
///
bool StrNCmpInliner::optimizeStrNCmp() {
if (StrNCmpInlineThreshold < 2)
return false;
+ if (!isOnlyUsedInZeroComparison(CI))
+ return false;
+
Value *Str1P = CI->getArgOperand(0);
Value *Str2P = CI->getArgOperand(1);
- // should be handled elsewhere
+ // Should be handled elsewhere.
if (Str1P == Str2P)
return false;
StringRef Str1, Str2;
bool HasStr1 = getConstantStringInfo(Str1P, Str1, false);
bool HasStr2 = getConstantStringInfo(Str2P, Str2, false);
- if (!(HasStr1 ^ HasStr2))
+ if (HasStr1 == HasStr2)
return false;
- // note that '\0' and characters after it are not trimmed
+ // Note that '\0' and characters after it are not trimmed.
StringRef Str = HasStr1 ? Str1 : Str2;
size_t Idx = Str.find('\0');
uint64_t N = Idx == StringRef::npos ? UINT64_MAX : Idx + 1;
if (Func == LibFunc_strncmp) {
- if (!isa<ConstantInt>(CI->getArgOperand(2)))
+ if (auto ConstInt = dyn_cast<ConstantInt>(CI->getArgOperand(2)))
+ N = std::min(N, ConstInt->getZExtValue());
+ else
return false;
- N = std::min(N, cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue());
}
- // now N means how many bytes we need to compare at most
+ // Now N means how many bytes we need to compare at most.
if (N > Str.size() || N < 2 || N > StrNCmpInlineThreshold)
return false;
Value *StrP = HasStr1 ? Str2P : Str1P;
- // cases that StrP has two or more dereferenceable bytes might be better
- // optimized elsewhere
+ // Cases where StrP has two or more dereferenceable bytes might be better
+ // optimized elsewhere.
bool CanBeNull = false, CanBeFreed = false;
if (StrP->getPointerDereferenceableBytes(DL, CanBeNull, CanBeFreed) > 1)
return false;
@@ -1054,7 +1047,7 @@ bool StrNCmpInliner::optimizeStrNCmp() {
/// BBSubs[N-1] (sub) ---------+
///
bool StrNCmpInliner::inlineCompare(Value *LHS, StringRef RHS, uint64_t N,
- bool Switched) {
+ bool Swapped) {
auto &Ctx = CI->getContext();
IRBuilder<> B(Ctx);
@@ -1076,12 +1069,12 @@ bool StrNCmpInliner::inlineCompare(Value *LHS, StringRef RHS, uint64_t N,
Value *Base = LHS;
for (uint64_t i = 0; i < N; ++i) {
B.SetInsertPoint(BBSubs[i]);
- Value *VL = B.CreateZExt(
- B.CreateLoad(B.getInt8Ty(),
- B.CreateInBoundsGEP(B.getInt8Ty(), Base, B.getInt64(i))),
- CI->getType());
+ Value *VL =
+ B.CreateZExt(B.CreateLoad(B.getInt8Ty(),
+ B.CreateInBoundsPtrAdd(Base, B.getInt64(i))),
+ CI->getType());
Value *VR = ConstantInt::get(CI->getType(), RHS[i]);
- Value *Sub = Switched ? B.CreateSub(VR, VL) : B.CreateSub(VL, VR);
+ Value *Sub = Swapped ? B.CreateSub(VR, VL) : B.CreateSub(VL, VR);
if (i < N - 1)
B.CreateCondBr(B.CreateICmpNE(Sub, ConstantInt::get(CI->getType(), 0)),
BBNE, BBSubs[i + 1]);
@@ -1094,12 +1087,8 @@ bool StrNCmpInliner::inlineCompare(Value *LHS, StringRef RHS, uint64_t N,
CI->replaceAllUsesWith(Phi);
CI->eraseFromParent();
- BBNext = BBCI->getIterator();
-
- // Update DomTree
if (DTU) {
SmallVector<DominatorTree::UpdateType, 8> Updates;
- Updates.push_back({DominatorTree::Delete, BBBefore, BBCI});
Updates.push_back({DominatorTree::Insert, BBBefore, BBSubs[0]});
for (uint64_t i = 0; i < N; ++i) {
if (i < N - 1)
@@ -1107,54 +1096,47 @@ bool StrNCmpInliner::inlineCompare(Value *LHS, StringRef RHS, uint64_t N,
Updates.push_back({DominatorTree::Insert, BBSubs[i], BBNE});
}
Updates.push_back({DominatorTree::Insert, BBNE, BBCI});
+ Updates.push_back({DominatorTree::Delete, BBBefore, BBCI});
DTU->applyUpdates(Updates);
}
return true;
}
-static bool inlineLibCalls(Function &F, TargetLibraryInfo &TLI,
- const TargetTransformInfo &TTI, DominatorTree &DT,
- const DataLayout &DL, bool &MadeCFGChange) {
- MadeCFGChange = false;
- DomTreeUpdater DTU(&DT, DomTreeUpdater::UpdateStrategy::Lazy);
-
- bool MadeChange = false;
-
- Function::iterator CurrBB;
- for (Function::iterator BB = F.begin(), BE = F.end(); BB != BE;) {
- CurrBB = BB++;
+static bool foldLibCalls(Instruction &I, TargetTransformInfo &TTI,
+ TargetLibraryInfo &TLI, llvm::AssumptionCache &AC,
+ DominatorTree &DT, const DataLayout &DL,
+ bool &MadeCFGChange) {
- for (BasicBlock::iterator II = CurrBB->begin(), IE = CurrBB->end();
- II != IE; ++II) {
- CallInst *Call = dyn_cast<CallInst>(&*II);
- Function *CalledFunc;
+ auto *CI = dyn_cast<CallInst>(&I);
+ if (!CI || CI->isNoBuiltin())
+ return false;
- if (!Call || !(CalledFunc = Call->getCalledFunction()))
- continue;
+ Function *CalledFunc = CI->getCalledFunction();
+ if (!CalledFunc)
+ return false;
- LibFunc LF;
- if (!TLI.getLibFunc(*CalledFunc, LF))
- continue;
+ LibFunc LF;
+ if (!TLI.getLibFunc(*CalledFunc, LF) ||
+ !isLibFuncEmittable(CI->getModule(), &TLI, LF))
+ return false;
- switch (LF) {
- case LibFunc_strcmp:
- case LibFunc_strncmp: {
- if (StrNCmpInliner(Call, LF, BB, &DTU, DL).optimizeStrNCmp()) {
- MadeCFGChange = true;
- break;
- }
- continue;
- }
- default:
- continue;
- }
+ DomTreeUpdater DTU(&DT, DomTreeUpdater::UpdateStrategy::Lazy);
- MadeChange = true;
- break;
+ switch (LF) {
+ case LibFunc_sqrt:
+ case LibFunc_sqrtf:
+ case LibFunc_sqrtl:
+ return foldSqrt(CI, LF, TTI, TLI, AC, DT);
+ case LibFunc_strcmp:
+ case LibFunc_strncmp:
+ if (StrNCmpInliner(CI, LF, &DTU, DL).optimizeStrNCmp()) {
+ MadeCFGChange = true;
+ return true;
}
+ break;
+ default:;
}
-
- return MadeChange;
+ return false;
}
/// This is the entry point for folds that could be implemented in regular
@@ -1163,7 +1145,7 @@ static bool inlineLibCalls(Function &F, TargetLibraryInfo &TLI,
static bool foldUnusualPatterns(Function &F, DominatorTree &DT,
TargetTransformInfo &TTI,
TargetLibraryInfo &TLI, AliasAnalysis &AA,
- AssumptionCache &AC) {
+ AssumptionCache &AC, bool &MadeCFGChange) {
bool MadeChange = false;
for (BasicBlock &BB : F) {
// Ignore unreachable basic blocks.
@@ -1188,7 +1170,7 @@ static bool foldUnusualPatterns(Function &F, DominatorTree &DT,
// NOTE: This function introduces erasing of the instruction `I`, so it
// needs to be called at the end of this sequence, otherwise we may make
// bugs.
- MadeChange |= foldSqrt(I, TTI, TLI, AC, DT);
+ MadeChange |= foldLibCalls(I, TTI, TLI, AC, DT, DL, MadeCFGChange);
}
}
@@ -1209,8 +1191,7 @@ static bool runImpl(Function &F, AssumptionCache &AC, TargetTransformInfo &TTI,
const DataLayout &DL = F.getParent()->getDataLayout();
TruncInstCombine TIC(AC, TLI, DL, DT);
MadeChange |= TIC.run(F);
- MadeChange |= inlineLibCalls(F, TLI, TTI, DT, DL, MadeCFGChange);
- MadeChange |= foldUnusualPatterns(F, DT, TTI, TLI, AA, AC);
+ MadeChange |= foldUnusualPatterns(F, DT, TTI, TLI, AA, AC, MadeCFGChange);
return MadeChange;
}
diff --git a/llvm/test/Transforms/AggressiveInstCombine/strncmp-1.ll b/llvm/test/Transforms/AggressiveInstCombine/strncmp-1.ll
index 16f6d9c25c1c65..3d511483a7beb1 100644
--- a/llvm/test/Transforms/AggressiveInstCombine/strncmp-1.ll
+++ b/llvm/test/Transforms/AggressiveInstCombine/strncmp-1.ll
@@ -1,210 +1,216 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
; RUN: opt -S -passes=aggressive-instcombine -strncmp-inline-threshold=3 < %s | FileCheck %s
+; check whether we generate the right IR
+
declare i32 @strncmp(ptr nocapture, ptr nocapture, i64)
declare i32 @strcmp(ptr nocapture, ptr nocapture)
- at .str = private unnamed_addr constant [3 x i8] c"ab\00", align 1
- at .str.1 = private unnamed_addr constant [2 x i8] c"a\00", align 1
+ at s2 = constant [2 x i8] c"a\00"
+ at s3 = constant [3 x i8] c"ab\00"
-define i32 @test_strncmp_1(ptr nocapture readonly %s) {
-; CHECK-LABEL: define i32 @test_strncmp_1(
+define i1 @test_strncmp_1(ptr nocapture readonly %s) {
+; CHECK-LABEL: define i1 @test_strncmp_1(
; CHECK-SAME: ptr nocapture readonly [[S:%.*]]) {
; CHECK-NEXT: entry.before:
; CHECK-NEXT: br label [[SUB:%.*]]
; CHECK: sub:
-; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[S]], i64 0
-; CHECK-NEXT: [[TMP1:%.*]] = load i8, ptr [[TMP0]], align 1
-; CHECK-NEXT: [[TMP2:%.*]] = zext i8 [[TMP1]] to i32
-; CHECK-NEXT: [[TMP3:%.*]] = sub i32 97, [[TMP2]]
-; CHECK-NEXT: [[TMP4:%.*]] = icmp ne i32 [[TMP3]], 0
-; CHECK-NEXT: br i1 [[TMP4]], label [[NE:%.*]], label [[SUB1:%.*]]
+; CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr [[S]], align 1
+; CHECK-NEXT: [[TMP1:%.*]] = zext i8 [[TMP0]] to i32
+; CHECK-NEXT: [[TMP2:%.*]] = sub i32 97, [[TMP1]]
+; CHECK-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0
+; CHECK-NEXT: br i1 [[TMP3]], label [[NE:%.*]], label [[SUB1:%.*]]
; CHECK: sub1:
-; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[S]], i64 1
-; CHECK-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP5]], align 1
-; CHECK-NEXT: [[TMP7:%.*]] = zext i8 [[TMP6]] to i32
-; CHECK-NEXT: [[TMP8:%.*]] = sub i32 98, [[TMP7]]
+; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[S]], i64 1
+; CHECK-NEXT: [[TMP5:%.*]] = load i8, ptr [[TMP4]], align 1
+; CHECK-NEXT: [[TMP6:%.*]] = zext i8 [[TMP5]] to i32
+; CHECK-NEXT: [[TMP7:%.*]] = sub i32 98, [[TMP6]]
; CHECK-NEXT: br label [[NE]]
; CHECK: ne:
-; CHECK-NEXT: [[TMP9:%.*]] = phi i32 [ [[TMP3]], [[SUB]] ], [ [[TMP8]], [[SUB1]] ]
+; CHECK-NEXT: [[TMP8:%.*]] = phi i32 [ [[TMP2]], [[SUB]] ], [ [[TMP7]], [[SUB1]] ]
; CHECK-NEXT: br label [[ENTRY:%.*]]
; CHECK: entry:
-; CHECK-NEXT: ret i32 [[TMP9]]
+; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP8]], 0
+; CHECK-NEXT: ret i1 [[CMP]]
;
entry:
- %call = tail call i32 @strncmp(ptr nonnull dereferenceable(3) @.str, ptr nonnull dereferenceable(1) %s, i64 2)
- ret i32 %call
+ %call = tail call i32 @strncmp(ptr nonnull dereferenceable(3) @s3, ptr nonnull dereferenceable(1) %s, i64 2)
+ %cmp = icmp eq i32 %call, 0
+ ret i1 %cmp
}
-define i32 @test_strncmp_2(ptr nocapture readonly %s) {
-; CHECK-LABEL: define i32 @test_strncmp_2(
+define i1 @test_strncmp_2(ptr nocapture readonly %s) {
+; CHECK-LABEL: define i1 @test_strncmp_2(
; CHECK-SAME: ptr nocapture readonly [[S:%.*]]) {
; CHECK-NEXT: entry.before:
; CHECK-NEXT: br label [[SUB:%.*]]
; CHECK: sub:
-; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[S]], i64 0
-; CHECK-NEXT: [[TMP1:%.*]] = load i8, ptr [[TMP0]], align 1
-; CHECK-NEXT: [[TMP2:%.*]] = zext i8 [[TMP1]] to i32
-; CHECK-NEXT: [[TMP3:%.*]] = sub i32 97, [[TMP2]]
-; CHECK-NEXT: [[TMP4:%.*]] = icmp ne i32 [[TMP3]], 0
-; CHECK-NEXT: br i1 [[TMP4]], label [[NE:%.*]], label [[SUB1:%.*]]
+; CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr [[S]], align 1
+; CHECK-NEXT: [[TMP1:%.*]] = zext i8 [[TMP0]] to i32
+; CHECK-NEXT: [[TMP2:%.*]] = sub i32 97, [[TMP1]]
+; CHECK-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0
+; CHECK-NEXT: br i1 [[TMP3]], label [[NE:%.*]], label [[SUB1:%.*]]
; CHECK: sub1:
-; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[S]], i64 1
-; CHECK-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP5]], align 1
-; CHECK-NEXT: [[TMP7:%.*]] = zext i8 [[TMP6]] to i32
-; CHECK-NEXT: [[TMP8:%.*]] = sub i32 98, [[TMP7]]
-; CHECK-NEXT: [[TMP9:%.*]] = icmp ne i32 [[TMP8]], 0
-; CHECK-NEXT: br i1 [[TMP9]], label [[NE]], label [[SUB2:%.*]]
+; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[S]], i64 1
+; CHECK-NEXT: [[TMP5:%.*]] = load i8, ptr [[TMP4]], align 1
+; CHECK-NEXT: [[TMP6:%.*]] = zext i8 [[TMP5]] to i32
+; CHECK-NEXT: [[TMP7:%.*]] = sub i32 98, [[TMP6]]
+; CHECK-NEXT: [[TMP8:%.*]] = icmp ne i32 [[TMP7]], 0
+; CHECK-NEXT: br i1 [[TMP8]], label [[NE]], label [[SUB2:%.*]]
; CHECK: sub2:
-; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i8, ptr [[S]], i64 2
-; CHECK-NEXT: [[TMP11:%.*]] = load i8, ptr [[TMP10]], align 1
-; CHECK-NEXT: [[TMP12:%.*]] = zext i8 [[TMP11]] to i32
-; CHECK-NEXT: [[TMP13:%.*]] = sub i32 0, [[TMP12]]
+; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[S]], i64 2
+; CHECK-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP9]], align 1
+; CHECK-NEXT: [[TMP11:%.*]] = zext i8 [[TMP10]] to i32
+; CHECK-NEXT: [[TMP12:%.*]] = sub i32 0, [[TMP11]]
; CHECK-NEXT: br label [[NE]]
; CHECK: ne:
-; CHECK-NEXT: [[TMP14:%.*]] = phi i32 [ [[TMP3]], [[SUB]] ], [ [[TMP8]], [[SUB1]] ], [ [[TMP13]], [[SUB2]] ]
+; CHECK-NEXT: [[TMP13:%.*]] = phi i32 [ [[TMP2]], [[SUB]] ], [ [[TMP7]], [[SUB1]] ], [ [[TMP12]], [[SUB2]] ]
; CHECK-NEXT: br label [[ENTRY:%.*]]
; CHECK: entry:
-; CHECK-NEXT: ret i32 [[TMP14]]
+; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP13]], 0
+; CHECK-NEXT: ret i1 [[CMP]]
;
entry:
- %call = tail call i32 @strncmp(ptr nonnull dereferenceable(3) @.str, ptr nonnull dereferenceable(1) %s, i64 3)
- ret i32 %call
+ %call = tail call i32 @strncmp(ptr nonnull dereferenceable(3) @s3, ptr nonnull dereferenceable(1) %s, i64 3)
+ %cmp = icmp slt i32 %call, 0
+ ret i1 %cmp
}
-define i32 @test_strncmp_3(ptr nocapture readonly %s) {
-; CHECK-LABEL: define i32 @test_strncmp_3(
+define i1 @test_strncmp_3(ptr nocapture readonly %s) {
+; CHECK-LABEL: define i1 @test_strncmp_3(
; CHECK-SAME: ptr nocapture readonly [[S:%.*]]) {
; CHECK-NEXT: entry.before:
; CHECK-NEXT: br label [[SUB:%.*]]
; CHECK: sub:
-; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[S]], i64 0
-; CHECK-NEXT: [[TMP1:%.*]] = load i8, ptr [[TMP0]], align 1
-; CHECK-NEXT: [[TMP2:%.*]] = zext i8 [[TMP1]] to i32
-; CHECK-NEXT: [[TMP3:%.*]] = sub i32 97, [[TMP2]]
-; CHECK-NEXT: [[TMP4:%.*]] = icmp ne i32 [[TMP3]], 0
-; CHECK-NEXT: br i1 [[TMP4]], label [[NE:%.*]], label [[SUB1:%.*]]
+; CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr [[S]], align 1
+; CHECK-NEXT: [[TMP1:%.*]] = zext i8 [[TMP0]] to i32
+; CHECK-NEXT: [[TMP2:%.*]] = sub i32 97, [[TMP1]]
+; CHECK-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0
+; CHECK-NEXT: br i1 [[TMP3]], label [[NE:%.*]], label [[SUB1:%.*]]
; CHECK: sub1:
-; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[S]], i64 1
-; CHECK-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP5]], align 1
-; CHECK-NEXT: [[TMP7:%.*]] = zext i8 [[TMP6]] to i32
-; CHECK-NEXT: [[TMP8:%.*]] = sub i32 98, [[TMP7]]
-; CHECK-NEXT: [[TMP9:%.*]] = icmp ne i32 [[TMP8]], 0
-; CHECK-NEXT: br i1 [[TMP9]], label [[NE]], label [[SUB2:%.*]]
+; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[S]], i64 1
+; CHECK-NEXT: [[TMP5:%.*]] = load i8, ptr [[TMP4]], align 1
+; CHECK-NEXT: [[TMP6:%.*]] = zext i8 [[TMP5]] to i32
+; CHECK-NEXT: [[TMP7:%.*]] = sub i32 98, [[TMP6]]
+; CHECK-NEXT: [[TMP8:%.*]] = icmp ne i32 [[TMP7]], 0
+; CHECK-NEXT: br i1 [[TMP8]], label [[NE]], label [[SUB2:%.*]]
; CHECK: sub2:
-; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i8, ptr [[S]], i64 2
-; CHECK-NEXT: [[TMP11:%.*]] = load i8, ptr [[TMP10]], align 1
-; CHECK-NEXT: [[TMP12:%.*]] = zext i8 [[TMP11]] to i32
-; CHECK-NEXT: [[TMP13:%.*]] = sub i32 0, [[TMP12]]
+; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[S]], i64 2
+; CHECK-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP9]], align 1
+; CHECK-NEXT: [[TMP11:%.*]] = zext i8 [[TMP10]] to i32
+; CHECK-NEXT: [[TMP12:%.*]] = sub i32 0, [[TMP11]]
; CHECK-NEXT: br label [[NE]]
; CHECK: ne:
-; CHECK-NEXT: [[TMP14:%.*]] = phi i32 [ [[TMP3]], [[SUB]] ], [ [[TMP8]], [[SUB1]] ], [ [[TMP13]], [[SUB2]] ]
+; CHECK-NEXT: [[TMP13:%.*]] = phi i32 [ [[TMP2]], [[SUB]] ], [ [[TMP7]], [[SUB1]] ], [ [[TMP12]], [[SUB2]] ]
; CHECK-NEXT: br label [[ENTRY:%.*]]
; CHECK: entry:
-; CHECK-NEXT: ret i32 [[TMP14]]
+; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP13]], 0
+; CHECK-NEXT: ret i1 [[CMP]]
;
entry:
- %call = tail call i32 @strncmp(ptr nonnull dereferenceable(3) @.str, ptr nonnull dereferenceable(1) %s, i64 4)
- ret i32 %call
+ %call = tail call i32 @strncmp(ptr nonnull dereferenceable(3) @s3, ptr nonnull dereferenceable(1) %s, i64 4)
+ %cmp = icmp sgt i32 %call, 0
+ ret i1 %cmp
}
-define i32 @test_strcmp_1(ptr nocapture readonly %s) {
-; CHECK-LABEL: define i32 @test_strcmp_1(
+define i1 @test_strcmp_1(ptr nocapture readonly %s) {
+; CHECK-LABEL: define i1 @test_strcmp_1(
; CHECK-SAME: ptr nocapture readonly [[S:%.*]]) {
; CHECK-NEXT: entry.before:
; CHECK-NEXT: br label [[SUB:%.*]]
; CHECK: sub:
-; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[S]], i64 0
-; CHECK-NEXT: [[TMP1:%.*]] = load i8, ptr [[TMP0]], align 1
-; CHECK-NEXT: [[TMP2:%.*]] = zext i8 [[TMP1]] to i32
-; CHECK-NEXT: [[TMP3:%.*]] = sub i32 [[TMP2]], 97
-; CHECK-NEXT: [[TMP4:%.*]] = icmp ne i32 [[TMP3]], 0
-; CHECK-NEXT: br i1 [[TMP4]], label [[NE:%.*]], label [[SUB1:%.*]]
+; CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr [[S]], align 1
+; CHECK-NEXT: [[TMP1:%.*]] = zext i8 [[TMP0]] to i32
+; CHECK-NEXT: [[TMP2:%.*]] = sub i32 [[TMP1]], 97
+; CHECK-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0
+; CHECK-NEXT: br i1 [[TMP3]], label [[NE:%.*]], label [[SUB1:%.*]]
; CHECK: sub1:
-; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[S]], i64 1
-; CHECK-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP5]], align 1
-; CHECK-NEXT: [[TMP7:%.*]] = zext i8 [[TMP6]] to i32
-; CHECK-NEXT: [[TMP8:%.*]] = sub i32 [[TMP7]], 0
+; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[S]], i64 1
+; CHECK-NEXT: [[TMP5:%.*]] = load i8, ptr [[TMP4]], align 1
+; CHECK-NEXT: [[TMP6:%.*]] = zext i8 [[TMP5]] to i32
; CHECK-NEXT: br label [[NE]]
; CHECK: ne:
-; CHECK-NEXT: [[TMP9:%.*]] = phi i32 [ [[TMP3]], [[SUB]] ], [ [[TMP8]], [[SUB1]] ]
+; CHECK-NEXT: [[TMP7:%.*]] = phi i32 [ [[TMP2]], [[SUB]] ], [ [[TMP6]], [[SUB1]] ]
; CHECK-NEXT: br label [[ENTRY:%.*]]
; CHECK: entry:
-; CHECK-NEXT: ret i32 [[TMP9]]
+; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[TMP7]], 0
+; CHECK-NEXT: ret i1 [[CMP]]
;
entry:
- %call = tail call i32 @strcmp(ptr nonnull dereferenceable(1) %s, ptr nonnull dereferenceable(2) @.str.1)
- ret i32 %call
+ %call = tail call i32 @strcmp(ptr nonnull dereferenceable(1) %s, ptr nonnull dereferenceable(2) @s2)
+ %cmp = icmp ne i32 %call, 0
+ ret i1 %cmp
}
-define i32 @test_strcmp_2(ptr nocapture readonly %s) {
-; CHECK-LABEL: define i32 @test_strcmp_2(
+define i1 @test_strcmp_2(ptr nocapture readonly %s) {
+; CHECK-LABEL: define i1 @test_strcmp_2(
; CHECK-SAME: ptr nocapture readonly [[S:%.*]]) {
; CHECK-NEXT: entry.before:
; CHECK-NEXT: br label [[SUB:%.*]]
; CHECK: sub:
-; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[S]], i64 0
-; CHECK-NEXT: [[TMP1:%.*]] = load i8, ptr [[TMP0]], align 1
-; CHECK-NEXT: [[TMP2:%.*]] = zext i8 [[TMP1]] to i32
-; CHECK-NEXT: [[TMP3:%.*]] = sub i32 [[TMP2]], 97
-; CHECK-NEXT: [[TMP4:%.*]] = icmp ne i32 [[TMP3]], 0
-; CHECK-NEXT: br i1 [[TMP4]], label [[NE:%.*]], label [[SUB1:%.*]]
+; CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr [[S]], align 1
+; CHECK-NEXT: [[TMP1:%.*]] = zext i8 [[TMP0]] to i32
+; CHECK-NEXT: [[TMP2:%.*]] = sub i32 [[TMP1]], 97
+; CHECK-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0
+; CHECK-NEXT: br i1 [[TMP3]], label [[NE:%.*]], label [[SUB1:%.*]]
; CHECK: sub1:
-; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[S]], i64 1
-; CHECK-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP5]], align 1
-; CHECK-NEXT: [[TMP7:%.*]] = zext i8 [[TMP6]] to i32
-; CHECK-NEXT: [[TMP8:%.*]] = sub i32 [[TMP7]], 98
-; CHECK-NEXT: [[TMP9:%.*]] = icmp ne i32 [[TMP8]], 0
-; CHECK-NEXT: br i1 [[TMP9]], label [[NE]], label [[SUB2:%.*]]
+; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[S]], i64 1
+; CHECK-NEXT: [[TMP5:%.*]] = load i8, ptr [[TMP4]], align 1
+; CHECK-NEXT: [[TMP6:%.*]] = zext i8 [[TMP5]] to i32
+; CHECK-NEXT: [[TMP7:%.*]] = sub i32 [[TMP6]], 98
+; CHECK-NEXT: [[TMP8:%.*]] = icmp ne i32 [[TMP7]], 0
+; CHECK-NEXT: br i1 [[TMP8]], label [[NE]], label [[SUB2:%.*]]
; CHECK: sub2:
-; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i8, ptr [[S]], i64 2
-; CHECK-NEXT: [[TMP11:%.*]] = load i8, ptr [[TMP10]], align 1
-; CHECK-NEXT: [[TMP12:%.*]] = zext i8 [[TMP11]] to i32
-; CHECK-NEXT: [[TMP13:%.*]] = sub i32 [[TMP12]], 0
+; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[S]], i64 2
+; CHECK-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP9]], align 1
+; CHECK-NEXT: [[TMP11:%.*]] = zext i8 [[TMP10]] to i32
; CHECK-NEXT: br label [[NE]]
; CHECK: ne:
-; CHECK-NEXT: [[TMP14:%.*]] = phi i32 [ [[TMP3]], [[SUB]] ], [ [[TMP8]], [[SUB1]] ], [ [[TMP13]], [[SUB2]] ]
+; CHECK-NEXT: [[TMP12:%.*]] = phi i32 [ [[TMP2]], [[SUB]] ], [ [[TMP7]], [[SUB1]] ], [ [[TMP11]], [[SUB2]] ]
; CHECK-NEXT: br label [[ENTRY:%.*]]
; CHECK: entry:
-; CHECK-NEXT: ret i32 [[TMP14]]
+; CHECK-NEXT: [[CMP:%.*]] = icmp sge i32 [[TMP12]], 0
+; CHECK-NEXT: ret i1 [[CMP]]
;
entry:
- %call = tail call i32 @strcmp(ptr nonnull dereferenceable(1) %s, ptr nonnull dereferenceable(3) @.str)
- ret i32 %call
+ %call = tail call i32 @strcmp(ptr nonnull dereferenceable(1) %s, ptr nonnull dereferenceable(3) @s3)
+ %cmp = icmp sge i32 %call, 0
+ ret i1 %cmp
}
-define i32 @test_strcmp_3(ptr nocapture readonly %s) {
-; CHECK-LABEL: define i32 @test_strcmp_3(
+define i1 @test_strcmp_3(ptr nocapture readonly %s) {
+; CHECK-LABEL: define i1 @test_strcmp_3(
; CHECK-SAME: ptr nocapture readonly [[S:%.*]]) {
; CHECK-NEXT: entry.before:
; CHECK-NEXT: br label [[SUB:%.*]]
; CHECK: sub:
-; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[S]], i64 0
-; CHECK-NEXT: [[TMP1:%.*]] = load i8, ptr [[TMP0]], align 1
-; CHECK-NEXT: [[TMP2:%.*]] = zext i8 [[TMP1]] to i32
-; CHECK-NEXT: [[TMP3:%.*]] = sub i32 97, [[TMP2]]
-; CHECK-NEXT: [[TMP4:%.*]] = icmp ne i32 [[TMP3]], 0
-; CHECK-NEXT: br i1 [[TMP4]], label [[NE:%.*]], label [[SUB1:%.*]]
+; CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr [[S]], align 1
+; CHECK-NEXT: [[TMP1:%.*]] = zext i8 [[TMP0]] to i32
+; CHECK-NEXT: [[TMP2:%.*]] = sub i32 97, [[TMP1]]
+; CHECK-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0
+; CHECK-NEXT: br i1 [[TMP3]], label [[NE:%.*]], label [[SUB1:%.*]]
; CHECK: sub1:
-; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[S]], i64 1
-; CHECK-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP5]], align 1
-; CHECK-NEXT: [[TMP7:%.*]] = zext i8 [[TMP6]] to i32
-; CHECK-NEXT: [[TMP8:%.*]] = sub i32 98, [[TMP7]]
-; CHECK-NEXT: [[TMP9:%.*]] = icmp ne i32 [[TMP8]], 0
-; CHECK-NEXT: br i1 [[TMP9]], label [[NE]], label [[SUB2:%.*]]
+; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[S]], i64 1
+; CHECK-NEXT: [[TMP5:%.*]] = load i8, ptr [[TMP4]], align 1
+; CHECK-NEXT: [[TMP6:%.*]] = zext i8 [[TMP5]] to i32
+; CHECK-NEXT: [[TMP7:%.*]] = sub i32 98, [[TMP6]]
+; CHECK-NEXT: [[TMP8:%.*]] = icmp ne i32 [[TMP7]], 0
+; CHECK-NEXT: br i1 [[TMP8]], label [[NE]], label [[SUB2:%.*]]
; CHECK: sub2:
-; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i8, ptr [[S]], i64 2
-; CHECK-NEXT: [[TMP11:%.*]] = load i8, ptr [[TMP10]], align 1
-; CHECK-NEXT: [[TMP12:%.*]] = zext i8 [[TMP11]] to i32
-; CHECK-NEXT: [[TMP13:%.*]] = sub i32 0, [[TMP12]]
+; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[S]], i64 2
+; CHECK-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP9]], align 1
+; CHECK-NEXT: [[TMP11:%.*]] = zext i8 [[TMP10]] to i32
+; CHECK-NEXT: [[TMP12:%.*]] = sub i32 0, [[TMP11]]
; CHECK-NEXT: br label [[NE]]
; CHECK: ne:
-; CHECK-NEXT: [[TMP14:%.*]] = phi i32 [ [[TMP3]], [[SUB]] ], [ [[TMP8]], [[SUB1]] ], [ [[TMP13]], [[SUB2]] ]
+; CHECK-NEXT: [[TMP13:%.*]] = phi i32 [ [[TMP2]], [[SUB]] ], [ [[TMP7]], [[SUB1]] ], [ [[TMP12]], [[SUB2]] ]
; CHECK-NEXT: br label [[ENTRY:%.*]]
; CHECK: entry:
-; CHECK-NEXT: ret i32 [[TMP14]]
+; CHECK-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP13]], 0
+; CHECK-NEXT: ret i1 [[CMP]]
;
entry:
- %call = tail call i32 @strcmp(ptr nonnull dereferenceable(3) @.str, ptr nonnull dereferenceable(1) %s)
- ret i32 %call
+ %call = tail call i32 @strcmp(ptr nonnull dereferenceable(3) @s3, ptr nonnull dereferenceable(1) %s)
+ %cmp = icmp sle i32 %call, 0
+ ret i1 %cmp
}
diff --git a/llvm/test/Transforms/AggressiveInstCombine/strncmp-2.ll b/llvm/test/Transforms/AggressiveInstCombine/strncmp-2.ll
index 15d5e17a86cf03..ebe9c32ca22fab 100644
--- a/llvm/test/Transforms/AggressiveInstCombine/strncmp-2.ll
+++ b/llvm/test/Transforms/AggressiveInstCombine/strncmp-2.ll
@@ -1,165 +1,163 @@
; RUN: opt -S -passes=aggressive-instcombine -strncmp-inline-threshold=3 < %s | FileCheck %s
+; RUN: opt -S -passes=aggressive-instcombine -strncmp-inline-threshold=2 < %s | FileCheck --check-prefix TH-2 %s
+; RUN: opt -S -passes=aggressive-instcombine -strncmp-inline-threshold=1 < %s | FileCheck --check-prefix TH-1 %s
+; RUN: opt -S -passes=aggressive-instcombine -strncmp-inline-threshold=0 < %s | FileCheck --check-prefix TH-0 %s
-declare i32 @strncmp(ptr nocapture, ptr nocapture, i64)
declare i32 @strcmp(ptr nocapture, ptr nocapture)
+declare i32 @strncmp(ptr nocapture, ptr nocapture, i64)
+
+ at s1 = constant [1 x i8] c"\00", align 1
+ at s2n = constant [2 x i8] c"aa", align 1
+ at s3 = constant [3 x i8] c"aa\00", align 1
+ at s4 = constant [4 x i8] c"aab\00", align 1
- at .str = private unnamed_addr constant [3 x i8] c"aa\00", align 1
- at .str.1 = private unnamed_addr constant [4 x i8] c"aab\00", align 1
- at __const.test_strncmp_8.s2 = private unnamed_addr constant [2 x i8] c"aa", align 1
+; strncmp(s, "aa", 1)
+define i1 @test_strncmp_0(i8* nocapture readonly %s) {
+entry:
+ %call = tail call i32 @strncmp(ptr nonnull dereferenceable(1) %s, ptr nonnull dereferenceable(3) @s3, i64 1)
+ %cmp = icmp eq i32 %call, 0
+ ret i1 %cmp
+}
+; CHECK-LABEL: @test_strncmp_0(
+; CHECK: @strncmp
-; int test_strncmp_1(const char *s) {
-; if (!strncmp(s, "aa", 2))
-; return 11;
-; return 41;
-; }
-define i32 @test_strncmp_1(i8* nocapture readonly %s) {
+; strncmp(s, "aa", 2)
+define i1 @test_strncmp_1(i8* nocapture readonly %s) {
entry:
- %call = tail call i32 @strncmp(ptr nonnull dereferenceable(1) %s, ptr nonnull dereferenceable(3) @.str, i64 2)
- %tobool.not = icmp eq i32 %call, 0
- %retval.0 = select i1 %tobool.not, i32 11, i32 41
- ret i32 %retval.0
+ %call = tail call i32 @strncmp(ptr nonnull dereferenceable(1) %s, ptr nonnull dereferenceable(3) @s3, i64 2)
+ %cmp = icmp eq i32 %call, 0
+ ret i1 %cmp
}
; CHECK-LABEL: @test_strncmp_1(
; CHECK-NOT: @strncmp
-define i32 @test_strncmp_1_dereferenceable(i8* nocapture readonly dereferenceable(2) %s) {
+; TH-2-LABEL: @test_strncmp_1(
+; TH-2-NOT: @strncmp
+; TH-1-LABEL: @test_strncmp_1(
+; TH-1: @strncmp
+; TH-0-LABEL: @test_strncmp_1(
+; TH-0: @strncmp
+
+define i1 @test_strncmp_1_dereferenceable(i8* nocapture readonly dereferenceable(2) %s) {
entry:
- %call = tail call i32 @strncmp(ptr nonnull %s, ptr nonnull dereferenceable(3) @.str, i64 2)
- %tobool.not = icmp eq i32 %call, 0
- %retval.0 = select i1 %tobool.not, i32 11, i32 41
- ret i32 %retval.0
+ %call = tail call i32 @strncmp(ptr nonnull %s, ptr nonnull dereferenceable(3) @s3, i64 2)
+ %cmp = icmp eq i32 %call, 0
+ ret i1 %cmp
}
; CHECK-LABEL: @test_strncmp_1_dereferenceable(
; CHECK: @strncmp
-; int test_strncmp_2(const char *s) {
-; if (!strncmp(s, "aa", 3))
-; return 11;
-; return 41;
-; }
-define i32 @test_strncmp_2(i8* nocapture readonly %s) {
+; TH-2-LABEL: @test_strncmp_1_dereferenceable(
+; TH-1-LABEL: @test_strncmp_1_dereferenceable(
+; TH-0-LABEL: @test_strncmp_1_dereferenceable(
+
+define i32 @test_strncmp_1_not_comparision(i8* nocapture readonly %s) {
+entry:
+ %call = tail call i32 @strncmp(ptr nonnull dereferenceable(1) %s, ptr nonnull dereferenceable(3) @s3, i64 2)
+ ret i32 %call
+}
+; CHECK-LABEL: @test_strncmp_1_not_comparision(
+; CHECK: @strncmp
+
+; strncmp(s, "aa", 3)
+define i1 @test_strncmp_2(i8* nocapture readonly %s) {
entry:
- %call = tail call i32 @strncmp(ptr nonnull dereferenceable(1) %s, ptr nonnull dereferenceable(3) @.str, i64 3)
- %tobool.not = icmp eq i32 %call, 0
- %retval.0 = select i1 %tobool.not, i32 11, i32 41
- ret i32 %retval.0
+ %call = tail call i32 @strncmp(ptr nonnull dereferenceable(1) %s, ptr nonnull dereferenceable(3) @s3, i64 3)
+ %cmp = icmp eq i32 %call, 0
+ ret i1 %cmp
}
; CHECK-LABEL: @test_strncmp_2(
; CHECK-NOT: @strncmp
-; int test_strncmp_3(const char *s) {
-; if (!strncmp(s, "aab", 3))
-; return 11;
-; return 41;
-; }
-define i32 @test_strncmp_3(i8* nocapture readonly %s) {
+; TH-2-LABEL: @test_strncmp_2(
+; TH-2: @strncmp
+; TH-1-LABEL: @test_strncmp_2(
+; TH-1: @strncmp
+; TH-0-LABEL: @test_strncmp_2(
+; TH-0: @strncmp
+
+; strncmp(s, "aab", 3)
+define i1 @test_strncmp_3(i8* nocapture readonly %s) {
entry:
- %call = tail call i32 @strncmp(ptr nonnull dereferenceable(1) %s, ptr nonnull dereferenceable(4) @.str.1, i64 3)
- %tobool.not = icmp eq i32 %call, 0
- %retval.0 = select i1 %tobool.not, i32 11, i32 41
- ret i32 %retval.0
+ %call = tail call i32 @strncmp(ptr nonnull dereferenceable(1) %s, ptr nonnull dereferenceable(4) @s4, i64 3)
+ %cmp = icmp eq i32 %call, 0
+ ret i1 %cmp
}
; CHECK-LABEL: @test_strncmp_3(
; CHECK-NOT: @strncmp
-; int test_strncmp_4(const char *s) {
-; if (!strncmp(s, "aab", 4))
-; return 11;
-; return 41;
-; }
-define i32 @test_strncmp_4(i8* nocapture readonly %s) {
+; TH-2-LABEL: @test_strncmp_3(
+; TH-1-LABEL: @test_strncmp_3(
+; TH-0-LABEL: @test_strncmp_3(
+
+; strncmp(s, "aab", 4)
+define i1 @test_strncmp_4(i8* nocapture readonly %s) {
entry:
- %call = tail call i32 @strncmp(ptr nonnull dereferenceable(1) %s, ptr nonnull dereferenceable(4) @.str.1, i64 4)
- %tobool.not = icmp eq i32 %call, 0
- %retval.0 = select i1 %tobool.not, i32 11, i32 41
- ret i32 %retval.0
+ %call = tail call i32 @strncmp(ptr nonnull dereferenceable(1) %s, ptr nonnull dereferenceable(4) @s4, i64 4)
+ %cmp = icmp eq i32 %call, 0
+ ret i1 %cmp
}
; CHECK-LABEL: @test_strncmp_4(
; CHECK: @strncmp
-; int test_strncmp_5(const char *s) {
-; if (strncmp(s, "aa", 2) < 0)
-; return 11;
-; return 41;
-; }
-define i32 @test_strncmp_5(i8* nocapture readonly %s) {
+; strncmp(s, "aa", 2)
+define i1 @test_strncmp_5(i8* nocapture readonly %s) {
entry:
- %call = tail call i32 @strncmp(ptr nonnull dereferenceable(1) %s, ptr nonnull dereferenceable(3) @.str, i64 2)
- %cmp = icmp slt i32 %call, 0
- %retval.0 = select i1 %cmp, i32 11, i32 41
- ret i32 %retval.0
+ %call = tail call i32 @strncmp(ptr nonnull dereferenceable(1) %s, ptr nonnull dereferenceable(3) @s3, i64 2)
+ %cmp = icmp eq i32 %call, 0
+ ret i1 %cmp
}
; CHECK-LABEL: @test_strncmp_5(
; CHECK-NOT: @strncmp
-; int test_strncmp_6(const char *s1) {
-; char s2[] = {'a', 'a'};
-; if (strncmp(s1, s2, 2) < 0)
-; return 11;
-; return 41;
-; }
-define i32 @test_strncmp_6(i8* nocapture readonly %s1) {
+; char s2[] = {'a', 'a'}
+; strncmp(s1, s2, 2)
+define i1 @test_strncmp_6(i8* nocapture readonly %s1) {
entry:
- %call = tail call i32 @strncmp(ptr nonnull dereferenceable(1) %s1, ptr nonnull dereferenceable(3) @__const.test_strncmp_8.s2, i64 2)
- %cmp = icmp slt i32 %call, 0
- %retval.0 = select i1 %cmp, i32 11, i32 41
- ret i32 %retval.0
+ %call = tail call i32 @strncmp(ptr nonnull dereferenceable(1) %s1, ptr nonnull dereferenceable(3) @s2n, i64 2)
+ %cmp = icmp eq i32 %call, 0
+ ret i1 %cmp
}
; CHECK-LABEL: @test_strncmp_6(
; CHECK-NOT: @strncmp
-; int test_strncmp_7(const char *s1) {
-; char s2[] = {'a', 'a'};
-; if (strncmp(s1, s2, 3) < 0)
-; return 11;
-; return 41;
-; }
-define i32 @test_strncmp_7(i8* nocapture readonly %s1) {
+; char s2[] = {'a', 'a'}
+; strncmp(s1, s2, 3)
+define i1 @test_strncmp_7(i8* nocapture readonly %s1) {
entry:
- %call = tail call i32 @strncmp(ptr nonnull dereferenceable(1) %s1, ptr nonnull dereferenceable(3) @__const.test_strncmp_8.s2, i64 3)
- %cmp = icmp slt i32 %call, 0
- %retval.0 = select i1 %cmp, i32 11, i32 41
- ret i32 %retval.0
+ %call = tail call i32 @strncmp(ptr nonnull dereferenceable(1) %s1, ptr nonnull dereferenceable(3) @s2n, i64 3)
+ %cmp = icmp eq i32 %call, 0
+ ret i1 %cmp
}
; CHECK-LABEL: @test_strncmp_7(
; CHECK: @strncmp
-; int test_strcmp_1(const char *s) {
-; if (!strcmp(s, "aa"))
-; return 11;
-; return 41;
-; }
-define i32 @test_strcmp_1(i8* nocapture readonly %s) {
+; strcmp(s, "")
+define i1 @test_strcmp_0(i8* nocapture readonly %s) {
entry:
- %call = tail call i32 @strcmp(ptr nonnull dereferenceable(1) %s, ptr nonnull dereferenceable(3) @.str)
- %tobool.not = icmp eq i32 %call, 0
- %retval.0 = select i1 %tobool.not, i32 11, i32 41
- ret i32 %retval.0
+ %call = tail call i32 @strcmp(ptr nonnull dereferenceable(1) %s, ptr nonnull dereferenceable(1) @s1)
+ %cmp = icmp eq i32 %call, 0
+ ret i1 %cmp
}
-; CHECK-LABEL: @test_strcmp_1(
-; CHECK-NOT: @strcmp
+; CHECK-LABEL: @test_strcmp_0(
+; CHECK: @strcmp
-define i32 @test_strcmp_1_dereferenceable(i8* nocapture readonly dereferenceable(2) %s) {
+; strcmp(s, "aa")
+define i1 @test_strcmp_1(i8* nocapture readonly %s) {
entry:
- %call = tail call i32 @strcmp(ptr nonnull %s, ptr nonnull dereferenceable(3) @.str)
- %tobool.not = icmp eq i32 %call, 0
- %retval.0 = select i1 %tobool.not, i32 11, i32 41
- ret i32 %retval.0
+ %call = tail call i32 @strcmp(ptr nonnull dereferenceable(1) %s, ptr nonnull dereferenceable(3) @s3)
+ %cmp = icmp eq i32 %call, 0
+ ret i1 %cmp
}
-; CHECK-LABEL: @test_strcmp_1_dereferenceable(
-; CHECK: @strcmp
+; CHECK-LABEL: @test_strcmp_1(
+; CHECK-NOT: @strcmp
-; int test_strcmp_2(const char *s) {
-; if (!strcmp(s, "aab"))
-; return 11;
-; return 41;
-; }
-define i32 @test_strcmp_2(i8* nocapture readonly %s) {
+; strcmp(s, "aab")
+define i1 @test_strcmp_2(i8* nocapture readonly %s) {
entry:
- %call = tail call i32 @strcmp(ptr nonnull dereferenceable(1) %s, ptr nonnull dereferenceable(4) @.str.1)
- %tobool.not = icmp eq i32 %call, 0
- %retval.0 = select i1 %tobool.not, i32 11, i32 41
- ret i32 %retval.0
+ %call = tail call i32 @strcmp(ptr nonnull dereferenceable(1) %s, ptr nonnull dereferenceable(4) @s4)
+ %cmp = icmp eq i32 %call, 0
+ ret i1 %cmp
}
; CHECK-LABEL: @test_strcmp_2(
; CHECK: @strcmp
>From 4ca7ba9c356d97b80ce3154eeef91e64a987e971 Mon Sep 17 00:00:00 2001
From: zhangfenglei <zhangfenglei at huawei.com>
Date: Wed, 1 May 2024 16:51:38 +0800
Subject: [PATCH 4/6] [AggressiveInstCombine] Inline strcmp/strncmp
* use SplitBlock rather than splitBlockBefore to
avoid invalidating the Instruction iterator
---
.../AggressiveInstCombine.cpp | 33 +++++------
.../AggressiveInstCombine/strncmp-1.ll | 56 +++++++++----------
2 files changed, 45 insertions(+), 44 deletions(-)
diff --git a/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp b/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp
index 3349607a299ed5..fc354b08bc1e50 100644
--- a/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp
+++ b/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp
@@ -1038,13 +1038,13 @@ bool StrNCmpInliner::optimizeStrNCmp() {
/// BBCI
///
/// (after)
-/// BBBefore -> BBSubs[0] (sub,icmp) --NE-> BBNE -> BBCI
-/// | ^
-/// E |
-/// | |
-/// BBSubs[1] (sub,icmp) --NE-----+
-/// ... |
-/// BBSubs[N-1] (sub) ---------+
+/// BBCI -> BBSubs[0] (sub,icmp) --NE-> BBNE -> BBTail
+/// | ^
+/// E |
+/// | |
+/// BBSubs[1] (sub,icmp) --NE-----+
+/// ... |
+/// BBSubs[N-1] (sub) ---------+
///
bool StrNCmpInliner::inlineCompare(Value *LHS, StringRef RHS, uint64_t N,
bool Swapped) {
@@ -1052,19 +1052,20 @@ bool StrNCmpInliner::inlineCompare(Value *LHS, StringRef RHS, uint64_t N,
IRBuilder<> B(Ctx);
BasicBlock *BBCI = CI->getParent();
- BasicBlock *BBBefore = splitBlockBefore(BBCI, CI, DTU, nullptr, nullptr,
- BBCI->getName() + ".before");
+ BasicBlock *BBTail =
+ SplitBlock(BBCI, CI, DTU, nullptr, nullptr, BBCI->getName() + ".tail");
SmallVector<BasicBlock *> BBSubs;
for (uint64_t i = 0; i < N; ++i)
- BBSubs.push_back(BasicBlock::Create(Ctx, "sub", BBCI->getParent(), BBCI));
- BasicBlock *BBNE = BasicBlock::Create(Ctx, "ne", BBCI->getParent(), BBCI);
+ BBSubs.push_back(BasicBlock::Create(Ctx, "sub_" + std::to_string(i),
+ BBCI->getParent(), BBTail));
+ BasicBlock *BBNE = BasicBlock::Create(Ctx, "ne", BBCI->getParent(), BBTail);
- cast<BranchInst>(BBBefore->getTerminator())->setSuccessor(0, BBSubs[0]);
+ cast<BranchInst>(BBCI->getTerminator())->setSuccessor(0, BBSubs[0]);
B.SetInsertPoint(BBNE);
PHINode *Phi = B.CreatePHI(CI->getType(), N);
- B.CreateBr(BBCI);
+ B.CreateBr(BBTail);
Value *Base = LHS;
for (uint64_t i = 0; i < N; ++i) {
@@ -1089,14 +1090,14 @@ bool StrNCmpInliner::inlineCompare(Value *LHS, StringRef RHS, uint64_t N,
if (DTU) {
SmallVector<DominatorTree::UpdateType, 8> Updates;
- Updates.push_back({DominatorTree::Insert, BBBefore, BBSubs[0]});
+ Updates.push_back({DominatorTree::Insert, BBCI, BBSubs[0]});
for (uint64_t i = 0; i < N; ++i) {
if (i < N - 1)
Updates.push_back({DominatorTree::Insert, BBSubs[i], BBSubs[i + 1]});
Updates.push_back({DominatorTree::Insert, BBSubs[i], BBNE});
}
- Updates.push_back({DominatorTree::Insert, BBNE, BBCI});
- Updates.push_back({DominatorTree::Delete, BBBefore, BBCI});
+ Updates.push_back({DominatorTree::Insert, BBNE, BBTail});
+ Updates.push_back({DominatorTree::Delete, BBCI, BBTail});
DTU->applyUpdates(Updates);
}
return true;
diff --git a/llvm/test/Transforms/AggressiveInstCombine/strncmp-1.ll b/llvm/test/Transforms/AggressiveInstCombine/strncmp-1.ll
index 3d511483a7beb1..569de8e5efee3f 100644
--- a/llvm/test/Transforms/AggressiveInstCombine/strncmp-1.ll
+++ b/llvm/test/Transforms/AggressiveInstCombine/strncmp-1.ll
@@ -12,15 +12,15 @@ declare i32 @strcmp(ptr nocapture, ptr nocapture)
define i1 @test_strncmp_1(ptr nocapture readonly %s) {
; CHECK-LABEL: define i1 @test_strncmp_1(
; CHECK-SAME: ptr nocapture readonly [[S:%.*]]) {
-; CHECK-NEXT: entry.before:
+; CHECK-NEXT: entry:
; CHECK-NEXT: br label [[SUB:%.*]]
-; CHECK: sub:
+; CHECK: sub_0:
; CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr [[S]], align 1
; CHECK-NEXT: [[TMP1:%.*]] = zext i8 [[TMP0]] to i32
; CHECK-NEXT: [[TMP2:%.*]] = sub i32 97, [[TMP1]]
; CHECK-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0
; CHECK-NEXT: br i1 [[TMP3]], label [[NE:%.*]], label [[SUB1:%.*]]
-; CHECK: sub1:
+; CHECK: sub_1:
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[S]], i64 1
; CHECK-NEXT: [[TMP5:%.*]] = load i8, ptr [[TMP4]], align 1
; CHECK-NEXT: [[TMP6:%.*]] = zext i8 [[TMP5]] to i32
@@ -29,7 +29,7 @@ define i1 @test_strncmp_1(ptr nocapture readonly %s) {
; CHECK: ne:
; CHECK-NEXT: [[TMP8:%.*]] = phi i32 [ [[TMP2]], [[SUB]] ], [ [[TMP7]], [[SUB1]] ]
; CHECK-NEXT: br label [[ENTRY:%.*]]
-; CHECK: entry:
+; CHECK: entry.tail:
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP8]], 0
; CHECK-NEXT: ret i1 [[CMP]]
;
@@ -42,22 +42,22 @@ entry:
define i1 @test_strncmp_2(ptr nocapture readonly %s) {
; CHECK-LABEL: define i1 @test_strncmp_2(
; CHECK-SAME: ptr nocapture readonly [[S:%.*]]) {
-; CHECK-NEXT: entry.before:
+; CHECK-NEXT: entry:
; CHECK-NEXT: br label [[SUB:%.*]]
-; CHECK: sub:
+; CHECK: sub_0:
; CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr [[S]], align 1
; CHECK-NEXT: [[TMP1:%.*]] = zext i8 [[TMP0]] to i32
; CHECK-NEXT: [[TMP2:%.*]] = sub i32 97, [[TMP1]]
; CHECK-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0
; CHECK-NEXT: br i1 [[TMP3]], label [[NE:%.*]], label [[SUB1:%.*]]
-; CHECK: sub1:
+; CHECK: sub_1:
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[S]], i64 1
; CHECK-NEXT: [[TMP5:%.*]] = load i8, ptr [[TMP4]], align 1
; CHECK-NEXT: [[TMP6:%.*]] = zext i8 [[TMP5]] to i32
; CHECK-NEXT: [[TMP7:%.*]] = sub i32 98, [[TMP6]]
; CHECK-NEXT: [[TMP8:%.*]] = icmp ne i32 [[TMP7]], 0
; CHECK-NEXT: br i1 [[TMP8]], label [[NE]], label [[SUB2:%.*]]
-; CHECK: sub2:
+; CHECK: sub_2:
; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[S]], i64 2
; CHECK-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP9]], align 1
; CHECK-NEXT: [[TMP11:%.*]] = zext i8 [[TMP10]] to i32
@@ -66,7 +66,7 @@ define i1 @test_strncmp_2(ptr nocapture readonly %s) {
; CHECK: ne:
; CHECK-NEXT: [[TMP13:%.*]] = phi i32 [ [[TMP2]], [[SUB]] ], [ [[TMP7]], [[SUB1]] ], [ [[TMP12]], [[SUB2]] ]
; CHECK-NEXT: br label [[ENTRY:%.*]]
-; CHECK: entry:
+; CHECK: entry.tail:
; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP13]], 0
; CHECK-NEXT: ret i1 [[CMP]]
;
@@ -79,22 +79,22 @@ entry:
define i1 @test_strncmp_3(ptr nocapture readonly %s) {
; CHECK-LABEL: define i1 @test_strncmp_3(
; CHECK-SAME: ptr nocapture readonly [[S:%.*]]) {
-; CHECK-NEXT: entry.before:
+; CHECK-NEXT: entry:
; CHECK-NEXT: br label [[SUB:%.*]]
-; CHECK: sub:
+; CHECK: sub_0:
; CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr [[S]], align 1
; CHECK-NEXT: [[TMP1:%.*]] = zext i8 [[TMP0]] to i32
; CHECK-NEXT: [[TMP2:%.*]] = sub i32 97, [[TMP1]]
; CHECK-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0
; CHECK-NEXT: br i1 [[TMP3]], label [[NE:%.*]], label [[SUB1:%.*]]
-; CHECK: sub1:
+; CHECK: sub_1:
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[S]], i64 1
; CHECK-NEXT: [[TMP5:%.*]] = load i8, ptr [[TMP4]], align 1
; CHECK-NEXT: [[TMP6:%.*]] = zext i8 [[TMP5]] to i32
; CHECK-NEXT: [[TMP7:%.*]] = sub i32 98, [[TMP6]]
; CHECK-NEXT: [[TMP8:%.*]] = icmp ne i32 [[TMP7]], 0
; CHECK-NEXT: br i1 [[TMP8]], label [[NE]], label [[SUB2:%.*]]
-; CHECK: sub2:
+; CHECK: sub_2:
; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[S]], i64 2
; CHECK-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP9]], align 1
; CHECK-NEXT: [[TMP11:%.*]] = zext i8 [[TMP10]] to i32
@@ -103,7 +103,7 @@ define i1 @test_strncmp_3(ptr nocapture readonly %s) {
; CHECK: ne:
; CHECK-NEXT: [[TMP13:%.*]] = phi i32 [ [[TMP2]], [[SUB]] ], [ [[TMP7]], [[SUB1]] ], [ [[TMP12]], [[SUB2]] ]
; CHECK-NEXT: br label [[ENTRY:%.*]]
-; CHECK: entry:
+; CHECK: entry.tail:
; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP13]], 0
; CHECK-NEXT: ret i1 [[CMP]]
;
@@ -116,15 +116,15 @@ entry:
define i1 @test_strcmp_1(ptr nocapture readonly %s) {
; CHECK-LABEL: define i1 @test_strcmp_1(
; CHECK-SAME: ptr nocapture readonly [[S:%.*]]) {
-; CHECK-NEXT: entry.before:
+; CHECK-NEXT: entry:
; CHECK-NEXT: br label [[SUB:%.*]]
-; CHECK: sub:
+; CHECK: sub_0:
; CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr [[S]], align 1
; CHECK-NEXT: [[TMP1:%.*]] = zext i8 [[TMP0]] to i32
; CHECK-NEXT: [[TMP2:%.*]] = sub i32 [[TMP1]], 97
; CHECK-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0
; CHECK-NEXT: br i1 [[TMP3]], label [[NE:%.*]], label [[SUB1:%.*]]
-; CHECK: sub1:
+; CHECK: sub_1:
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[S]], i64 1
; CHECK-NEXT: [[TMP5:%.*]] = load i8, ptr [[TMP4]], align 1
; CHECK-NEXT: [[TMP6:%.*]] = zext i8 [[TMP5]] to i32
@@ -132,7 +132,7 @@ define i1 @test_strcmp_1(ptr nocapture readonly %s) {
; CHECK: ne:
; CHECK-NEXT: [[TMP7:%.*]] = phi i32 [ [[TMP2]], [[SUB]] ], [ [[TMP6]], [[SUB1]] ]
; CHECK-NEXT: br label [[ENTRY:%.*]]
-; CHECK: entry:
+; CHECK: entry.tail:
; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[TMP7]], 0
; CHECK-NEXT: ret i1 [[CMP]]
;
@@ -145,22 +145,22 @@ entry:
define i1 @test_strcmp_2(ptr nocapture readonly %s) {
; CHECK-LABEL: define i1 @test_strcmp_2(
; CHECK-SAME: ptr nocapture readonly [[S:%.*]]) {
-; CHECK-NEXT: entry.before:
+; CHECK-NEXT: entry:
; CHECK-NEXT: br label [[SUB:%.*]]
-; CHECK: sub:
+; CHECK: sub_0:
; CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr [[S]], align 1
; CHECK-NEXT: [[TMP1:%.*]] = zext i8 [[TMP0]] to i32
; CHECK-NEXT: [[TMP2:%.*]] = sub i32 [[TMP1]], 97
; CHECK-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0
; CHECK-NEXT: br i1 [[TMP3]], label [[NE:%.*]], label [[SUB1:%.*]]
-; CHECK: sub1:
+; CHECK: sub_1:
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[S]], i64 1
; CHECK-NEXT: [[TMP5:%.*]] = load i8, ptr [[TMP4]], align 1
; CHECK-NEXT: [[TMP6:%.*]] = zext i8 [[TMP5]] to i32
; CHECK-NEXT: [[TMP7:%.*]] = sub i32 [[TMP6]], 98
; CHECK-NEXT: [[TMP8:%.*]] = icmp ne i32 [[TMP7]], 0
; CHECK-NEXT: br i1 [[TMP8]], label [[NE]], label [[SUB2:%.*]]
-; CHECK: sub2:
+; CHECK: sub_2:
; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[S]], i64 2
; CHECK-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP9]], align 1
; CHECK-NEXT: [[TMP11:%.*]] = zext i8 [[TMP10]] to i32
@@ -168,7 +168,7 @@ define i1 @test_strcmp_2(ptr nocapture readonly %s) {
; CHECK: ne:
; CHECK-NEXT: [[TMP12:%.*]] = phi i32 [ [[TMP2]], [[SUB]] ], [ [[TMP7]], [[SUB1]] ], [ [[TMP11]], [[SUB2]] ]
; CHECK-NEXT: br label [[ENTRY:%.*]]
-; CHECK: entry:
+; CHECK: entry.tail:
; CHECK-NEXT: [[CMP:%.*]] = icmp sge i32 [[TMP12]], 0
; CHECK-NEXT: ret i1 [[CMP]]
;
@@ -181,22 +181,22 @@ entry:
define i1 @test_strcmp_3(ptr nocapture readonly %s) {
; CHECK-LABEL: define i1 @test_strcmp_3(
; CHECK-SAME: ptr nocapture readonly [[S:%.*]]) {
-; CHECK-NEXT: entry.before:
+; CHECK-NEXT: entry:
; CHECK-NEXT: br label [[SUB:%.*]]
-; CHECK: sub:
+; CHECK: sub_0:
; CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr [[S]], align 1
; CHECK-NEXT: [[TMP1:%.*]] = zext i8 [[TMP0]] to i32
; CHECK-NEXT: [[TMP2:%.*]] = sub i32 97, [[TMP1]]
; CHECK-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0
; CHECK-NEXT: br i1 [[TMP3]], label [[NE:%.*]], label [[SUB1:%.*]]
-; CHECK: sub1:
+; CHECK: sub_1:
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[S]], i64 1
; CHECK-NEXT: [[TMP5:%.*]] = load i8, ptr [[TMP4]], align 1
; CHECK-NEXT: [[TMP6:%.*]] = zext i8 [[TMP5]] to i32
; CHECK-NEXT: [[TMP7:%.*]] = sub i32 98, [[TMP6]]
; CHECK-NEXT: [[TMP8:%.*]] = icmp ne i32 [[TMP7]], 0
; CHECK-NEXT: br i1 [[TMP8]], label [[NE]], label [[SUB2:%.*]]
-; CHECK: sub2:
+; CHECK: sub_2:
; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[S]], i64 2
; CHECK-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP9]], align 1
; CHECK-NEXT: [[TMP11:%.*]] = zext i8 [[TMP10]] to i32
@@ -205,7 +205,7 @@ define i1 @test_strcmp_3(ptr nocapture readonly %s) {
; CHECK: ne:
; CHECK-NEXT: [[TMP13:%.*]] = phi i32 [ [[TMP2]], [[SUB]] ], [ [[TMP7]], [[SUB1]] ], [ [[TMP12]], [[SUB2]] ]
; CHECK-NEXT: br label [[ENTRY:%.*]]
-; CHECK: entry:
+; CHECK: entry.tail:
; CHECK-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP13]], 0
; CHECK-NEXT: ret i1 [[CMP]]
;
>From 8e63645536df0c0ae1e39c20cdcaf69c5488fb77 Mon Sep 17 00:00:00 2001
From: zhangfenglei <zhangfenglei at huawei.com>
Date: Thu, 2 May 2024 18:12:41 +0800
Subject: [PATCH 5/6] [AggressiveInstCombine] Inline strcmp/strncmp
* improve tests
---
.../AggressiveInstCombine.cpp | 36 +++++----
.../AggressiveInstCombine/strncmp-1.ll | 26 +++----
.../AggressiveInstCombine/strncmp-2.ll | 76 ++++++++-----------
3 files changed, 60 insertions(+), 78 deletions(-)
diff --git a/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp b/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp
index fc354b08bc1e50..39eca4f41ec57f 100644
--- a/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp
+++ b/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp
@@ -49,6 +49,11 @@ static cl::opt<unsigned> MaxInstrsToScan(
"aggressive-instcombine-max-scan-instrs", cl::init(64), cl::Hidden,
cl::desc("Max number of instructions to scan for aggressive instcombine."));
+static cl::opt<unsigned> StrNCmpInlineThreshold(
+ "strncmp-inline-threshold", cl::init(3), cl::Hidden,
+ cl::desc("The maximum length of a constant string for a builtin string cmp "
+ "call eligible for inlining. The default value is 3."));
+
/// Match a pattern for a bitwise funnel/rotate operation that partially guards
/// against undefined behavior by branching around the funnel-shift/rotation
/// when the shift amount is 0.
@@ -914,11 +919,6 @@ static bool foldPatternedLoads(Instruction &I, const DataLayout &DL) {
return true;
}
-static cl::opt<unsigned> StrNCmpInlineThreshold(
- "strncmp-inline-threshold", cl::init(3), cl::Hidden,
- cl::desc("The maximum length of a constant string for a builtin string cmp "
- "call eligible for inlining. The default value is 3."));
-
namespace {
class StrNCmpInliner {
public:
@@ -929,7 +929,7 @@ class StrNCmpInliner {
bool optimizeStrNCmp();
private:
- bool inlineCompare(Value *LHS, StringRef RHS, uint64_t N, bool Swapped);
+ void inlineCompare(Value *LHS, StringRef RHS, uint64_t N, bool Swapped);
CallInst *CI;
LibFunc Func;
@@ -981,18 +981,19 @@ bool StrNCmpInliner::optimizeStrNCmp() {
return false;
StringRef Str1, Str2;
- bool HasStr1 = getConstantStringInfo(Str1P, Str1, false);
- bool HasStr2 = getConstantStringInfo(Str2P, Str2, false);
+ bool HasStr1 = getConstantStringInfo(Str1P, Str1, /*TrimAtNul=*/false);
+ bool HasStr2 = getConstantStringInfo(Str2P, Str2, /*TrimAtNul=*/false);
if (HasStr1 == HasStr2)
return false;
// Note that '\0' and characters after it are not trimmed.
StringRef Str = HasStr1 ? Str1 : Str2;
+ Value *StrP = HasStr1 ? Str2P : Str1P;
size_t Idx = Str.find('\0');
uint64_t N = Idx == StringRef::npos ? UINT64_MAX : Idx + 1;
if (Func == LibFunc_strncmp) {
- if (auto ConstInt = dyn_cast<ConstantInt>(CI->getArgOperand(2)))
+ if (auto *ConstInt = dyn_cast<ConstantInt>(CI->getArgOperand(2)))
N = std::min(N, ConstInt->getZExtValue());
else
return false;
@@ -1001,15 +1002,13 @@ bool StrNCmpInliner::optimizeStrNCmp() {
if (N > Str.size() || N < 2 || N > StrNCmpInlineThreshold)
return false;
- Value *StrP = HasStr1 ? Str2P : Str1P;
-
// Cases where StrP has two or more dereferenceable bytes might be better
// optimized elsewhere.
bool CanBeNull = false, CanBeFreed = false;
if (StrP->getPointerDereferenceableBytes(DL, CanBeNull, CanBeFreed) > 1)
return false;
-
- return inlineCompare(StrP, Str, N, HasStr1);
+ inlineCompare(StrP, Str, N, HasStr1);
+ return true;
}
/// Convert
@@ -1046,7 +1045,7 @@ bool StrNCmpInliner::optimizeStrNCmp() {
/// ... |
/// BBSubs[N-1] (sub) ---------+
///
-bool StrNCmpInliner::inlineCompare(Value *LHS, StringRef RHS, uint64_t N,
+void StrNCmpInliner::inlineCompare(Value *LHS, StringRef RHS, uint64_t N,
bool Swapped) {
auto &Ctx = CI->getContext();
IRBuilder<> B(Ctx);
@@ -1056,9 +1055,9 @@ bool StrNCmpInliner::inlineCompare(Value *LHS, StringRef RHS, uint64_t N,
SplitBlock(BBCI, CI, DTU, nullptr, nullptr, BBCI->getName() + ".tail");
SmallVector<BasicBlock *> BBSubs;
- for (uint64_t i = 0; i < N; ++i)
- BBSubs.push_back(BasicBlock::Create(Ctx, "sub_" + std::to_string(i),
- BBCI->getParent(), BBTail));
+ for (uint64_t I = 0; I < N; ++I)
+ BBSubs.push_back(
+ BasicBlock::Create(Ctx, "sub_" + Twine(I), BBCI->getParent(), BBTail));
BasicBlock *BBNE = BasicBlock::Create(Ctx, "ne", BBCI->getParent(), BBTail);
cast<BranchInst>(BBCI->getTerminator())->setSuccessor(0, BBSubs[0]);
@@ -1100,11 +1099,10 @@ bool StrNCmpInliner::inlineCompare(Value *LHS, StringRef RHS, uint64_t N,
Updates.push_back({DominatorTree::Delete, BBCI, BBTail});
DTU->applyUpdates(Updates);
}
- return true;
}
static bool foldLibCalls(Instruction &I, TargetTransformInfo &TTI,
- TargetLibraryInfo &TLI, llvm::AssumptionCache &AC,
+ TargetLibraryInfo &TLI, AssumptionCache &AC,
DominatorTree &DT, const DataLayout &DL,
bool &MadeCFGChange) {
diff --git a/llvm/test/Transforms/AggressiveInstCombine/strncmp-1.ll b/llvm/test/Transforms/AggressiveInstCombine/strncmp-1.ll
index 569de8e5efee3f..f3f88663672fe2 100644
--- a/llvm/test/Transforms/AggressiveInstCombine/strncmp-1.ll
+++ b/llvm/test/Transforms/AggressiveInstCombine/strncmp-1.ll
@@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
-; RUN: opt -S -passes=aggressive-instcombine -strncmp-inline-threshold=3 < %s | FileCheck %s
+; RUN: opt -S -passes=aggressive-instcombine < %s | FileCheck %s
; check whether we generate the right IR
@@ -9,9 +9,9 @@ declare i32 @strcmp(ptr nocapture, ptr nocapture)
@s2 = constant [2 x i8] c"a\00"
@s3 = constant [3 x i8] c"ab\00"
-define i1 @test_strncmp_1(ptr nocapture readonly %s) {
+define i1 @test_strncmp_1(ptr %s) {
; CHECK-LABEL: define i1 @test_strncmp_1(
-; CHECK-SAME: ptr nocapture readonly [[S:%.*]]) {
+; CHECK-SAME: ptr [[S:%.*]]) {
; CHECK-NEXT: entry:
; CHECK-NEXT: br label [[SUB:%.*]]
; CHECK: sub_0:
@@ -39,9 +39,9 @@ entry:
ret i1 %cmp
}
-define i1 @test_strncmp_2(ptr nocapture readonly %s) {
+define i1 @test_strncmp_2(ptr %s) {
; CHECK-LABEL: define i1 @test_strncmp_2(
-; CHECK-SAME: ptr nocapture readonly [[S:%.*]]) {
+; CHECK-SAME: ptr [[S:%.*]]) {
; CHECK-NEXT: entry:
; CHECK-NEXT: br label [[SUB:%.*]]
; CHECK: sub_0:
@@ -76,9 +76,9 @@ entry:
ret i1 %cmp
}
-define i1 @test_strncmp_3(ptr nocapture readonly %s) {
+define i1 @test_strncmp_3(ptr %s) {
; CHECK-LABEL: define i1 @test_strncmp_3(
-; CHECK-SAME: ptr nocapture readonly [[S:%.*]]) {
+; CHECK-SAME: ptr [[S:%.*]]) {
; CHECK-NEXT: entry:
; CHECK-NEXT: br label [[SUB:%.*]]
; CHECK: sub_0:
@@ -113,9 +113,9 @@ entry:
ret i1 %cmp
}
-define i1 @test_strcmp_1(ptr nocapture readonly %s) {
+define i1 @test_strcmp_1(ptr %s) {
; CHECK-LABEL: define i1 @test_strcmp_1(
-; CHECK-SAME: ptr nocapture readonly [[S:%.*]]) {
+; CHECK-SAME: ptr [[S:%.*]]) {
; CHECK-NEXT: entry:
; CHECK-NEXT: br label [[SUB:%.*]]
; CHECK: sub_0:
@@ -142,9 +142,9 @@ entry:
ret i1 %cmp
}
-define i1 @test_strcmp_2(ptr nocapture readonly %s) {
+define i1 @test_strcmp_2(ptr %s) {
; CHECK-LABEL: define i1 @test_strcmp_2(
-; CHECK-SAME: ptr nocapture readonly [[S:%.*]]) {
+; CHECK-SAME: ptr [[S:%.*]]) {
; CHECK-NEXT: entry:
; CHECK-NEXT: br label [[SUB:%.*]]
; CHECK: sub_0:
@@ -178,9 +178,9 @@ entry:
ret i1 %cmp
}
-define i1 @test_strcmp_3(ptr nocapture readonly %s) {
+define i1 @test_strcmp_3(ptr %s) {
; CHECK-LABEL: define i1 @test_strcmp_3(
-; CHECK-SAME: ptr nocapture readonly [[S:%.*]]) {
+; CHECK-SAME: ptr [[S:%.*]]) {
; CHECK-NEXT: entry:
; CHECK-NEXT: br label [[SUB:%.*]]
; CHECK: sub_0:
diff --git a/llvm/test/Transforms/AggressiveInstCombine/strncmp-2.ll b/llvm/test/Transforms/AggressiveInstCombine/strncmp-2.ll
index ebe9c32ca22fab..13ad66642eae49 100644
--- a/llvm/test/Transforms/AggressiveInstCombine/strncmp-2.ll
+++ b/llvm/test/Transforms/AggressiveInstCombine/strncmp-2.ll
@@ -1,7 +1,7 @@
-; RUN: opt -S -passes=aggressive-instcombine -strncmp-inline-threshold=3 < %s | FileCheck %s
-; RUN: opt -S -passes=aggressive-instcombine -strncmp-inline-threshold=2 < %s | FileCheck --check-prefix TH-2 %s
-; RUN: opt -S -passes=aggressive-instcombine -strncmp-inline-threshold=1 < %s | FileCheck --check-prefix TH-1 %s
-; RUN: opt -S -passes=aggressive-instcombine -strncmp-inline-threshold=0 < %s | FileCheck --check-prefix TH-0 %s
+; RUN: opt -S -passes=aggressive-instcombine -strncmp-inline-threshold=3 < %s | FileCheck --check-prefixes=CHECK,TH-3 %s
+; RUN: opt -S -passes=aggressive-instcombine -strncmp-inline-threshold=2 < %s | FileCheck --check-prefixes=CHECK,TH-2 %s
+; RUN: opt -S -passes=aggressive-instcombine -strncmp-inline-threshold=1 < %s | FileCheck --check-prefixes=CHECK,TH-1 %s
+; RUN: opt -S -passes=aggressive-instcombine -strncmp-inline-threshold=0 < %s | FileCheck --check-prefixes=CHECK,TH-0 %s
declare i32 @strcmp(ptr nocapture, ptr nocapture)
declare i32 @strncmp(ptr nocapture, ptr nocapture, i64)
@@ -12,152 +12,136 @@ declare i32 @strncmp(ptr nocapture, ptr nocapture, i64)
@s4 = constant [4 x i8] c"aab\00", align 1
; strncmp(s, "aa", 1)
-define i1 @test_strncmp_0(i8* nocapture readonly %s) {
+define i1 @test_strncmp_0(i8* %s) {
entry:
%call = tail call i32 @strncmp(ptr nonnull dereferenceable(1) %s, ptr nonnull dereferenceable(3) @s3, i64 1)
%cmp = icmp eq i32 %call, 0
ret i1 %cmp
}
; CHECK-LABEL: @test_strncmp_0(
-; CHECK: @strncmp
+; TH-3: @strncmp
; strncmp(s, "aa", 2)
-define i1 @test_strncmp_1(i8* nocapture readonly %s) {
+define i1 @test_strncmp_1(i8* %s) {
entry:
%call = tail call i32 @strncmp(ptr nonnull dereferenceable(1) %s, ptr nonnull dereferenceable(3) @s3, i64 2)
%cmp = icmp eq i32 %call, 0
ret i1 %cmp
}
; CHECK-LABEL: @test_strncmp_1(
-; CHECK-NOT: @strncmp
-
-; TH-2-LABEL: @test_strncmp_1(
+; TH-3-NOT: @strncmp
; TH-2-NOT: @strncmp
-; TH-1-LABEL: @test_strncmp_1(
; TH-1: @strncmp
-; TH-0-LABEL: @test_strncmp_1(
; TH-0: @strncmp
-define i1 @test_strncmp_1_dereferenceable(i8* nocapture readonly dereferenceable(2) %s) {
+define i1 @test_strncmp_1_dereferenceable(i8* dereferenceable(2) %s) {
entry:
%call = tail call i32 @strncmp(ptr nonnull %s, ptr nonnull dereferenceable(3) @s3, i64 2)
%cmp = icmp eq i32 %call, 0
ret i1 %cmp
}
; CHECK-LABEL: @test_strncmp_1_dereferenceable(
-; CHECK: @strncmp
-
-; TH-2-LABEL: @test_strncmp_1_dereferenceable(
-; TH-1-LABEL: @test_strncmp_1_dereferenceable(
-; TH-0-LABEL: @test_strncmp_1_dereferenceable(
+; TH-3: @strncmp
-define i32 @test_strncmp_1_not_comparision(i8* nocapture readonly %s) {
+define i32 @test_strncmp_1_not_comparision(i8* %s) {
entry:
%call = tail call i32 @strncmp(ptr nonnull dereferenceable(1) %s, ptr nonnull dereferenceable(3) @s3, i64 2)
ret i32 %call
}
; CHECK-LABEL: @test_strncmp_1_not_comparision(
-; CHECK: @strncmp
+; TH-3: @strncmp
; strncmp(s, "aa", 3)
-define i1 @test_strncmp_2(i8* nocapture readonly %s) {
+define i1 @test_strncmp_2(i8* %s) {
entry:
%call = tail call i32 @strncmp(ptr nonnull dereferenceable(1) %s, ptr nonnull dereferenceable(3) @s3, i64 3)
%cmp = icmp eq i32 %call, 0
ret i1 %cmp
}
; CHECK-LABEL: @test_strncmp_2(
-; CHECK-NOT: @strncmp
-
-; TH-2-LABEL: @test_strncmp_2(
+; TH-3-NOT: @strncmp
; TH-2: @strncmp
-; TH-1-LABEL: @test_strncmp_2(
; TH-1: @strncmp
-; TH-0-LABEL: @test_strncmp_2(
; TH-0: @strncmp
; strncmp(s, "aab", 3)
-define i1 @test_strncmp_3(i8* nocapture readonly %s) {
+define i1 @test_strncmp_3(i8* %s) {
entry:
%call = tail call i32 @strncmp(ptr nonnull dereferenceable(1) %s, ptr nonnull dereferenceable(4) @s4, i64 3)
%cmp = icmp eq i32 %call, 0
ret i1 %cmp
}
; CHECK-LABEL: @test_strncmp_3(
-; CHECK-NOT: @strncmp
-
-; TH-2-LABEL: @test_strncmp_3(
-; TH-1-LABEL: @test_strncmp_3(
-; TH-0-LABEL: @test_strncmp_3(
+; TH-3-NOT: @strncmp
; strncmp(s, "aab", 4)
-define i1 @test_strncmp_4(i8* nocapture readonly %s) {
+define i1 @test_strncmp_4(i8* %s) {
entry:
%call = tail call i32 @strncmp(ptr nonnull dereferenceable(1) %s, ptr nonnull dereferenceable(4) @s4, i64 4)
%cmp = icmp eq i32 %call, 0
ret i1 %cmp
}
; CHECK-LABEL: @test_strncmp_4(
-; CHECK: @strncmp
+; TH-3: @strncmp
; strncmp(s, "aa", 2)
-define i1 @test_strncmp_5(i8* nocapture readonly %s) {
+define i1 @test_strncmp_5(i8* %s) {
entry:
%call = tail call i32 @strncmp(ptr nonnull dereferenceable(1) %s, ptr nonnull dereferenceable(3) @s3, i64 2)
%cmp = icmp eq i32 %call, 0
ret i1 %cmp
}
; CHECK-LABEL: @test_strncmp_5(
-; CHECK-NOT: @strncmp
+; TH-3-NOT: @strncmp
; char s2[] = {'a', 'a'}
; strncmp(s1, s2, 2)
-define i1 @test_strncmp_6(i8* nocapture readonly %s1) {
+define i1 @test_strncmp_6(i8* %s1) {
entry:
%call = tail call i32 @strncmp(ptr nonnull dereferenceable(1) %s1, ptr nonnull dereferenceable(3) @s2n, i64 2)
%cmp = icmp eq i32 %call, 0
ret i1 %cmp
}
; CHECK-LABEL: @test_strncmp_6(
-; CHECK-NOT: @strncmp
+; TH-3-NOT: @strncmp
; char s2[] = {'a', 'a'}
; strncmp(s1, s2, 3)
-define i1 @test_strncmp_7(i8* nocapture readonly %s1) {
+define i1 @test_strncmp_7(i8* %s1) {
entry:
%call = tail call i32 @strncmp(ptr nonnull dereferenceable(1) %s1, ptr nonnull dereferenceable(3) @s2n, i64 3)
%cmp = icmp eq i32 %call, 0
ret i1 %cmp
}
; CHECK-LABEL: @test_strncmp_7(
-; CHECK: @strncmp
+; TH-3: @strncmp
; strcmp(s, "")
-define i1 @test_strcmp_0(i8* nocapture readonly %s) {
+define i1 @test_strcmp_0(i8* %s) {
entry:
%call = tail call i32 @strcmp(ptr nonnull dereferenceable(1) %s, ptr nonnull dereferenceable(1) @s1)
%cmp = icmp eq i32 %call, 0
ret i1 %cmp
}
; CHECK-LABEL: @test_strcmp_0(
-; CHECK: @strcmp
+; TH-3: @strcmp
; strcmp(s, "aa")
-define i1 @test_strcmp_1(i8* nocapture readonly %s) {
+define i1 @test_strcmp_1(i8* %s) {
entry:
%call = tail call i32 @strcmp(ptr nonnull dereferenceable(1) %s, ptr nonnull dereferenceable(3) @s3)
%cmp = icmp eq i32 %call, 0
ret i1 %cmp
}
; CHECK-LABEL: @test_strcmp_1(
-; CHECK-NOT: @strcmp
+; TH-3-NOT: @strcmp
; strcmp(s, "aab")
-define i1 @test_strcmp_2(i8* nocapture readonly %s) {
+define i1 @test_strcmp_2(i8* %s) {
entry:
%call = tail call i32 @strcmp(ptr nonnull dereferenceable(1) %s, ptr nonnull dereferenceable(4) @s4)
%cmp = icmp eq i32 %call, 0
ret i1 %cmp
}
; CHECK-LABEL: @test_strcmp_2(
-; CHECK: @strcmp
+; TH-3: @strcmp
>From e3b20bfc59d86fd9c2527bd8a9caf1386c54d05d Mon Sep 17 00:00:00 2001
From: zhangfenglei <zhangfenglei at huawei.com>
Date: Fri, 3 May 2024 11:05:07 +0800
Subject: [PATCH 6/6] [AggressiveInstCombine] Inline strcmp/strncmp
* improve tests
---
.../AggressiveInstCombine/strncmp-2.ll | 42 +++++++++----------
1 file changed, 21 insertions(+), 21 deletions(-)
diff --git a/llvm/test/Transforms/AggressiveInstCombine/strncmp-2.ll b/llvm/test/Transforms/AggressiveInstCombine/strncmp-2.ll
index 13ad66642eae49..0cc5e3f135b652 100644
--- a/llvm/test/Transforms/AggressiveInstCombine/strncmp-2.ll
+++ b/llvm/test/Transforms/AggressiveInstCombine/strncmp-2.ll
@@ -12,17 +12,17 @@ declare i32 @strncmp(ptr nocapture, ptr nocapture, i64)
@s4 = constant [4 x i8] c"aab\00", align 1
; strncmp(s, "aa", 1)
-define i1 @test_strncmp_0(i8* %s) {
+define i1 @test_strncmp_0(ptr %s) {
entry:
%call = tail call i32 @strncmp(ptr nonnull dereferenceable(1) %s, ptr nonnull dereferenceable(3) @s3, i64 1)
%cmp = icmp eq i32 %call, 0
ret i1 %cmp
}
; CHECK-LABEL: @test_strncmp_0(
-; TH-3: @strncmp
+; CHECK: @strncmp
; strncmp(s, "aa", 2)
-define i1 @test_strncmp_1(i8* %s) {
+define i1 @test_strncmp_1(ptr %s) {
entry:
%call = tail call i32 @strncmp(ptr nonnull dereferenceable(1) %s, ptr nonnull dereferenceable(3) @s3, i64 2)
%cmp = icmp eq i32 %call, 0
@@ -34,25 +34,25 @@ entry:
; TH-1: @strncmp
; TH-0: @strncmp
-define i1 @test_strncmp_1_dereferenceable(i8* dereferenceable(2) %s) {
+define i1 @test_strncmp_1_dereferenceable(ptr dereferenceable(2) %s) {
entry:
%call = tail call i32 @strncmp(ptr nonnull %s, ptr nonnull dereferenceable(3) @s3, i64 2)
%cmp = icmp eq i32 %call, 0
ret i1 %cmp
}
; CHECK-LABEL: @test_strncmp_1_dereferenceable(
-; TH-3: @strncmp
+; CHECK: @strncmp
-define i32 @test_strncmp_1_not_comparision(i8* %s) {
+define i32 @test_strncmp_1_not_comparision(ptr %s) {
entry:
%call = tail call i32 @strncmp(ptr nonnull dereferenceable(1) %s, ptr nonnull dereferenceable(3) @s3, i64 2)
ret i32 %call
}
; CHECK-LABEL: @test_strncmp_1_not_comparision(
-; TH-3: @strncmp
+; CHECK: @strncmp
; strncmp(s, "aa", 3)
-define i1 @test_strncmp_2(i8* %s) {
+define i1 @test_strncmp_2(ptr %s) {
entry:
%call = tail call i32 @strncmp(ptr nonnull dereferenceable(1) %s, ptr nonnull dereferenceable(3) @s3, i64 3)
%cmp = icmp eq i32 %call, 0
@@ -65,7 +65,7 @@ entry:
; TH-0: @strncmp
; strncmp(s, "aab", 3)
-define i1 @test_strncmp_3(i8* %s) {
+define i1 @test_strncmp_3(ptr %s) {
entry:
%call = tail call i32 @strncmp(ptr nonnull dereferenceable(1) %s, ptr nonnull dereferenceable(4) @s4, i64 3)
%cmp = icmp eq i32 %call, 0
@@ -75,7 +75,7 @@ entry:
; TH-3-NOT: @strncmp
; strncmp(s, "aab", 4)
-define i1 @test_strncmp_4(i8* %s) {
+define i1 @test_strncmp_4(ptr %s) {
entry:
%call = tail call i32 @strncmp(ptr nonnull dereferenceable(1) %s, ptr nonnull dereferenceable(4) @s4, i64 4)
%cmp = icmp eq i32 %call, 0
@@ -85,7 +85,7 @@ entry:
; TH-3: @strncmp
; strncmp(s, "aa", 2)
-define i1 @test_strncmp_5(i8* %s) {
+define i1 @test_strncmp_5(ptr %s) {
entry:
%call = tail call i32 @strncmp(ptr nonnull dereferenceable(1) %s, ptr nonnull dereferenceable(3) @s3, i64 2)
%cmp = icmp eq i32 %call, 0
@@ -96,9 +96,9 @@ entry:
; char s2[] = {'a', 'a'}
; strncmp(s1, s2, 2)
-define i1 @test_strncmp_6(i8* %s1) {
+define i1 @test_strncmp_6(ptr %s1) {
entry:
- %call = tail call i32 @strncmp(ptr nonnull dereferenceable(1) %s1, ptr nonnull dereferenceable(3) @s2n, i64 2)
+ %call = tail call i32 @strncmp(ptr nonnull dereferenceable(1) %s1, ptr nonnull dereferenceable(2) @s2n, i64 2)
%cmp = icmp eq i32 %call, 0
ret i1 %cmp
}
@@ -106,28 +106,28 @@ entry:
; TH-3-NOT: @strncmp
; char s2[] = {'a', 'a'}
-; strncmp(s1, s2, 3)
-define i1 @test_strncmp_7(i8* %s1) {
+; strncmp(s, s2, 3)
+define i1 @test_strncmp_7(ptr %s) {
entry:
- %call = tail call i32 @strncmp(ptr nonnull dereferenceable(1) %s1, ptr nonnull dereferenceable(3) @s2n, i64 3)
+ %call = tail call i32 @strncmp(ptr nonnull dereferenceable(1) %s, ptr nonnull dereferenceable(2) @s2n, i64 3)
%cmp = icmp eq i32 %call, 0
ret i1 %cmp
}
; CHECK-LABEL: @test_strncmp_7(
-; TH-3: @strncmp
+; CHECK: @strncmp
; strcmp(s, "")
-define i1 @test_strcmp_0(i8* %s) {
+define i1 @test_strcmp_0(ptr %s) {
entry:
%call = tail call i32 @strcmp(ptr nonnull dereferenceable(1) %s, ptr nonnull dereferenceable(1) @s1)
%cmp = icmp eq i32 %call, 0
ret i1 %cmp
}
; CHECK-LABEL: @test_strcmp_0(
-; TH-3: @strcmp
+; CHECK: @strcmp
; strcmp(s, "aa")
-define i1 @test_strcmp_1(i8* %s) {
+define i1 @test_strcmp_1(ptr %s) {
entry:
%call = tail call i32 @strcmp(ptr nonnull dereferenceable(1) %s, ptr nonnull dereferenceable(3) @s3)
%cmp = icmp eq i32 %call, 0
@@ -137,7 +137,7 @@ entry:
; TH-3-NOT: @strcmp
; strcmp(s, "aab")
-define i1 @test_strcmp_2(i8* %s) {
+define i1 @test_strcmp_2(ptr %s) {
entry:
%call = tail call i32 @strcmp(ptr nonnull dereferenceable(1) %s, ptr nonnull dereferenceable(4) @s4)
%cmp = icmp eq i32 %call, 0
More information about the llvm-commits
mailing list