[llvm] [Transforms] LoopIdiomRecognize recognize strlen and wcslen (PR #108985)
Henry Jiang via llvm-commits
llvm-commits at lists.llvm.org
Tue Nov 19 10:40:59 PST 2024
https://github.com/mustartt updated https://github.com/llvm/llvm-project/pull/108985
>From c37e74fa74e02c6693c93abdefcb225cd9d03e9d Mon Sep 17 00:00:00 2001
From: Henry Jiang <henry.jiang1 at ibm.com>
Date: Sat, 7 Sep 2024 22:27:50 -0400
Subject: [PATCH 1/5] Initial upstreaming of strlen8 LIR 1 out of 3
---
.../Transforms/Scalar/LoopIdiomRecognize.h | 3 +
.../Transforms/Scalar/LoopIdiomRecognize.cpp | 296 +++++++++++++++++-
llvm/test/Transforms/LoopIdiom/strlen.ll | 149 +++++++++
3 files changed, 445 insertions(+), 3 deletions(-)
create mode 100644 llvm/test/Transforms/LoopIdiom/strlen.ll
diff --git a/llvm/include/llvm/Transforms/Scalar/LoopIdiomRecognize.h b/llvm/include/llvm/Transforms/Scalar/LoopIdiomRecognize.h
index 0c6406d8618518..3a9f016ce9bd60 100644
--- a/llvm/include/llvm/Transforms/Scalar/LoopIdiomRecognize.h
+++ b/llvm/include/llvm/Transforms/Scalar/LoopIdiomRecognize.h
@@ -34,6 +34,9 @@ struct DisableLIRP {
/// When true, Memcpy is disabled.
static bool Memcpy;
+
+ /// When true, Strlen is disabled.
+ static bool Strlen;
};
/// Performs Loop Idiom Recognize Pass.
diff --git a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
index 05cf638d3f09df..1bcf7025cc1259 100644
--- a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
@@ -97,6 +97,7 @@ using namespace llvm;
STATISTIC(NumMemSet, "Number of memset's formed from loop stores");
STATISTIC(NumMemCpy, "Number of memcpy's formed from loop load+stores");
STATISTIC(NumMemMove, "Number of memmove's formed from loop load+stores");
+STATISTIC(NumStrLen, "Number of strlen's formed from loop loads");
STATISTIC(
NumShiftUntilBitTest,
"Number of uncountable loops recognized as 'shift until bitttest' idiom");
@@ -126,6 +127,14 @@ static cl::opt<bool, true>
cl::location(DisableLIRP::Memcpy), cl::init(false),
cl::ReallyHidden);
+bool DisableLIRP::Strlen;
+static cl::opt<bool, true>
+ DisableLIRPStrlen("disable-" DEBUG_TYPE "-strlen",
+ cl::desc("Proceed with loop idiom recognize pass, but do "
+ "not convert loop(s) to strlen."),
+ cl::location(DisableLIRP::Strlen), cl::init(false),
+ cl::ReallyHidden);
+
static cl::opt<bool> UseLIRCodeSizeHeurs(
"use-lir-code-size-heurs",
cl::desc("Use loop idiom recognition code size heuristics when compiling"
@@ -246,6 +255,7 @@ class LoopIdiomRecognize {
bool recognizeShiftUntilBitTest();
bool recognizeShiftUntilZero();
+ bool recognizeAndInsertStrLen();
/// @}
};
@@ -1507,9 +1517,11 @@ static Value *matchCondition(BranchInst *BI, BasicBlock *LoopEntry,
if (!Cond)
return nullptr;
- ConstantInt *CmpZero = dyn_cast<ConstantInt>(Cond->getOperand(1));
- if (!CmpZero || !CmpZero->isZero())
- return nullptr;
+ if (!isa<ConstantPointerNull>(Cond->getOperand(1))) {
+ ConstantInt *CmpZero = dyn_cast<ConstantInt>(Cond->getOperand(1));
+ if (!CmpZero || !CmpZero->isZero())
+ return nullptr;
+ }
BasicBlock *TrueSucc = BI->getSuccessor(0);
BasicBlock *FalseSucc = BI->getSuccessor(1);
@@ -1524,6 +1536,284 @@ static Value *matchCondition(BranchInst *BI, BasicBlock *LoopEntry,
return nullptr;
}
+/// getCandidateResInstr - If there is strlen calculated, return the Result
+/// instruction based on the \p OpWidth passed, else return nullptr
+static Instruction *getCandidateResInstr(Instruction *EndAddress,
+ Value *StartAddress,
+ unsigned OpWidth) {
+ using namespace llvm::PatternMatch;
+
+ assert(StartAddress && "Valid start address required.");
+
+ // lambda expression to check that the instruction has a single user
+ auto GetSingleUser = [](Instruction *I) -> User * {
+ if (I->hasOneUse())
+ return *I->user_begin();
+ return nullptr;
+ };
+
+ // The pointer to the end address should only have one use which is a pointer
+ // to int instruction.
+ auto *TmpUser = GetSingleUser(EndAddress);
+ if (!TmpUser)
+ return nullptr;
+
+ if (PtrToIntInst *PToI = dyn_cast<PtrToIntInst>(TmpUser)) {
+ // The only user of the PtrToIntInst should be the sub instruction that
+ // calculates the difference b/w the two pointer operands.
+ TmpUser = GetSingleUser(PToI);
+ if (!TmpUser)
+ return nullptr;
+ Instruction *Inst = dyn_cast<Instruction>(TmpUser);
+
+ if (!Inst || Inst->getOpcode() != Instruction::Sub ||
+ Inst->getOperand(0) != PToI)
+ return nullptr;
+ Value *MatchAddr;
+ if (match(Inst->getOperand(1), m_PtrToInt(m_Value(MatchAddr)))) {
+ if (MatchAddr != StartAddress)
+ return nullptr;
+
+ // We found the candidate sub instruction
+ switch (OpWidth) {
+ case 8:
+ return Inst;
+ default:
+ return nullptr;
+ }
+ }
+ }
+
+ return nullptr;
+}
+
+/// Recognizes a strlen idiom by checking for loops that increment
+/// a char pointer and then subtract with the base pointer.
+///
+/// If detected, transforms the relevant code to a strlen function
+/// call, and returns true; otherwise, returns false.
+///
+/// The core idiom we are trying to detect is:
+/// \code
+/// if (str == NULL)
+/// goto loop-exit // the precondition of the loop
+/// start = str;
+/// do {
+/// str++;
+/// } while(*str!='\0');
+/// return (str - start);
+/// loop-exit:
+/// \endcode
+///
+/// The transformed output is similar to below c-code:
+/// \code
+/// if (str == NULL)
+/// goto loop-exit // the precondition of the loop
+/// return strlen(str);
+/// \endcode
+bool LoopIdiomRecognize::recognizeAndInsertStrLen() {
+ if (DisableLIRPStrlen)
+ return false;
+
+ // Give up if the loop has multiple blocks or multiple backedges.
+ if (CurLoop->getNumBackEdges() != 1 || CurLoop->getNumBlocks() != 1)
+ return false;
+
+ // It should have a preheader containing nothing but an unconditional branch.
+ auto *Pre = CurLoop->getLoopPreheader();
+ if (!Pre || &Pre->front() != Pre->getTerminator())
+ return false;
+
+ auto *EntryBI = dyn_cast<BranchInst>(Pre->getTerminator());
+ if (!EntryBI || EntryBI->isConditional())
+ return false;
+
+ // It should have a precondition block
+ auto *PreCondBB = Pre->getSinglePredecessor();
+ if (!PreCondBB)
+ return false;
+
+ // The precondition terminator instruction should skip the loop body based on
+ // an icmp with zero/null.
+ if (!matchCondition(dyn_cast<BranchInst>(PreCondBB->getTerminator()), Pre))
+ return false;
+
+ // The loop exit must be conditioned on an icmp with 0.
+ // The icmp operand has to be a load on some SSA reg that increments
+ // by 1 in the loop.
+ auto *LoopBody = *(CurLoop->block_begin());
+ auto *LoopTerm = dyn_cast<BranchInst>(LoopBody->getTerminator());
+ auto *LoopCond = matchCondition(LoopTerm, LoopBody);
+
+ if (!LoopCond)
+ return false;
+
+ auto *LoopLoad = dyn_cast<LoadInst>(LoopCond);
+ if (!LoopLoad || LoopLoad->getPointerAddressSpace() != 0)
+ return false;
+
+ Type *OperandType = LoopLoad->getType();
+ if (!OperandType || !OperandType->isIntegerTy())
+ return false;
+
+ // See if the pointer expression is an AddRec with step 1 ({n,+,1}) on
+ // the loop, indicating strlen calculation.
+ auto *IncPtr = LoopLoad->getPointerOperand();
+ const SCEVAddRecExpr *LoadEv = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(IncPtr));
+ if (!LoadEv || LoadEv->getLoop() != CurLoop || !LoadEv->isAffine())
+ return false;
+
+ const SCEVConstant *Step =
+ dyn_cast<SCEVConstant>(LoadEv->getStepRecurrence(*SE));
+ if (!Step)
+ return false;
+
+ unsigned int ConstIntValue = 0;
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(Step->getValue()))
+ ConstIntValue = CI->getZExtValue();
+
+ unsigned OpWidth = OperandType->getIntegerBitWidth();
+ if (OpWidth != ConstIntValue * 8)
+ return false;
+ if (OpWidth != 8)
+ return false;
+
+ // Scan every instruction in the loop to ensure there are no side effects.
+ for (auto &I : *LoopBody)
+ if (I.mayHaveSideEffects())
+ return false;
+
+ auto *LoopExitBB = CurLoop->getExitBlock();
+ if (!LoopExitBB)
+ return false;
+
+ // Check that the loop exit block is valid:
+ // It needs to have exactly one LCSSA Phi which is an AddRec.
+ PHINode *LCSSAPhi = nullptr;
+ for (PHINode &PN : LoopExitBB->phis()) {
+ if (!LCSSAPhi && PN.getNumIncomingValues() == 1)
+ LCSSAPhi = &PN;
+ else
+ return false;
+ }
+
+ if (!LCSSAPhi || !SE->isSCEVable(LCSSAPhi->getType()))
+ return false;
+
+ if (LCSSAPhi->getIncomingValueForBlock(LoopBody) !=
+ LoopLoad->getPointerOperand())
+ return false;
+
+ const SCEVAddRecExpr *LCSSAEv =
+ dyn_cast<SCEVAddRecExpr>(SE->getSCEV(LCSSAPhi->getIncomingValue(0)));
+
+ if (!LCSSAEv || !dyn_cast<SCEVUnknown>(SE->getPointerBase(LCSSAEv)) ||
+ !LCSSAEv->isAffine())
+ return false;
+
+ // We can now expand the base of the str
+ IRBuilder<> Builder(Pre->getTerminator());
+
+ PHINode *LoopPhi = &*LoopBody->phis().begin();
+ if (!LoopPhi || ++LoopBody->phis().begin() != LoopBody->phis().end())
+ return false;
+ Value *PreVal = LoopBody->phis().begin()->getIncomingValueForBlock(Pre);
+ if (!PreVal)
+ return false;
+
+ Value *Expanded = nullptr;
+ if (auto *GEP = dyn_cast<GetElementPtrInst>(LoopLoad->getPointerOperand())) {
+ if (GEP->getPointerOperand() != LoopPhi)
+ return false;
+ GetElementPtrInst *NewGEP =
+ GetElementPtrInst::Create(GEP->getSourceElementType(), PreVal,
+ SmallVector<Value *, 4>(GEP->indices()),
+ "newgep", Pre->getTerminator());
+ Expanded = NewGEP;
+ } else if (LoopLoad->getPointerOperand() == LoopPhi)
+ Expanded = PreVal;
+ if (!Expanded)
+ return false;
+
+ // Check that the LoopExitBB is calculating the string length and identify
+ // the instruction that has the string length calculation
+ Instruction *ResInst = getCandidateResInstr(LCSSAPhi, PreVal, OpWidth);
+ if (!ResInst)
+ return false;
+
+ // Ensure that the GEP has the correct index if the pointer was modified.
+ // This can happen when the pointer in the user code, outside the loop,
+ // walks past a certain pre-checked index of the string.
+ if (auto *GEP = dyn_cast<GEPOperator>(Expanded)) {
+ if (GEP->getNumOperands() != 2)
+ return false;
+
+ ConstantInt *I0 = dyn_cast<ConstantInt>(GEP->getOperand(1));
+ if (!I0)
+ return false;
+
+ int64_t Index = I0->getSExtValue(); // GEP index
+ auto *SAdd = dyn_cast<SCEVAddExpr>(LoadEv->getStart());
+ if (!SAdd || SAdd->getNumOperands() != 2)
+ return false;
+
+ auto *SAdd0 = dyn_cast<SCEVConstant>(SAdd->getOperand(0));
+ if (!SAdd0)
+ return false;
+
+ ConstantInt *CInt = SAdd0->getValue(); // SCEV index
+ assert(CInt && "Expecting CInt to be valid.");
+ int64_t Offset = CInt->getSExtValue();
+
+ // Update the index based on the Offset
+ assert((Offset * 8) % GEP->getSourceElementType()->getIntegerBitWidth() ==
+ 0 &&
+ "Invalid offset");
+ int64_t NewIndex =
+ (Offset * 8) / GEP->getSourceElementType()->getIntegerBitWidth() -
+ Index;
+ Value *NewIndexVal =
+ ConstantInt::get(GEP->getOperand(1)->getType(), NewIndex);
+ GEP->setOperand(1, NewIndexVal);
+ }
+
+ Value *StrLenFunc = nullptr;
+ switch (OpWidth) {
+ case 8:
+ StrLenFunc = emitStrLen(Expanded, Builder, *DL, TLI);
+ break;
+ }
+
+ assert(StrLenFunc && "Failed to emit strlen function.");
+
+ // Replace the subtraction instruction by the result of strlen
+ ResInst->replaceAllUsesWith(StrLenFunc);
+
+ // Remove the loop-exit branch and delete dead instructions
+ RecursivelyDeleteTriviallyDeadInstructions(ResInst, TLI);
+
+ ConstantInt *NewLoopCond = LoopTerm->getSuccessor(0) == LoopBody
+ ? Builder.getFalse()
+ : Builder.getTrue();
+ LoopTerm->setCondition(NewLoopCond);
+
+ deleteDeadInstruction(cast<Instruction>(LoopCond));
+ deleteDeadInstruction(cast<Instruction>(IncPtr));
+ SE->forgetLoop(CurLoop);
+
+ LLVM_DEBUG(dbgs() << " Formed strlen: " << *StrLenFunc << "\n");
+
+ ORE.emit([&]() {
+ return OptimizationRemark(DEBUG_TYPE, "recognizeAndInsertStrLen",
+ CurLoop->getStartLoc(), Pre)
+ << "Transformed pointer difference into a call to strlen() function";
+ });
+
+ ++NumStrLen;
+
+ return true;
+}
+
/// Check if the given conditional branch is based on an unsigned less-than
/// comparison between a variable and a constant, and if the comparison is false
/// the control yields to the loop entry. If the branch matches the behaviour,
diff --git a/llvm/test/Transforms/LoopIdiom/strlen.ll b/llvm/test/Transforms/LoopIdiom/strlen.ll
new file mode 100644
index 00000000000000..641fce0da8b785
--- /dev/null
+++ b/llvm/test/Transforms/LoopIdiom/strlen.ll
@@ -0,0 +1,149 @@
+; RUN: opt -passes='loop-idiom' < %s -S | FileCheck %s
+
+target datalayout = "e-m:e-i64:64-n32:64"
+target triple = "powerpc64le-unknown-linux-gnu"
+
+define i64 @valid_strlen_i8_test1(ptr %Str) {
+; CHECK-LABEL: @valid_strlen_i8_test1(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TOBOOL:%.*]] = icmp eq ptr [[STR:%.*]], null
+; CHECK-NEXT: br i1 [[TOBOOL]], label [[CLEANUP:%.*]], label [[LOR_LHS_FALSE:%.*]]
+; CHECK: lor.lhs.false:
+; CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr [[STR]], align 1
+; CHECK-NEXT: [[CMP:%.*]] = icmp eq i8 [[TMP0]], 0
+; CHECK-NEXT: br i1 [[CMP]], label [[CLEANUP]], label [[FOR_INC_PREHEADER:%.*]]
+; CHECK: for.inc.preheader:
+; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[STR]], i64 0
+; CHECK-NEXT: [[STRLEN:%.*]] = call i64 @strlen(ptr [[SCEVGEP]])
+; CHECK-NEXT: br label [[FOR_INC:%.*]]
+; CHECK: for.inc:
+; CHECK-NEXT: [[SRC_09:%.*]] = phi ptr [ poison, [[FOR_INC]] ], [ [[STR]], [[FOR_INC_PREHEADER]] ]
+; CHECK-NEXT: [[TOBOOL2:%.*]] = icmp eq i8 poison, 0
+; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[FOR_INC]]
+; CHECK: for.end:
+; CHECK-NEXT: br label [[CLEANUP]]
+; CHECK: cleanup:
+; CHECK-NEXT: [[RETVAL_0:%.*]] = phi i64 [ [[STRLEN]], [[FOR_END]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[LOR_LHS_FALSE]] ]
+; CHECK-NEXT: ret i64 [[RETVAL_0]]
+;
+entry:
+ %tobool = icmp eq ptr %Str, null
+ br i1 %tobool, label %cleanup, label %lor.lhs.false
+
+lor.lhs.false: ; preds = %entry
+ %0 = load i8, ptr %Str, align 1
+ %cmp = icmp eq i8 %0, 0
+ br i1 %cmp, label %cleanup, label %for.inc
+
+for.inc: ; preds = %lor.lhs.false, %for.inc
+ %Src.09 = phi ptr [ %incdec.ptr, %for.inc ], [ %Str, %lor.lhs.false ]
+ %incdec.ptr = getelementptr inbounds i8, ptr %Src.09, i64 1
+ %.pr = load i8, ptr %incdec.ptr, align 1
+ %tobool2 = icmp eq i8 %.pr, 0
+ br i1 %tobool2, label %for.end, label %for.inc
+
+for.end: ; preds = %for.inc
+ %sub.ptr.lhs.cast = ptrtoint ptr %incdec.ptr to i64
+ %sub.ptr.rhs.cast = ptrtoint ptr %Str to i64
+ %sub.ptr.sub = sub i64 %sub.ptr.lhs.cast, %sub.ptr.rhs.cast
+ br label %cleanup
+
+cleanup: ; preds = %lor.lhs.false, %entry, %for.end
+ %retval.0 = phi i64 [ %sub.ptr.sub, %for.end ], [ 0, %entry ], [ 0, %lor.lhs.false ]
+ ret i64 %retval.0
+}
+
+define i64 @valid_strlen_i8_test2(ptr %Str) {
+; CHECK-LABEL: @valid_strlen_i8_test2(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TOBOOL:%.*]] = icmp eq ptr [[STR:%.*]], null
+; CHECK-NEXT: br i1 [[TOBOOL]], label [[CLEANUP:%.*]], label [[FOR_COND_PREHEADER:%.*]]
+; CHECK: for.cond.preheader:
+; CHECK-NEXT: [[STRLEN:%.*]] = call i64 @strlen(ptr [[STR]])
+; CHECK-NEXT: br label [[FOR_COND:%.*]]
+; CHECK: for.cond:
+; CHECK-NEXT: [[TOBOOL1:%.*]] = icmp eq i8 poison, 0
+; CHECK-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds i8, ptr poison, i64 1
+; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[FOR_COND]]
+; CHECK: for.end:
+; CHECK-NEXT: br label [[CLEANUP]]
+; CHECK: cleanup:
+; CHECK-NEXT: [[RETVAL_0:%.*]] = phi i64 [ [[STRLEN]], [[FOR_END]] ], [ 0, [[ENTRY:%.*]] ]
+; CHECK-NEXT: ret i64 [[RETVAL_0]]
+;
+entry:
+ %tobool = icmp eq ptr %Str, null
+ br i1 %tobool, label %cleanup, label %for.cond
+
+for.cond: ; preds = %entry, %for.cond
+ %Src.0 = phi ptr [ %incdec.ptr, %for.cond ], [ %Str, %entry ]
+ %0 = load i8, ptr %Src.0, align 1
+ %tobool1 = icmp eq i8 %0, 0
+ %incdec.ptr = getelementptr inbounds i8, ptr %Src.0, i64 1
+ br i1 %tobool1, label %for.end, label %for.cond
+
+for.end: ; preds = %for.cond
+ %sub.ptr.lhs.cast = ptrtoint ptr %Src.0 to i64
+ %sub.ptr.rhs.cast = ptrtoint ptr %Str to i64
+ %sub.ptr.sub = sub i64 %sub.ptr.lhs.cast, %sub.ptr.rhs.cast
+ br label %cleanup
+
+ cleanup: ; preds = %entry, %for.end
+ %retval.0 = phi i64 [ %sub.ptr.sub, %for.end ], [ 0, %entry ]
+ ret i64 %retval.0
+}
+
+define void @invalid_strlen_i8_test3(ptr %s, i32 zeroext %i) {
+; CHECK-LABEL: @invalid_strlen_i8_test3(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br label [[WHILE_COND:%.*]]
+; CHECK: while.cond:
+; CHECK-NEXT: [[S_ADDR_0:%.*]] = phi ptr [ [[S:%.*]], [[ENTRY:%.*]] ], [ [[INCDEC_PTR1:%.*]], [[WHILE_COND]] ]
+; CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr [[S_ADDR_0]], align 1
+; CHECK-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i8 [[TMP0]], 0
+; CHECK-NEXT: [[INCDEC_PTR1]] = getelementptr inbounds i8, ptr [[S_ADDR_0]], i64 1
+; CHECK-NEXT: br i1 [[TOBOOL_NOT]], label [[WHILE_END:%.*]], label [[WHILE_COND]]
+; CHECK: while.end:
+; CHECK-NEXT: [[S_ADDR_0_LCSSA:%.*]] = phi ptr [ [[S_ADDR_0]], [[WHILE_COND]] ]
+; CHECK-NEXT: [[INCDEC_PTR1_LCSSA:%.*]] = phi ptr [ [[INCDEC_PTR1]], [[WHILE_COND]] ]
+; CHECK-NEXT: store i8 45, ptr [[S_ADDR_0_LCSSA]], align 1
+; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[I:%.*]], 10
+; CHECK-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_END:%.*]]
+; CHECK: if.then:
+; CHECK-NEXT: store i8 65, ptr [[INCDEC_PTR1_LCSSA]], align 1
+; CHECK-NEXT: br label [[IF_END9:%.*]]
+; CHECK: if.end:
+; CHECK-NEXT: store i8 66, ptr [[INCDEC_PTR1_LCSSA]], align 1
+; CHECK-NEXT: br label [[IF_END9]]
+; CHECK: if.end9:
+; CHECK-NEXT: ret void
+;
+entry:
+ br label %while.cond
+
+while.cond: ; preds = %while.cond, %entry
+ %s.addr.0 = phi ptr [ %s, %entry ], [ %incdec.ptr1, %while.cond ]
+ %0 = load i8, ptr %s.addr.0, align 1
+ %tobool.not = icmp eq i8 %0, 0
+ %incdec.ptr1 = getelementptr inbounds i8, ptr %s.addr.0, i64 1
+ br i1 %tobool.not, label %while.end, label %while.cond
+
+while.end: ; preds = %while.cond
+ %s.addr.0.lcssa = phi ptr [ %s.addr.0, %while.cond ]
+ %incdec.ptr1.lcssa = phi ptr [ %incdec.ptr1, %while.cond ]
+ store i8 45, ptr %s.addr.0.lcssa, align 1
+ %cmp = icmp ult i32 %i, 10
+ br i1 %cmp, label %if.then, label %if.end
+
+if.then: ; preds = %while.end
+ store i8 65, ptr %incdec.ptr1.lcssa, align 1
+ br label %if.end9
+
+if.end: ; preds = %while.end
+ store i8 66, ptr %incdec.ptr1.lcssa, align 1
+ br label %if.end9
+
+if.end9: ; preds = %if.end, %if.then
+ ret void
+}
+
>From 46af91d4b02e1e75d3c321f377dc7b9033820dfe Mon Sep 17 00:00:00 2001
From: Henry Jiang <henry.jiang1 at ibm.com>
Date: Sun, 8 Sep 2024 13:17:03 -0400
Subject: [PATCH 2/5] enable strlen insert
---
llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
index 1bcf7025cc1259..cbc5ed40947d79 100644
--- a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
@@ -1499,7 +1499,7 @@ bool LoopIdiomRecognize::runOnNoncountableLoop() {
return recognizePopcount() || recognizeAndInsertFFS() ||
recognizeShiftUntilBitTest() || recognizeShiftUntilZero() ||
- recognizeShiftUntilLessThan();
+ recognizeShiftUntilLessThan() || recognizeAndInsertStrLen();
}
/// Check if the given conditional branch is based on the comparison between
>From 65416534f79b1645df771f24ec407a1ba9919aa7 Mon Sep 17 00:00:00 2001
From: Henry Jiang <henry.jiang1 at ibm.com>
Date: Thu, 12 Sep 2024 16:39:07 -0400
Subject: [PATCH 3/5] replace LCSSA with null term ptr
---
.../Transforms/Scalar/LoopIdiomRecognize.cpp | 125 +++++-------------
1 file changed, 32 insertions(+), 93 deletions(-)
diff --git a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
index cbc5ed40947d79..20ecc2d83b2b05 100644
--- a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
@@ -33,6 +33,7 @@
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/MapVector.h"
+#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
@@ -1536,57 +1537,6 @@ static Value *matchCondition(BranchInst *BI, BasicBlock *LoopEntry,
return nullptr;
}
-/// getCandidateResInstr - If there is strlen calculated, return the Result
-/// instruction based on the \p OpWidth passed, else return nullptr
-static Instruction *getCandidateResInstr(Instruction *EndAddress,
- Value *StartAddress,
- unsigned OpWidth) {
- using namespace llvm::PatternMatch;
-
- assert(StartAddress && "Valid start address required.");
-
- // lambda expression to check that the instruction has a single user
- auto GetSingleUser = [](Instruction *I) -> User * {
- if (I->hasOneUse())
- return *I->user_begin();
- return nullptr;
- };
-
- // The pointer to the end address should only have one use which is a pointer
- // to int instruction.
- auto *TmpUser = GetSingleUser(EndAddress);
- if (!TmpUser)
- return nullptr;
-
- if (PtrToIntInst *PToI = dyn_cast<PtrToIntInst>(TmpUser)) {
- // The only user of the PtrToIntInst should be the sub instruction that
- // calculates the difference b/w the two pointer operands.
- TmpUser = GetSingleUser(PToI);
- if (!TmpUser)
- return nullptr;
- Instruction *Inst = dyn_cast<Instruction>(TmpUser);
-
- if (!Inst || Inst->getOpcode() != Instruction::Sub ||
- Inst->getOperand(0) != PToI)
- return nullptr;
- Value *MatchAddr;
- if (match(Inst->getOperand(1), m_PtrToInt(m_Value(MatchAddr)))) {
- if (MatchAddr != StartAddress)
- return nullptr;
-
- // We found the candidate sub instruction
- switch (OpWidth) {
- case 8:
- return Inst;
- default:
- return nullptr;
- }
- }
- }
-
- return nullptr;
-}
-
/// Recognizes a strlen idiom by checking for loops that increment
/// a char pointer and then subtract with the base pointer.
///
@@ -1595,22 +1545,19 @@ static Instruction *getCandidateResInstr(Instruction *EndAddress,
///
/// The core idiom we are trying to detect is:
/// \code
-/// if (str == NULL)
-/// goto loop-exit // the precondition of the loop
/// start = str;
/// do {
/// str++;
-/// } while(*str!='\0');
-/// return (str - start);
-/// loop-exit:
+/// } while(*str != '\0');
/// \endcode
///
/// The transformed output is similar to below c-code:
/// \code
-/// if (str == NULL)
-/// goto loop-exit // the precondition of the loop
-/// return strlen(str);
+/// str = start + strlen(start)
+/// len = str - start
/// \endcode
+///
+/// Later the pointer subtraction will be folded by InstCombine
bool LoopIdiomRecognize::recognizeAndInsertStrLen() {
if (DisableLIRPStrlen)
return false;
@@ -1620,30 +1567,20 @@ bool LoopIdiomRecognize::recognizeAndInsertStrLen() {
return false;
// It should have a preheader containing nothing but an unconditional branch.
- auto *Pre = CurLoop->getLoopPreheader();
- if (!Pre || &Pre->front() != Pre->getTerminator())
+ auto *Preheader = CurLoop->getLoopPreheader();
+ if (!Preheader || &Preheader->front() != Preheader->getTerminator())
return false;
- auto *EntryBI = dyn_cast<BranchInst>(Pre->getTerminator());
+ auto *EntryBI = dyn_cast<BranchInst>(Preheader->getTerminator());
if (!EntryBI || EntryBI->isConditional())
return false;
- // It should have a precondition block
- auto *PreCondBB = Pre->getSinglePredecessor();
- if (!PreCondBB)
- return false;
-
- // The precondition terminator instruction should skip the loop body based on
- // an icmp with zero/null.
- if (!matchCondition(dyn_cast<BranchInst>(PreCondBB->getTerminator()), Pre))
- return false;
-
// The loop exit must be conditioned on an icmp with 0.
// The icmp operand has to be a load on some SSA reg that increments
// by 1 in the loop.
- auto *LoopBody = *(CurLoop->block_begin());
- auto *LoopTerm = dyn_cast<BranchInst>(LoopBody->getTerminator());
- auto *LoopCond = matchCondition(LoopTerm, LoopBody);
+ BasicBlock *LoopBody = *CurLoop->block_begin();
+ BranchInst *LoopTerm = dyn_cast<BranchInst>(LoopBody->getTerminator());
+ Value *LoopCond = matchCondition(LoopTerm, LoopBody);
if (!LoopCond)
return false;
@@ -1660,6 +1597,7 @@ bool LoopIdiomRecognize::recognizeAndInsertStrLen() {
// the loop, indicating strlen calculation.
auto *IncPtr = LoopLoad->getPointerOperand();
const SCEVAddRecExpr *LoadEv = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(IncPtr));
+
if (!LoadEv || LoadEv->getLoop() != CurLoop || !LoadEv->isAffine())
return false;
@@ -1700,6 +1638,7 @@ bool LoopIdiomRecognize::recognizeAndInsertStrLen() {
if (!LCSSAPhi || !SE->isSCEVable(LCSSAPhi->getType()))
return false;
+ // This matched the pointer version of the idiom
if (LCSSAPhi->getIncomingValueForBlock(LoopBody) !=
LoopLoad->getPointerOperand())
return false;
@@ -1712,35 +1651,34 @@ bool LoopIdiomRecognize::recognizeAndInsertStrLen() {
return false;
// We can now expand the base of the str
- IRBuilder<> Builder(Pre->getTerminator());
+ IRBuilder<> Builder(Preheader->getTerminator());
- PHINode *LoopPhi = &*LoopBody->phis().begin();
- if (!LoopPhi || ++LoopBody->phis().begin() != LoopBody->phis().end())
+ auto LoopPhiRange = LoopBody->phis();
+ if (!hasNItems(LoopPhiRange, 1))
return false;
- Value *PreVal = LoopBody->phis().begin()->getIncomingValueForBlock(Pre);
+ auto *LoopPhi = &*LoopPhiRange.begin();
+ Value *PreVal = LoopPhi->getIncomingValueForBlock(Preheader);
if (!PreVal)
return false;
Value *Expanded = nullptr;
+ Type *ExpandedType = nullptr;
if (auto *GEP = dyn_cast<GetElementPtrInst>(LoopLoad->getPointerOperand())) {
if (GEP->getPointerOperand() != LoopPhi)
return false;
GetElementPtrInst *NewGEP =
GetElementPtrInst::Create(GEP->getSourceElementType(), PreVal,
SmallVector<Value *, 4>(GEP->indices()),
- "newgep", Pre->getTerminator());
+ "newgep", Preheader->getTerminator());
Expanded = NewGEP;
- } else if (LoopLoad->getPointerOperand() == LoopPhi)
+ ExpandedType = NewGEP->getSourceElementType();
+ } else if (LoopLoad->getPointerOperand() == LoopPhi) {
Expanded = PreVal;
+ ExpandedType = LoopLoad->getType();
+ }
if (!Expanded)
return false;
- // Check that the LoopExitBB is calculating the string length and identify
- // the instruction that has the string length calculation
- Instruction *ResInst = getCandidateResInstr(LCSSAPhi, PreVal, OpWidth);
- if (!ResInst)
- return false;
-
// Ensure that the GEP has the correct index if the pointer was modified.
// This can happen when the pointer in the user code, outside the loop,
// walks past a certain pre-checked index of the string.
@@ -1786,11 +1724,12 @@ bool LoopIdiomRecognize::recognizeAndInsertStrLen() {
assert(StrLenFunc && "Failed to emit strlen function.");
- // Replace the subtraction instruction by the result of strlen
- ResInst->replaceAllUsesWith(StrLenFunc);
-
- // Remove the loop-exit branch and delete dead instructions
- RecursivelyDeleteTriviallyDeadInstructions(ResInst, TLI);
+ // Replace LCSSA Phi use with new pointer to the null terminator
+ SmallVector<Value *, 4> NewBaseIndex{StrLenFunc};
+ GetElementPtrInst *NewEndPtr = GetElementPtrInst::Create(
+ ExpandedType, Expanded, NewBaseIndex, "end", Preheader->getTerminator());
+ LCSSAPhi->replaceAllUsesWith(NewEndPtr);
+ RecursivelyDeleteDeadPHINode(LCSSAPhi);
ConstantInt *NewLoopCond = LoopTerm->getSuccessor(0) == LoopBody
? Builder.getFalse()
@@ -1805,7 +1744,7 @@ bool LoopIdiomRecognize::recognizeAndInsertStrLen() {
ORE.emit([&]() {
return OptimizationRemark(DEBUG_TYPE, "recognizeAndInsertStrLen",
- CurLoop->getStartLoc(), Pre)
+ CurLoop->getStartLoc(), Preheader)
<< "Transformed pointer difference into a call to strlen() function";
});
>From b09384485510cd78c3c5332bed2d900f5af7ae08 Mon Sep 17 00:00:00 2001
From: Henry Jiang <henry.jiang1 at ibm.com>
Date: Thu, 12 Sep 2024 18:38:19 -0400
Subject: [PATCH 4/5] update tests
---
llvm/test/Transforms/LoopIdiom/strlen.ll | 396 +++++++++++++++--------
1 file changed, 270 insertions(+), 126 deletions(-)
diff --git a/llvm/test/Transforms/LoopIdiom/strlen.ll b/llvm/test/Transforms/LoopIdiom/strlen.ll
index 641fce0da8b785..43ed9d0980bc49 100644
--- a/llvm/test/Transforms/LoopIdiom/strlen.ll
+++ b/llvm/test/Transforms/LoopIdiom/strlen.ll
@@ -1,149 +1,293 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
; RUN: opt -passes='loop-idiom' < %s -S | FileCheck %s
-target datalayout = "e-m:e-i64:64-n32:64"
-target triple = "powerpc64le-unknown-linux-gnu"
-
-define i64 @valid_strlen_i8_test1(ptr %Str) {
-; CHECK-LABEL: @valid_strlen_i8_test1(
-; CHECK-NEXT: entry:
-; CHECK-NEXT: [[TOBOOL:%.*]] = icmp eq ptr [[STR:%.*]], null
-; CHECK-NEXT: br i1 [[TOBOOL]], label [[CLEANUP:%.*]], label [[LOR_LHS_FALSE:%.*]]
-; CHECK: lor.lhs.false:
-; CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr [[STR]], align 1
-; CHECK-NEXT: [[CMP:%.*]] = icmp eq i8 [[TMP0]], 0
-; CHECK-NEXT: br i1 [[CMP]], label [[CLEANUP]], label [[FOR_INC_PREHEADER:%.*]]
-; CHECK: for.inc.preheader:
-; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[STR]], i64 0
-; CHECK-NEXT: [[STRLEN:%.*]] = call i64 @strlen(ptr [[SCEVGEP]])
-; CHECK-NEXT: br label [[FOR_INC:%.*]]
-; CHECK: for.inc:
-; CHECK-NEXT: [[SRC_09:%.*]] = phi ptr [ poison, [[FOR_INC]] ], [ [[STR]], [[FOR_INC_PREHEADER]] ]
-; CHECK-NEXT: [[TOBOOL2:%.*]] = icmp eq i8 poison, 0
-; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[FOR_INC]]
-; CHECK: for.end:
-; CHECK-NEXT: br label [[CLEANUP]]
-; CHECK: cleanup:
-; CHECK-NEXT: [[RETVAL_0:%.*]] = phi i64 [ [[STRLEN]], [[FOR_END]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[LOR_LHS_FALSE]] ]
-; CHECK-NEXT: ret i64 [[RETVAL_0]]
+declare void @use(ptr)
+
+define i64 @valid_strlen_1(ptr %0) {
+; CHECK-LABEL: define i64 @valid_strlen_1(
+; CHECK-SAME: ptr [[TMP0:%.*]]) {
+; CHECK-NEXT: [[STRLEN:%.*]] = call i64 @strlen(ptr [[TMP0]])
+; CHECK-NEXT: [[DOTLCSSA:%.*]] = getelementptr i8, ptr [[TMP0]], i64 [[STRLEN]]
+; CHECK-NEXT: br label %[[BB2:.*]]
+; CHECK: [[BB2]]:
+; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i8 poison, 0
+; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr poison, i64 1
+; CHECK-NEXT: br i1 true, label %[[BB5:.*]], label %[[BB2]]
+; CHECK: [[BB5]]:
+; CHECK-NEXT: [[TMP12:%.*]] = ptrtoint ptr [[DOTLCSSA]] to i64
+; CHECK-NEXT: [[TMP13:%.*]] = ptrtoint ptr [[TMP0]] to i64
+; CHECK-NEXT: [[TMP14:%.*]] = sub i64 [[TMP12]], [[TMP13]]
+; CHECK-NEXT: ret i64 [[TMP14]]
;
-entry:
- %tobool = icmp eq ptr %Str, null
- br i1 %tobool, label %cleanup, label %lor.lhs.false
-
-lor.lhs.false: ; preds = %entry
- %0 = load i8, ptr %Str, align 1
- %cmp = icmp eq i8 %0, 0
- br i1 %cmp, label %cleanup, label %for.inc
-
-for.inc: ; preds = %lor.lhs.false, %for.inc
- %Src.09 = phi ptr [ %incdec.ptr, %for.inc ], [ %Str, %lor.lhs.false ]
- %incdec.ptr = getelementptr inbounds i8, ptr %Src.09, i64 1
- %.pr = load i8, ptr %incdec.ptr, align 1
- %tobool2 = icmp eq i8 %.pr, 0
- br i1 %tobool2, label %for.end, label %for.inc
-
-for.end: ; preds = %for.inc
- %sub.ptr.lhs.cast = ptrtoint ptr %incdec.ptr to i64
- %sub.ptr.rhs.cast = ptrtoint ptr %Str to i64
- %sub.ptr.sub = sub i64 %sub.ptr.lhs.cast, %sub.ptr.rhs.cast
- br label %cleanup
+ br label %2
-cleanup: ; preds = %lor.lhs.false, %entry, %for.end
- %retval.0 = phi i64 [ %sub.ptr.sub, %for.end ], [ 0, %entry ], [ 0, %lor.lhs.false ]
- ret i64 %retval.0
+2: ; preds = %2, %1
+ %3 = phi ptr [ %0, %1 ], [ %6, %2 ]
+ %4 = load i8, ptr %3, align 1
+ %5 = icmp eq i8 %4, 0
+ %6 = getelementptr inbounds i8, ptr %3, i64 1
+ br i1 %5, label %7, label %2
+
+7: ; preds = %2
+ %8 = ptrtoint ptr %3 to i64
+ %9 = ptrtoint ptr %0 to i64
+ %10 = sub i64 %8, %9
+ ret i64 %10
}
-define i64 @valid_strlen_i8_test2(ptr %Str) {
-; CHECK-LABEL: @valid_strlen_i8_test2(
-; CHECK-NEXT: entry:
-; CHECK-NEXT: [[TOBOOL:%.*]] = icmp eq ptr [[STR:%.*]], null
-; CHECK-NEXT: br i1 [[TOBOOL]], label [[CLEANUP:%.*]], label [[FOR_COND_PREHEADER:%.*]]
-; CHECK: for.cond.preheader:
+
+define i32 @valid_strlen_2(ptr %0) {
+; CHECK-LABEL: define i32 @valid_strlen_2(
+; CHECK-SAME: ptr [[TMP0:%.*]]) {
+; CHECK-NEXT: [[TMP2:%.*]] = icmp eq ptr [[TMP0]], null
+; CHECK-NEXT: br i1 [[TMP2]], label %[[BB14:.*]], label %[[BB3:.*]]
+; CHECK: [[BB3]]:
+; CHECK-NEXT: [[TMP4:%.*]] = load i8, ptr [[TMP0]], align 1
+; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i8 [[TMP4]], 0
+; CHECK-NEXT: br i1 [[TMP5]], label %[[BB14]], label %[[DOTPREHEADER:.*]]
+; CHECK: [[_PREHEADER:.*:]]
+; CHECK-NEXT: [[STR:%.*]] = getelementptr i8, ptr [[TMP0]], i64 0
; CHECK-NEXT: [[STRLEN:%.*]] = call i64 @strlen(ptr [[STR]])
-; CHECK-NEXT: br label [[FOR_COND:%.*]]
-; CHECK: for.cond:
-; CHECK-NEXT: [[TOBOOL1:%.*]] = icmp eq i8 poison, 0
+; CHECK-NEXT: [[STR_ADDR_0_LCSSA:%.*]] = getelementptr i8, ptr [[STR]], i64 [[STRLEN]]
+; CHECK-NEXT: br label %[[BB6:.*]]
+; CHECK: [[BB6]]:
+; CHECK-NEXT: [[TMP7:%.*]] = phi ptr [ poison, %[[BB6]] ], [ [[TMP0]], %[[DOTPREHEADER]] ]
+; CHECK-NEXT: [[CMP_NOT:%.*]] = icmp eq i8 poison, 0
+; CHECK-NEXT: br i1 true, label %[[BB9:.*]], label %[[BB6]]
+; CHECK: [[BB9]]:
+; CHECK-NEXT: [[SUB_PTR_LHS_CAST:%.*]] = ptrtoint ptr [[STR_ADDR_0_LCSSA]] to i64
+; CHECK-NEXT: [[SUB_PTR_RHS_CAST:%.*]] = ptrtoint ptr [[TMP0]] to i64
+; CHECK-NEXT: [[SUB_PTR_SUB:%.*]] = sub i64 [[SUB_PTR_LHS_CAST]], [[SUB_PTR_RHS_CAST]]
+; CHECK-NEXT: [[TMP13:%.*]] = trunc i64 [[SUB_PTR_SUB]] to i32
+; CHECK-NEXT: br label %[[BB14]]
+; CHECK: [[BB14]]:
+; CHECK-NEXT: [[TMP15:%.*]] = phi i32 [ [[TMP13]], %[[BB9]] ], [ 0, %[[BB3]] ], [ 0, [[TMP1:%.*]] ]
+; CHECK-NEXT: ret i32 [[TMP15]]
+;
+ %2 = icmp eq ptr %0, null
+ br i1 %2, label %16, label %3
+
+3: ; preds = %1
+ %4 = load i8, ptr %0, align 1
+ %5 = icmp eq i8 %4, 0
+ br i1 %5, label %16, label %6
+
+6: ; preds = %3, %6
+ %7 = phi ptr [ %8, %6 ], [ %0, %3 ]
+ %8 = getelementptr inbounds i8, ptr %7, i64 1
+ %9 = load i8, ptr %8, align 1
+ %10 = icmp eq i8 %9, 0
+ br i1 %10, label %11, label %6
+
+11: ; preds = %6
+ %12 = ptrtoint ptr %8 to i64
+ %13 = ptrtoint ptr %0 to i64
+ %14 = sub i64 %12, %13
+ %15 = trunc i64 %14 to i32
+ br label %16
+
+16: ; preds = %1, %3, %11
+ %17 = phi i32 [ %15, %11 ], [ 0, %3 ], [ 0, %1 ]
+ ret i32 %17
+}
+
+define i64 @valid_strlen_3(ptr %str) local_unnamed_addr #0 {
+; CHECK-LABEL: define i64 @valid_strlen_3(
+; CHECK-SAME: ptr [[STR:%.*]]) local_unnamed_addr {
+; CHECK-NEXT: [[_PREHEADER:.*:]]
+; CHECK-NEXT: [[STRLEN:%.*]] = call i64 @strlen(ptr [[STR]])
+; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[STR]], i64 [[STRLEN]]
+; CHECK-NEXT: br label %[[WHILE_COND:.*]]
+; CHECK: [[WHILE_COND]]:
+; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i8 poison, 0
; CHECK-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds i8, ptr poison, i64 1
-; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[FOR_COND]]
-; CHECK: for.end:
-; CHECK-NEXT: br label [[CLEANUP]]
-; CHECK: cleanup:
-; CHECK-NEXT: [[RETVAL_0:%.*]] = phi i64 [ [[STRLEN]], [[FOR_END]] ], [ 0, [[ENTRY:%.*]] ]
-; CHECK-NEXT: ret i64 [[RETVAL_0]]
+; CHECK-NEXT: br i1 true, label %[[WHILE_END:.*]], label %[[WHILE_COND]]
+; CHECK: [[WHILE_END]]:
+; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[TMP0]] to i64
+; CHECK-NEXT: [[SUB_PTR_RHS_CAST:%.*]] = ptrtoint ptr [[STR]] to i64
+; CHECK-NEXT: [[TMP13:%.*]] = sub i64 [[TMP10]], [[SUB_PTR_RHS_CAST]]
+; CHECK-NEXT: tail call void @use(ptr [[TMP0]])
+; CHECK-NEXT: tail call void @use(ptr [[STR]])
+; CHECK-NEXT: ret i64 [[TMP13]]
;
entry:
- %tobool = icmp eq ptr %Str, null
- br i1 %tobool, label %cleanup, label %for.cond
-
-for.cond: ; preds = %entry, %for.cond
- %Src.0 = phi ptr [ %incdec.ptr, %for.cond ], [ %Str, %entry ]
- %0 = load i8, ptr %Src.0, align 1
- %tobool1 = icmp eq i8 %0, 0
- %incdec.ptr = getelementptr inbounds i8, ptr %Src.0, i64 1
- br i1 %tobool1, label %for.end, label %for.cond
-
-for.end: ; preds = %for.cond
- %sub.ptr.lhs.cast = ptrtoint ptr %Src.0 to i64
- %sub.ptr.rhs.cast = ptrtoint ptr %Str to i64
+ br label %while.cond
+
+while.cond: ; preds = %while.cond, %entry
+ %str.addr.0 = phi ptr [ %str, %entry ], [ %incdec.ptr, %while.cond ]
+ %0 = load i8, ptr %str.addr.0, align 1
+ %cmp.not = icmp eq i8 %0, 0
+ %incdec.ptr = getelementptr inbounds i8, ptr %str.addr.0, i64 1
+ br i1 %cmp.not, label %while.end, label %while.cond
+
+while.end: ; preds = %while.cond
+ %sub.ptr.lhs.cast = ptrtoint ptr %str.addr.0 to i64
+ %sub.ptr.rhs.cast = ptrtoint ptr %str to i64
%sub.ptr.sub = sub i64 %sub.ptr.lhs.cast, %sub.ptr.rhs.cast
- br label %cleanup
+ tail call void @use(ptr %str.addr.0)
+ tail call void @use(ptr %str)
+ ret i64 %sub.ptr.sub
+}
- cleanup: ; preds = %entry, %for.end
- %retval.0 = phi i64 [ %sub.ptr.sub, %for.end ], [ 0, %entry ]
- ret i64 %retval.0
+define i64 @valid_strlen_4(ptr %0) {
+; CHECK-LABEL: define i64 @valid_strlen_4(
+; CHECK-SAME: ptr [[TMP0:%.*]]) {
+; CHECK-NEXT: [[TMP2:%.*]] = icmp eq ptr [[TMP0]], null
+; CHECK-NEXT: br i1 [[TMP2]], label %[[BB10:.*]], label %[[DOTPREHEADER:.*]]
+; CHECK: [[_PREHEADER:.*:]]
+; CHECK-NEXT: [[NEWGEP:%.*]] = getelementptr i8, ptr [[TMP0]], i64 0
+; CHECK-NEXT: [[STRLEN:%.*]] = call i64 @strlen(ptr [[NEWGEP]])
+; CHECK-NEXT: [[END:%.*]] = getelementptr i8, ptr [[NEWGEP]], i64 [[STRLEN]]
+; CHECK-NEXT: br label %[[BB3:.*]]
+; CHECK: [[BB3]]:
+; CHECK-NEXT: [[TMP4:%.*]] = phi ptr [ poison, %[[BB3]] ], [ [[TMP0]], %[[DOTPREHEADER]] ]
+; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i8 poison, 0
+; CHECK-NEXT: br i1 true, label %[[BB6:.*]], label %[[BB3]]
+; CHECK: [[BB6]]:
+; CHECK-NEXT: [[TMP7:%.*]] = ptrtoint ptr [[END]] to i64
+; CHECK-NEXT: [[TMP8:%.*]] = ptrtoint ptr [[TMP0]] to i64
+; CHECK-NEXT: [[TMP9:%.*]] = sub i64 [[TMP7]], [[TMP8]]
+; CHECK-NEXT: br label %[[BB10]]
+; CHECK: [[BB10]]:
+; CHECK-NEXT: [[TMP11:%.*]] = phi i64 [ [[TMP9]], %[[BB6]] ], [ 0, [[TMP1:%.*]] ]
+; CHECK-NEXT: ret i64 [[TMP11]]
+;
+ %2 = icmp eq ptr %0, null
+ br i1 %2, label %12, label %3
+
+3: ; preds = %1, %3
+ %4 = phi ptr [ %5, %3 ], [ %0, %1 ]
+ %5 = getelementptr inbounds i8, ptr %4, i64 1
+ %6 = load i8, ptr %5, align 1
+ %7 = icmp eq i8 %6, 0
+ br i1 %7, label %8, label %3
+
+8: ; preds = %3
+ %9 = ptrtoint ptr %5 to i64
+ %10 = ptrtoint ptr %0 to i64
+ %11 = sub i64 %9, %10
+ br label %12
+
+12: ; preds = %1, %8
+ %13 = phi i64 [ %11, %8 ], [ 0, %1 ]
+ ret i64 %13
}
-define void @invalid_strlen_i8_test3(ptr %s, i32 zeroext %i) {
-; CHECK-LABEL: @invalid_strlen_i8_test3(
-; CHECK-NEXT: entry:
-; CHECK-NEXT: br label [[WHILE_COND:%.*]]
-; CHECK: while.cond:
-; CHECK-NEXT: [[S_ADDR_0:%.*]] = phi ptr [ [[S:%.*]], [[ENTRY:%.*]] ], [ [[INCDEC_PTR1:%.*]], [[WHILE_COND]] ]
-; CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr [[S_ADDR_0]], align 1
-; CHECK-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i8 [[TMP0]], 0
-; CHECK-NEXT: [[INCDEC_PTR1]] = getelementptr inbounds i8, ptr [[S_ADDR_0]], i64 1
-; CHECK-NEXT: br i1 [[TOBOOL_NOT]], label [[WHILE_END:%.*]], label [[WHILE_COND]]
-; CHECK: while.end:
-; CHECK-NEXT: [[S_ADDR_0_LCSSA:%.*]] = phi ptr [ [[S_ADDR_0]], [[WHILE_COND]] ]
-; CHECK-NEXT: [[INCDEC_PTR1_LCSSA:%.*]] = phi ptr [ [[INCDEC_PTR1]], [[WHILE_COND]] ]
-; CHECK-NEXT: store i8 45, ptr [[S_ADDR_0_LCSSA]], align 1
-; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[I:%.*]], 10
-; CHECK-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_END:%.*]]
-; CHECK: if.then:
-; CHECK-NEXT: store i8 65, ptr [[INCDEC_PTR1_LCSSA]], align 1
-; CHECK-NEXT: br label [[IF_END9:%.*]]
-; CHECK: if.end:
-; CHECK-NEXT: store i8 66, ptr [[INCDEC_PTR1_LCSSA]], align 1
-; CHECK-NEXT: br label [[IF_END9]]
-; CHECK: if.end9:
-; CHECK-NEXT: ret void
+define i64 @valid_strlen_use(ptr %str) {
+; CHECK-LABEL: define i64 @valid_strlen_use(
+; CHECK-SAME: ptr [[STR:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[STRLEN:%.*]] = call i64 @strlen(ptr [[STR]])
+; CHECK-NEXT: [[END:%.*]] = getelementptr i8, ptr [[STR]], i64 [[STRLEN]]
+; CHECK-NEXT: br label %[[WHILE_COND:.*]]
+; CHECK: [[WHILE_COND]]:
+; CHECK-NEXT: [[CMP_NOT:%.*]] = icmp eq i8 poison, 0
+; CHECK-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds i8, ptr poison, i64 1
+; CHECK-NEXT: br i1 true, label %[[WHILE_END:.*]], label %[[WHILE_COND]]
+; CHECK: [[WHILE_END]]:
+; CHECK-NEXT: [[SUB_PTR_LHS_CAST:%.*]] = ptrtoint ptr [[END]] to i64
+; CHECK-NEXT: [[SUB_PTR_RHS_CAST:%.*]] = ptrtoint ptr [[STR]] to i64
+; CHECK-NEXT: [[SUB_PTR_SUB:%.*]] = sub i64 [[SUB_PTR_LHS_CAST]], [[SUB_PTR_RHS_CAST]]
+; CHECK-NEXT: tail call void @use(ptr noundef nonnull [[END]])
+; CHECK-NEXT: tail call void @use(ptr noundef [[STR]])
+; CHECK-NEXT: ret i64 [[SUB_PTR_SUB]]
;
entry:
br label %while.cond
while.cond: ; preds = %while.cond, %entry
- %s.addr.0 = phi ptr [ %s, %entry ], [ %incdec.ptr1, %while.cond ]
- %0 = load i8, ptr %s.addr.0, align 1
- %tobool.not = icmp eq i8 %0, 0
- %incdec.ptr1 = getelementptr inbounds i8, ptr %s.addr.0, i64 1
- br i1 %tobool.not, label %while.end, label %while.cond
+ %str.addr.0 = phi ptr [ %str, %entry ], [ %incdec.ptr, %while.cond ]
+ %0 = load i8, ptr %str.addr.0, align 1
+ %cmp.not = icmp eq i8 %0, 0
+ %incdec.ptr = getelementptr inbounds i8, ptr %str.addr.0, i64 1
+ br i1 %cmp.not, label %while.end, label %while.cond
while.end: ; preds = %while.cond
- %s.addr.0.lcssa = phi ptr [ %s.addr.0, %while.cond ]
- %incdec.ptr1.lcssa = phi ptr [ %incdec.ptr1, %while.cond ]
- store i8 45, ptr %s.addr.0.lcssa, align 1
- %cmp = icmp ult i32 %i, 10
- br i1 %cmp, label %if.then, label %if.end
-
-if.then: ; preds = %while.end
- store i8 65, ptr %incdec.ptr1.lcssa, align 1
- br label %if.end9
-
-if.end: ; preds = %while.end
- store i8 66, ptr %incdec.ptr1.lcssa, align 1
- br label %if.end9
-
-if.end9: ; preds = %if.end, %if.then
- ret void
+ %sub.ptr.lhs.cast = ptrtoint ptr %str.addr.0 to i64
+ %sub.ptr.rhs.cast = ptrtoint ptr %str to i64
+ %sub.ptr.sub = sub i64 %sub.ptr.lhs.cast, %sub.ptr.rhs.cast
+ tail call void @use(ptr noundef nonnull %str.addr.0)
+ tail call void @use(ptr noundef %str)
+ ret i64 %sub.ptr.sub
}
+define i64 @invalid_strlen_has_side_effect(ptr %0) {
+; CHECK-LABEL: define i64 @invalid_strlen_has_side_effect(
+; CHECK-SAME: ptr [[TMP0:%.*]]) {
+; CHECK-NEXT: br label %[[BB2:.*]]
+; CHECK: [[BB2]]:
+; CHECK-NEXT: [[TMP3:%.*]] = phi ptr [ [[TMP0]], [[TMP1:%.*]] ], [ [[TMP6:%.*]], %[[BB2]] ]
+; CHECK-NEXT: [[TMP4:%.*]] = load volatile i8, ptr [[TMP3]], align 1
+; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i8 [[TMP4]], 0
+; CHECK-NEXT: [[TMP6]] = getelementptr inbounds i8, ptr [[TMP3]], i64 1
+; CHECK-NEXT: br i1 [[TMP5]], label %[[BB7:.*]], label %[[BB2]]
+; CHECK: [[BB7]]:
+; CHECK-NEXT: [[DOTLCSSA:%.*]] = phi ptr [ [[TMP3]], %[[BB2]] ]
+; CHECK-NEXT: [[TMP8:%.*]] = ptrtoint ptr [[DOTLCSSA]] to i64
+; CHECK-NEXT: [[TMP9:%.*]] = ptrtoint ptr [[TMP0]] to i64
+; CHECK-NEXT: [[TMP10:%.*]] = sub i64 [[TMP8]], [[TMP9]]
+; CHECK-NEXT: ret i64 [[TMP10]]
+;
+ br label %2
+
+2: ; preds = %2, %1
+ %3 = phi ptr [ %0, %1 ], [ %6, %2 ]
+ %4 = load volatile i8, ptr %3, align 1
+ %5 = icmp eq i8 %4, 0
+ %6 = getelementptr inbounds i8, ptr %3, i64 1
+ br i1 %5, label %7, label %2
+
+7: ; preds = %2
+ %8 = ptrtoint ptr %3 to i64
+ %9 = ptrtoint ptr %0 to i64
+ %10 = sub i64 %8, %9
+ ret i64 %10
+}
+
+define i64 @invalid_strlen_idx_idiom(ptr %0) {
+; CHECK-LABEL: define i64 @invalid_strlen_idx_idiom(
+; CHECK-SAME: ptr [[TMP0:%.*]]) {
+; CHECK-NEXT: [[TMP2:%.*]] = load i8, ptr [[TMP0]], align 1
+; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i8 [[TMP2]], 0
+; CHECK-NEXT: br i1 [[TMP3]], label %[[BB13:.*]], label %[[DOTPREHEADER:.*]]
+; CHECK: [[_PREHEADER:.*:]]
+; CHECK-NEXT: br label %[[BB4:.*]]
+; CHECK: [[BB4]]:
+; CHECK-NEXT: [[TMP5:%.*]] = phi i32 [ [[TMP7:%.*]], %[[BB4]] ], [ 0, %[[DOTPREHEADER]] ]
+; CHECK-NEXT: [[TMP6:%.*]] = phi ptr [ [[TMP8:%.*]], %[[BB4]] ], [ [[TMP0]], %[[DOTPREHEADER]] ]
+; CHECK-NEXT: [[TMP7]] = add nuw nsw i32 [[TMP5]], 1
+; CHECK-NEXT: [[TMP8]] = getelementptr inbounds i8, ptr [[TMP6]], i64 1
+; CHECK-NEXT: [[TMP9:%.*]] = load i8, ptr [[TMP8]], align 1
+; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i8 [[TMP9]], 0
+; CHECK-NEXT: br i1 [[TMP10]], label %[[BB11:.*]], label %[[BB4]]
+; CHECK: [[BB11]]:
+; CHECK-NEXT: [[DOTLCSSA:%.*]] = phi i32 [ [[TMP7]], %[[BB4]] ]
+; CHECK-NEXT: [[TMP12:%.*]] = zext nneg i32 [[DOTLCSSA]] to i64
+; CHECK-NEXT: br label %[[BB13]]
+; CHECK: [[BB13]]:
+; CHECK-NEXT: [[TMP14:%.*]] = phi i64 [ 0, [[TMP1:%.*]] ], [ [[TMP12]], %[[BB11]] ]
+; CHECK-NEXT: ret i64 [[TMP14]]
+;
+ %2 = load i8, ptr %0, align 1
+ %3 = icmp eq i8 %2, 0
+ br i1 %3, label %13, label %4
+
+4: ; preds = %1, %4
+ %5 = phi i32 [ %7, %4 ], [ 0, %1 ]
+ %6 = phi ptr [ %8, %4 ], [ %0, %1 ]
+ %7 = add nuw nsw i32 %5, 1
+ %8 = getelementptr inbounds i8, ptr %6, i64 1
+ %9 = load i8, ptr %8, align 1
+ %10 = icmp eq i8 %9, 0
+ br i1 %10, label %11, label %4
+
+11: ; preds = %4
+ %12 = zext nneg i32 %7 to i64
+ br label %13
+
+13: ; preds = %11, %1
+ %14 = phi i64 [ 0, %1 ], [ %12, %11 ]
+ ret i64 %14
+}
+
+
>From 2c2b30ac3cdfa8724986edc47f14dfd12793bd5a Mon Sep 17 00:00:00 2001
From: Henry Jiang <henry.jiang1 at ibm.com>
Date: Fri, 13 Sep 2024 15:12:24 -0400
Subject: [PATCH 5/5] Add wcslen idiom
---
.../Transforms/Scalar/LoopIdiomRecognize.h | 3 +
.../llvm/Transforms/Utils/BuildLibCalls.h | 6 ++
.../Transforms/Scalar/LoopIdiomRecognize.cpp | 36 +++++++---
llvm/lib/Transforms/Utils/BuildLibCalls.cpp | 9 +++
llvm/test/Transforms/LoopIdiom/wcslen16.ll | 66 +++++++++++++++++
llvm/test/Transforms/LoopIdiom/wcslen32.ll | 70 +++++++++++++++++++
6 files changed, 181 insertions(+), 9 deletions(-)
create mode 100644 llvm/test/Transforms/LoopIdiom/wcslen16.ll
create mode 100644 llvm/test/Transforms/LoopIdiom/wcslen32.ll
diff --git a/llvm/include/llvm/Transforms/Scalar/LoopIdiomRecognize.h b/llvm/include/llvm/Transforms/Scalar/LoopIdiomRecognize.h
index 3a9f016ce9bd60..241a3fc1093607 100644
--- a/llvm/include/llvm/Transforms/Scalar/LoopIdiomRecognize.h
+++ b/llvm/include/llvm/Transforms/Scalar/LoopIdiomRecognize.h
@@ -37,6 +37,9 @@ struct DisableLIRP {
/// When true, Strlen is disabled.
static bool Strlen;
+
+ /// When true, Wcslen is disabled.
+ static bool Wcslen;
};
/// Performs Loop Idiom Recognize Pass.
diff --git a/llvm/include/llvm/Transforms/Utils/BuildLibCalls.h b/llvm/include/llvm/Transforms/Utils/BuildLibCalls.h
index a8fb38e7260043..50f695dbe6c076 100644
--- a/llvm/include/llvm/Transforms/Utils/BuildLibCalls.h
+++ b/llvm/include/llvm/Transforms/Utils/BuildLibCalls.h
@@ -93,6 +93,12 @@ namespace llvm {
Value *emitStrLen(Value *Ptr, IRBuilderBase &B, const DataLayout &DL,
const TargetLibraryInfo *TLI);
+ /// Emit a call to the wcslen function to the builder, for the specified
+ /// pointer. Ptr is required to be some pointer type, and the return value has
+ /// 'size_t' type.
+ Value *emitWcsLen(Value *Ptr, IRBuilderBase &B, const DataLayout &DL,
+ const TargetLibraryInfo *TLI);
+
/// Emit a call to the strdup function to the builder, for the specified
/// pointer. Ptr is required to be some pointer type, and the return value has
/// 'i8*' type.
diff --git a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
index 20ecc2d83b2b05..c1af2aa98fc990 100644
--- a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
@@ -136,6 +136,14 @@ static cl::opt<bool, true>
cl::location(DisableLIRP::Strlen), cl::init(false),
cl::ReallyHidden);
+bool DisableLIRP::Wcslen;
+static cl::opt<bool, true>
+ DisableLIRPWcslen("disable-" DEBUG_TYPE "-wcslen",
+ cl::desc("Proceed with loop idiom recognize pass, but do "
+ "not convert loop(s) to wcslen."),
+ cl::location(DisableLIRP::Wcslen), cl::init(false),
+ cl::ReallyHidden);
+
static cl::opt<bool> UseLIRCodeSizeHeurs(
"use-lir-code-size-heurs",
cl::desc("Use loop idiom recognition code size heuristics when compiling"
@@ -1606,15 +1614,19 @@ bool LoopIdiomRecognize::recognizeAndInsertStrLen() {
if (!Step)
return false;
- unsigned int ConstIntValue = 0;
+ unsigned int StepSize = 0;
if (ConstantInt *CI = dyn_cast<ConstantInt>(Step->getValue()))
- ConstIntValue = CI->getZExtValue();
+ StepSize = CI->getZExtValue();
unsigned OpWidth = OperandType->getIntegerBitWidth();
- if (OpWidth != ConstIntValue * 8)
+ unsigned WcharSize = TLI->getWCharSize(*LoopLoad->getModule());
+ if (OpWidth != StepSize * 8)
return false;
- if (OpWidth != 8)
+ if (OpWidth != 8 && OpWidth != 16 && OpWidth != 32)
return false;
+ if (OpWidth >= 16)
+ if (OpWidth != WcharSize * 8 || DisableLIRPWcslen)
+ return false;
// Scan every instruction in the loop to ensure there are no side effects.
for (auto &I : *LoopBody)
@@ -1666,12 +1678,11 @@ bool LoopIdiomRecognize::recognizeAndInsertStrLen() {
if (auto *GEP = dyn_cast<GetElementPtrInst>(LoopLoad->getPointerOperand())) {
if (GEP->getPointerOperand() != LoopPhi)
return false;
- GetElementPtrInst *NewGEP =
- GetElementPtrInst::Create(GEP->getSourceElementType(), PreVal,
- SmallVector<Value *, 4>(GEP->indices()),
- "newgep", Preheader->getTerminator());
+ GetElementPtrInst *NewGEP = GetElementPtrInst::Create(
+ LoopLoad->getType(), PreVal, SmallVector<Value *, 4>(GEP->indices()),
+ "newgep", Preheader->getTerminator());
Expanded = NewGEP;
- ExpandedType = NewGEP->getSourceElementType();
+ ExpandedType = LoopLoad->getType();
} else if (LoopLoad->getPointerOperand() == LoopPhi) {
Expanded = PreVal;
ExpandedType = LoopLoad->getType();
@@ -1718,8 +1729,15 @@ bool LoopIdiomRecognize::recognizeAndInsertStrLen() {
Value *StrLenFunc = nullptr;
switch (OpWidth) {
case 8:
+ if (!TLI->has(LibFunc_strlen))
+ return false;
StrLenFunc = emitStrLen(Expanded, Builder, *DL, TLI);
break;
+ case 16:
+ case 32:
+ if (!TLI->has(LibFunc_wcslen))
+ return false;
+ StrLenFunc = emitWcsLen(Expanded, Builder, *DL, TLI);
}
assert(StrLenFunc && "Failed to emit strlen function.");
diff --git a/llvm/lib/Transforms/Utils/BuildLibCalls.cpp b/llvm/lib/Transforms/Utils/BuildLibCalls.cpp
index e039457f313b29..cfda42dd7f6556 100644
--- a/llvm/lib/Transforms/Utils/BuildLibCalls.cpp
+++ b/llvm/lib/Transforms/Utils/BuildLibCalls.cpp
@@ -1536,6 +1536,15 @@ Value *llvm::emitStrLen(Value *Ptr, IRBuilderBase &B, const DataLayout &DL,
return emitLibCall(LibFunc_strlen, SizeTTy, CharPtrTy, Ptr, B, TLI);
}
+Value *llvm::emitWcsLen(Value *Ptr, IRBuilderBase &B, const DataLayout &DL,
+ const TargetLibraryInfo *TLI) {
+ assert(Ptr && Ptr->getType()->isPointerTy() &&
+ "Argument to wcslen intrinsic must be a pointer.");
+ Type *PtrTy = B.getPtrTy();
+ Type *SizeTTy = getSizeTTy(B, TLI);
+ return emitLibCall(LibFunc_wcslen, SizeTTy, PtrTy, Ptr, B, TLI);
+}
+
Value *llvm::emitStrDup(Value *Ptr, IRBuilderBase &B,
const TargetLibraryInfo *TLI) {
Type *CharPtrTy = B.getPtrTy();
diff --git a/llvm/test/Transforms/LoopIdiom/wcslen16.ll b/llvm/test/Transforms/LoopIdiom/wcslen16.ll
new file mode 100644
index 00000000000000..6c140ddf90d4e3
--- /dev/null
+++ b/llvm/test/Transforms/LoopIdiom/wcslen16.ll
@@ -0,0 +1,66 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -passes='loop-idiom' < %s -S | FileCheck %s
+
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define i64 @valid_strlen16(ptr %src) {
+; CHECK-LABEL: define i64 @valid_strlen16(
+; CHECK-SAME: ptr [[SRC:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: [[CMP:%.*]] = icmp eq ptr [[SRC]], null
+; CHECK-NEXT: br i1 [[CMP]], label %[[RETURN:.*]], label %[[LOR_LHS_FALSE:.*]]
+; CHECK: [[LOR_LHS_FALSE]]:
+; CHECK-NEXT: [[TMP0:%.*]] = load i16, ptr [[SRC]], align 2
+; CHECK-NEXT: [[CMP1:%.*]] = icmp eq i16 [[TMP0]], 0
+; CHECK-NEXT: br i1 [[CMP1]], label %[[RETURN]], label %[[WHILE_COND_PREHEADER:.*]]
+; CHECK: [[WHILE_COND_PREHEADER]]:
+; CHECK-NEXT: [[NEWGEP:%.*]] = getelementptr i16, ptr [[SRC]], i64 -1
+; CHECK-NEXT: [[WCSLEN:%.*]] = call i64 @wcslen(ptr [[NEWGEP]])
+; CHECK-NEXT: [[END:%.*]] = getelementptr i16, ptr [[NEWGEP]], i64 [[WCSLEN]]
+; CHECK-NEXT: br label %[[WHILE_COND:.*]]
+; CHECK: [[WHILE_COND]]:
+; CHECK-NEXT: [[SRC_PN:%.*]] = phi ptr [ poison, %[[WHILE_COND]] ], [ [[SRC]], %[[WHILE_COND_PREHEADER]] ]
+; CHECK-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i16 poison, 0
+; CHECK-NEXT: br i1 true, label %[[WHILE_END:.*]], label %[[WHILE_COND]]
+; CHECK: [[WHILE_END]]:
+; CHECK-NEXT: [[SUB_PTR_LHS_CAST:%.*]] = ptrtoint ptr [[END]] to i64
+; CHECK-NEXT: [[SUB_PTR_RHS_CAST:%.*]] = ptrtoint ptr [[SRC]] to i64
+; CHECK-NEXT: [[SUB_PTR_SUB:%.*]] = sub i64 [[SUB_PTR_LHS_CAST]], [[SUB_PTR_RHS_CAST]]
+; CHECK-NEXT: [[SUB_PTR_DIV:%.*]] = ashr exact i64 [[SUB_PTR_SUB]], 1
+; CHECK-NEXT: br label %[[RETURN]]
+; CHECK: [[RETURN]]:
+; CHECK-NEXT: [[RETVAL_0:%.*]] = phi i64 [ [[SUB_PTR_DIV]], %[[WHILE_END]] ], [ 0, %[[LOR_LHS_FALSE]] ], [ 0, %[[ENTRY]] ]
+; CHECK-NEXT: ret i64 [[RETVAL_0]]
+;
+entry:
+ %cmp = icmp eq ptr %src, null
+ br i1 %cmp, label %return, label %lor.lhs.false
+
+lor.lhs.false: ; preds = %entry
+ %0 = load i16, ptr %src, align 2
+ %cmp1 = icmp eq i16 %0, 0
+ br i1 %cmp1, label %return, label %while.cond
+
+while.cond: ; preds = %lor.lhs.false, %while.cond
+ %src.pn = phi ptr [ %curr.0, %while.cond ], [ %src, %lor.lhs.false ]
+ %curr.0 = getelementptr inbounds i8, ptr %src.pn, i64 2
+ %1 = load i16, ptr %curr.0, align 2
+ %tobool.not = icmp eq i16 %1, 0
+ br i1 %tobool.not, label %while.end, label %while.cond
+
+while.end: ; preds = %while.cond
+ %sub.ptr.lhs.cast = ptrtoint ptr %curr.0 to i64
+ %sub.ptr.rhs.cast = ptrtoint ptr %src to i64
+ %sub.ptr.sub = sub i64 %sub.ptr.lhs.cast, %sub.ptr.rhs.cast
+ %sub.ptr.div = ashr exact i64 %sub.ptr.sub, 1
+ br label %return
+
+return: ; preds = %entry, %lor.lhs.false, %while.end
+ %retval.0 = phi i64 [ %sub.ptr.div, %while.end ], [ 0, %lor.lhs.false ], [ 0, %entry ]
+ ret i64 %retval.0
+}
+
+!llvm.module.flags = !{!0}
+!0 = !{i32 1, !"wchar_size", i32 2}
+
diff --git a/llvm/test/Transforms/LoopIdiom/wcslen32.ll b/llvm/test/Transforms/LoopIdiom/wcslen32.ll
new file mode 100644
index 00000000000000..fad4c52078967f
--- /dev/null
+++ b/llvm/test/Transforms/LoopIdiom/wcslen32.ll
@@ -0,0 +1,70 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -passes='loop-idiom' < %s -S | FileCheck %s
+
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define i64 @valid_wcslen32(ptr %src) {
+; CHECK-LABEL: define i64 @valid_wcslen32(
+; CHECK-SAME: ptr [[SRC:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: [[CMP:%.*]] = icmp eq ptr [[SRC]], null
+; CHECK-NEXT: br i1 [[CMP]], label %[[RETURN:.*]], label %[[LOR_LHS_FALSE:.*]]
+; CHECK: [[LOR_LHS_FALSE]]:
+; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[SRC]], align 4
+; CHECK-NEXT: [[CMP1:%.*]] = icmp eq i32 [[TMP0]], 0
+; CHECK-NEXT: br i1 [[CMP1]], label %[[RETURN]], label %[[WHILE_COND_PREHEADER:.*]]
+; CHECK: [[WHILE_COND_PREHEADER]]:
+; CHECK-NEXT: [[NEWGEP:%.*]] = getelementptr i32, ptr [[SRC]], i64 -3
+; CHECK-NEXT: [[WCSLEN:%.*]] = call i64 @wcslen(ptr [[NEWGEP]])
+; CHECK-NEXT: [[END:%.*]] = getelementptr i32, ptr [[NEWGEP]], i64 [[WCSLEN]]
+; CHECK-NEXT: br label %[[WHILE_COND:.*]]
+; CHECK: [[WHILE_COND]]:
+; CHECK-NEXT: [[SRC_PN:%.*]] = phi ptr [ poison, %[[WHILE_COND]] ], [ [[SRC]], %[[WHILE_COND_PREHEADER]] ]
+; CHECK-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i32 poison, 0
+; CHECK-NEXT: br i1 true, label %[[WHILE_END:.*]], label %[[WHILE_COND]]
+; CHECK: [[WHILE_END]]:
+; CHECK-NEXT: [[SUB_PTR_LHS_CAST:%.*]] = ptrtoint ptr [[END]] to i64
+; CHECK-NEXT: [[SUB_PTR_RHS_CAST:%.*]] = ptrtoint ptr [[SRC]] to i64
+; CHECK-NEXT: [[SUB_PTR_SUB:%.*]] = sub i64 [[SUB_PTR_LHS_CAST]], [[SUB_PTR_RHS_CAST]]
+; CHECK-NEXT: [[SUB_PTR_DIV:%.*]] = ashr exact i64 [[SUB_PTR_SUB]], 2
+; CHECK-NEXT: br label %[[RETURN]]
+; CHECK: [[RETURN]]:
+; CHECK-NEXT: [[RETVAL_0:%.*]] = phi i64 [ [[SUB_PTR_DIV]], %[[WHILE_END]] ], [ 0, %[[LOR_LHS_FALSE]] ], [ 0, %[[ENTRY]] ]
+; CHECK-NEXT: ret i64 [[RETVAL_0]]
+;
+entry:
+ %cmp = icmp eq ptr %src, null
+ br i1 %cmp, label %return, label %lor.lhs.false
+
+lor.lhs.false: ; preds = %entry
+ %0 = load i32, ptr %src, align 4
+ %cmp1 = icmp eq i32 %0, 0
+ br i1 %cmp1, label %return, label %while.cond.preheader
+
+while.cond.preheader: ; preds = %lor.lhs.false
+ br label %while.cond
+
+while.cond: ; preds = %while.cond.preheader, %while.cond
+ %src.pn = phi ptr [ %curr.0, %while.cond ], [ %src, %while.cond.preheader ]
+ %curr.0 = getelementptr inbounds i8, ptr %src.pn, i64 4
+ %1 = load i32, ptr %curr.0, align 4
+ %tobool.not = icmp eq i32 %1, 0
+ br i1 %tobool.not, label %while.end, label %while.cond
+
+while.end: ; preds = %while.cond
+ %curr.0.lcssa = phi ptr [ %curr.0, %while.cond ]
+ %sub.ptr.lhs.cast = ptrtoint ptr %curr.0.lcssa to i64
+ %sub.ptr.rhs.cast = ptrtoint ptr %src to i64
+ %sub.ptr.sub = sub i64 %sub.ptr.lhs.cast, %sub.ptr.rhs.cast
+ %sub.ptr.div = ashr exact i64 %sub.ptr.sub, 2
+ br label %return
+
+return: ; preds = %entry, %lor.lhs.false, %while.end
+ %retval.0 = phi i64 [ %sub.ptr.div, %while.end ], [ 0, %lor.lhs.false ], [ 0, %entry ]
+ ret i64 %retval.0
+}
+
+!llvm.module.flags = !{!0}
+!0 = !{i32 1, !"wchar_size", i32 4}
+
More information about the llvm-commits
mailing list