[llvm] [LowerMemIntrinsics] Avoid emitting unreachable loops in insertLoopExpansion (PR #185900)
Fabian Ritter via llvm-commits
llvm-commits at lists.llvm.org
Wed Mar 11 08:26:01 PDT 2026
https://github.com/ritter-x2a created https://github.com/llvm/llvm-project/pull/185900
This patch refactors insertLoopExpansion and allows it to skip loops that are
statically known to be unreachable and make conditional branches with a
statically known condition unconditional. Those situations arise when the loop
count is a known constant.
These cases don't occur at the existing call sites in the memcpy and memset
lowering, since they have custom handling for constant loop sizes anyway. They
will however occur in a follow-up patch that uses insertLoopExpansion for
memset.pattern, where similar custom handling for constant loop sizes would
make less sense.
This is mostly NFC with the current use except for slight changes in the branch
weight computation from profiling data (which causes the included test
changes).
>From 30f7c7653fcaf1227a14186bc9bbe57d9b35853d Mon Sep 17 00:00:00 2001
From: Fabian Ritter <fabian.ritter at amd.com>
Date: Wed, 11 Mar 2026 08:12:30 -0400
Subject: [PATCH] [LowerMemIntrinsics] Avoid emitting unreachable loops in
insertLoopExpansion
This patch refactors insertLoopExpansion and allows it to skip loops that are
statically known to be unreachable and make conditional branches with a
statically known condition unconditional. Those situations arise when the loop
count is a known constant.
These cases don't occur at the existing call sites in the memcpy and memset
lowering, since they have custom handling for constant loop sizes anyway. They
will however occur in a follow-up patch that uses insertLoopExpansion for
memset.pattern, where similar custom handling for constant loop sizes would
make less sense.
This is mostly NFC with the current use except for slight changes in the branch
weight computation from profiling data (which causes the included test
changes).
---
.../Transforms/Utils/LowerMemIntrinsics.cpp | 277 ++++++++++++------
.../X86/memcpy-inline-non-constant-len.ll | 10 +-
.../X86/memset-inline-non-constant-len.ll | 6 +-
3 files changed, 201 insertions(+), 92 deletions(-)
diff --git a/llvm/lib/Transforms/Utils/LowerMemIntrinsics.cpp b/llvm/lib/Transforms/Utils/LowerMemIntrinsics.cpp
index 7623f3b9a6c08..da7260cdbac31 100644
--- a/llvm/lib/Transforms/Utils/LowerMemIntrinsics.cpp
+++ b/llvm/lib/Transforms/Utils/LowerMemIntrinsics.cpp
@@ -124,17 +124,26 @@ std::optional<uint64_t> getAverageMemOpLoopTripCount(const MemIntrinsic &I) {
/// to \p MainLoopStep.
/// The generated \c MainLoopIP, \c MainLoopIndex, \c ResidualLoopIP, and
/// \c ResidualLoopIndex are returned in a \c LoopExpansionInfo object.
+///
+/// If provided, \p ExpectedUnits is used as the expected number of units
+/// handled by the loop expansion when computing branch weights.
static LoopExpansionInfo
insertLoopExpansion(Instruction *InsertBefore, Value *Len,
unsigned MainLoopStep, unsigned ResidualLoopStep,
StringRef BBNamePrefix,
- std::optional<uint64_t> AverageTripCount) {
+ std::optional<uint64_t> ExpectedUnits) {
assert((ResidualLoopStep == 0 || MainLoopStep % ResidualLoopStep == 0) &&
"ResidualLoopStep must divide MainLoopStep if specified");
assert(ResidualLoopStep <= MainLoopStep &&
"ResidualLoopStep cannot be larger than MainLoopStep");
assert(MainLoopStep > 0 && "MainLoopStep must be non-zero");
LoopExpansionInfo LEI;
+
+ // If the length is known to be zero, there is nothing to do.
+ if (auto *CLen = dyn_cast<ConstantInt>(Len))
+ if (CLen->isZero())
+ return LEI;
+
BasicBlock *PreLoopBB = InsertBefore->getParent();
BasicBlock *PostLoopBB = PreLoopBB->splitBasicBlock(
InsertBefore, BBNamePrefix + "-post-expansion");
@@ -149,12 +158,21 @@ insertLoopExpansion(Instruction *InsertBefore, Value *Len,
Type *LenType = Len->getType();
IntegerType *ILenType = cast<IntegerType>(LenType);
ConstantInt *CIMainLoopStep = ConstantInt::get(ILenType, MainLoopStep);
+ ConstantInt *Zero = ConstantInt::get(ILenType, 0U);
+
+ // We can avoid conditional branches and/or entire loops if we know any of the
+ // following:
+ // - that the main loop must be executed at least once
+ // - that the main loop will not be executed at all
+ // - that the residual loop must be executed at least once
+ // - that the residual loop will not be executed at all
+ bool MustTakeMainLoop = false;
+ bool MayTakeMainLoop = true;
+ bool MustTakeResidualLoop = false;
+ bool MayTakeResidualLoop = true;
Value *LoopUnits = Len;
Value *ResidualUnits = nullptr;
- // We can make a conditional branch unconditional if we know that the
- // MainLoop must be executed at least once.
- bool MustTakeMainLoop = false;
if (MainLoopStep != 1) {
if (auto *CLen = dyn_cast<ConstantInt>(Len)) {
uint64_t TotalUnits = CLen->getZExtValue();
@@ -163,11 +181,12 @@ insertLoopExpansion(Instruction *InsertBefore, Value *Len,
LoopUnits = ConstantInt::get(LenType, LoopEndCount);
ResidualUnits = ConstantInt::get(LenType, ResidualCount);
MustTakeMainLoop = LoopEndCount > 0;
- // As an optimization, we could skip generating the residual loop if
- // ResidualCount is known to be 0. However, current uses of this function
- // don't request a residual loop if the length is constant (they generate
- // a (potentially empty) sequence of loads and stores instead), so this
- // optimization would have no effect here.
+ MayTakeMainLoop = MustTakeMainLoop;
+ MustTakeResidualLoop = ResidualCount > 0;
+ MayTakeResidualLoop = MustTakeResidualLoop;
+ // TODO: This could also use known bits to check if a non-constant loop
+ // count is guaranteed to be a multiple of MainLoopStep, in which case we
+ // could omit the residual loop. It's unclear if that is worthwhile.
} else {
ResidualUnits = getRuntimeLoopRemainder(PreLoopBuilder, Len,
CIMainLoopStep, MainLoopStep);
@@ -176,113 +195,195 @@ insertLoopExpansion(Instruction *InsertBefore, Value *Len,
}
} else if (auto *CLen = dyn_cast<ConstantInt>(Len)) {
MustTakeMainLoop = CLen->getZExtValue() > 0;
+ MayTakeMainLoop = MustTakeMainLoop;
}
- BasicBlock *MainLoopBB = BasicBlock::Create(
- Ctx, BBNamePrefix + "-expansion-main-body", ParentFunc, PostLoopBB);
- IRBuilder<> LoopBuilder(MainLoopBB);
- LoopBuilder.SetCurrentDebugLocation(DbgLoc);
+ // The case where both loops are omitted (i.e., the length is known zero) is
+ // already handled at the beginning of this function.
+ assert((MayTakeMainLoop || MayTakeResidualLoop) &&
+ "At least one of the loops must be generated");
- PHINode *LoopIndex = LoopBuilder.CreatePHI(LenType, 2, "loop-index");
- LEI.MainLoopIndex = LoopIndex;
- LoopIndex->addIncoming(ConstantInt::get(LenType, 0U), PreLoopBB);
+ BasicBlock *MainLoopBB = nullptr;
+ BranchInst *MainLoopBr = nullptr;
- Value *NewIndex =
- LoopBuilder.CreateAdd(LoopIndex, ConstantInt::get(LenType, MainLoopStep));
- LoopIndex->addIncoming(NewIndex, MainLoopBB);
+ // Construct the main loop unless we statically known that it is not taken.
+ if (MayTakeMainLoop) {
+ MainLoopBB = BasicBlock::Create(Ctx, BBNamePrefix + "-expansion-main-body",
+ ParentFunc, PostLoopBB);
+ IRBuilder<> LoopBuilder(MainLoopBB);
+ LoopBuilder.SetCurrentDebugLocation(DbgLoc);
+
+ PHINode *LoopIndex = LoopBuilder.CreatePHI(LenType, 2, "loop-index");
+ LEI.MainLoopIndex = LoopIndex;
+ LoopIndex->addIncoming(ConstantInt::get(LenType, 0U), PreLoopBB);
+
+ Value *NewIndex = LoopBuilder.CreateAdd(
+ LoopIndex, ConstantInt::get(LenType, MainLoopStep));
+ LoopIndex->addIncoming(NewIndex, MainLoopBB);
+
+ // One argument of the addition is a loop-variant PHI, so it must be an
+ // Instruction (i.e., it cannot be a Constant).
+ LEI.MainLoopIP = cast<Instruction>(NewIndex);
+
+ // Stay in the MainLoop until we have handled all the LoopUnits. The False
+ // target is adjusted below if a residual is generated.
+ MainLoopBr = LoopBuilder.CreateCondBr(
+ LoopBuilder.CreateICmpULT(NewIndex, LoopUnits), MainLoopBB, PostLoopBB);
- // One argument of the addition is a loop-variant PHI, so it must be an
- // Instruction (i.e., it cannot be a Constant).
- LEI.MainLoopIP = cast<Instruction>(NewIndex);
+ if (ExpectedUnits.has_value()) {
+ uint64_t BackedgeTakenCount = ExpectedUnits.value() / MainLoopStep;
+ if (BackedgeTakenCount > 0)
+ BackedgeTakenCount -= 1; // The last iteration goes to the False target.
+ MDBuilder MDB(ParentFunc->getContext());
+ setFittedBranchWeights(*MainLoopBr, {BackedgeTakenCount, 1},
+ /*IsExpected=*/false);
+ } else {
+ setExplicitlyUnknownBranchWeightsIfProfiled(*MainLoopBr, DEBUG_TYPE);
+ }
+ }
- if (ResidualLoopStep > 0 && ResidualLoopStep < MainLoopStep) {
- // Loop body for the residual accesses.
- BasicBlock *ResLoopBB =
+ // Construct the residual loop if it is requested from the caller unless we
+ // statically know that it won't be taken.
+ bool ResidualLoopRequested =
+ ResidualLoopStep > 0 && ResidualLoopStep < MainLoopStep;
+ BasicBlock *ResidualLoopBB = nullptr;
+ BasicBlock *ResidualCondBB = nullptr;
+ if (ResidualLoopRequested && MayTakeResidualLoop) {
+ ResidualLoopBB =
BasicBlock::Create(Ctx, BBNamePrefix + "-expansion-residual-body",
PreLoopBB->getParent(), PostLoopBB);
- // BB to check if the residual loop is needed.
- BasicBlock *ResidualCondBB =
- BasicBlock::Create(Ctx, BBNamePrefix + "-expansion-residual-cond",
- PreLoopBB->getParent(), ResLoopBB);
-
- // Enter the MainLoop unless no main loop iteration is required.
- ConstantInt *Zero = ConstantInt::get(ILenType, 0U);
- if (MustTakeMainLoop)
- PreLoopBuilder.CreateBr(MainLoopBB);
- else {
- auto *BR = PreLoopBuilder.CreateCondBr(
- PreLoopBuilder.CreateICmpNE(LoopUnits, Zero), MainLoopBB,
- ResidualCondBB);
- if (AverageTripCount.has_value()) {
- MDBuilder MDB(ParentFunc->getContext());
- setFittedBranchWeights(*BR,
- {AverageTripCount.value() % MainLoopStep, 1},
- /*IsExpected=*/false);
+
+ // The residual loop body is either reached from the ResidualCondBB (which
+ // checks if the residual loop needs to be executed), from the main loop
+ // body if we know statically that the residual must be executed, or from
+ // the pre-loop BB (conditionally or unconditionally) if the main loop is
+ // omitted.
+ BasicBlock *PredOfResLoopBody = PreLoopBB;
+ if (MainLoopBB) {
+ // If it's statically known that the residual must be executed, we don't
+ // need to create a preheader BB.
+ if (MustTakeResidualLoop) {
+ MainLoopBr->setSuccessor(1, ResidualLoopBB);
+ PredOfResLoopBody = MainLoopBB;
} else {
- setExplicitlyUnknownBranchWeightsIfProfiled(*BR, DEBUG_TYPE);
+ // Construct a preheader BB to check if the residual loop is executed.
+ ResidualCondBB =
+ BasicBlock::Create(Ctx, BBNamePrefix + "-expansion-residual-cond",
+ PreLoopBB->getParent(), ResidualLoopBB);
+
+ // Determine if we need to branch to the residual loop or bypass it.
+ IRBuilder<> RCBuilder(ResidualCondBB);
+ RCBuilder.SetCurrentDebugLocation(DbgLoc);
+ auto *BR =
+ RCBuilder.CreateCondBr(RCBuilder.CreateICmpNE(ResidualUnits, Zero),
+ ResidualLoopBB, PostLoopBB);
+ if (ExpectedUnits.has_value()) {
+ MDBuilder MDB(ParentFunc->getContext());
+ BR->setMetadata(LLVMContext::MD_prof,
+ MDB.createLikelyBranchWeights());
+ } else {
+ setExplicitlyUnknownBranchWeightsIfProfiled(*BR, DEBUG_TYPE);
+ }
+
+ MainLoopBr->setSuccessor(1, ResidualCondBB);
+ PredOfResLoopBody = ResidualCondBB;
}
}
- PreLoopBB->getTerminator()->eraseFromParent();
-
- // Stay in the MainLoop until we have handled all the LoopUnits. Then go to
- // the residual condition BB.
- LoopBuilder.CreateCondBr(LoopBuilder.CreateICmpULT(NewIndex, LoopUnits),
- MainLoopBB, ResidualCondBB);
-
- // Determine if we need to branch to the residual loop or bypass it.
- IRBuilder<> RCBuilder(ResidualCondBB);
- RCBuilder.SetCurrentDebugLocation(DbgLoc);
- RCBuilder.CreateCondBr(RCBuilder.CreateICmpNE(ResidualUnits, Zero),
- ResLoopBB, PostLoopBB);
- IRBuilder<> ResBuilder(ResLoopBB);
+ IRBuilder<> ResBuilder(ResidualLoopBB);
ResBuilder.SetCurrentDebugLocation(DbgLoc);
PHINode *ResidualIndex =
ResBuilder.CreatePHI(LenType, 2, "residual-loop-index");
- ResidualIndex->addIncoming(Zero, ResidualCondBB);
+ ResidualIndex->addIncoming(Zero, PredOfResLoopBody);
// Add the offset at the end of the main loop to the loop counter of the
- // residual loop to get the proper index.
- Value *FullOffset = ResBuilder.CreateAdd(LoopUnits, ResidualIndex);
- LEI.ResidualLoopIndex = FullOffset;
+ // residual loop to get the proper index. If the main loop was omitted, we
+ // can also omit the addition.
+ if (MainLoopBB)
+ LEI.ResidualLoopIndex = ResBuilder.CreateAdd(LoopUnits, ResidualIndex);
+ else
+ LEI.ResidualLoopIndex = ResidualIndex;
Value *ResNewIndex = ResBuilder.CreateAdd(
ResidualIndex, ConstantInt::get(LenType, ResidualLoopStep));
- ResidualIndex->addIncoming(ResNewIndex, ResLoopBB);
+ ResidualIndex->addIncoming(ResNewIndex, ResidualLoopBB);
// One argument of the addition is a loop-variant PHI, so it must be an
// Instruction (i.e., it cannot be a Constant).
LEI.ResidualLoopIP = cast<Instruction>(ResNewIndex);
// Stay in the residual loop until all ResidualUnits are handled.
- ResBuilder.CreateCondBr(
- ResBuilder.CreateICmpULT(ResNewIndex, ResidualUnits), ResLoopBB,
+ BranchInst *BR = ResBuilder.CreateCondBr(
+ ResBuilder.CreateICmpULT(ResNewIndex, ResidualUnits), ResidualLoopBB,
PostLoopBB);
+
+ if (ExpectedUnits.has_value()) {
+ uint64_t BackedgeTakenCount =
+ (ExpectedUnits.value() % MainLoopStep) / ResidualLoopStep;
+ if (BackedgeTakenCount > 0)
+ BackedgeTakenCount -= 1; // The last iteration goes to the False target.
+ MDBuilder MDB(ParentFunc->getContext());
+ setFittedBranchWeights(*BR, {BackedgeTakenCount, 1},
+ /*IsExpected=*/false);
+ } else {
+ setExplicitlyUnknownBranchWeightsIfProfiled(*BR, DEBUG_TYPE);
+ }
+ }
+
+ // Create the branch in the pre-loop block.
+ if (MustTakeMainLoop) {
+ // Go unconditionally to the main loop if it's statically known that it must
+ // be executed.
+ assert(MainLoopBB);
+ PreLoopBuilder.CreateBr(MainLoopBB);
+ } else if (!MainLoopBB && ResidualLoopBB) {
+ if (MustTakeResidualLoop) {
+ // If the main loop is omitted and the residual loop is statically known
+ // to be executed, go there unconditionally.
+ PreLoopBuilder.CreateBr(ResidualLoopBB);
+ } else {
+ // If the main loop is omitted and we don't know if the residual loop is
+ // executed, go there if necessary. The PreLoopBB takes the role of the
+ // preheader for the residual loop in this case.
+ auto *BR = PreLoopBuilder.CreateCondBr(
+ PreLoopBuilder.CreateICmpNE(ResidualUnits, Zero), ResidualLoopBB,
+ PostLoopBB);
+ if (ExpectedUnits.has_value()) {
+ MDBuilder MDB(ParentFunc->getContext());
+ BR->setMetadata(LLVMContext::MD_prof, MDB.createLikelyBranchWeights());
+ } else {
+ setExplicitlyUnknownBranchWeightsIfProfiled(*BR, DEBUG_TYPE);
+ }
+ }
} else {
- // There is no need for a residual loop after the main loop. We do however
- // need to patch up the control flow by creating the terminators for the
- // preloop block and the main loop.
+ // Otherwise, go conditionally to the main loop or its successor.
+ // If there is no residual loop, the successor is the post-loop BB.
+ BasicBlock *FalseBB = PostLoopBB;
+ if (ResidualCondBB) {
+ // If we constructed a pre-header for the residual loop, that is the
+ // successor.
+ FalseBB = ResidualCondBB;
+ } else if (ResidualLoopBB) {
+ // If there is a residual loop but the preheader is omitted (because the
+ // residual loop is statically known to be executed), the successor
+ // is the residual loop body.
+ assert(MustTakeResidualLoop);
+ FalseBB = ResidualLoopBB;
+ }
- // Enter the MainLoop unless no main loop iteration is required.
- if (MustTakeMainLoop) {
- PreLoopBuilder.CreateBr(MainLoopBB);
+ auto *BR = PreLoopBuilder.CreateCondBr(
+ PreLoopBuilder.CreateICmpNE(LoopUnits, Zero), MainLoopBB, FalseBB);
+
+ if (ExpectedUnits.has_value()) {
+ MDBuilder MDB(ParentFunc->getContext());
+ BR->setMetadata(LLVMContext::MD_prof, MDB.createLikelyBranchWeights());
} else {
- ConstantInt *Zero = ConstantInt::get(ILenType, 0U);
- MDBuilder B(ParentFunc->getContext());
- PreLoopBuilder.CreateCondBr(PreLoopBuilder.CreateICmpNE(LoopUnits, Zero),
- MainLoopBB, PostLoopBB,
- B.createLikelyBranchWeights());
+ setExplicitlyUnknownBranchWeightsIfProfiled(*BR, DEBUG_TYPE);
}
- PreLoopBB->getTerminator()->eraseFromParent();
- // Stay in the MainLoop until we have handled all the LoopUnits.
- auto *Br = LoopBuilder.CreateCondBr(
- LoopBuilder.CreateICmpULT(NewIndex, LoopUnits), MainLoopBB, PostLoopBB);
- if (AverageTripCount.has_value())
- setFittedBranchWeights(*Br, {AverageTripCount.value() / MainLoopStep, 1},
- /*IsExpected=*/false);
- else
- setExplicitlyUnknownBranchWeightsIfProfiled(*Br, DEBUG_TYPE);
}
+ // Delete the unconditional branch inserted by splitBasicBlock.
+ PreLoopBB->getTerminator()->eraseFromParent();
+
return LEI;
}
@@ -330,6 +431,8 @@ void llvm::createMemCpyLoopKnownSize(Instruction *InsertBefore, Value *SrcAddr,
LoopExpansionInfo LEI =
insertLoopExpansion(InsertBefore, CopyLen, LoopOpSize, 0,
"static-memcpy", AverageTripCount);
+ assert(LEI.MainLoopIP && LEI.MainLoopIndex &&
+ "Main loop should be generated for non-zero loop count");
// Fill MainLoopBB
IRBuilder<> MainLoopBuilder(LEI.MainLoopIP);
@@ -450,6 +553,8 @@ void llvm::createMemCpyLoopUnknownSize(
LoopExpansionInfo LEI =
insertLoopExpansion(InsertBefore, CopyLen, LoopOpSize, ResidualLoopOpSize,
"dynamic-memcpy", AverageTripCount);
+ assert(LEI.MainLoopIP && LEI.MainLoopIndex &&
+ "Main loop should be generated for unknown size copy");
// Fill MainLoopBB
IRBuilder<> MainLoopBuilder(LEI.MainLoopIP);
@@ -1087,6 +1192,8 @@ createMemSetLoopKnownSize(Instruction *InsertBefore, Value *DstAddr,
// straight-line code.
LoopExpansionInfo LEI = insertLoopExpansion(
InsertBefore, Len, LoopOpSize, 0, "static-memset", AverageTripCount);
+ assert(LEI.MainLoopIP && LEI.MainLoopIndex &&
+ "Main loop should be generated for non-zero loop count");
// Fill MainLoopBB
IRBuilder<> MainLoopBuilder(LEI.MainLoopIP);
@@ -1174,6 +1281,8 @@ createMemSetLoopUnknownSize(Instruction *InsertBefore, Value *DstAddr,
LoopExpansionInfo LEI =
insertLoopExpansion(InsertBefore, Len, LoopOpSize, ResidualLoopOpSize,
"dynamic-memset", AverageTripCount);
+ assert(LEI.MainLoopIP && LEI.MainLoopIndex &&
+ "Main loop should be generated for unknown size memset");
// Fill MainLoopBB
IRBuilder<> MainLoopBuilder(LEI.MainLoopIP);
diff --git a/llvm/test/Transforms/PreISelIntrinsicLowering/X86/memcpy-inline-non-constant-len.ll b/llvm/test/Transforms/PreISelIntrinsicLowering/X86/memcpy-inline-non-constant-len.ll
index 0ca0bb2421c8b..29f32164054cb 100644
--- a/llvm/test/Transforms/PreISelIntrinsicLowering/X86/memcpy-inline-non-constant-len.ll
+++ b/llvm/test/Transforms/PreISelIntrinsicLowering/X86/memcpy-inline-non-constant-len.ll
@@ -27,10 +27,10 @@ define void @memcpy_x(ptr %dst, ptr %src, i64 %x) nounwind !prof !0 {
; CHECK-NEXT: store i8 [[TMP3]], ptr [[TMP4]], align 1
; CHECK-NEXT: [[TMP5]] = add i64 [[LOOP_INDEX]], 1
; CHECK-NEXT: [[TMP6:%.*]] = icmp ult i64 [[TMP5]], [[X]]
-; CHECK-NEXT: br i1 [[TMP6]], label %[[DYNAMIC_MEMCPY_EXPANSION_MAIN_BODY]], label %[[DYNAMIC_MEMCPY_POST_EXPANSION]], !prof [[PROF3:![0-9]+]]
+; CHECK-NEXT: br i1 [[TMP6]], label %[[DYNAMIC_MEMCPY_EXPANSION_MAIN_BODY]], label %[[DYNAMIC_MEMCPY_POST_EXPANSION]], !prof [[PROF2]]
; CHECK: [[DYNAMIC_MEMCPY_POST_EXPANSION]]:
; CHECK-NEXT: [[TMP7:%.*]] = icmp ne i64 [[X]], 0
-; CHECK-NEXT: br i1 [[TMP7]], label %[[DYNAMIC_MEMCPY_EXPANSION_MAIN_BODY2:.*]], label %[[DYNAMIC_MEMCPY_POST_EXPANSION1:.*]], !prof [[PROF2]]
+; CHECK-NEXT: br i1 [[TMP7]], label %[[DYNAMIC_MEMCPY_EXPANSION_MAIN_BODY2:.*]], label %[[DYNAMIC_MEMCPY_POST_EXPANSION1:.*]], !prof [[PROF3:![0-9]+]]
; CHECK: [[DYNAMIC_MEMCPY_EXPANSION_MAIN_BODY2]]:
; CHECK-NEXT: [[LOOP_INDEX3:%.*]] = phi i64 [ 0, %[[DYNAMIC_MEMCPY_POST_EXPANSION]] ], [ [[TMP11:%.*]], %[[DYNAMIC_MEMCPY_EXPANSION_MAIN_BODY2]] ]
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 [[LOOP_INDEX3]]
@@ -56,7 +56,7 @@ define void @memcpy_x(ptr %dst, ptr %src, i64 %x) nounwind !prof !0 {
;.
; CHECK: [[PROF0]] = !{!"function_entry_count", i32 10}
; CHECK: [[PROF1]] = !{!"VP", i32 1, i32 100, i32 5, i32 10, i32 16, i32 13}
-; CHECK: [[PROF2]] = !{!"branch_weights", i32 1048575, i32 1}
-; CHECK: [[PROF3]] = !{!"unknown", !"lower-mem-intrinsics"}
-; CHECK: [[PROF4]] = !{!"branch_weights", i32 3, i32 1}
+; CHECK: [[PROF2]] = !{!"unknown", !"lower-mem-intrinsics"}
+; CHECK: [[PROF3]] = !{!"branch_weights", i32 1048575, i32 1}
+; CHECK: [[PROF4]] = !{!"branch_weights", i32 2, i32 1}
;.
diff --git a/llvm/test/Transforms/PreISelIntrinsicLowering/X86/memset-inline-non-constant-len.ll b/llvm/test/Transforms/PreISelIntrinsicLowering/X86/memset-inline-non-constant-len.ll
index ff9f662aaee9a..971b15d086d31 100644
--- a/llvm/test/Transforms/PreISelIntrinsicLowering/X86/memset-inline-non-constant-len.ll
+++ b/llvm/test/Transforms/PreISelIntrinsicLowering/X86/memset-inline-non-constant-len.ll
@@ -28,14 +28,14 @@ define void @memset_x(ptr %a, i8 %value, i64 %x) nounwind !prof !0 {
; CHECK-NEXT: br i1 [[TMP5]], label %[[DYNAMIC_MEMSET_LOOP_EXPANSION_MAIN_BODY]], label %[[DYNAMIC_MEMSET_POST_LOOP_EXPANSION]], !prof [[PROF2:![0-9]+]]
; CHECK: [[DYNAMIC_MEMSET_POST_LOOP_EXPANSION]]:
; CHECK-NEXT: [[TMP6:%.*]] = icmp ne i64 [[X]], 0
-; CHECK-NEXT: br i1 [[TMP6]], label %[[DYNAMIC_MEMSET_LOOP_EXPANSION_MAIN_BODY2:.*]], label %[[DYNAMIC_MEMSET_POST_LOOP_EXPANSION1:.*]], !prof [[PROF1]]
+; CHECK-NEXT: br i1 [[TMP6]], label %[[DYNAMIC_MEMSET_LOOP_EXPANSION_MAIN_BODY2:.*]], label %[[DYNAMIC_MEMSET_POST_LOOP_EXPANSION1:.*]], !prof [[PROF3:![0-9]+]]
; CHECK: [[DYNAMIC_MEMSET_LOOP_EXPANSION_MAIN_BODY2]]:
; CHECK-NEXT: [[TMP7:%.*]] = phi i64 [ 0, %[[DYNAMIC_MEMSET_POST_LOOP_EXPANSION]] ], [ [[TMP9:%.*]], %[[DYNAMIC_MEMSET_LOOP_EXPANSION_MAIN_BODY2]] ]
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP7]]
; CHECK-NEXT: store volatile i8 [[VALUE]], ptr [[TMP8]], align 1
; CHECK-NEXT: [[TMP9]] = add i64 [[TMP7]], 1
; CHECK-NEXT: [[TMP10:%.*]] = icmp ult i64 [[TMP9]], [[X]]
-; CHECK-NEXT: br i1 [[TMP10]], label %[[DYNAMIC_MEMSET_LOOP_EXPANSION_MAIN_BODY2]], label %[[DYNAMIC_MEMSET_POST_LOOP_EXPANSION1]], !prof [[PROF3:![0-9]+]]
+; CHECK-NEXT: br i1 [[TMP10]], label %[[DYNAMIC_MEMSET_LOOP_EXPANSION_MAIN_BODY2]], label %[[DYNAMIC_MEMSET_POST_LOOP_EXPANSION1]], !prof [[PROF3]]
; CHECK: [[DYNAMIC_MEMSET_POST_LOOP_EXPANSION1]]:
; CHECK-NEXT: ret void
;
@@ -52,6 +52,6 @@ define void @memset_x(ptr %a, i8 %value, i64 %x) nounwind !prof !0 {
;.
; CHECK: [[PROF0]] = !{!"function_entry_count", i32 10}
; CHECK: [[PROF1]] = !{!"branch_weights", i32 1048575, i32 1}
-; CHECK: [[PROF2]] = !{!"branch_weights", i32 3, i32 1}
+; CHECK: [[PROF2]] = !{!"branch_weights", i32 2, i32 1}
; CHECK: [[PROF3]] = !{!"unknown", !"lower-mem-intrinsics"}
;.
More information about the llvm-commits
mailing list