[llvm-branch-commits] [llvm] Memset (PR #174490)
Mircea Trofin via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Tue Jan 6 10:45:46 PST 2026
https://github.com/mtrofin updated https://github.com/llvm/llvm-project/pull/174490
>From 3328edeec35c1bd7f458e7c5aec4cfe9526cf495 Mon Sep 17 00:00:00 2001
From: Mircea Trofin <mtrofin at google.com>
Date: Mon, 15 Dec 2025 17:25:57 -0800
Subject: [PATCH] Memset
---
.../Transforms/Utils/LowerMemIntrinsics.h | 6 +-
.../Transforms/Utils/LowerMemIntrinsics.cpp | 148 +++++++++++++-----
.../X86/memcpy-inline-non-constant-len.ll | 49 +++---
.../X86/memset-inline-non-constant-len.ll | 28 +++-
4 files changed, 166 insertions(+), 65 deletions(-)
diff --git a/llvm/include/llvm/Transforms/Utils/LowerMemIntrinsics.h b/llvm/include/llvm/Transforms/Utils/LowerMemIntrinsics.h
index d4e72a60fc1ea..1ec150330484d 100644
--- a/llvm/include/llvm/Transforms/Utils/LowerMemIntrinsics.h
+++ b/llvm/include/llvm/Transforms/Utils/LowerMemIntrinsics.h
@@ -38,7 +38,8 @@ LLVM_ABI void createMemCpyLoopUnknownSize(
Instruction *InsertBefore, Value *SrcAddr, Value *DstAddr, Value *CopyLen,
Align SrcAlign, Align DestAlign, bool SrcIsVolatile, bool DstIsVolatile,
bool CanOverlap, const TargetTransformInfo &TTI,
- std::optional<unsigned> AtomicSize = std::nullopt);
+ std::optional<unsigned> AtomicSize = std::nullopt,
+ std::optional<uint64_t> AverageTripCount = std::nullopt);
/// Emit a loop implementing the semantics of an llvm.memcpy whose size is a
/// compile time constant. Loop is inserted at \p InsertBefore.
@@ -46,7 +47,8 @@ LLVM_ABI void createMemCpyLoopKnownSize(
Instruction *InsertBefore, Value *SrcAddr, Value *DstAddr,
ConstantInt *CopyLen, Align SrcAlign, Align DestAlign, bool SrcIsVolatile,
bool DstIsVolatile, bool CanOverlap, const TargetTransformInfo &TTI,
- std::optional<uint32_t> AtomicCpySize = std::nullopt);
+ std::optional<uint32_t> AtomicCpySize = std::nullopt,
+ std::optional<uint64_t> AverageTripCount = std::nullopt);
/// Expand \p MemCpy as a loop. \p MemCpy is not deleted.
LLVM_ABI void expandMemCpyAsLoop(MemCpyInst *MemCpy,
diff --git a/llvm/lib/Transforms/Utils/LowerMemIntrinsics.cpp b/llvm/lib/Transforms/Utils/LowerMemIntrinsics.cpp
index 4ab99edd64baa..c6f9e8c32b90d 100644
--- a/llvm/lib/Transforms/Utils/LowerMemIntrinsics.cpp
+++ b/llvm/lib/Transforms/Utils/LowerMemIntrinsics.cpp
@@ -12,15 +12,23 @@
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/MDBuilder.h"
+#include "llvm/IR/ProfDataUtils.h"
+#include "llvm/ProfileData/InstrProf.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/LoopUtils.h"
+#include <limits>
#include <optional>
#define DEBUG_TYPE "lower-mem-intrinsics"
using namespace llvm;
+namespace llvm {
+extern cl::opt<bool> ProfcheckDisableMetadataFixes;
+}
+
/// \returns \p Len urem \p OpSize, checking for optimization opportunities.
/// \p OpSizeVal must be the integer value of the \c ConstantInt \p OpSize.
static Value *getRuntimeLoopRemainder(IRBuilderBase &B, Value *Len,
@@ -60,6 +68,28 @@ struct LoopExpansionInfo {
/// required.
Value *ResidualLoopIndex = nullptr;
};
+
+std::optional<uint64_t> getAverageMemOpLoopTripCount(const MemIntrinsic &I) {
+ if (ProfcheckDisableMetadataFixes)
+ return std::nullopt;
+ if (std::optional<Function::ProfileCount> EC =
+ I.getFunction()->getEntryCount();
+ !EC || !EC->getCount())
+ return std::nullopt;
+ if (const auto Len = I.getLengthInBytes())
+ return Len->getZExtValue();
+ uint64_t Total = 0;
+ auto ProfData =
+ getValueProfDataFromInst(I, InstrProfValueKind::IPVK_MemOPSize,
+ std::numeric_limits<uint32_t>::max(), Total);
+ if (!Total)
+ return std::nullopt;
+ uint64_t TripCount = 0;
+ for (const auto &KV : ProfData)
+ TripCount += KV.Count * KV.Value;
+ return std::round(1.0 * TripCount / Total);
+}
+
} // namespace
/// Insert the control flow and loop counters for a memcpy/memset loop
@@ -94,10 +124,11 @@ struct LoopExpansionInfo {
/// to \p MainLoopStep.
/// The generated \c MainLoopIP, \c MainLoopIndex, \c ResidualLoopIP, and
/// \c ResidualLoopIndex are returned in a \c LoopExpansionInfo object.
-static LoopExpansionInfo insertLoopExpansion(Instruction *InsertBefore,
- Value *Len, unsigned MainLoopStep,
- unsigned ResidualLoopStep,
- StringRef BBNamePrefix) {
+static LoopExpansionInfo
+insertLoopExpansion(Instruction *InsertBefore, Value *Len,
+ unsigned MainLoopStep, unsigned ResidualLoopStep,
+ StringRef BBNamePrefix,
+ std::optional<uint64_t> AverageTripCount) {
assert((ResidualLoopStep == 0 || MainLoopStep % ResidualLoopStep == 0) &&
"ResidualLoopStep must divide MainLoopStep if specified");
assert(ResidualLoopStep <= MainLoopStep &&
@@ -175,9 +206,18 @@ static LoopExpansionInfo insertLoopExpansion(Instruction *InsertBefore,
ConstantInt *Zero = ConstantInt::get(ILenType, 0U);
if (MustTakeMainLoop)
PreLoopBuilder.CreateBr(MainLoopBB);
- else
- PreLoopBuilder.CreateCondBr(PreLoopBuilder.CreateICmpNE(LoopUnits, Zero),
- MainLoopBB, ResidualCondBB);
+ else {
+ auto *BR = PreLoopBuilder.CreateCondBr(
+ PreLoopBuilder.CreateICmpNE(LoopUnits, Zero), MainLoopBB,
+ ResidualCondBB);
+ if (AverageTripCount.has_value()) {
+ MDBuilder MDB(ParentFunc->getContext());
+ setFittedBranchWeights(
+ *BR, {AverageTripCount.value() % MainLoopStep, 1}, false);
+ } else {
+ setExplicitlyUnknownBranchWeightsIfProfiled(*BR, DEBUG_TYPE);
+ }
+ }
PreLoopBB->getTerminator()->eraseFromParent();
// Stay in the MainLoop until we have handled all the LoopUnits. Then go to
@@ -222,22 +262,32 @@ static LoopExpansionInfo insertLoopExpansion(Instruction *InsertBefore,
PreLoopBuilder.CreateBr(MainLoopBB);
} else {
ConstantInt *Zero = ConstantInt::get(ILenType, 0U);
+ MDBuilder B(ParentFunc->getContext());
PreLoopBuilder.CreateCondBr(PreLoopBuilder.CreateICmpNE(LoopUnits, Zero),
- MainLoopBB, PostLoopBB);
+ MainLoopBB, PostLoopBB,
+ B.createLikelyBranchWeights());
}
PreLoopBB->getTerminator()->eraseFromParent();
// Stay in the MainLoop until we have handled all the LoopUnits.
- LoopBuilder.CreateCondBr(LoopBuilder.CreateICmpULT(NewIndex, LoopUnits),
- MainLoopBB, PostLoopBB);
+ auto *Br = LoopBuilder.CreateCondBr(
+ LoopBuilder.CreateICmpULT(NewIndex, LoopUnits), MainLoopBB, PostLoopBB);
+ if (AverageTripCount.has_value())
+ setFittedBranchWeights(*Br, {AverageTripCount.value() / MainLoopStep, 1},
+ /*IsExpected=*/false);
+ else
+ setExplicitlyUnknownBranchWeightsIfProfiled(*Br, DEBUG_TYPE);
}
return LEI;
}
-void llvm::createMemCpyLoopKnownSize(
- Instruction *InsertBefore, Value *SrcAddr, Value *DstAddr,
- ConstantInt *CopyLen, Align SrcAlign, Align DstAlign, bool SrcIsVolatile,
- bool DstIsVolatile, bool CanOverlap, const TargetTransformInfo &TTI,
- std::optional<uint32_t> AtomicElementSize) {
+void llvm::createMemCpyLoopKnownSize(Instruction *InsertBefore, Value *SrcAddr,
+ Value *DstAddr, ConstantInt *CopyLen,
+ Align SrcAlign, Align DstAlign,
+ bool SrcIsVolatile, bool DstIsVolatile,
+ bool CanOverlap,
+ const TargetTransformInfo &TTI,
+ std::optional<uint32_t> AtomicElementSize,
+ std::optional<uint64_t> AverageTripCount) {
// No need to expand zero length copies.
if (CopyLen->isZero())
return;
@@ -269,8 +319,9 @@ void llvm::createMemCpyLoopKnownSize(
// Skip the loop expansion entirely if the loop would never be taken.
if (LoopEndCount != 0) {
- LoopExpansionInfo LEI = insertLoopExpansion(InsertBefore, CopyLen,
- LoopOpSize, 0, "static-memcpy");
+ LoopExpansionInfo LEI =
+ insertLoopExpansion(InsertBefore, CopyLen, LoopOpSize, 0,
+ "static-memcpy", AverageTripCount);
// Fill MainLoopBB
IRBuilder<> MainLoopBuilder(LEI.MainLoopIP);
@@ -357,7 +408,8 @@ void llvm::createMemCpyLoopUnknownSize(
Instruction *InsertBefore, Value *SrcAddr, Value *DstAddr, Value *CopyLen,
Align SrcAlign, Align DstAlign, bool SrcIsVolatile, bool DstIsVolatile,
bool CanOverlap, const TargetTransformInfo &TTI,
- std::optional<uint32_t> AtomicElementSize) {
+ std::optional<uint32_t> AtomicElementSize,
+ std::optional<uint64_t> AverageTripCount) {
BasicBlock *PreLoopBB = InsertBefore->getParent();
Function *ParentFunc = PreLoopBB->getParent();
const DataLayout &DL = ParentFunc->getDataLayout();
@@ -387,8 +439,9 @@ void llvm::createMemCpyLoopUnknownSize(
assert(ResidualLoopOpSize == (AtomicElementSize ? *AtomicElementSize : 1) &&
"Store size is expected to match type size");
- LoopExpansionInfo LEI = insertLoopExpansion(
- InsertBefore, CopyLen, LoopOpSize, ResidualLoopOpSize, "dynamic-memcpy");
+ LoopExpansionInfo LEI =
+ insertLoopExpansion(InsertBefore, CopyLen, LoopOpSize, ResidualLoopOpSize,
+ "dynamic-memcpy", AverageTripCount);
// Fill MainLoopBB
IRBuilder<> MainLoopBuilder(LEI.MainLoopIP);
@@ -931,6 +984,7 @@ static void createMemMoveLoopKnownSize(Instruction *InsertBefore,
static void createMemSetLoop(Instruction *InsertBefore, Value *DstAddr,
Value *CopyLen, Value *SetValue, Align DstAlign,
+ std::optional<uint64_t> AverageTripCount,
bool IsVolatile) {
Type *TypeOfCopyLen = CopyLen->getType();
BasicBlock *OrigBB = InsertBefore->getParent();
@@ -943,9 +997,16 @@ static void createMemSetLoop(Instruction *InsertBefore, Value *DstAddr,
IRBuilder<> Builder(OrigBB->getTerminator());
- Builder.CreateCondBr(
+ auto *ToLoopBR = Builder.CreateCondBr(
Builder.CreateICmpEQ(ConstantInt::get(TypeOfCopyLen, 0), CopyLen), NewBB,
LoopBB);
+ MDBuilder MDB(F->getContext());
+ if (AverageTripCount.has_value())
+ ToLoopBR->setMetadata(LLVMContext::MD_prof,
+ MDB.createLikelyBranchWeights());
+ else
+ setExplicitlyUnknownBranchWeightsIfProfiled(*ToLoopBR, DEBUG_TYPE);
+
OrigBB->getTerminator()->eraseFromParent();
unsigned PartSize = DL.getTypeStoreSize(SetValue->getType());
@@ -964,8 +1025,13 @@ static void createMemSetLoop(Instruction *InsertBefore, Value *DstAddr,
LoopBuilder.CreateAdd(LoopIndex, ConstantInt::get(TypeOfCopyLen, 1));
LoopIndex->addIncoming(NewIndex, LoopBB);
- LoopBuilder.CreateCondBr(LoopBuilder.CreateICmpULT(NewIndex, CopyLen), LoopBB,
- NewBB);
+ auto *LoopBR = LoopBuilder.CreateCondBr(
+ LoopBuilder.CreateICmpULT(NewIndex, CopyLen), LoopBB, NewBB);
+ if (AverageTripCount.has_value())
+ setFittedBranchWeights(*LoopBR, {AverageTripCount.value(), 1},
+ /*IsExpected=*/false);
+ else
+ setExplicitlyUnknownBranchWeightsIfProfiled(*LoopBR, DEBUG_TYPE);
}
template <typename T>
@@ -983,6 +1049,7 @@ void llvm::expandMemCpyAsLoop(MemCpyInst *Memcpy,
const TargetTransformInfo &TTI,
ScalarEvolution *SE) {
bool CanOverlap = canOverlap(Memcpy, SE);
+ auto TripCount = getAverageMemOpLoopTripCount(*Memcpy);
if (ConstantInt *CI = dyn_cast<ConstantInt>(Memcpy->getLength())) {
createMemCpyLoopKnownSize(
/* InsertBefore */ Memcpy,
@@ -994,7 +1061,9 @@ void llvm::expandMemCpyAsLoop(MemCpyInst *Memcpy,
/* SrcIsVolatile */ Memcpy->isVolatile(),
/* DstIsVolatile */ Memcpy->isVolatile(),
/* CanOverlap */ CanOverlap,
- /* TargetTransformInfo */ TTI);
+ /* TargetTransformInfo */ TTI,
+ /* AtomicElementSize */ std::nullopt,
+ /* AverageTripCount */ TripCount);
} else {
createMemCpyLoopUnknownSize(
/* InsertBefore */ Memcpy,
@@ -1006,7 +1075,9 @@ void llvm::expandMemCpyAsLoop(MemCpyInst *Memcpy,
/* SrcIsVolatile */ Memcpy->isVolatile(),
/* DstIsVolatile */ Memcpy->isVolatile(),
/* CanOverlap */ CanOverlap,
- /* TargetTransformInfo */ TTI);
+ /* TargetTransformInfo */ TTI,
+ /* AtomicElementSize */ std::nullopt,
+ /* AverageTripCount */ TripCount);
}
}
@@ -1027,16 +1098,17 @@ bool llvm::expandMemMoveAsLoop(MemMoveInst *Memmove,
if (!TTI.addrspacesMayAlias(SrcAS, DstAS)) {
// We may not be able to emit a pointer comparison, but we don't have
// to. Expand as memcpy.
+ auto AverageTripCount = getAverageMemOpLoopTripCount(*Memmove);
if (ConstantInt *CI = dyn_cast<ConstantInt>(CopyLen)) {
- createMemCpyLoopKnownSize(/*InsertBefore=*/Memmove, SrcAddr, DstAddr,
- CI, SrcAlign, DstAlign, SrcIsVolatile,
- DstIsVolatile,
- /*CanOverlap=*/false, TTI);
+ createMemCpyLoopKnownSize(
+ /*InsertBefore=*/Memmove, SrcAddr, DstAddr, CI, SrcAlign, DstAlign,
+ SrcIsVolatile, DstIsVolatile,
+ /*CanOverlap=*/false, TTI, std::nullopt, AverageTripCount);
} else {
- createMemCpyLoopUnknownSize(/*InsertBefore=*/Memmove, SrcAddr, DstAddr,
- CopyLen, SrcAlign, DstAlign, SrcIsVolatile,
- DstIsVolatile,
- /*CanOverlap=*/false, TTI);
+ createMemCpyLoopUnknownSize(
+ /*InsertBefore=*/Memmove, SrcAddr, DstAddr, CopyLen, SrcAlign,
+ DstAlign, SrcIsVolatile, DstIsVolatile,
+ /*CanOverlap=*/false, TTI, std::nullopt, AverageTripCount);
}
return true;
@@ -1072,7 +1144,8 @@ void llvm::expandMemSetAsLoop(MemSetInst *Memset) {
/* CopyLen */ Memset->getLength(),
/* SetValue */ Memset->getValue(),
/* Alignment */ Memset->getDestAlign().valueOrOne(),
- Memset->isVolatile());
+ /* AverageTripCount */ getAverageMemOpLoopTripCount(*Memset),
+ /* IsVolatile */ Memset->isVolatile());
}
void llvm::expandMemSetPatternAsLoop(MemSetPatternInst *Memset) {
@@ -1081,7 +1154,8 @@ void llvm::expandMemSetPatternAsLoop(MemSetPatternInst *Memset) {
/* CopyLen=*/Memset->getLength(),
/* SetValue=*/Memset->getValue(),
/* Alignment=*/Memset->getDestAlign().valueOrOne(),
- Memset->isVolatile());
+ /* AverageTripCount */ getAverageMemOpLoopTripCount(*Memset),
+ /* IsVolatile */ Memset->isVolatile());
}
void llvm::expandAtomicMemCpyAsLoop(AnyMemCpyInst *AtomicMemcpy,
@@ -1100,7 +1174,7 @@ void llvm::expandAtomicMemCpyAsLoop(AnyMemCpyInst *AtomicMemcpy,
/* DstIsVolatile */ AtomicMemcpy->isVolatile(),
/* CanOverlap */ false, // SrcAddr & DstAddr may not overlap by spec.
/* TargetTransformInfo */ TTI,
- /* AtomicCpySize */ AtomicMemcpy->getElementSizeInBytes());
+ /* AtomicElementSize */ AtomicMemcpy->getElementSizeInBytes());
} else {
createMemCpyLoopUnknownSize(
/* InsertBefore */ AtomicMemcpy,
@@ -1113,6 +1187,6 @@ void llvm::expandAtomicMemCpyAsLoop(AnyMemCpyInst *AtomicMemcpy,
/* DstIsVolatile */ AtomicMemcpy->isVolatile(),
/* CanOverlap */ false, // SrcAddr & DstAddr may not overlap by spec.
/* TargetTransformInfo */ TTI,
- /* AtomicCpySize */ AtomicMemcpy->getElementSizeInBytes());
+ /* AtomicElementSize */ AtomicMemcpy->getElementSizeInBytes());
}
}
diff --git a/llvm/test/Transforms/PreISelIntrinsicLowering/X86/memcpy-inline-non-constant-len.ll b/llvm/test/Transforms/PreISelIntrinsicLowering/X86/memcpy-inline-non-constant-len.ll
index a4e049941030e..0ca0bb2421c8b 100644
--- a/llvm/test/Transforms/PreISelIntrinsicLowering/X86/memcpy-inline-non-constant-len.ll
+++ b/llvm/test/Transforms/PreISelIntrinsicLowering/X86/memcpy-inline-non-constant-len.ll
@@ -1,49 +1,62 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals all --version 5
; RUN: opt -mtriple=x86_64-pc-linux-gnu -passes=pre-isel-intrinsic-lowering -S -o - %s | FileCheck %s
; Constant length memcpy.inline should be left unmodified.
-define void @memcpy_32(ptr %dst, ptr %src) nounwind {
+define void @memcpy_32(ptr %dst, ptr %src) nounwind !prof !0 {
; CHECK-LABEL: define void @memcpy_32(
-; CHECK-SAME: ptr [[DST:%.*]], ptr [[SRC:%.*]]) #[[ATTR0:[0-9]+]] {
-; CHECK-NEXT: call void @llvm.memcpy.inline.p0.p0.i64(ptr [[DST]], ptr [[SRC]], i64 32, i1 false)
+; CHECK-SAME: ptr [[DST:%.*]], ptr [[SRC:%.*]]) #[[ATTR0:[0-9]+]] !prof [[PROF0:![0-9]+]] {
+; CHECK-NEXT: call void @llvm.memcpy.inline.p0.p0.i64(ptr [[DST]], ptr [[SRC]], i64 32, i1 false), !prof [[PROF1:![0-9]+]]
; CHECK-NEXT: tail call void @llvm.memcpy.inline.p0.p0.i64(ptr [[DST]], ptr [[SRC]], i64 32, i1 true)
; CHECK-NEXT: ret void
;
- call void @llvm.memcpy.inline.p0.p0.i64(ptr %dst, ptr %src, i64 32, i1 0)
+ call void @llvm.memcpy.inline.p0.p0.i64(ptr %dst, ptr %src, i64 32, i1 0), !prof !1
tail call void @llvm.memcpy.inline.p0.p0.i64(ptr %dst, ptr %src, i64 32, i1 1)
ret void
}
-define void @memcpy_x(ptr %dst, ptr %src, i64 %x) nounwind {
+define void @memcpy_x(ptr %dst, ptr %src, i64 %x) nounwind !prof !0 {
; CHECK-LABEL: define void @memcpy_x(
-; CHECK-SAME: ptr [[DST:%.*]], ptr [[SRC:%.*]], i64 [[X:%.*]]) #[[ATTR0]] {
+; CHECK-SAME: ptr [[DST:%.*]], ptr [[SRC:%.*]], i64 [[X:%.*]]) #[[ATTR0]] !prof [[PROF0]] {
; CHECK-NEXT: [[TMP1:%.*]] = icmp ne i64 [[X]], 0
-; CHECK-NEXT: br i1 [[TMP1]], label %[[LOOP_MEMCPY_EXPANSION:.*]], label %[[POST_LOOP_MEMCPY_EXPANSION:.*]]
-; CHECK: [[LOOP_MEMCPY_EXPANSION]]:
-; CHECK-NEXT: [[LOOP_INDEX:%.*]] = phi i64 [ 0, [[TMP0:%.*]] ], [ [[TMP5:%.*]], %[[LOOP_MEMCPY_EXPANSION]] ]
+; CHECK-NEXT: br i1 [[TMP1]], label %[[DYNAMIC_MEMCPY_EXPANSION_MAIN_BODY:.*]], label %[[DYNAMIC_MEMCPY_POST_EXPANSION:.*]], !prof [[PROF2:![0-9]+]]
+; CHECK: [[DYNAMIC_MEMCPY_EXPANSION_MAIN_BODY]]:
+; CHECK-NEXT: [[LOOP_INDEX:%.*]] = phi i64 [ 0, [[TMP0:%.*]] ], [ [[TMP5:%.*]], %[[DYNAMIC_MEMCPY_EXPANSION_MAIN_BODY]] ]
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 [[LOOP_INDEX]]
; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr [[TMP2]], align 1
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 [[LOOP_INDEX]]
; CHECK-NEXT: store i8 [[TMP3]], ptr [[TMP4]], align 1
; CHECK-NEXT: [[TMP5]] = add i64 [[LOOP_INDEX]], 1
; CHECK-NEXT: [[TMP6:%.*]] = icmp ult i64 [[TMP5]], [[X]]
-; CHECK-NEXT: br i1 [[TMP6]], label %[[LOOP_MEMCPY_EXPANSION]], label %[[POST_LOOP_MEMCPY_EXPANSION]]
-; CHECK: [[POST_LOOP_MEMCPY_EXPANSION]]:
+; CHECK-NEXT: br i1 [[TMP6]], label %[[DYNAMIC_MEMCPY_EXPANSION_MAIN_BODY]], label %[[DYNAMIC_MEMCPY_POST_EXPANSION]], !prof [[PROF3:![0-9]+]]
+; CHECK: [[DYNAMIC_MEMCPY_POST_EXPANSION]]:
; CHECK-NEXT: [[TMP7:%.*]] = icmp ne i64 [[X]], 0
-; CHECK-NEXT: br i1 [[TMP7]], label %[[LOOP_MEMCPY_EXPANSION2:.*]], label %[[POST_LOOP_MEMCPY_EXPANSION1:.*]]
-; CHECK: [[LOOP_MEMCPY_EXPANSION2]]:
-; CHECK-NEXT: [[LOOP_INDEX3:%.*]] = phi i64 [ 0, %[[POST_LOOP_MEMCPY_EXPANSION]] ], [ [[TMP11:%.*]], %[[LOOP_MEMCPY_EXPANSION2]] ]
+; CHECK-NEXT: br i1 [[TMP7]], label %[[DYNAMIC_MEMCPY_EXPANSION_MAIN_BODY2:.*]], label %[[DYNAMIC_MEMCPY_POST_EXPANSION1:.*]], !prof [[PROF2]]
+; CHECK: [[DYNAMIC_MEMCPY_EXPANSION_MAIN_BODY2]]:
+; CHECK-NEXT: [[LOOP_INDEX3:%.*]] = phi i64 [ 0, %[[DYNAMIC_MEMCPY_POST_EXPANSION]] ], [ [[TMP11:%.*]], %[[DYNAMIC_MEMCPY_EXPANSION_MAIN_BODY2]] ]
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 [[LOOP_INDEX3]]
; CHECK-NEXT: [[TMP9:%.*]] = load volatile i8, ptr [[TMP8]], align 1
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 [[LOOP_INDEX3]]
; CHECK-NEXT: store volatile i8 [[TMP9]], ptr [[TMP10]], align 1
; CHECK-NEXT: [[TMP11]] = add i64 [[LOOP_INDEX3]], 1
; CHECK-NEXT: [[TMP12:%.*]] = icmp ult i64 [[TMP11]], [[X]]
-; CHECK-NEXT: br i1 [[TMP12]], label %[[LOOP_MEMCPY_EXPANSION2]], label %[[POST_LOOP_MEMCPY_EXPANSION1]]
-; CHECK: [[POST_LOOP_MEMCPY_EXPANSION1]]:
+; CHECK-NEXT: br i1 [[TMP12]], label %[[DYNAMIC_MEMCPY_EXPANSION_MAIN_BODY2]], label %[[DYNAMIC_MEMCPY_POST_EXPANSION1]], !prof [[PROF4:![0-9]+]]
+; CHECK: [[DYNAMIC_MEMCPY_POST_EXPANSION1]]:
; CHECK-NEXT: ret void
;
call void @llvm.memcpy.inline.p0.p0.i64(ptr %dst, ptr %src, i64 %x, i1 0)
- tail call void @llvm.memcpy.inline.p0.p0.i64(ptr %dst, ptr %src, i64 %x, i1 1)
+ tail call void @llvm.memcpy.inline.p0.p0.i64(ptr %dst, ptr %src, i64 %x, i1 1), !prof !1
ret void
}
+
+!0 = !{!"function_entry_count", i32 10}
+!1 = !{!"VP", i32 1, i32 100, i32 5, i32 10, i32 16, i32 13}
+;.
+; CHECK: attributes #[[ATTR0]] = { nounwind }
+; CHECK: attributes #[[ATTR1:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: readwrite) }
+;.
+; CHECK: [[PROF0]] = !{!"function_entry_count", i32 10}
+; CHECK: [[PROF1]] = !{!"VP", i32 1, i32 100, i32 5, i32 10, i32 16, i32 13}
+; CHECK: [[PROF2]] = !{!"branch_weights", i32 1048575, i32 1}
+; CHECK: [[PROF3]] = !{!"unknown", !"lower-mem-intrinsics"}
+; CHECK: [[PROF4]] = !{!"branch_weights", i32 3, i32 1}
+;.
diff --git a/llvm/test/Transforms/PreISelIntrinsicLowering/X86/memset-inline-non-constant-len.ll b/llvm/test/Transforms/PreISelIntrinsicLowering/X86/memset-inline-non-constant-len.ll
index 0843b1532f843..b376e27fdaf1c 100644
--- a/llvm/test/Transforms/PreISelIntrinsicLowering/X86/memset-inline-non-constant-len.ll
+++ b/llvm/test/Transforms/PreISelIntrinsicLowering/X86/memset-inline-non-constant-len.ll
@@ -1,4 +1,4 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals all --version 5
; RUN: opt -mtriple=x86_64-pc-linux-gnu -passes=pre-isel-intrinsic-lowering -S -o - %s | FileCheck %s
; Constant length memset.inline should be left unmodified.
@@ -14,32 +14,44 @@ define void @memset_32(ptr %a, i8 %value) nounwind {
ret void
}
-define void @memset_x(ptr %a, i8 %value, i64 %x) nounwind {
+define void @memset_x(ptr %a, i8 %value, i64 %x) nounwind !prof !0 {
; CHECK-LABEL: define void @memset_x(
-; CHECK-SAME: ptr [[A:%.*]], i8 [[VALUE:%.*]], i64 [[X:%.*]]) #[[ATTR0]] {
+; CHECK-SAME: ptr [[A:%.*]], i8 [[VALUE:%.*]], i64 [[X:%.*]]) #[[ATTR0]] !prof [[PROF0:![0-9]+]] {
; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i64 0, [[X]]
-; CHECK-NEXT: br i1 [[TMP1]], label %[[SPLIT:.*]], label %[[LOADSTORELOOP:.*]]
+; CHECK-NEXT: br i1 [[TMP1]], label %[[SPLIT:.*]], label %[[LOADSTORELOOP:.*]], !prof [[PROF1:![0-9]+]]
; CHECK: [[LOADSTORELOOP]]:
; CHECK-NEXT: [[TMP2:%.*]] = phi i64 [ 0, [[TMP0:%.*]] ], [ [[TMP4:%.*]], %[[LOADSTORELOOP]] ]
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP2]]
; CHECK-NEXT: store i8 [[VALUE]], ptr [[TMP3]], align 1
; CHECK-NEXT: [[TMP4]] = add i64 [[TMP2]], 1
; CHECK-NEXT: [[TMP5:%.*]] = icmp ult i64 [[TMP4]], [[X]]
-; CHECK-NEXT: br i1 [[TMP5]], label %[[LOADSTORELOOP]], label %[[SPLIT]]
+; CHECK-NEXT: br i1 [[TMP5]], label %[[LOADSTORELOOP]], label %[[SPLIT]], !prof [[PROF2:![0-9]+]]
; CHECK: [[SPLIT]]:
; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 0, [[X]]
-; CHECK-NEXT: br i1 [[TMP6]], label %[[SPLIT1:.*]], label %[[LOADSTORELOOP2:.*]]
+; CHECK-NEXT: br i1 [[TMP6]], label %[[SPLIT1:.*]], label %[[LOADSTORELOOP2:.*]], !prof [[PROF3:![0-9]+]]
; CHECK: [[LOADSTORELOOP2]]:
; CHECK-NEXT: [[TMP7:%.*]] = phi i64 [ 0, %[[SPLIT]] ], [ [[TMP9:%.*]], %[[LOADSTORELOOP2]] ]
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP7]]
; CHECK-NEXT: store volatile i8 [[VALUE]], ptr [[TMP8]], align 1
; CHECK-NEXT: [[TMP9]] = add i64 [[TMP7]], 1
; CHECK-NEXT: [[TMP10:%.*]] = icmp ult i64 [[TMP9]], [[X]]
-; CHECK-NEXT: br i1 [[TMP10]], label %[[LOADSTORELOOP2]], label %[[SPLIT1]]
+; CHECK-NEXT: br i1 [[TMP10]], label %[[LOADSTORELOOP2]], label %[[SPLIT1]], !prof [[PROF3]]
; CHECK: [[SPLIT1]]:
; CHECK-NEXT: ret void
;
- call void @llvm.memset.inline.p0.i64(ptr %a, i8 %value, i64 %x, i1 0)
+ call void @llvm.memset.inline.p0.i64(ptr %a, i8 %value, i64 %x, i1 0), !prof !1
tail call void @llvm.memset.inline.p0.i64(ptr %a, i8 %value, i64 %x, i1 1)
ret void
}
+
+!0 = !{!"function_entry_count", i32 10}
+!1 = !{!"VP", i32 1, i32 100, i32 5, i32 10, i32 16, i32 13}
+;.
+; CHECK: attributes #[[ATTR0]] = { nounwind }
+; CHECK: attributes #[[ATTR1:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: write) }
+;.
+; CHECK: [[PROF0]] = !{!"function_entry_count", i32 10}
+; CHECK: [[PROF1]] = !{!"branch_weights", i32 1048575, i32 1}
+; CHECK: [[PROF2]] = !{!"branch_weights", i32 3, i32 1}
+; CHECK: [[PROF3]] = !{!"unknown", !"lower-mem-intrinsics"}
+;.
More information about the llvm-branch-commits
mailing list