[llvm-branch-commits] [llvm] Memset (PR #174490)

Mircea Trofin via llvm-branch-commits llvm-branch-commits at lists.llvm.org
Tue Jan 6 10:45:46 PST 2026


https://github.com/mtrofin updated https://github.com/llvm/llvm-project/pull/174490

>From 3328edeec35c1bd7f458e7c5aec4cfe9526cf495 Mon Sep 17 00:00:00 2001
From: Mircea Trofin <mtrofin at google.com>
Date: Mon, 15 Dec 2025 17:25:57 -0800
Subject: [PATCH] Memset

---
 .../Transforms/Utils/LowerMemIntrinsics.h     |   6 +-
 .../Transforms/Utils/LowerMemIntrinsics.cpp   | 148 +++++++++++++-----
 .../X86/memcpy-inline-non-constant-len.ll     |  49 +++---
 .../X86/memset-inline-non-constant-len.ll     |  28 +++-
 4 files changed, 166 insertions(+), 65 deletions(-)

diff --git a/llvm/include/llvm/Transforms/Utils/LowerMemIntrinsics.h b/llvm/include/llvm/Transforms/Utils/LowerMemIntrinsics.h
index d4e72a60fc1ea..1ec150330484d 100644
--- a/llvm/include/llvm/Transforms/Utils/LowerMemIntrinsics.h
+++ b/llvm/include/llvm/Transforms/Utils/LowerMemIntrinsics.h
@@ -38,7 +38,8 @@ LLVM_ABI void createMemCpyLoopUnknownSize(
     Instruction *InsertBefore, Value *SrcAddr, Value *DstAddr, Value *CopyLen,
     Align SrcAlign, Align DestAlign, bool SrcIsVolatile, bool DstIsVolatile,
     bool CanOverlap, const TargetTransformInfo &TTI,
-    std::optional<unsigned> AtomicSize = std::nullopt);
+    std::optional<unsigned> AtomicSize = std::nullopt,
+    std::optional<uint64_t> AverageTripCount = std::nullopt);
 
 /// Emit a loop implementing the semantics of an llvm.memcpy whose size is a
 /// compile time constant. Loop is inserted at \p InsertBefore.
@@ -46,7 +47,8 @@ LLVM_ABI void createMemCpyLoopKnownSize(
     Instruction *InsertBefore, Value *SrcAddr, Value *DstAddr,
     ConstantInt *CopyLen, Align SrcAlign, Align DestAlign, bool SrcIsVolatile,
     bool DstIsVolatile, bool CanOverlap, const TargetTransformInfo &TTI,
-    std::optional<uint32_t> AtomicCpySize = std::nullopt);
+    std::optional<uint32_t> AtomicCpySize = std::nullopt,
+    std::optional<uint64_t> AverageTripCount = std::nullopt);
 
 /// Expand \p MemCpy as a loop. \p MemCpy is not deleted.
 LLVM_ABI void expandMemCpyAsLoop(MemCpyInst *MemCpy,
diff --git a/llvm/lib/Transforms/Utils/LowerMemIntrinsics.cpp b/llvm/lib/Transforms/Utils/LowerMemIntrinsics.cpp
index 4ab99edd64baa..c6f9e8c32b90d 100644
--- a/llvm/lib/Transforms/Utils/LowerMemIntrinsics.cpp
+++ b/llvm/lib/Transforms/Utils/LowerMemIntrinsics.cpp
@@ -12,15 +12,23 @@
 #include "llvm/IR/IRBuilder.h"
 #include "llvm/IR/IntrinsicInst.h"
 #include "llvm/IR/MDBuilder.h"
+#include "llvm/IR/ProfDataUtils.h"
+#include "llvm/ProfileData/InstrProf.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/LoopUtils.h"
+#include <limits>
 #include <optional>
 
 #define DEBUG_TYPE "lower-mem-intrinsics"
 
 using namespace llvm;
 
+namespace llvm {
+extern cl::opt<bool> ProfcheckDisableMetadataFixes;
+}
+
 /// \returns \p Len urem \p OpSize, checking for optimization opportunities.
 /// \p OpSizeVal must be the integer value of the \c ConstantInt \p OpSize.
 static Value *getRuntimeLoopRemainder(IRBuilderBase &B, Value *Len,
@@ -60,6 +68,28 @@ struct LoopExpansionInfo {
   /// required.
   Value *ResidualLoopIndex = nullptr;
 };
+
+std::optional<uint64_t> getAverageMemOpLoopTripCount(const MemIntrinsic &I) {
+  if (ProfcheckDisableMetadataFixes)
+    return std::nullopt;
+  if (std::optional<Function::ProfileCount> EC =
+          I.getFunction()->getEntryCount();
+      !EC || !EC->getCount())
+    return std::nullopt;
+  if (const auto Len = I.getLengthInBytes())
+    return Len->getZExtValue();
+  uint64_t Total = 0;
+  auto ProfData =
+      getValueProfDataFromInst(I, InstrProfValueKind::IPVK_MemOPSize,
+                               std::numeric_limits<uint32_t>::max(), Total);
+  if (!Total)
+    return std::nullopt;
+  uint64_t TripCount = 0;
+  for (const auto &KV : ProfData)
+    TripCount += KV.Count * KV.Value;
+  return std::round(1.0 * TripCount / Total);
+}
+
 } // namespace
 
 /// Insert the control flow and loop counters for a memcpy/memset loop
@@ -94,10 +124,11 @@ struct LoopExpansionInfo {
 /// to \p MainLoopStep.
 /// The generated \c MainLoopIP, \c MainLoopIndex, \c ResidualLoopIP, and
 /// \c ResidualLoopIndex are returned in a \c LoopExpansionInfo object.
-static LoopExpansionInfo insertLoopExpansion(Instruction *InsertBefore,
-                                             Value *Len, unsigned MainLoopStep,
-                                             unsigned ResidualLoopStep,
-                                             StringRef BBNamePrefix) {
+static LoopExpansionInfo
+insertLoopExpansion(Instruction *InsertBefore, Value *Len,
+                    unsigned MainLoopStep, unsigned ResidualLoopStep,
+                    StringRef BBNamePrefix,
+                    std::optional<uint64_t> AverageTripCount) {
   assert((ResidualLoopStep == 0 || MainLoopStep % ResidualLoopStep == 0) &&
          "ResidualLoopStep must divide MainLoopStep if specified");
   assert(ResidualLoopStep <= MainLoopStep &&
@@ -175,9 +206,18 @@ static LoopExpansionInfo insertLoopExpansion(Instruction *InsertBefore,
     ConstantInt *Zero = ConstantInt::get(ILenType, 0U);
     if (MustTakeMainLoop)
       PreLoopBuilder.CreateBr(MainLoopBB);
-    else
-      PreLoopBuilder.CreateCondBr(PreLoopBuilder.CreateICmpNE(LoopUnits, Zero),
-                                  MainLoopBB, ResidualCondBB);
+    else {
+      auto *BR = PreLoopBuilder.CreateCondBr(
+          PreLoopBuilder.CreateICmpNE(LoopUnits, Zero), MainLoopBB,
+          ResidualCondBB);
+      if (AverageTripCount.has_value()) {
+        MDBuilder MDB(ParentFunc->getContext());
+        setFittedBranchWeights(
+            *BR, {AverageTripCount.value() % MainLoopStep, 1}, false);
+      } else {
+        setExplicitlyUnknownBranchWeightsIfProfiled(*BR, DEBUG_TYPE);
+      }
+    }
     PreLoopBB->getTerminator()->eraseFromParent();
 
     // Stay in the MainLoop until we have handled all the LoopUnits. Then go to
@@ -222,22 +262,32 @@ static LoopExpansionInfo insertLoopExpansion(Instruction *InsertBefore,
       PreLoopBuilder.CreateBr(MainLoopBB);
     } else {
       ConstantInt *Zero = ConstantInt::get(ILenType, 0U);
+      MDBuilder B(ParentFunc->getContext());
       PreLoopBuilder.CreateCondBr(PreLoopBuilder.CreateICmpNE(LoopUnits, Zero),
-                                  MainLoopBB, PostLoopBB);
+                                  MainLoopBB, PostLoopBB,
+                                  B.createLikelyBranchWeights());
     }
     PreLoopBB->getTerminator()->eraseFromParent();
     // Stay in the MainLoop until we have handled all the LoopUnits.
-    LoopBuilder.CreateCondBr(LoopBuilder.CreateICmpULT(NewIndex, LoopUnits),
-                             MainLoopBB, PostLoopBB);
+    auto *Br = LoopBuilder.CreateCondBr(
+        LoopBuilder.CreateICmpULT(NewIndex, LoopUnits), MainLoopBB, PostLoopBB);
+    if (AverageTripCount.has_value())
+      setFittedBranchWeights(*Br, {AverageTripCount.value() / MainLoopStep, 1},
+                             /*IsExpected=*/false);
+    else
+      setExplicitlyUnknownBranchWeightsIfProfiled(*Br, DEBUG_TYPE);
   }
   return LEI;
 }
 
-void llvm::createMemCpyLoopKnownSize(
-    Instruction *InsertBefore, Value *SrcAddr, Value *DstAddr,
-    ConstantInt *CopyLen, Align SrcAlign, Align DstAlign, bool SrcIsVolatile,
-    bool DstIsVolatile, bool CanOverlap, const TargetTransformInfo &TTI,
-    std::optional<uint32_t> AtomicElementSize) {
+void llvm::createMemCpyLoopKnownSize(Instruction *InsertBefore, Value *SrcAddr,
+                                     Value *DstAddr, ConstantInt *CopyLen,
+                                     Align SrcAlign, Align DstAlign,
+                                     bool SrcIsVolatile, bool DstIsVolatile,
+                                     bool CanOverlap,
+                                     const TargetTransformInfo &TTI,
+                                     std::optional<uint32_t> AtomicElementSize,
+                                     std::optional<uint64_t> AverageTripCount) {
   // No need to expand zero length copies.
   if (CopyLen->isZero())
     return;
@@ -269,8 +319,9 @@ void llvm::createMemCpyLoopKnownSize(
 
   // Skip the loop expansion entirely if the loop would never be taken.
   if (LoopEndCount != 0) {
-    LoopExpansionInfo LEI = insertLoopExpansion(InsertBefore, CopyLen,
-                                                LoopOpSize, 0, "static-memcpy");
+    LoopExpansionInfo LEI =
+        insertLoopExpansion(InsertBefore, CopyLen, LoopOpSize, 0,
+                            "static-memcpy", AverageTripCount);
 
     // Fill MainLoopBB
     IRBuilder<> MainLoopBuilder(LEI.MainLoopIP);
@@ -357,7 +408,8 @@ void llvm::createMemCpyLoopUnknownSize(
     Instruction *InsertBefore, Value *SrcAddr, Value *DstAddr, Value *CopyLen,
     Align SrcAlign, Align DstAlign, bool SrcIsVolatile, bool DstIsVolatile,
     bool CanOverlap, const TargetTransformInfo &TTI,
-    std::optional<uint32_t> AtomicElementSize) {
+    std::optional<uint32_t> AtomicElementSize,
+    std::optional<uint64_t> AverageTripCount) {
   BasicBlock *PreLoopBB = InsertBefore->getParent();
   Function *ParentFunc = PreLoopBB->getParent();
   const DataLayout &DL = ParentFunc->getDataLayout();
@@ -387,8 +439,9 @@ void llvm::createMemCpyLoopUnknownSize(
   assert(ResidualLoopOpSize == (AtomicElementSize ? *AtomicElementSize : 1) &&
          "Store size is expected to match type size");
 
-  LoopExpansionInfo LEI = insertLoopExpansion(
-      InsertBefore, CopyLen, LoopOpSize, ResidualLoopOpSize, "dynamic-memcpy");
+  LoopExpansionInfo LEI =
+      insertLoopExpansion(InsertBefore, CopyLen, LoopOpSize, ResidualLoopOpSize,
+                          "dynamic-memcpy", AverageTripCount);
 
   // Fill MainLoopBB
   IRBuilder<> MainLoopBuilder(LEI.MainLoopIP);
@@ -931,6 +984,7 @@ static void createMemMoveLoopKnownSize(Instruction *InsertBefore,
 
 static void createMemSetLoop(Instruction *InsertBefore, Value *DstAddr,
                              Value *CopyLen, Value *SetValue, Align DstAlign,
+                             std::optional<uint64_t> AverageTripCount,
                              bool IsVolatile) {
   Type *TypeOfCopyLen = CopyLen->getType();
   BasicBlock *OrigBB = InsertBefore->getParent();
@@ -943,9 +997,16 @@ static void createMemSetLoop(Instruction *InsertBefore, Value *DstAddr,
 
   IRBuilder<> Builder(OrigBB->getTerminator());
 
-  Builder.CreateCondBr(
+  auto *ToLoopBR = Builder.CreateCondBr(
       Builder.CreateICmpEQ(ConstantInt::get(TypeOfCopyLen, 0), CopyLen), NewBB,
       LoopBB);
+  MDBuilder MDB(F->getContext());
+  if (AverageTripCount.has_value())
+    ToLoopBR->setMetadata(LLVMContext::MD_prof,
+                          MDB.createLikelyBranchWeights());
+  else
+    setExplicitlyUnknownBranchWeightsIfProfiled(*ToLoopBR, DEBUG_TYPE);
+
   OrigBB->getTerminator()->eraseFromParent();
 
   unsigned PartSize = DL.getTypeStoreSize(SetValue->getType());
@@ -964,8 +1025,13 @@ static void createMemSetLoop(Instruction *InsertBefore, Value *DstAddr,
       LoopBuilder.CreateAdd(LoopIndex, ConstantInt::get(TypeOfCopyLen, 1));
   LoopIndex->addIncoming(NewIndex, LoopBB);
 
-  LoopBuilder.CreateCondBr(LoopBuilder.CreateICmpULT(NewIndex, CopyLen), LoopBB,
-                           NewBB);
+  auto *LoopBR = LoopBuilder.CreateCondBr(
+      LoopBuilder.CreateICmpULT(NewIndex, CopyLen), LoopBB, NewBB);
+  if (AverageTripCount.has_value())
+    setFittedBranchWeights(*LoopBR, {AverageTripCount.value(), 1},
+                           /*IsExpected=*/false);
+  else
+    setExplicitlyUnknownBranchWeightsIfProfiled(*LoopBR, DEBUG_TYPE);
 }
 
 template <typename T>
@@ -983,6 +1049,7 @@ void llvm::expandMemCpyAsLoop(MemCpyInst *Memcpy,
                               const TargetTransformInfo &TTI,
                               ScalarEvolution *SE) {
   bool CanOverlap = canOverlap(Memcpy, SE);
+  auto TripCount = getAverageMemOpLoopTripCount(*Memcpy);
   if (ConstantInt *CI = dyn_cast<ConstantInt>(Memcpy->getLength())) {
     createMemCpyLoopKnownSize(
         /* InsertBefore */ Memcpy,
@@ -994,7 +1061,9 @@ void llvm::expandMemCpyAsLoop(MemCpyInst *Memcpy,
         /* SrcIsVolatile */ Memcpy->isVolatile(),
         /* DstIsVolatile */ Memcpy->isVolatile(),
         /* CanOverlap */ CanOverlap,
-        /* TargetTransformInfo */ TTI);
+        /* TargetTransformInfo */ TTI,
+        /* AtomicElementSize */ std::nullopt,
+        /* AverageTripCount */ TripCount);
   } else {
     createMemCpyLoopUnknownSize(
         /* InsertBefore */ Memcpy,
@@ -1006,7 +1075,9 @@ void llvm::expandMemCpyAsLoop(MemCpyInst *Memcpy,
         /* SrcIsVolatile */ Memcpy->isVolatile(),
         /* DstIsVolatile */ Memcpy->isVolatile(),
         /* CanOverlap */ CanOverlap,
-        /* TargetTransformInfo */ TTI);
+        /* TargetTransformInfo */ TTI,
+        /* AtomicElementSize */ std::nullopt,
+        /* AverageTripCount */ TripCount);
   }
 }
 
@@ -1027,16 +1098,17 @@ bool llvm::expandMemMoveAsLoop(MemMoveInst *Memmove,
     if (!TTI.addrspacesMayAlias(SrcAS, DstAS)) {
       // We may not be able to emit a pointer comparison, but we don't have
       // to. Expand as memcpy.
+      auto AverageTripCount = getAverageMemOpLoopTripCount(*Memmove);
       if (ConstantInt *CI = dyn_cast<ConstantInt>(CopyLen)) {
-        createMemCpyLoopKnownSize(/*InsertBefore=*/Memmove, SrcAddr, DstAddr,
-                                  CI, SrcAlign, DstAlign, SrcIsVolatile,
-                                  DstIsVolatile,
-                                  /*CanOverlap=*/false, TTI);
+        createMemCpyLoopKnownSize(
+            /*InsertBefore=*/Memmove, SrcAddr, DstAddr, CI, SrcAlign, DstAlign,
+            SrcIsVolatile, DstIsVolatile,
+            /*CanOverlap=*/false, TTI, std::nullopt, AverageTripCount);
       } else {
-        createMemCpyLoopUnknownSize(/*InsertBefore=*/Memmove, SrcAddr, DstAddr,
-                                    CopyLen, SrcAlign, DstAlign, SrcIsVolatile,
-                                    DstIsVolatile,
-                                    /*CanOverlap=*/false, TTI);
+        createMemCpyLoopUnknownSize(
+            /*InsertBefore=*/Memmove, SrcAddr, DstAddr, CopyLen, SrcAlign,
+            DstAlign, SrcIsVolatile, DstIsVolatile,
+            /*CanOverlap=*/false, TTI, std::nullopt, AverageTripCount);
       }
 
       return true;
@@ -1072,7 +1144,8 @@ void llvm::expandMemSetAsLoop(MemSetInst *Memset) {
                    /* CopyLen */ Memset->getLength(),
                    /* SetValue */ Memset->getValue(),
                    /* Alignment */ Memset->getDestAlign().valueOrOne(),
-                   Memset->isVolatile());
+                   /* AverageTripCount */ getAverageMemOpLoopTripCount(*Memset),
+                   /* IsVolatile */ Memset->isVolatile());
 }
 
 void llvm::expandMemSetPatternAsLoop(MemSetPatternInst *Memset) {
@@ -1081,7 +1154,8 @@ void llvm::expandMemSetPatternAsLoop(MemSetPatternInst *Memset) {
                    /* CopyLen=*/Memset->getLength(),
                    /* SetValue=*/Memset->getValue(),
                    /* Alignment=*/Memset->getDestAlign().valueOrOne(),
-                   Memset->isVolatile());
+                   /* AverageTripCount */ getAverageMemOpLoopTripCount(*Memset),
+                   /* IsVolatile */ Memset->isVolatile());
 }
 
 void llvm::expandAtomicMemCpyAsLoop(AnyMemCpyInst *AtomicMemcpy,
@@ -1100,7 +1174,7 @@ void llvm::expandAtomicMemCpyAsLoop(AnyMemCpyInst *AtomicMemcpy,
         /* DstIsVolatile */ AtomicMemcpy->isVolatile(),
         /* CanOverlap */ false, // SrcAddr & DstAddr may not overlap by spec.
         /* TargetTransformInfo */ TTI,
-        /* AtomicCpySize */ AtomicMemcpy->getElementSizeInBytes());
+        /* AtomicElementSize */ AtomicMemcpy->getElementSizeInBytes());
   } else {
     createMemCpyLoopUnknownSize(
         /* InsertBefore */ AtomicMemcpy,
@@ -1113,6 +1187,6 @@ void llvm::expandAtomicMemCpyAsLoop(AnyMemCpyInst *AtomicMemcpy,
         /* DstIsVolatile */ AtomicMemcpy->isVolatile(),
         /* CanOverlap */ false, // SrcAddr & DstAddr may not overlap by spec.
         /* TargetTransformInfo */ TTI,
-        /* AtomicCpySize */ AtomicMemcpy->getElementSizeInBytes());
+        /* AtomicElementSize */ AtomicMemcpy->getElementSizeInBytes());
   }
 }
diff --git a/llvm/test/Transforms/PreISelIntrinsicLowering/X86/memcpy-inline-non-constant-len.ll b/llvm/test/Transforms/PreISelIntrinsicLowering/X86/memcpy-inline-non-constant-len.ll
index a4e049941030e..0ca0bb2421c8b 100644
--- a/llvm/test/Transforms/PreISelIntrinsicLowering/X86/memcpy-inline-non-constant-len.ll
+++ b/llvm/test/Transforms/PreISelIntrinsicLowering/X86/memcpy-inline-non-constant-len.ll
@@ -1,49 +1,62 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals all --version 5
 ; RUN: opt -mtriple=x86_64-pc-linux-gnu -passes=pre-isel-intrinsic-lowering -S -o - %s | FileCheck %s
 
 ; Constant length memcpy.inline should be left unmodified.
-define void @memcpy_32(ptr %dst, ptr %src) nounwind {
+define void @memcpy_32(ptr %dst, ptr %src) nounwind !prof !0 {
 ; CHECK-LABEL: define void @memcpy_32(
-; CHECK-SAME: ptr [[DST:%.*]], ptr [[SRC:%.*]]) #[[ATTR0:[0-9]+]] {
-; CHECK-NEXT:    call void @llvm.memcpy.inline.p0.p0.i64(ptr [[DST]], ptr [[SRC]], i64 32, i1 false)
+; CHECK-SAME: ptr [[DST:%.*]], ptr [[SRC:%.*]]) #[[ATTR0:[0-9]+]] !prof [[PROF0:![0-9]+]] {
+; CHECK-NEXT:    call void @llvm.memcpy.inline.p0.p0.i64(ptr [[DST]], ptr [[SRC]], i64 32, i1 false), !prof [[PROF1:![0-9]+]]
 ; CHECK-NEXT:    tail call void @llvm.memcpy.inline.p0.p0.i64(ptr [[DST]], ptr [[SRC]], i64 32, i1 true)
 ; CHECK-NEXT:    ret void
 ;
-  call void @llvm.memcpy.inline.p0.p0.i64(ptr %dst, ptr %src, i64 32, i1 0)
+  call void @llvm.memcpy.inline.p0.p0.i64(ptr %dst, ptr %src, i64 32, i1 0), !prof !1
   tail call void @llvm.memcpy.inline.p0.p0.i64(ptr %dst, ptr %src, i64 32, i1 1)
   ret void
 }
 
-define void @memcpy_x(ptr %dst, ptr %src, i64 %x) nounwind {
+define void @memcpy_x(ptr %dst, ptr %src, i64 %x) nounwind  !prof !0 {
 ; CHECK-LABEL: define void @memcpy_x(
-; CHECK-SAME: ptr [[DST:%.*]], ptr [[SRC:%.*]], i64 [[X:%.*]]) #[[ATTR0]] {
+; CHECK-SAME: ptr [[DST:%.*]], ptr [[SRC:%.*]], i64 [[X:%.*]]) #[[ATTR0]] !prof [[PROF0]] {
 ; CHECK-NEXT:    [[TMP1:%.*]] = icmp ne i64 [[X]], 0
-; CHECK-NEXT:    br i1 [[TMP1]], label %[[LOOP_MEMCPY_EXPANSION:.*]], label %[[POST_LOOP_MEMCPY_EXPANSION:.*]]
-; CHECK:       [[LOOP_MEMCPY_EXPANSION]]:
-; CHECK-NEXT:    [[LOOP_INDEX:%.*]] = phi i64 [ 0, [[TMP0:%.*]] ], [ [[TMP5:%.*]], %[[LOOP_MEMCPY_EXPANSION]] ]
+; CHECK-NEXT:    br i1 [[TMP1]], label %[[DYNAMIC_MEMCPY_EXPANSION_MAIN_BODY:.*]], label %[[DYNAMIC_MEMCPY_POST_EXPANSION:.*]], !prof [[PROF2:![0-9]+]]
+; CHECK:       [[DYNAMIC_MEMCPY_EXPANSION_MAIN_BODY]]:
+; CHECK-NEXT:    [[LOOP_INDEX:%.*]] = phi i64 [ 0, [[TMP0:%.*]] ], [ [[TMP5:%.*]], %[[DYNAMIC_MEMCPY_EXPANSION_MAIN_BODY]] ]
 ; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 [[LOOP_INDEX]]
 ; CHECK-NEXT:    [[TMP3:%.*]] = load i8, ptr [[TMP2]], align 1
 ; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 [[LOOP_INDEX]]
 ; CHECK-NEXT:    store i8 [[TMP3]], ptr [[TMP4]], align 1
 ; CHECK-NEXT:    [[TMP5]] = add i64 [[LOOP_INDEX]], 1
 ; CHECK-NEXT:    [[TMP6:%.*]] = icmp ult i64 [[TMP5]], [[X]]
-; CHECK-NEXT:    br i1 [[TMP6]], label %[[LOOP_MEMCPY_EXPANSION]], label %[[POST_LOOP_MEMCPY_EXPANSION]]
-; CHECK:       [[POST_LOOP_MEMCPY_EXPANSION]]:
+; CHECK-NEXT:    br i1 [[TMP6]], label %[[DYNAMIC_MEMCPY_EXPANSION_MAIN_BODY]], label %[[DYNAMIC_MEMCPY_POST_EXPANSION]], !prof [[PROF3:![0-9]+]]
+; CHECK:       [[DYNAMIC_MEMCPY_POST_EXPANSION]]:
 ; CHECK-NEXT:    [[TMP7:%.*]] = icmp ne i64 [[X]], 0
-; CHECK-NEXT:    br i1 [[TMP7]], label %[[LOOP_MEMCPY_EXPANSION2:.*]], label %[[POST_LOOP_MEMCPY_EXPANSION1:.*]]
-; CHECK:       [[LOOP_MEMCPY_EXPANSION2]]:
-; CHECK-NEXT:    [[LOOP_INDEX3:%.*]] = phi i64 [ 0, %[[POST_LOOP_MEMCPY_EXPANSION]] ], [ [[TMP11:%.*]], %[[LOOP_MEMCPY_EXPANSION2]] ]
+; CHECK-NEXT:    br i1 [[TMP7]], label %[[DYNAMIC_MEMCPY_EXPANSION_MAIN_BODY2:.*]], label %[[DYNAMIC_MEMCPY_POST_EXPANSION1:.*]], !prof [[PROF2]]
+; CHECK:       [[DYNAMIC_MEMCPY_EXPANSION_MAIN_BODY2]]:
+; CHECK-NEXT:    [[LOOP_INDEX3:%.*]] = phi i64 [ 0, %[[DYNAMIC_MEMCPY_POST_EXPANSION]] ], [ [[TMP11:%.*]], %[[DYNAMIC_MEMCPY_EXPANSION_MAIN_BODY2]] ]
 ; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 [[LOOP_INDEX3]]
 ; CHECK-NEXT:    [[TMP9:%.*]] = load volatile i8, ptr [[TMP8]], align 1
 ; CHECK-NEXT:    [[TMP10:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 [[LOOP_INDEX3]]
 ; CHECK-NEXT:    store volatile i8 [[TMP9]], ptr [[TMP10]], align 1
 ; CHECK-NEXT:    [[TMP11]] = add i64 [[LOOP_INDEX3]], 1
 ; CHECK-NEXT:    [[TMP12:%.*]] = icmp ult i64 [[TMP11]], [[X]]
-; CHECK-NEXT:    br i1 [[TMP12]], label %[[LOOP_MEMCPY_EXPANSION2]], label %[[POST_LOOP_MEMCPY_EXPANSION1]]
-; CHECK:       [[POST_LOOP_MEMCPY_EXPANSION1]]:
+; CHECK-NEXT:    br i1 [[TMP12]], label %[[DYNAMIC_MEMCPY_EXPANSION_MAIN_BODY2]], label %[[DYNAMIC_MEMCPY_POST_EXPANSION1]], !prof [[PROF4:![0-9]+]]
+; CHECK:       [[DYNAMIC_MEMCPY_POST_EXPANSION1]]:
 ; CHECK-NEXT:    ret void
 ;
   call void @llvm.memcpy.inline.p0.p0.i64(ptr %dst, ptr %src, i64 %x, i1 0)
-  tail call void @llvm.memcpy.inline.p0.p0.i64(ptr %dst, ptr %src, i64 %x, i1 1)
+  tail call void @llvm.memcpy.inline.p0.p0.i64(ptr %dst, ptr %src, i64 %x, i1 1), !prof !1
   ret void
 }
+
+!0 = !{!"function_entry_count", i32 10}
+!1 = !{!"VP", i32 1, i32 100, i32 5, i32 10, i32 16, i32 13}
+;.
+; CHECK: attributes #[[ATTR0]] = { nounwind }
+; CHECK: attributes #[[ATTR1:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: readwrite) }
+;.
+; CHECK: [[PROF0]] = !{!"function_entry_count", i32 10}
+; CHECK: [[PROF1]] = !{!"VP", i32 1, i32 100, i32 5, i32 10, i32 16, i32 13}
+; CHECK: [[PROF2]] = !{!"branch_weights", i32 1048575, i32 1}
+; CHECK: [[PROF3]] = !{!"unknown", !"lower-mem-intrinsics"}
+; CHECK: [[PROF4]] = !{!"branch_weights", i32 3, i32 1}
+;.
diff --git a/llvm/test/Transforms/PreISelIntrinsicLowering/X86/memset-inline-non-constant-len.ll b/llvm/test/Transforms/PreISelIntrinsicLowering/X86/memset-inline-non-constant-len.ll
index 0843b1532f843..b376e27fdaf1c 100644
--- a/llvm/test/Transforms/PreISelIntrinsicLowering/X86/memset-inline-non-constant-len.ll
+++ b/llvm/test/Transforms/PreISelIntrinsicLowering/X86/memset-inline-non-constant-len.ll
@@ -1,4 +1,4 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals all --version 5
 ; RUN: opt -mtriple=x86_64-pc-linux-gnu -passes=pre-isel-intrinsic-lowering -S -o - %s | FileCheck %s
 
 ; Constant length memset.inline should be left unmodified.
@@ -14,32 +14,44 @@ define void @memset_32(ptr %a, i8 %value) nounwind {
   ret void
 }
 
-define void @memset_x(ptr %a, i8 %value, i64 %x) nounwind {
+define void @memset_x(ptr %a, i8 %value, i64 %x) nounwind !prof !0 {
 ; CHECK-LABEL: define void @memset_x(
-; CHECK-SAME: ptr [[A:%.*]], i8 [[VALUE:%.*]], i64 [[X:%.*]]) #[[ATTR0]] {
+; CHECK-SAME: ptr [[A:%.*]], i8 [[VALUE:%.*]], i64 [[X:%.*]]) #[[ATTR0]] !prof [[PROF0:![0-9]+]] {
 ; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq i64 0, [[X]]
-; CHECK-NEXT:    br i1 [[TMP1]], label %[[SPLIT:.*]], label %[[LOADSTORELOOP:.*]]
+; CHECK-NEXT:    br i1 [[TMP1]], label %[[SPLIT:.*]], label %[[LOADSTORELOOP:.*]], !prof [[PROF1:![0-9]+]]
 ; CHECK:       [[LOADSTORELOOP]]:
 ; CHECK-NEXT:    [[TMP2:%.*]] = phi i64 [ 0, [[TMP0:%.*]] ], [ [[TMP4:%.*]], %[[LOADSTORELOOP]] ]
 ; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP2]]
 ; CHECK-NEXT:    store i8 [[VALUE]], ptr [[TMP3]], align 1
 ; CHECK-NEXT:    [[TMP4]] = add i64 [[TMP2]], 1
 ; CHECK-NEXT:    [[TMP5:%.*]] = icmp ult i64 [[TMP4]], [[X]]
-; CHECK-NEXT:    br i1 [[TMP5]], label %[[LOADSTORELOOP]], label %[[SPLIT]]
+; CHECK-NEXT:    br i1 [[TMP5]], label %[[LOADSTORELOOP]], label %[[SPLIT]], !prof [[PROF2:![0-9]+]]
 ; CHECK:       [[SPLIT]]:
 ; CHECK-NEXT:    [[TMP6:%.*]] = icmp eq i64 0, [[X]]
-; CHECK-NEXT:    br i1 [[TMP6]], label %[[SPLIT1:.*]], label %[[LOADSTORELOOP2:.*]]
+; CHECK-NEXT:    br i1 [[TMP6]], label %[[SPLIT1:.*]], label %[[LOADSTORELOOP2:.*]], !prof [[PROF3:![0-9]+]]
 ; CHECK:       [[LOADSTORELOOP2]]:
 ; CHECK-NEXT:    [[TMP7:%.*]] = phi i64 [ 0, %[[SPLIT]] ], [ [[TMP9:%.*]], %[[LOADSTORELOOP2]] ]
 ; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP7]]
 ; CHECK-NEXT:    store volatile i8 [[VALUE]], ptr [[TMP8]], align 1
 ; CHECK-NEXT:    [[TMP9]] = add i64 [[TMP7]], 1
 ; CHECK-NEXT:    [[TMP10:%.*]] = icmp ult i64 [[TMP9]], [[X]]
-; CHECK-NEXT:    br i1 [[TMP10]], label %[[LOADSTORELOOP2]], label %[[SPLIT1]]
+; CHECK-NEXT:    br i1 [[TMP10]], label %[[LOADSTORELOOP2]], label %[[SPLIT1]], !prof [[PROF3]]
 ; CHECK:       [[SPLIT1]]:
 ; CHECK-NEXT:    ret void
 ;
-  call void @llvm.memset.inline.p0.i64(ptr %a, i8 %value, i64 %x, i1 0)
+  call void @llvm.memset.inline.p0.i64(ptr %a, i8 %value, i64 %x, i1 0), !prof !1
   tail call void @llvm.memset.inline.p0.i64(ptr %a, i8 %value, i64 %x, i1 1)
   ret void
 }
+
+!0 = !{!"function_entry_count", i32 10}
+!1 = !{!"VP", i32 1, i32 100, i32 5, i32 10, i32 16, i32 13}
+;.
+; CHECK: attributes #[[ATTR0]] = { nounwind }
+; CHECK: attributes #[[ATTR1:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: write) }
+;.
+; CHECK: [[PROF0]] = !{!"function_entry_count", i32 10}
+; CHECK: [[PROF1]] = !{!"branch_weights", i32 1048575, i32 1}
+; CHECK: [[PROF2]] = !{!"branch_weights", i32 3, i32 1}
+; CHECK: [[PROF3]] = !{!"unknown", !"lower-mem-intrinsics"}
+;.



More information about the llvm-branch-commits mailing list