[llvm] [profcheck] Fix missing profile metadata in ExpandMemCmp (PR #169979)

Jin Huang via llvm-commits llvm-commits at lists.llvm.org
Thu Dec 4 14:55:29 PST 2025


https://github.com/jinhuang1102 updated https://github.com/llvm/llvm-project/pull/169979

>From 48e254dd4af0aa83d6fc3e66840aebfa34739fbd Mon Sep 17 00:00:00 2001
From: Jin Huang <jingold at google.com>
Date: Sat, 29 Nov 2025 09:09:29 +0000
Subject: [PATCH] [profcheck] Fix missing profile metadata in ExpandMemCmp

---
 llvm/lib/CodeGen/ExpandMemCmp.cpp             |  7 +++++
 .../Transforms/ExpandMemCmp/AArch64/memcmp.ll | 20 +++++++++-----
 .../Transforms/ExpandMemCmp/X86/memcmp-x32.ll | 15 ++++++++---
 .../Transforms/ExpandMemCmp/X86/memcmp.ll     | 26 ++++++++++++++-----
 llvm/utils/profcheck-xfail.txt                |  3 ---
 5 files changed, 51 insertions(+), 20 deletions(-)

diff --git a/llvm/lib/CodeGen/ExpandMemCmp.cpp b/llvm/lib/CodeGen/ExpandMemCmp.cpp
index 74f93e1979532..1cccd2f66395c 100644
--- a/llvm/lib/CodeGen/ExpandMemCmp.cpp
+++ b/llvm/lib/CodeGen/ExpandMemCmp.cpp
@@ -25,6 +25,7 @@
 #include "llvm/IR/Dominators.h"
 #include "llvm/IR/IRBuilder.h"
 #include "llvm/IR/PatternMatch.h"
+#include "llvm/IR/ProfDataUtils.h"
 #include "llvm/InitializePasses.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
@@ -488,6 +489,8 @@ void MemCmpExpansion::emitLoadCompareBlockMultipleLoads(unsigned BlockIndex,
   // continue to next LoadCmpBlock or EndBlock.
   BasicBlock *BB = Builder.GetInsertBlock();
   BranchInst *CmpBr = BranchInst::Create(ResBlock.BB, NextBB, Cmp);
+  setExplicitlyUnknownBranchWeightsIfProfiled(*CmpBr, DEBUG_TYPE,
+                                              CI->getFunction());
   Builder.Insert(CmpBr);
   if (DTU)
     DTU->applyUpdates({{DominatorTree::Insert, BB, ResBlock.BB},
@@ -552,6 +555,8 @@ void MemCmpExpansion::emitLoadCompareBlock(unsigned BlockIndex) {
   // to next LoadCmpBlock or EndBlock.
   BasicBlock *BB = Builder.GetInsertBlock();
   BranchInst *CmpBr = BranchInst::Create(NextBB, ResBlock.BB, Cmp);
+  setExplicitlyUnknownBranchWeightsIfProfiled(*CmpBr, DEBUG_TYPE,
+                                              CI->getFunction());
   Builder.Insert(CmpBr);
   if (DTU)
     DTU->applyUpdates({{DominatorTree::Insert, BB, NextBB},
@@ -592,6 +597,8 @@ void MemCmpExpansion::emitMemCmpResultBlock() {
   Value *Res =
       Builder.CreateSelect(Cmp, Constant::getAllOnesValue(Builder.getInt32Ty()),
                            ConstantInt::get(Builder.getInt32Ty(), 1));
+  setExplicitlyUnknownBranchWeightsIfProfiled(*cast<Instruction>(Res),
+                                              DEBUG_TYPE, CI->getFunction());
 
   PhiRes->addIncoming(Res, ResBlock.BB);
   BranchInst *NewBr = BranchInst::Create(EndBlock);
diff --git a/llvm/test/Transforms/ExpandMemCmp/AArch64/memcmp.ll b/llvm/test/Transforms/ExpandMemCmp/AArch64/memcmp.ll
index 7df48d878bd0b..0ed3af535ac9c 100644
--- a/llvm/test/Transforms/ExpandMemCmp/AArch64/memcmp.ll
+++ b/llvm/test/Transforms/ExpandMemCmp/AArch64/memcmp.ll
@@ -1,4 +1,4 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals --version 3
 ; RUN: opt -S -expand-memcmp -memcmp-num-loads-per-block=1 -mtriple=aarch64-unknown-unknown < %s | FileCheck %s
 ; RUN: opt -S -passes=expand-memcmp -memcmp-num-loads-per-block=1 -mtriple=aarch64-unknown-unknown < %s | FileCheck %s
 
@@ -98,15 +98,15 @@ define i32 @cmp6(ptr nocapture readonly %x, ptr nocapture readonly %y)  {
   ret i32 %call
 }
 
-define i32 @cmp7(ptr nocapture readonly %x, ptr nocapture readonly %y)  {
+define i32 @cmp7(ptr nocapture readonly %x, ptr nocapture readonly %y) !prof !0 {
 ; CHECK-LABEL: define i32 @cmp7(
-; CHECK-SAME: ptr readonly captures(none) [[X:%.*]], ptr readonly captures(none) [[Y:%.*]]) {
+; CHECK-SAME: ptr readonly captures(none) [[X:%.*]], ptr readonly captures(none) [[Y:%.*]]) !prof [[PROF0:![0-9]+]] {
 ; CHECK-NEXT:    br label [[LOADBB:%.*]]
 ; CHECK:       res_block:
 ; CHECK-NEXT:    [[PHI_SRC1:%.*]] = phi i32 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP12:%.*]], [[LOADBB1:%.*]] ]
 ; CHECK-NEXT:    [[PHI_SRC2:%.*]] = phi i32 [ [[TMP6:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1]] ]
 ; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult i32 [[PHI_SRC1]], [[PHI_SRC2]]
-; CHECK-NEXT:    [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; CHECK-NEXT:    [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1, !prof [[PROF1:![0-9]+]]
 ; CHECK-NEXT:    br label [[ENDBLOCK:%.*]]
 ; CHECK:       loadbb:
 ; CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr [[X]], align 1
@@ -114,7 +114,7 @@ define i32 @cmp7(ptr nocapture readonly %x, ptr nocapture readonly %y)  {
 ; CHECK-NEXT:    [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP3]])
 ; CHECK-NEXT:    [[TMP6]] = call i32 @llvm.bswap.i32(i32 [[TMP4]])
 ; CHECK-NEXT:    [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]]
-; CHECK-NEXT:    br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
+; CHECK-NEXT:    br i1 [[TMP7]], label [[LOADBB1]], label [[RES_BLOCK:%.*]], !prof [[PROF1]]
 ; CHECK:       loadbb1:
 ; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr i8, ptr [[X]], i64 3
 ; CHECK-NEXT:    [[TMP9:%.*]] = getelementptr i8, ptr [[Y]], i64 3
@@ -123,7 +123,7 @@ define i32 @cmp7(ptr nocapture readonly %x, ptr nocapture readonly %y)  {
 ; CHECK-NEXT:    [[TMP12]] = call i32 @llvm.bswap.i32(i32 [[TMP10]])
 ; CHECK-NEXT:    [[TMP13]] = call i32 @llvm.bswap.i32(i32 [[TMP11]])
 ; CHECK-NEXT:    [[TMP14:%.*]] = icmp eq i32 [[TMP12]], [[TMP13]]
-; CHECK-NEXT:    br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; CHECK-NEXT:    br i1 [[TMP14]], label [[ENDBLOCK]], label [[RES_BLOCK]], !prof [[PROF1]]
 ; CHECK:       endblock:
 ; CHECK-NEXT:    [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
 ; CHECK-NEXT:    ret i32 [[PHI_RES]]
@@ -860,3 +860,11 @@ define i32 @cmp_eq16(ptr nocapture readonly %x, ptr nocapture readonly %y)  {
   %conv = zext i1 %cmp to i32
   ret i32 %conv
 }
+
+!0 = !{!"function_entry_count", i64 1000}
+;.
+; CHECK: attributes #[[ATTR0:[0-9]+]] = { nocallback nocreateundeforpoison nofree nosync nounwind speculatable willreturn memory(none) }
+;.
+; CHECK: [[PROF0]] = !{!"function_entry_count", i64 1000}
+; CHECK: [[PROF1]] = !{!"unknown", !"expand-memcmp"}
+;.
diff --git a/llvm/test/Transforms/ExpandMemCmp/X86/memcmp-x32.ll b/llvm/test/Transforms/ExpandMemCmp/X86/memcmp-x32.ll
index 0507ec9de542e..30da8b391f55d 100644
--- a/llvm/test/Transforms/ExpandMemCmp/X86/memcmp-x32.ll
+++ b/llvm/test/Transforms/ExpandMemCmp/X86/memcmp-x32.ll
@@ -1,4 +1,4 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals
 ; RUN: opt -S -expand-memcmp -mtriple=i686-unknown-unknown   -data-layout=e-m:o-p:32:32-f64:32:64-f80:128-n8:16:32-S128 < %s | FileCheck %s --check-prefix=X32
 ; RUN: opt -S -passes=expand-memcmp -mtriple=i686-unknown-unknown   -data-layout=e-m:o-p:32:32-f64:32:64-f80:128-n8:16:32-S128 < %s | FileCheck %s --check-prefix=X32
 
@@ -34,12 +34,12 @@ define i32 @cmp2_align2(ptr nocapture readonly align 2 %x, ptr nocapture readonl
   ret i32 %call
 }
 
-define i32 @cmp3(ptr nocapture readonly %x, ptr nocapture readonly %y)  {
+define i32 @cmp3(ptr nocapture readonly %x, ptr nocapture readonly %y) !prof !0 {
 ; X32-LABEL: @cmp3(
 ; X32-NEXT:    br label [[LOADBB:%.*]]
 ; X32:       res_block:
 ; X32-NEXT:    [[TMP1:%.*]] = icmp ult i16 [[TMP7:%.*]], [[TMP8:%.*]]
-; X32-NEXT:    [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X32-NEXT:    [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1, !prof [[PROF1:![0-9]+]]
 ; X32-NEXT:    br label [[ENDBLOCK:%.*]]
 ; X32:       loadbb:
 ; X32-NEXT:    [[TMP5:%.*]] = load i16, ptr [[X:%.*]], align 1
@@ -47,7 +47,7 @@ define i32 @cmp3(ptr nocapture readonly %x, ptr nocapture readonly %y)  {
 ; X32-NEXT:    [[TMP7]] = call i16 @llvm.bswap.i16(i16 [[TMP5]])
 ; X32-NEXT:    [[TMP8]] = call i16 @llvm.bswap.i16(i16 [[TMP6]])
 ; X32-NEXT:    [[TMP9:%.*]] = icmp eq i16 [[TMP7]], [[TMP8]]
-; X32-NEXT:    br i1 [[TMP9]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]]
+; X32-NEXT:    br i1 [[TMP9]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]], !prof [[PROF1]]
 ; X32:       loadbb1:
 ; X32-NEXT:    [[TMP10:%.*]] = getelementptr i8, ptr [[X]], i64 2
 ; X32-NEXT:    [[TMP11:%.*]] = getelementptr i8, ptr [[Y]], i64 2
@@ -564,3 +564,10 @@ define i32 @cmp_eq16(ptr nocapture readonly %x, ptr nocapture readonly %y)  {
   ret i32 %conv
 }
 
+!0 = !{!"function_entry_count", i64 1000}
+;.
+; X32: attributes #[[ATTR0:[0-9]+]] = { nocallback nocreateundeforpoison nofree nosync nounwind speculatable willreturn memory(none) }
+;.
+; X32: [[META0:![0-9]+]] = !{!"function_entry_count", i64 1000}
+; X32: [[PROF1]] = !{!"unknown", !"expand-memcmp"}
+;.
diff --git a/llvm/test/Transforms/ExpandMemCmp/X86/memcmp.ll b/llvm/test/Transforms/ExpandMemCmp/X86/memcmp.ll
index 86dc3e5245f24..b5075a2fcff0b 100644
--- a/llvm/test/Transforms/ExpandMemCmp/X86/memcmp.ll
+++ b/llvm/test/Transforms/ExpandMemCmp/X86/memcmp.ll
@@ -1,4 +1,4 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals
 ; RUN: opt -S -expand-memcmp -memcmp-num-loads-per-block=1 -mtriple=x86_64-unknown-unknown -data-layout=e-m:o-i64:64-f80:128-n8:16:32:64-S128         < %s | FileCheck %s --check-prefix=X64 --check-prefix=X64_1LD
 ; RUN: opt -S -expand-memcmp -memcmp-num-loads-per-block=2 -mtriple=x86_64-unknown-unknown -data-layout=e-m:o-i64:64-f80:128-n8:16:32:64-S128         < %s | FileCheck %s --check-prefix=X64 --check-prefix=X64_2LD
 ; RUN: opt -S -passes=expand-memcmp -memcmp-num-loads-per-block=1 -mtriple=x86_64-unknown-unknown -data-layout=e-m:o-i64:64-f80:128-n8:16:32:64-S128         < %s | FileCheck %s --check-prefix=X64 --check-prefix=X64_1LD
@@ -36,12 +36,12 @@ define i32 @cmp2_align2(ptr nocapture readonly align 2 %x, ptr nocapture readonl
   ret i32 %call
 }
 
-define i32 @cmp3(ptr nocapture readonly %x, ptr nocapture readonly %y)  {
+define i32 @cmp3(ptr nocapture readonly %x, ptr nocapture readonly %y) !prof !0 {
 ; X64-LABEL: @cmp3(
 ; X64-NEXT:    br label [[LOADBB:%.*]]
 ; X64:       res_block:
 ; X64-NEXT:    [[TMP1:%.*]] = icmp ult i16 [[TMP7:%.*]], [[TMP8:%.*]]
-; X64-NEXT:    [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
+; X64-NEXT:    [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1, !prof [[PROF1:![0-9]+]]
 ; X64-NEXT:    br label [[ENDBLOCK:%.*]]
 ; X64:       loadbb:
 ; X64-NEXT:    [[TMP5:%.*]] = load i16, ptr [[X:%.*]], align 1
@@ -49,7 +49,7 @@ define i32 @cmp3(ptr nocapture readonly %x, ptr nocapture readonly %y)  {
 ; X64-NEXT:    [[TMP7]] = call i16 @llvm.bswap.i16(i16 [[TMP5]])
 ; X64-NEXT:    [[TMP8]] = call i16 @llvm.bswap.i16(i16 [[TMP6]])
 ; X64-NEXT:    [[TMP9:%.*]] = icmp eq i16 [[TMP7]], [[TMP8]]
-; X64-NEXT:    br i1 [[TMP9]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]]
+; X64-NEXT:    br i1 [[TMP9]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]], !prof [[PROF1]]
 ; X64:       loadbb1:
 ; X64-NEXT:    [[TMP10:%.*]] = getelementptr i8, ptr [[X]], i64 2
 ; X64-NEXT:    [[TMP11:%.*]] = getelementptr i8, ptr [[Y]], i64 2
@@ -474,7 +474,7 @@ define i32 @cmp_eq2(ptr nocapture readonly %x, ptr nocapture readonly %y)  {
   ret i32 %conv
 }
 
-define i32 @cmp_eq3(ptr nocapture readonly %x, ptr nocapture readonly %y)  {
+define i32 @cmp_eq3(ptr nocapture readonly %x, ptr nocapture readonly %y) !prof !0 {
 ; X64_1LD-LABEL: @cmp_eq3(
 ; X64_1LD-NEXT:    br label [[LOADBB:%.*]]
 ; X64_1LD:       res_block:
@@ -483,14 +483,14 @@ define i32 @cmp_eq3(ptr nocapture readonly %x, ptr nocapture readonly %y)  {
 ; X64_1LD-NEXT:    [[TMP3:%.*]] = load i16, ptr [[X:%.*]], align 1
 ; X64_1LD-NEXT:    [[TMP4:%.*]] = load i16, ptr [[Y:%.*]], align 1
 ; X64_1LD-NEXT:    [[TMP5:%.*]] = icmp ne i16 [[TMP3]], [[TMP4]]
-; X64_1LD-NEXT:    br i1 [[TMP5]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]]
+; X64_1LD-NEXT:    br i1 [[TMP5]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]], !prof [[PROF1]]
 ; X64_1LD:       loadbb1:
 ; X64_1LD-NEXT:    [[TMP6:%.*]] = getelementptr i8, ptr [[X]], i64 2
 ; X64_1LD-NEXT:    [[TMP7:%.*]] = getelementptr i8, ptr [[Y]], i64 2
 ; X64_1LD-NEXT:    [[TMP8:%.*]] = load i8, ptr [[TMP6]], align 1
 ; X64_1LD-NEXT:    [[TMP9:%.*]] = load i8, ptr [[TMP7]], align 1
 ; X64_1LD-NEXT:    [[TMP10:%.*]] = icmp ne i8 [[TMP8]], [[TMP9]]
-; X64_1LD-NEXT:    br i1 [[TMP10]], label [[RES_BLOCK]], label [[ENDBLOCK]]
+; X64_1LD-NEXT:    br i1 [[TMP10]], label [[RES_BLOCK]], label [[ENDBLOCK]], !prof [[PROF1]]
 ; X64_1LD:       endblock:
 ; X64_1LD-NEXT:    [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ 1, [[RES_BLOCK]] ]
 ; X64_1LD-NEXT:    [[CMP:%.*]] = icmp eq i32 [[PHI_RES]], 0
@@ -1076,3 +1076,15 @@ define i32 @cmp_eq16(ptr nocapture readonly %x, ptr nocapture readonly %y)  {
   ret i32 %conv
 }
 
+!0 = !{!"function_entry_count", i64 1000}
+;.
+; X64_1LD: attributes #[[ATTR0:[0-9]+]] = { nocallback nocreateundeforpoison nofree nosync nounwind speculatable willreturn memory(none) }
+;.
+; X64_2LD: attributes #[[ATTR0:[0-9]+]] = { nocallback nocreateundeforpoison nofree nosync nounwind speculatable willreturn memory(none) }
+;.
+; X64_1LD: [[META0:![0-9]+]] = !{!"function_entry_count", i64 1000}
+; X64_1LD: [[PROF1]] = !{!"unknown", !"expand-memcmp"}
+;.
+; X64_2LD: [[META0:![0-9]+]] = !{!"function_entry_count", i64 1000}
+; X64_2LD: [[PROF1]] = !{!"unknown", !"expand-memcmp"}
+;.
diff --git a/llvm/utils/profcheck-xfail.txt b/llvm/utils/profcheck-xfail.txt
index 835025d1e319e..adac50ce08702 100644
--- a/llvm/utils/profcheck-xfail.txt
+++ b/llvm/utils/profcheck-xfail.txt
@@ -145,9 +145,6 @@ Transforms/ExpandLargeFpConvert/X86/expand-large-fp-convert-fptosi129.ll
 Transforms/ExpandLargeFpConvert/X86/expand-large-fp-convert-fptoui129.ll
 Transforms/ExpandLargeFpConvert/X86/expand-large-fp-convert-si129tofp.ll
 Transforms/ExpandLargeFpConvert/X86/expand-large-fp-convert-ui129tofp.ll
-Transforms/ExpandMemCmp/AArch64/memcmp.ll
-Transforms/ExpandMemCmp/X86/memcmp.ll
-Transforms/ExpandMemCmp/X86/memcmp-x32.ll
 Transforms/ExpandVariadics/expand-va-intrinsic-split-linkage.ll
 Transforms/ExpandVariadics/expand-va-intrinsic-split-simple.ll
 Transforms/ExpandVariadics/intrinsics.ll



More information about the llvm-commits mailing list