[llvm-branch-commits] [llvm] [ProfCheck][Matrix] Add profile data where relevant (PR #181292)
Aiden Grossman via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Thu Feb 12 19:17:46 PST 2026
https://github.com/boomanaiden154 updated https://github.com/llvm/llvm-project/pull/181292
>From b99ff35e3648564d0470a80e36321bf7d313580f Mon Sep 17 00:00:00 2001
From: Aiden Grossman <aidengrossman at google.com>
Date: Fri, 13 Feb 2026 02:03:03 +0000
Subject: [PATCH 1/2] tests
Created using spr 1.3.7
---
.../LowerMatrixIntrinsics/multiply-fused.ll | 17 +++++++++++------
1 file changed, 11 insertions(+), 6 deletions(-)
diff --git a/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-fused.ll b/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-fused.ll
index 430358f0a5138..c3a81b79f6de5 100644
--- a/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-fused.ll
+++ b/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-fused.ll
@@ -1,4 +1,4 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals smart
; RUN: opt -passes=lower-matrix-intrinsics,instcombine -fuse-matrix-loops-threshold=9999 -fuse-matrix-tile-size=2 -matrix-allow-contract -force-fuse-matrix -verify-dom-info %s -S | FileCheck %s
; REQUIRES: aarch64-registered-target
@@ -8,18 +8,18 @@ target triple = "aarch64-apple-ios"
; Test tiling without generating explicit loops.
-define void @multiply(ptr %A, ptr %B, ptr %C) {
+define void @multiply(ptr %A, ptr %B, ptr %C) !prof !0 {
; CHECK-LABEL: @multiply(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[STORE_BEGIN:%.*]] = ptrtoint ptr [[C:%.*]] to i64
; CHECK-NEXT: [[STORE_END:%.*]] = add nuw nsw i64 [[STORE_BEGIN]], 128
; CHECK-NEXT: [[LOAD_BEGIN:%.*]] = ptrtoint ptr [[A:%.*]] to i64
; CHECK-NEXT: [[TMP0:%.*]] = icmp ugt i64 [[STORE_END]], [[LOAD_BEGIN]]
-; CHECK-NEXT: br i1 [[TMP0]], label [[ALIAS_CONT:%.*]], label [[NO_ALIAS:%.*]]
+; CHECK-NEXT: br i1 [[TMP0]], label [[ALIAS_CONT:%.*]], label [[NO_ALIAS:%.*]], !prof [[PROF1:![0-9]+]]
; CHECK: alias_cont:
; CHECK-NEXT: [[LOAD_END:%.*]] = add nuw nsw i64 [[LOAD_BEGIN]], 128
; CHECK-NEXT: [[TMP1:%.*]] = icmp ugt i64 [[LOAD_END]], [[STORE_BEGIN]]
-; CHECK-NEXT: br i1 [[TMP1]], label [[COPY:%.*]], label [[NO_ALIAS]]
+; CHECK-NEXT: br i1 [[TMP1]], label [[COPY:%.*]], label [[NO_ALIAS]], !prof [[PROF1]]
; CHECK: copy:
; CHECK-NEXT: [[TMP2:%.*]] = alloca [16 x double], align 8
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr noundef nonnull align 8 dereferenceable(128) [[TMP2]], ptr noundef nonnull align 8 dereferenceable(128) [[A]], i64 128, i1 false)
@@ -30,11 +30,11 @@ define void @multiply(ptr %A, ptr %B, ptr %C) {
; CHECK-NEXT: [[STORE_END5:%.*]] = add nuw nsw i64 [[STORE_BEGIN4]], 128
; CHECK-NEXT: [[LOAD_BEGIN6:%.*]] = ptrtoint ptr [[B:%.*]] to i64
; CHECK-NEXT: [[TMP4:%.*]] = icmp ugt i64 [[STORE_END5]], [[LOAD_BEGIN6]]
-; CHECK-NEXT: br i1 [[TMP4]], label [[ALIAS_CONT1:%.*]], label [[NO_ALIAS3:%.*]]
+; CHECK-NEXT: br i1 [[TMP4]], label [[ALIAS_CONT1:%.*]], label [[NO_ALIAS3:%.*]], !prof [[PROF1]]
; CHECK: alias_cont1:
; CHECK-NEXT: [[LOAD_END7:%.*]] = add nuw nsw i64 [[LOAD_BEGIN6]], 128
; CHECK-NEXT: [[TMP5:%.*]] = icmp ugt i64 [[LOAD_END7]], [[STORE_BEGIN4]]
-; CHECK-NEXT: br i1 [[TMP5]], label [[COPY2:%.*]], label [[NO_ALIAS3]]
+; CHECK-NEXT: br i1 [[TMP5]], label [[COPY2:%.*]], label [[NO_ALIAS3]], !prof [[PROF1]]
; CHECK: copy2:
; CHECK-NEXT: [[TMP6:%.*]] = alloca [16 x double], align 8
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr noundef nonnull align 8 dereferenceable(128) [[TMP6]], ptr noundef nonnull align 8 dereferenceable(128) [[B]], i64 128, i1 false)
@@ -375,3 +375,8 @@ entry:
}
declare <16 x double> @llvm.matrix.multiply(<16 x double>, <16 x double>, i32, i32, i32)
+
+!0 = !{!"function_entry_count", i64 1000}
+;.
+; CHECK: [[PROF1]] = !{!"unknown", !"lower-matrix-intrinsics"}
+;.
>From 61e144b9f0add7f71edc60f7d6889516fd2b4150 Mon Sep 17 00:00:00 2001
From: Aiden Grossman <aidengrossman at google.com>
Date: Fri, 13 Feb 2026 03:17:34 +0000
Subject: [PATCH 2/2] fix test, feedback
Created using spr 1.3.7
---
llvm/lib/Transforms/Utils/MatrixUtils.cpp | 3 +++
.../multiply-fused-loops.ll | 20 +++++++++++--------
2 files changed, 15 insertions(+), 8 deletions(-)
diff --git a/llvm/lib/Transforms/Utils/MatrixUtils.cpp b/llvm/lib/Transforms/Utils/MatrixUtils.cpp
index e84522276219c..262e4c99365d7 100644
--- a/llvm/lib/Transforms/Utils/MatrixUtils.cpp
+++ b/llvm/lib/Transforms/Utils/MatrixUtils.cpp
@@ -51,6 +51,9 @@ BasicBlock *TileInfo::CreateLoop(BasicBlock *Preheader, BasicBlock *Exit,
Value *Cond = B.CreateICmpNE(Inc, Bound, Name + ".cond");
auto *BR = BranchInst::Create(Header, Exit, Cond, Latch);
if (!ProfcheckDisableMetadataFixes) {
+ assert(Step->getZExtValue() != 0 &&
+ "Expected a non-zero step size. A step size of zero produces an "
+ "infinite loop which massively skews profile data.");
MDBuilder MDB(Preheader->getContext());
setFittedBranchWeights(
*BR, {Bound->getZExtValue() / Step->getZExtValue(), 1}, false);
diff --git a/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-fused-loops.ll b/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-fused-loops.ll
index 8c6d142550abd..498102f70c42b 100644
--- a/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-fused-loops.ll
+++ b/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-fused-loops.ll
@@ -1,4 +1,4 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals smart
; RUN: opt -passes=lower-matrix-intrinsics,instcombine -fuse-matrix-loops-threshold=0 -fuse-matrix-tile-size=2 -matrix-allow-contract -force-fuse-matrix -verify-dom-info %s -S | FileCheck %s
; REQUIRES: aarch64-registered-target
@@ -360,10 +360,14 @@ entry:
}
declare <4 x float> @llvm.matrix.multiply.v4f32.v4f32.v4f32(<4 x float>, <4 x float>, i32, i32, i32)
-
-; CHECK: !0 = distinct !{!0, !1}
-; CHECK-NEXT: !1 = !{!"llvm.loop.unroll.count", i32 2}
-; CHECK-NEXT: !2 = distinct !{!2, !1}
-; CHECK-NEXT: !3 = distinct !{!3, !4}
-; CHECK-NEXT: !4 = !{!"llvm.loop.unroll.count", i32 1}
-; CHECK-NEXT: !5 = distinct !{!5, !4}
+;.
+; CHECK: [[PROF0]] = !{!"branch_weights", i32 1, i32 2}
+; CHECK: [[LOOP1]] = distinct !{[[LOOP1]], [[META2:![0-9]+]]}
+; CHECK: [[META2]] = !{!"llvm.loop.unroll.count", i32 2}
+; CHECK: [[LOOP3]] = distinct !{[[LOOP3]], [[META2]]}
+; CHECK: [[PROF4]] = !{!"branch_weights", i32 1, i32 1}
+; CHECK: [[LOOP5]] = distinct !{[[LOOP5]], [[META6:![0-9]+]]}
+; CHECK: [[META6]] = !{!"llvm.loop.unroll.count", i32 1}
+; CHECK: [[PROF7]] = !{!"branch_weights", i32 1, i32 4}
+; CHECK: [[LOOP8]] = distinct !{[[LOOP8]], [[META6]]}
+;.
More information about the llvm-branch-commits
mailing list