[llvm] [ProfCheck][Matrix] Add profile data where relevant (PR #181292)

Aiden Grossman via llvm-commits llvm-commits at lists.llvm.org
Tue Feb 17 18:51:07 PST 2026


https://github.com/boomanaiden154 updated https://github.com/llvm/llvm-project/pull/181292

>From 9906c842f87c5145adc1bea82101f6ab3b55591b Mon Sep 17 00:00:00 2001
From: Aiden Grossman <aidengrossman at google.com>
Date: Fri, 13 Feb 2026 01:57:43 +0000
Subject: [PATCH 1/7] =?UTF-8?q?[=F0=9D=98=80=F0=9D=97=BD=F0=9D=97=BF]=20ch?=
 =?UTF-8?q?anges=20to=20main=20this=20commit=20is=20based=20on?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Created using spr 1.3.7

[skip ci]
---
 llvm/include/llvm/Transforms/Utils/MatrixUtils.h |  7 ++++---
 .../Transforms/Scalar/LowerMatrixIntrinsics.cpp  |  4 +++-
 llvm/lib/Transforms/Utils/MatrixUtils.cpp        | 10 +++++-----
 .../Transforms/LowerMatrixIntrinsics/select.ll   | 16 +++++++++++-----
 llvm/utils/profcheck-xfail.txt                   |  1 -
 5 files changed, 23 insertions(+), 15 deletions(-)

diff --git a/llvm/include/llvm/Transforms/Utils/MatrixUtils.h b/llvm/include/llvm/Transforms/Utils/MatrixUtils.h
index ffad57002935e..9343f71d1243e 100644
--- a/llvm/include/llvm/Transforms/Utils/MatrixUtils.h
+++ b/llvm/include/llvm/Transforms/Utils/MatrixUtils.h
@@ -18,6 +18,7 @@
 namespace llvm {
 class DomTreeUpdater;
 class BasicBlock;
+class ConstantInt;
 class Value;
 class Loop;
 class LoopInfo;
@@ -80,9 +81,9 @@ struct TileInfo {
   /// Exit as exit block.  Adds the new loop blocks to \L and applies dominator
   /// tree updates to \p DTU.
   static BasicBlock *CreateLoop(BasicBlock *Preheader, BasicBlock *Exit,
-                                Value *Bound, Value *Step, StringRef Name,
-                                IRBuilderBase &B, DomTreeUpdater &DTU, Loop *L,
-                                LoopInfo &LI);
+                                ConstantInt *Bound, ConstantInt *Step,
+                                StringRef Name, IRBuilderBase &B,
+                                DomTreeUpdater &DTU, Loop *L, LoopInfo &LI);
 };
 } // namespace llvm
 
diff --git a/llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp b/llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp
index ecf295dc75c3a..053f1d99274ae 100644
--- a/llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp
+++ b/llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp
@@ -2459,16 +2459,18 @@ class LowerMatrixIntrinsics {
     MatrixTy B = getMatrix(OpB, Shape, Builder);
 
     SmallVector<Value*> CondV;
+    Instruction *MDFrom = nullptr;
     if (isa<FixedVectorType>(Cond->getType())) {
       MatrixTy C = getMatrix(Cond, Shape, Builder);
       llvm::copy(C.vectors(), std::back_inserter(CondV));
     } else {
       CondV.resize(A.getNumVectors());
       llvm::fill(CondV, Cond);
+      MDFrom = Inst;
     }
 
     for (auto [CV, AV, BV] : llvm::zip_equal(CondV, A.vectors(), B.vectors()))
-      Result.addVector(Builder.CreateSelect(CV, AV, BV));
+      Result.addVector(Builder.CreateSelect(CV, AV, BV, "", MDFrom));
 
     return Result.addNumComputeOps(getNumOps(Result.getVectorTy()) *
                                    Result.getNumVectors());
diff --git a/llvm/lib/Transforms/Utils/MatrixUtils.cpp b/llvm/lib/Transforms/Utils/MatrixUtils.cpp
index 7866d6434c115..a35003c3bc390 100644
--- a/llvm/lib/Transforms/Utils/MatrixUtils.cpp
+++ b/llvm/lib/Transforms/Utils/MatrixUtils.cpp
@@ -21,9 +21,9 @@
 using namespace llvm;
 
 BasicBlock *TileInfo::CreateLoop(BasicBlock *Preheader, BasicBlock *Exit,
-                                 Value *Bound, Value *Step, StringRef Name,
-                                 IRBuilderBase &B, DomTreeUpdater &DTU, Loop *L,
-                                 LoopInfo &LI) {
+                                 ConstantInt *Bound, ConstantInt *Step,
+                                 StringRef Name, IRBuilderBase &B,
+                                 DomTreeUpdater &DTU, Loop *L, LoopInfo &LI) {
   LLVMContext &Ctx = Preheader->getContext();
   BasicBlock *Header = BasicBlock::Create(
       Preheader->getContext(), Name + ".header", Preheader->getParent(), Exit);
@@ -35,8 +35,8 @@ BasicBlock *TileInfo::CreateLoop(BasicBlock *Preheader, BasicBlock *Exit,
   Type *I32Ty = Type::getInt64Ty(Ctx);
   BranchInst::Create(Body, Header);
   BranchInst::Create(Latch, Body);
-  PHINode *IV =
-      PHINode::Create(I32Ty, 2, Name + ".iv", Header->getTerminator()->getIterator());
+  PHINode *IV = PHINode::Create(I32Ty, 2, Name + ".iv",
+                                Header->getTerminator()->getIterator());
   IV->addIncoming(ConstantInt::get(I32Ty, 0), Preheader);
 
   B.SetInsertPoint(Latch);
diff --git a/llvm/test/Transforms/LowerMatrixIntrinsics/select.ll b/llvm/test/Transforms/LowerMatrixIntrinsics/select.ll
index bd97915759aac..73ac5f8b78704 100644
--- a/llvm/test/Transforms/LowerMatrixIntrinsics/select.ll
+++ b/llvm/test/Transforms/LowerMatrixIntrinsics/select.ll
@@ -1,7 +1,7 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals smart
 ; RUN: opt -passes='lower-matrix-intrinsics' -S < %s | FileCheck %s
 
-define void @select_2x2_bot(i1 %cond, ptr %lhs, ptr %rhs, ptr %out) {
+define void @select_2x2_bot(i1 %cond, ptr %lhs, ptr %rhs, ptr %out) !prof !0 {
 ; CHECK-LABEL: @select_2x2_bot(
 ; CHECK-NEXT:    [[COL_LOAD:%.*]] = load <2 x float>, ptr [[LHS:%.*]], align 16
 ; CHECK-NEXT:    [[VEC_GEP:%.*]] = getelementptr float, ptr [[LHS]], i64 2
@@ -9,8 +9,8 @@ define void @select_2x2_bot(i1 %cond, ptr %lhs, ptr %rhs, ptr %out) {
 ; CHECK-NEXT:    [[COL_LOAD2:%.*]] = load <2 x float>, ptr [[RHS:%.*]], align 16
 ; CHECK-NEXT:    [[VEC_GEP3:%.*]] = getelementptr float, ptr [[RHS]], i64 2
 ; CHECK-NEXT:    [[COL_LOAD4:%.*]] = load <2 x float>, ptr [[VEC_GEP3]], align 8
-; CHECK-NEXT:    [[TMP1:%.*]] = select i1 [[COND:%.*]], <2 x float> [[COL_LOAD]], <2 x float> [[COL_LOAD2]]
-; CHECK-NEXT:    [[TMP2:%.*]] = select i1 [[COND]], <2 x float> [[COL_LOAD1]], <2 x float> [[COL_LOAD4]]
+; CHECK-NEXT:    [[TMP1:%.*]] = select i1 [[COND:%.*]], <2 x float> [[COL_LOAD]], <2 x float> [[COL_LOAD2]], !prof [[PROF1:![0-9]+]]
+; CHECK-NEXT:    [[TMP2:%.*]] = select i1 [[COND]], <2 x float> [[COL_LOAD1]], <2 x float> [[COL_LOAD4]], !prof [[PROF1]]
 ; CHECK-NEXT:    store <2 x float> [[TMP1]], ptr [[OUT:%.*]], align 4
 ; CHECK-NEXT:    [[VEC_GEP5:%.*]] = getelementptr float, ptr [[OUT]], i64 2
 ; CHECK-NEXT:    store <2 x float> [[TMP2]], ptr [[VEC_GEP5]], align 4
@@ -18,7 +18,7 @@ define void @select_2x2_bot(i1 %cond, ptr %lhs, ptr %rhs, ptr %out) {
 ;
   %lhsv = load <4 x float>, ptr %lhs
   %rhsv = load <4 x float>, ptr %rhs
-  %op = select i1 %cond, <4 x float> %lhsv, <4 x float> %rhsv
+  %op = select i1 %cond, <4 x float> %lhsv, <4 x float> %rhsv, !prof !1
   call void @llvm.matrix.column.major.store(<4 x float> %op, ptr %out, i64 2, i1 false, i32 2, i32 2)
   ret void
 }
@@ -205,3 +205,9 @@ define void @select_2x2_vcond_shape5(ptr %cond, ptr %lhs, ptr %rhs, ptr %out) {
   store <4 x float> %op, ptr %out
   ret void
 }
+
+!0 = !{!"function_entry_count", i64 1000}
+!1 = !{!"branch_weights", i32 2, i32 3}
+;.
+; CHECK: [[PROF1]] = !{!"branch_weights", i32 2, i32 3}
+;.
diff --git a/llvm/utils/profcheck-xfail.txt b/llvm/utils/profcheck-xfail.txt
index 454f3d8f8fe00..63b1cf9696c65 100644
--- a/llvm/utils/profcheck-xfail.txt
+++ b/llvm/utils/profcheck-xfail.txt
@@ -190,7 +190,6 @@ Transforms/LowerMatrixIntrinsics/multiply-fused-loops.ll
 Transforms/LowerMatrixIntrinsics/multiply-fused-loops-large-matrixes.ll
 Transforms/LowerMatrixIntrinsics/multiply-fused-multiple-blocks.ll
 Transforms/LowerMatrixIntrinsics/multiply-fused-volatile.ll
-Transforms/LowerMatrixIntrinsics/select.ll
 Transforms/LowerSwitch/2003-05-01-PHIProblem.ll
 Transforms/LowerSwitch/2004-03-13-SwitchIsDefaultCrash.ll
 Transforms/LowerSwitch/2014-06-10-SwitchContiguousOpt.ll

>From b99ff35e3648564d0470a80e36321bf7d313580f Mon Sep 17 00:00:00 2001
From: Aiden Grossman <aidengrossman at google.com>
Date: Fri, 13 Feb 2026 02:03:03 +0000
Subject: [PATCH 2/7] tests

Created using spr 1.3.7
---
 .../LowerMatrixIntrinsics/multiply-fused.ll     | 17 +++++++++++------
 1 file changed, 11 insertions(+), 6 deletions(-)

diff --git a/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-fused.ll b/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-fused.ll
index 430358f0a5138..c3a81b79f6de5 100644
--- a/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-fused.ll
+++ b/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-fused.ll
@@ -1,4 +1,4 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals smart
 ; RUN: opt -passes=lower-matrix-intrinsics,instcombine -fuse-matrix-loops-threshold=9999 -fuse-matrix-tile-size=2 -matrix-allow-contract -force-fuse-matrix -verify-dom-info %s -S | FileCheck %s
 
 ; REQUIRES: aarch64-registered-target
@@ -8,18 +8,18 @@ target triple = "aarch64-apple-ios"
 
 ; Test tiling without generating explicit loops.
 
-define void @multiply(ptr %A, ptr %B, ptr %C) {
+define void @multiply(ptr %A, ptr %B, ptr %C) !prof !0 {
 ; CHECK-LABEL: @multiply(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[STORE_BEGIN:%.*]] = ptrtoint ptr [[C:%.*]] to i64
 ; CHECK-NEXT:    [[STORE_END:%.*]] = add nuw nsw i64 [[STORE_BEGIN]], 128
 ; CHECK-NEXT:    [[LOAD_BEGIN:%.*]] = ptrtoint ptr [[A:%.*]] to i64
 ; CHECK-NEXT:    [[TMP0:%.*]] = icmp ugt i64 [[STORE_END]], [[LOAD_BEGIN]]
-; CHECK-NEXT:    br i1 [[TMP0]], label [[ALIAS_CONT:%.*]], label [[NO_ALIAS:%.*]]
+; CHECK-NEXT:    br i1 [[TMP0]], label [[ALIAS_CONT:%.*]], label [[NO_ALIAS:%.*]], !prof [[PROF1:![0-9]+]]
 ; CHECK:       alias_cont:
 ; CHECK-NEXT:    [[LOAD_END:%.*]] = add nuw nsw i64 [[LOAD_BEGIN]], 128
 ; CHECK-NEXT:    [[TMP1:%.*]] = icmp ugt i64 [[LOAD_END]], [[STORE_BEGIN]]
-; CHECK-NEXT:    br i1 [[TMP1]], label [[COPY:%.*]], label [[NO_ALIAS]]
+; CHECK-NEXT:    br i1 [[TMP1]], label [[COPY:%.*]], label [[NO_ALIAS]], !prof [[PROF1]]
 ; CHECK:       copy:
 ; CHECK-NEXT:    [[TMP2:%.*]] = alloca [16 x double], align 8
 ; CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr noundef nonnull align 8 dereferenceable(128) [[TMP2]], ptr noundef nonnull align 8 dereferenceable(128) [[A]], i64 128, i1 false)
@@ -30,11 +30,11 @@ define void @multiply(ptr %A, ptr %B, ptr %C) {
 ; CHECK-NEXT:    [[STORE_END5:%.*]] = add nuw nsw i64 [[STORE_BEGIN4]], 128
 ; CHECK-NEXT:    [[LOAD_BEGIN6:%.*]] = ptrtoint ptr [[B:%.*]] to i64
 ; CHECK-NEXT:    [[TMP4:%.*]] = icmp ugt i64 [[STORE_END5]], [[LOAD_BEGIN6]]
-; CHECK-NEXT:    br i1 [[TMP4]], label [[ALIAS_CONT1:%.*]], label [[NO_ALIAS3:%.*]]
+; CHECK-NEXT:    br i1 [[TMP4]], label [[ALIAS_CONT1:%.*]], label [[NO_ALIAS3:%.*]], !prof [[PROF1]]
 ; CHECK:       alias_cont1:
 ; CHECK-NEXT:    [[LOAD_END7:%.*]] = add nuw nsw i64 [[LOAD_BEGIN6]], 128
 ; CHECK-NEXT:    [[TMP5:%.*]] = icmp ugt i64 [[LOAD_END7]], [[STORE_BEGIN4]]
-; CHECK-NEXT:    br i1 [[TMP5]], label [[COPY2:%.*]], label [[NO_ALIAS3]]
+; CHECK-NEXT:    br i1 [[TMP5]], label [[COPY2:%.*]], label [[NO_ALIAS3]], !prof [[PROF1]]
 ; CHECK:       copy2:
 ; CHECK-NEXT:    [[TMP6:%.*]] = alloca [16 x double], align 8
 ; CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr noundef nonnull align 8 dereferenceable(128) [[TMP6]], ptr noundef nonnull align 8 dereferenceable(128) [[B]], i64 128, i1 false)
@@ -375,3 +375,8 @@ entry:
 }
 
 declare <16 x double> @llvm.matrix.multiply(<16 x double>, <16 x double>, i32, i32, i32)
+
+!0 = !{!"function_entry_count", i64 1000}
+;.
+; CHECK: [[PROF1]] = !{!"unknown", !"lower-matrix-intrinsics"}
+;.

>From f0d3a8f8819d71b8640b1af606470199c85f46ea Mon Sep 17 00:00:00 2001
From: Aiden Grossman <aidengrossman at google.com>
Date: Fri, 13 Feb 2026 02:32:01 +0000
Subject: [PATCH 3/7] =?UTF-8?q?[=F0=9D=98=80=F0=9D=97=BD=F0=9D=97=BF]=20ch?=
 =?UTF-8?q?anges=20introduced=20through=20rebase?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Created using spr 1.3.7

[skip ci]
---
 llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp | 7 ++++++-
 llvm/lib/Transforms/Utils/MatrixUtils.cpp            | 4 ++--
 2 files changed, 8 insertions(+), 3 deletions(-)

diff --git a/llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp b/llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp
index 053f1d99274ae..93167c04e3142 100644
--- a/llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp
+++ b/llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp
@@ -104,6 +104,10 @@ static cl::opt<unsigned> SplitMatmulRemainderOverThreshold(
              "in the inner loop of matmul"),
     cl::init(0));
 
+namespace llvm {
+extern cl::opt<bool> ProfcheckDisableMetadataFixes;
+} // end namespace llvm
+
 /// Helper function to either return Scope, if it is a subprogram or the
 /// attached subprogram for a local scope.
 static DISubprogram *getSubprogram(DIScope *Scope) {
@@ -2466,7 +2470,8 @@ class LowerMatrixIntrinsics {
     } else {
       CondV.resize(A.getNumVectors());
       llvm::fill(CondV, Cond);
-      MDFrom = Inst;
+      if (!ProfcheckDisableMetadataFixes)
+        MDFrom = Inst;
     }
 
     for (auto [CV, AV, BV] : llvm::zip_equal(CondV, A.vectors(), B.vectors()))
diff --git a/llvm/lib/Transforms/Utils/MatrixUtils.cpp b/llvm/lib/Transforms/Utils/MatrixUtils.cpp
index a35003c3bc390..7bb0cb919f5c9 100644
--- a/llvm/lib/Transforms/Utils/MatrixUtils.cpp
+++ b/llvm/lib/Transforms/Utils/MatrixUtils.cpp
@@ -35,8 +35,8 @@ BasicBlock *TileInfo::CreateLoop(BasicBlock *Preheader, BasicBlock *Exit,
   Type *I32Ty = Type::getInt64Ty(Ctx);
   BranchInst::Create(Body, Header);
   BranchInst::Create(Latch, Body);
-  PHINode *IV = PHINode::Create(I32Ty, 2, Name + ".iv",
-                                Header->getTerminator()->getIterator());
+  PHINode *IV =
+      PHINode::Create(I32Ty, 2, Name + ".iv", Header->getTerminator()->getIterator());
   IV->addIncoming(ConstantInt::get(I32Ty, 0), Preheader);
 
   B.SetInsertPoint(Latch);

>From 61e144b9f0add7f71edc60f7d6889516fd2b4150 Mon Sep 17 00:00:00 2001
From: Aiden Grossman <aidengrossman at google.com>
Date: Fri, 13 Feb 2026 03:17:34 +0000
Subject: [PATCH 4/7] fix test, feedback

Created using spr 1.3.7
---
 llvm/lib/Transforms/Utils/MatrixUtils.cpp     |  3 +++
 .../multiply-fused-loops.ll                   | 20 +++++++++++--------
 2 files changed, 15 insertions(+), 8 deletions(-)

diff --git a/llvm/lib/Transforms/Utils/MatrixUtils.cpp b/llvm/lib/Transforms/Utils/MatrixUtils.cpp
index e84522276219c..262e4c99365d7 100644
--- a/llvm/lib/Transforms/Utils/MatrixUtils.cpp
+++ b/llvm/lib/Transforms/Utils/MatrixUtils.cpp
@@ -51,6 +51,9 @@ BasicBlock *TileInfo::CreateLoop(BasicBlock *Preheader, BasicBlock *Exit,
   Value *Cond = B.CreateICmpNE(Inc, Bound, Name + ".cond");
   auto *BR = BranchInst::Create(Header, Exit, Cond, Latch);
   if (!ProfcheckDisableMetadataFixes) {
+    assert(Step->getZExtValue() != 0 &&
+           "Expected a non-zero step size. A step size of zero produces an "
+           "infinite loop which massively skews profile data.");
     MDBuilder MDB(Preheader->getContext());
     setFittedBranchWeights(
         *BR, {Bound->getZExtValue() / Step->getZExtValue(), 1}, false);
diff --git a/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-fused-loops.ll b/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-fused-loops.ll
index 8c6d142550abd..498102f70c42b 100644
--- a/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-fused-loops.ll
+++ b/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-fused-loops.ll
@@ -1,4 +1,4 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals smart
 ; RUN: opt -passes=lower-matrix-intrinsics,instcombine -fuse-matrix-loops-threshold=0 -fuse-matrix-tile-size=2 -matrix-allow-contract -force-fuse-matrix -verify-dom-info %s -S | FileCheck %s
 
 ; REQUIRES: aarch64-registered-target
@@ -360,10 +360,14 @@ entry:
 }
 
 declare <4 x float> @llvm.matrix.multiply.v4f32.v4f32.v4f32(<4 x float>, <4 x float>, i32, i32, i32)
-
-; CHECK:      !0 = distinct !{!0, !1}
-; CHECK-NEXT: !1 = !{!"llvm.loop.unroll.count", i32 2}
-; CHECK-NEXT: !2 = distinct !{!2, !1}
-; CHECK-NEXT: !3 = distinct !{!3, !4}
-; CHECK-NEXT: !4 = !{!"llvm.loop.unroll.count", i32 1}
-; CHECK-NEXT: !5 = distinct !{!5, !4}
+;.
+; CHECK: [[PROF0]] = !{!"branch_weights", i32 1, i32 2}
+; CHECK: [[LOOP1]] = distinct !{[[LOOP1]], [[META2:![0-9]+]]}
+; CHECK: [[META2]] = !{!"llvm.loop.unroll.count", i32 2}
+; CHECK: [[LOOP3]] = distinct !{[[LOOP3]], [[META2]]}
+; CHECK: [[PROF4]] = !{!"branch_weights", i32 1, i32 1}
+; CHECK: [[LOOP5]] = distinct !{[[LOOP5]], [[META6:![0-9]+]]}
+; CHECK: [[META6]] = !{!"llvm.loop.unroll.count", i32 1}
+; CHECK: [[PROF7]] = !{!"branch_weights", i32 1, i32 4}
+; CHECK: [[LOOP8]] = distinct !{[[LOOP8]], [[META6]]}
+;.

>From 59cf57eac2f79e3ab3d7a4dc809f9778630497c8 Mon Sep 17 00:00:00 2001
From: Aiden Grossman <aidengrossman at google.com>
Date: Fri, 13 Feb 2026 05:07:38 +0000
Subject: [PATCH 5/7] feedback

Created using spr 1.3.7
---
 llvm/lib/Transforms/Utils/MatrixUtils.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/llvm/lib/Transforms/Utils/MatrixUtils.cpp b/llvm/lib/Transforms/Utils/MatrixUtils.cpp
index 262e4c99365d7..ab47289d4dad5 100644
--- a/llvm/lib/Transforms/Utils/MatrixUtils.cpp
+++ b/llvm/lib/Transforms/Utils/MatrixUtils.cpp
@@ -52,8 +52,8 @@ BasicBlock *TileInfo::CreateLoop(BasicBlock *Preheader, BasicBlock *Exit,
   auto *BR = BranchInst::Create(Header, Exit, Cond, Latch);
   if (!ProfcheckDisableMetadataFixes) {
     assert(Step->getZExtValue() != 0 &&
-           "Expected a non-zero step size. A step size of zero produces an "
-           "infinite loop which massively skews profile data.");
+           "Expected a non-zero step size. Matrices are expected to always be "
+           "finite sized.");
     MDBuilder MDB(Preheader->getContext());
     setFittedBranchWeights(
         *BR, {Bound->getZExtValue() / Step->getZExtValue(), 1}, false);

>From e999b50df814cf775a2eb1097dba7c1592b7d2d2 Mon Sep 17 00:00:00 2001
From: Aiden Grossman <aidengrossman at google.com>
Date: Fri, 13 Feb 2026 05:10:38 +0000
Subject: [PATCH 6/7] wording

Created using spr 1.3.7
---
 llvm/lib/Transforms/Utils/MatrixUtils.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/llvm/lib/Transforms/Utils/MatrixUtils.cpp b/llvm/lib/Transforms/Utils/MatrixUtils.cpp
index ab47289d4dad5..cc4326dd1a071 100644
--- a/llvm/lib/Transforms/Utils/MatrixUtils.cpp
+++ b/llvm/lib/Transforms/Utils/MatrixUtils.cpp
@@ -52,8 +52,8 @@ BasicBlock *TileInfo::CreateLoop(BasicBlock *Preheader, BasicBlock *Exit,
   auto *BR = BranchInst::Create(Header, Exit, Cond, Latch);
   if (!ProfcheckDisableMetadataFixes) {
     assert(Step->getZExtValue() != 0 &&
-           "Expected a non-zero step size. Matrices are expected to always be "
-           "finite sized.");
+           "Expected a non-zero step size. This is chosen by the pass and "
+           "should always be non-zero to imply a finite loop.");
     MDBuilder MDB(Preheader->getContext());
     setFittedBranchWeights(
         *BR, {Bound->getZExtValue() / Step->getZExtValue(), 1}, false);

>From 9a3fbf4a44836601e4a487fc18bc5ee3e026c234 Mon Sep 17 00:00:00 2001
From: Aiden Grossman <aidengrossman at google.com>
Date: Wed, 18 Feb 2026 02:08:21 +0000
Subject: [PATCH 7/7] =?UTF-8?q?[=F0=9D=98=80=F0=9D=97=BD=F0=9D=97=BF]=20ch?=
 =?UTF-8?q?anges=20introduced=20through=20rebase?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Created using spr 1.3.7

[skip ci]
---
 llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp b/llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp
index 93167c04e3142..65c6330d7bdd5 100644
--- a/llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp
+++ b/llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp
@@ -2474,8 +2474,12 @@ class LowerMatrixIntrinsics {
         MDFrom = Inst;
     }
 
-    for (auto [CV, AV, BV] : llvm::zip_equal(CondV, A.vectors(), B.vectors()))
+    for (auto [CV, AV, BV] : llvm::zip_equal(CondV, A.vectors(), B.vectors())) {
+      assert(!(isa<VectorType>(CV->getType()) && static_cast<bool>(MDFrom)) &&
+             "If we have a vector conditional, we should be propagating "
+             "profile information.");
       Result.addVector(Builder.CreateSelect(CV, AV, BV, "", MDFrom));
+    }
 
     return Result.addNumComputeOps(getNumOps(Result.getVectorTy()) *
                                    Result.getNumVectors());



More information about the llvm-commits mailing list