[llvm] [ProfCheck][Matrix] Add profile data where relevant (PR #181292)
Aiden Grossman via llvm-commits
llvm-commits at lists.llvm.org
Tue Feb 17 18:51:07 PST 2026
https://github.com/boomanaiden154 updated https://github.com/llvm/llvm-project/pull/181292
>From 9906c842f87c5145adc1bea82101f6ab3b55591b Mon Sep 17 00:00:00 2001
From: Aiden Grossman <aidengrossman at google.com>
Date: Fri, 13 Feb 2026 01:57:43 +0000
Subject: [PATCH 1/7] =?UTF-8?q?[=F0=9D=98=80=F0=9D=97=BD=F0=9D=97=BF]=20ch?=
=?UTF-8?q?anges=20to=20main=20this=20commit=20is=20based=20on?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Created using spr 1.3.7
[skip ci]
---
llvm/include/llvm/Transforms/Utils/MatrixUtils.h | 7 ++++---
.../Transforms/Scalar/LowerMatrixIntrinsics.cpp | 4 +++-
llvm/lib/Transforms/Utils/MatrixUtils.cpp | 10 +++++-----
.../Transforms/LowerMatrixIntrinsics/select.ll | 16 +++++++++++-----
llvm/utils/profcheck-xfail.txt | 1 -
5 files changed, 23 insertions(+), 15 deletions(-)
diff --git a/llvm/include/llvm/Transforms/Utils/MatrixUtils.h b/llvm/include/llvm/Transforms/Utils/MatrixUtils.h
index ffad57002935e..9343f71d1243e 100644
--- a/llvm/include/llvm/Transforms/Utils/MatrixUtils.h
+++ b/llvm/include/llvm/Transforms/Utils/MatrixUtils.h
@@ -18,6 +18,7 @@
namespace llvm {
class DomTreeUpdater;
class BasicBlock;
+class ConstantInt;
class Value;
class Loop;
class LoopInfo;
@@ -80,9 +81,9 @@ struct TileInfo {
/// Exit as exit block. Adds the new loop blocks to \L and applies dominator
/// tree updates to \p DTU.
static BasicBlock *CreateLoop(BasicBlock *Preheader, BasicBlock *Exit,
- Value *Bound, Value *Step, StringRef Name,
- IRBuilderBase &B, DomTreeUpdater &DTU, Loop *L,
- LoopInfo &LI);
+ ConstantInt *Bound, ConstantInt *Step,
+ StringRef Name, IRBuilderBase &B,
+ DomTreeUpdater &DTU, Loop *L, LoopInfo &LI);
};
} // namespace llvm
diff --git a/llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp b/llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp
index ecf295dc75c3a..053f1d99274ae 100644
--- a/llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp
+++ b/llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp
@@ -2459,16 +2459,18 @@ class LowerMatrixIntrinsics {
MatrixTy B = getMatrix(OpB, Shape, Builder);
SmallVector<Value*> CondV;
+ Instruction *MDFrom = nullptr;
if (isa<FixedVectorType>(Cond->getType())) {
MatrixTy C = getMatrix(Cond, Shape, Builder);
llvm::copy(C.vectors(), std::back_inserter(CondV));
} else {
CondV.resize(A.getNumVectors());
llvm::fill(CondV, Cond);
+ MDFrom = Inst;
}
for (auto [CV, AV, BV] : llvm::zip_equal(CondV, A.vectors(), B.vectors()))
- Result.addVector(Builder.CreateSelect(CV, AV, BV));
+ Result.addVector(Builder.CreateSelect(CV, AV, BV, "", MDFrom));
return Result.addNumComputeOps(getNumOps(Result.getVectorTy()) *
Result.getNumVectors());
diff --git a/llvm/lib/Transforms/Utils/MatrixUtils.cpp b/llvm/lib/Transforms/Utils/MatrixUtils.cpp
index 7866d6434c115..a35003c3bc390 100644
--- a/llvm/lib/Transforms/Utils/MatrixUtils.cpp
+++ b/llvm/lib/Transforms/Utils/MatrixUtils.cpp
@@ -21,9 +21,9 @@
using namespace llvm;
BasicBlock *TileInfo::CreateLoop(BasicBlock *Preheader, BasicBlock *Exit,
- Value *Bound, Value *Step, StringRef Name,
- IRBuilderBase &B, DomTreeUpdater &DTU, Loop *L,
- LoopInfo &LI) {
+ ConstantInt *Bound, ConstantInt *Step,
+ StringRef Name, IRBuilderBase &B,
+ DomTreeUpdater &DTU, Loop *L, LoopInfo &LI) {
LLVMContext &Ctx = Preheader->getContext();
BasicBlock *Header = BasicBlock::Create(
Preheader->getContext(), Name + ".header", Preheader->getParent(), Exit);
@@ -35,8 +35,8 @@ BasicBlock *TileInfo::CreateLoop(BasicBlock *Preheader, BasicBlock *Exit,
Type *I32Ty = Type::getInt64Ty(Ctx);
BranchInst::Create(Body, Header);
BranchInst::Create(Latch, Body);
- PHINode *IV =
- PHINode::Create(I32Ty, 2, Name + ".iv", Header->getTerminator()->getIterator());
+ PHINode *IV = PHINode::Create(I32Ty, 2, Name + ".iv",
+ Header->getTerminator()->getIterator());
IV->addIncoming(ConstantInt::get(I32Ty, 0), Preheader);
B.SetInsertPoint(Latch);
diff --git a/llvm/test/Transforms/LowerMatrixIntrinsics/select.ll b/llvm/test/Transforms/LowerMatrixIntrinsics/select.ll
index bd97915759aac..73ac5f8b78704 100644
--- a/llvm/test/Transforms/LowerMatrixIntrinsics/select.ll
+++ b/llvm/test/Transforms/LowerMatrixIntrinsics/select.ll
@@ -1,7 +1,7 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals smart
; RUN: opt -passes='lower-matrix-intrinsics' -S < %s | FileCheck %s
-define void @select_2x2_bot(i1 %cond, ptr %lhs, ptr %rhs, ptr %out) {
+define void @select_2x2_bot(i1 %cond, ptr %lhs, ptr %rhs, ptr %out) !prof !0 {
; CHECK-LABEL: @select_2x2_bot(
; CHECK-NEXT: [[COL_LOAD:%.*]] = load <2 x float>, ptr [[LHS:%.*]], align 16
; CHECK-NEXT: [[VEC_GEP:%.*]] = getelementptr float, ptr [[LHS]], i64 2
@@ -9,8 +9,8 @@ define void @select_2x2_bot(i1 %cond, ptr %lhs, ptr %rhs, ptr %out) {
; CHECK-NEXT: [[COL_LOAD2:%.*]] = load <2 x float>, ptr [[RHS:%.*]], align 16
; CHECK-NEXT: [[VEC_GEP3:%.*]] = getelementptr float, ptr [[RHS]], i64 2
; CHECK-NEXT: [[COL_LOAD4:%.*]] = load <2 x float>, ptr [[VEC_GEP3]], align 8
-; CHECK-NEXT: [[TMP1:%.*]] = select i1 [[COND:%.*]], <2 x float> [[COL_LOAD]], <2 x float> [[COL_LOAD2]]
-; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[COND]], <2 x float> [[COL_LOAD1]], <2 x float> [[COL_LOAD4]]
+; CHECK-NEXT: [[TMP1:%.*]] = select i1 [[COND:%.*]], <2 x float> [[COL_LOAD]], <2 x float> [[COL_LOAD2]], !prof [[PROF1:![0-9]+]]
+; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[COND]], <2 x float> [[COL_LOAD1]], <2 x float> [[COL_LOAD4]], !prof [[PROF1]]
; CHECK-NEXT: store <2 x float> [[TMP1]], ptr [[OUT:%.*]], align 4
; CHECK-NEXT: [[VEC_GEP5:%.*]] = getelementptr float, ptr [[OUT]], i64 2
; CHECK-NEXT: store <2 x float> [[TMP2]], ptr [[VEC_GEP5]], align 4
@@ -18,7 +18,7 @@ define void @select_2x2_bot(i1 %cond, ptr %lhs, ptr %rhs, ptr %out) {
;
%lhsv = load <4 x float>, ptr %lhs
%rhsv = load <4 x float>, ptr %rhs
- %op = select i1 %cond, <4 x float> %lhsv, <4 x float> %rhsv
+ %op = select i1 %cond, <4 x float> %lhsv, <4 x float> %rhsv, !prof !1
call void @llvm.matrix.column.major.store(<4 x float> %op, ptr %out, i64 2, i1 false, i32 2, i32 2)
ret void
}
@@ -205,3 +205,9 @@ define void @select_2x2_vcond_shape5(ptr %cond, ptr %lhs, ptr %rhs, ptr %out) {
store <4 x float> %op, ptr %out
ret void
}
+
+!0 = !{!"function_entry_count", i64 1000}
+!1 = !{!"branch_weights", i32 2, i32 3}
+;.
+; CHECK: [[PROF1]] = !{!"branch_weights", i32 2, i32 3}
+;.
diff --git a/llvm/utils/profcheck-xfail.txt b/llvm/utils/profcheck-xfail.txt
index 454f3d8f8fe00..63b1cf9696c65 100644
--- a/llvm/utils/profcheck-xfail.txt
+++ b/llvm/utils/profcheck-xfail.txt
@@ -190,7 +190,6 @@ Transforms/LowerMatrixIntrinsics/multiply-fused-loops.ll
Transforms/LowerMatrixIntrinsics/multiply-fused-loops-large-matrixes.ll
Transforms/LowerMatrixIntrinsics/multiply-fused-multiple-blocks.ll
Transforms/LowerMatrixIntrinsics/multiply-fused-volatile.ll
-Transforms/LowerMatrixIntrinsics/select.ll
Transforms/LowerSwitch/2003-05-01-PHIProblem.ll
Transforms/LowerSwitch/2004-03-13-SwitchIsDefaultCrash.ll
Transforms/LowerSwitch/2014-06-10-SwitchContiguousOpt.ll
>From b99ff35e3648564d0470a80e36321bf7d313580f Mon Sep 17 00:00:00 2001
From: Aiden Grossman <aidengrossman at google.com>
Date: Fri, 13 Feb 2026 02:03:03 +0000
Subject: [PATCH 2/7] tests
Created using spr 1.3.7
---
.../LowerMatrixIntrinsics/multiply-fused.ll | 17 +++++++++++------
1 file changed, 11 insertions(+), 6 deletions(-)
diff --git a/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-fused.ll b/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-fused.ll
index 430358f0a5138..c3a81b79f6de5 100644
--- a/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-fused.ll
+++ b/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-fused.ll
@@ -1,4 +1,4 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals smart
; RUN: opt -passes=lower-matrix-intrinsics,instcombine -fuse-matrix-loops-threshold=9999 -fuse-matrix-tile-size=2 -matrix-allow-contract -force-fuse-matrix -verify-dom-info %s -S | FileCheck %s
; REQUIRES: aarch64-registered-target
@@ -8,18 +8,18 @@ target triple = "aarch64-apple-ios"
; Test tiling without generating explicit loops.
-define void @multiply(ptr %A, ptr %B, ptr %C) {
+define void @multiply(ptr %A, ptr %B, ptr %C) !prof !0 {
; CHECK-LABEL: @multiply(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[STORE_BEGIN:%.*]] = ptrtoint ptr [[C:%.*]] to i64
; CHECK-NEXT: [[STORE_END:%.*]] = add nuw nsw i64 [[STORE_BEGIN]], 128
; CHECK-NEXT: [[LOAD_BEGIN:%.*]] = ptrtoint ptr [[A:%.*]] to i64
; CHECK-NEXT: [[TMP0:%.*]] = icmp ugt i64 [[STORE_END]], [[LOAD_BEGIN]]
-; CHECK-NEXT: br i1 [[TMP0]], label [[ALIAS_CONT:%.*]], label [[NO_ALIAS:%.*]]
+; CHECK-NEXT: br i1 [[TMP0]], label [[ALIAS_CONT:%.*]], label [[NO_ALIAS:%.*]], !prof [[PROF1:![0-9]+]]
; CHECK: alias_cont:
; CHECK-NEXT: [[LOAD_END:%.*]] = add nuw nsw i64 [[LOAD_BEGIN]], 128
; CHECK-NEXT: [[TMP1:%.*]] = icmp ugt i64 [[LOAD_END]], [[STORE_BEGIN]]
-; CHECK-NEXT: br i1 [[TMP1]], label [[COPY:%.*]], label [[NO_ALIAS]]
+; CHECK-NEXT: br i1 [[TMP1]], label [[COPY:%.*]], label [[NO_ALIAS]], !prof [[PROF1]]
; CHECK: copy:
; CHECK-NEXT: [[TMP2:%.*]] = alloca [16 x double], align 8
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr noundef nonnull align 8 dereferenceable(128) [[TMP2]], ptr noundef nonnull align 8 dereferenceable(128) [[A]], i64 128, i1 false)
@@ -30,11 +30,11 @@ define void @multiply(ptr %A, ptr %B, ptr %C) {
; CHECK-NEXT: [[STORE_END5:%.*]] = add nuw nsw i64 [[STORE_BEGIN4]], 128
; CHECK-NEXT: [[LOAD_BEGIN6:%.*]] = ptrtoint ptr [[B:%.*]] to i64
; CHECK-NEXT: [[TMP4:%.*]] = icmp ugt i64 [[STORE_END5]], [[LOAD_BEGIN6]]
-; CHECK-NEXT: br i1 [[TMP4]], label [[ALIAS_CONT1:%.*]], label [[NO_ALIAS3:%.*]]
+; CHECK-NEXT: br i1 [[TMP4]], label [[ALIAS_CONT1:%.*]], label [[NO_ALIAS3:%.*]], !prof [[PROF1]]
; CHECK: alias_cont1:
; CHECK-NEXT: [[LOAD_END7:%.*]] = add nuw nsw i64 [[LOAD_BEGIN6]], 128
; CHECK-NEXT: [[TMP5:%.*]] = icmp ugt i64 [[LOAD_END7]], [[STORE_BEGIN4]]
-; CHECK-NEXT: br i1 [[TMP5]], label [[COPY2:%.*]], label [[NO_ALIAS3]]
+; CHECK-NEXT: br i1 [[TMP5]], label [[COPY2:%.*]], label [[NO_ALIAS3]], !prof [[PROF1]]
; CHECK: copy2:
; CHECK-NEXT: [[TMP6:%.*]] = alloca [16 x double], align 8
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr noundef nonnull align 8 dereferenceable(128) [[TMP6]], ptr noundef nonnull align 8 dereferenceable(128) [[B]], i64 128, i1 false)
@@ -375,3 +375,8 @@ entry:
}
declare <16 x double> @llvm.matrix.multiply(<16 x double>, <16 x double>, i32, i32, i32)
+
+!0 = !{!"function_entry_count", i64 1000}
+;.
+; CHECK: [[PROF1]] = !{!"unknown", !"lower-matrix-intrinsics"}
+;.
>From f0d3a8f8819d71b8640b1af606470199c85f46ea Mon Sep 17 00:00:00 2001
From: Aiden Grossman <aidengrossman at google.com>
Date: Fri, 13 Feb 2026 02:32:01 +0000
Subject: [PATCH 3/7] =?UTF-8?q?[=F0=9D=98=80=F0=9D=97=BD=F0=9D=97=BF]=20ch?=
=?UTF-8?q?anges=20introduced=20through=20rebase?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Created using spr 1.3.7
[skip ci]
---
llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp | 7 ++++++-
llvm/lib/Transforms/Utils/MatrixUtils.cpp | 4 ++--
2 files changed, 8 insertions(+), 3 deletions(-)
diff --git a/llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp b/llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp
index 053f1d99274ae..93167c04e3142 100644
--- a/llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp
+++ b/llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp
@@ -104,6 +104,10 @@ static cl::opt<unsigned> SplitMatmulRemainderOverThreshold(
"in the inner loop of matmul"),
cl::init(0));
+namespace llvm {
+extern cl::opt<bool> ProfcheckDisableMetadataFixes;
+} // end namespace llvm
+
/// Helper function to either return Scope, if it is a subprogram or the
/// attached subprogram for a local scope.
static DISubprogram *getSubprogram(DIScope *Scope) {
@@ -2466,7 +2470,8 @@ class LowerMatrixIntrinsics {
} else {
CondV.resize(A.getNumVectors());
llvm::fill(CondV, Cond);
- MDFrom = Inst;
+ if (!ProfcheckDisableMetadataFixes)
+ MDFrom = Inst;
}
for (auto [CV, AV, BV] : llvm::zip_equal(CondV, A.vectors(), B.vectors()))
diff --git a/llvm/lib/Transforms/Utils/MatrixUtils.cpp b/llvm/lib/Transforms/Utils/MatrixUtils.cpp
index a35003c3bc390..7bb0cb919f5c9 100644
--- a/llvm/lib/Transforms/Utils/MatrixUtils.cpp
+++ b/llvm/lib/Transforms/Utils/MatrixUtils.cpp
@@ -35,8 +35,8 @@ BasicBlock *TileInfo::CreateLoop(BasicBlock *Preheader, BasicBlock *Exit,
Type *I32Ty = Type::getInt64Ty(Ctx);
BranchInst::Create(Body, Header);
BranchInst::Create(Latch, Body);
- PHINode *IV = PHINode::Create(I32Ty, 2, Name + ".iv",
- Header->getTerminator()->getIterator());
+ PHINode *IV =
+ PHINode::Create(I32Ty, 2, Name + ".iv", Header->getTerminator()->getIterator());
IV->addIncoming(ConstantInt::get(I32Ty, 0), Preheader);
B.SetInsertPoint(Latch);
>From 61e144b9f0add7f71edc60f7d6889516fd2b4150 Mon Sep 17 00:00:00 2001
From: Aiden Grossman <aidengrossman at google.com>
Date: Fri, 13 Feb 2026 03:17:34 +0000
Subject: [PATCH 4/7] fix test, feedback
Created using spr 1.3.7
---
llvm/lib/Transforms/Utils/MatrixUtils.cpp | 3 +++
.../multiply-fused-loops.ll | 20 +++++++++++--------
2 files changed, 15 insertions(+), 8 deletions(-)
diff --git a/llvm/lib/Transforms/Utils/MatrixUtils.cpp b/llvm/lib/Transforms/Utils/MatrixUtils.cpp
index e84522276219c..262e4c99365d7 100644
--- a/llvm/lib/Transforms/Utils/MatrixUtils.cpp
+++ b/llvm/lib/Transforms/Utils/MatrixUtils.cpp
@@ -51,6 +51,9 @@ BasicBlock *TileInfo::CreateLoop(BasicBlock *Preheader, BasicBlock *Exit,
Value *Cond = B.CreateICmpNE(Inc, Bound, Name + ".cond");
auto *BR = BranchInst::Create(Header, Exit, Cond, Latch);
if (!ProfcheckDisableMetadataFixes) {
+ assert(Step->getZExtValue() != 0 &&
+ "Expected a non-zero step size. A step size of zero produces an "
+ "infinite loop which massively skews profile data.");
MDBuilder MDB(Preheader->getContext());
setFittedBranchWeights(
*BR, {Bound->getZExtValue() / Step->getZExtValue(), 1}, false);
diff --git a/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-fused-loops.ll b/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-fused-loops.ll
index 8c6d142550abd..498102f70c42b 100644
--- a/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-fused-loops.ll
+++ b/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-fused-loops.ll
@@ -1,4 +1,4 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals smart
; RUN: opt -passes=lower-matrix-intrinsics,instcombine -fuse-matrix-loops-threshold=0 -fuse-matrix-tile-size=2 -matrix-allow-contract -force-fuse-matrix -verify-dom-info %s -S | FileCheck %s
; REQUIRES: aarch64-registered-target
@@ -360,10 +360,14 @@ entry:
}
declare <4 x float> @llvm.matrix.multiply.v4f32.v4f32.v4f32(<4 x float>, <4 x float>, i32, i32, i32)
-
-; CHECK: !0 = distinct !{!0, !1}
-; CHECK-NEXT: !1 = !{!"llvm.loop.unroll.count", i32 2}
-; CHECK-NEXT: !2 = distinct !{!2, !1}
-; CHECK-NEXT: !3 = distinct !{!3, !4}
-; CHECK-NEXT: !4 = !{!"llvm.loop.unroll.count", i32 1}
-; CHECK-NEXT: !5 = distinct !{!5, !4}
+;.
+; CHECK: [[PROF0]] = !{!"branch_weights", i32 1, i32 2}
+; CHECK: [[LOOP1]] = distinct !{[[LOOP1]], [[META2:![0-9]+]]}
+; CHECK: [[META2]] = !{!"llvm.loop.unroll.count", i32 2}
+; CHECK: [[LOOP3]] = distinct !{[[LOOP3]], [[META2]]}
+; CHECK: [[PROF4]] = !{!"branch_weights", i32 1, i32 1}
+; CHECK: [[LOOP5]] = distinct !{[[LOOP5]], [[META6:![0-9]+]]}
+; CHECK: [[META6]] = !{!"llvm.loop.unroll.count", i32 1}
+; CHECK: [[PROF7]] = !{!"branch_weights", i32 1, i32 4}
+; CHECK: [[LOOP8]] = distinct !{[[LOOP8]], [[META6]]}
+;.
>From 59cf57eac2f79e3ab3d7a4dc809f9778630497c8 Mon Sep 17 00:00:00 2001
From: Aiden Grossman <aidengrossman at google.com>
Date: Fri, 13 Feb 2026 05:07:38 +0000
Subject: [PATCH 5/7] feedback
Created using spr 1.3.7
---
llvm/lib/Transforms/Utils/MatrixUtils.cpp | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/llvm/lib/Transforms/Utils/MatrixUtils.cpp b/llvm/lib/Transforms/Utils/MatrixUtils.cpp
index 262e4c99365d7..ab47289d4dad5 100644
--- a/llvm/lib/Transforms/Utils/MatrixUtils.cpp
+++ b/llvm/lib/Transforms/Utils/MatrixUtils.cpp
@@ -52,8 +52,8 @@ BasicBlock *TileInfo::CreateLoop(BasicBlock *Preheader, BasicBlock *Exit,
auto *BR = BranchInst::Create(Header, Exit, Cond, Latch);
if (!ProfcheckDisableMetadataFixes) {
assert(Step->getZExtValue() != 0 &&
- "Expected a non-zero step size. A step size of zero produces an "
- "infinite loop which massively skews profile data.");
+ "Expected a non-zero step size. Matrices are expected to always be "
+ "finite sized.");
MDBuilder MDB(Preheader->getContext());
setFittedBranchWeights(
*BR, {Bound->getZExtValue() / Step->getZExtValue(), 1}, false);
>From e999b50df814cf775a2eb1097dba7c1592b7d2d2 Mon Sep 17 00:00:00 2001
From: Aiden Grossman <aidengrossman at google.com>
Date: Fri, 13 Feb 2026 05:10:38 +0000
Subject: [PATCH 6/7] wording
Created using spr 1.3.7
---
llvm/lib/Transforms/Utils/MatrixUtils.cpp | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/llvm/lib/Transforms/Utils/MatrixUtils.cpp b/llvm/lib/Transforms/Utils/MatrixUtils.cpp
index ab47289d4dad5..cc4326dd1a071 100644
--- a/llvm/lib/Transforms/Utils/MatrixUtils.cpp
+++ b/llvm/lib/Transforms/Utils/MatrixUtils.cpp
@@ -52,8 +52,8 @@ BasicBlock *TileInfo::CreateLoop(BasicBlock *Preheader, BasicBlock *Exit,
auto *BR = BranchInst::Create(Header, Exit, Cond, Latch);
if (!ProfcheckDisableMetadataFixes) {
assert(Step->getZExtValue() != 0 &&
- "Expected a non-zero step size. Matrices are expected to always be "
- "finite sized.");
+ "Expected a non-zero step size. This is chosen by the pass and "
+ "should always be non-zero to imply a finite loop.");
MDBuilder MDB(Preheader->getContext());
setFittedBranchWeights(
*BR, {Bound->getZExtValue() / Step->getZExtValue(), 1}, false);
>From 9a3fbf4a44836601e4a487fc18bc5ee3e026c234 Mon Sep 17 00:00:00 2001
From: Aiden Grossman <aidengrossman at google.com>
Date: Wed, 18 Feb 2026 02:08:21 +0000
Subject: [PATCH 7/7] =?UTF-8?q?[=F0=9D=98=80=F0=9D=97=BD=F0=9D=97=BF]=20ch?=
=?UTF-8?q?anges=20introduced=20through=20rebase?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Created using spr 1.3.7
[skip ci]
---
llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp | 6 +++++-
1 file changed, 5 insertions(+), 1 deletion(-)
diff --git a/llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp b/llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp
index 93167c04e3142..65c6330d7bdd5 100644
--- a/llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp
+++ b/llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp
@@ -2474,8 +2474,12 @@ class LowerMatrixIntrinsics {
MDFrom = Inst;
}
- for (auto [CV, AV, BV] : llvm::zip_equal(CondV, A.vectors(), B.vectors()))
+ for (auto [CV, AV, BV] : llvm::zip_equal(CondV, A.vectors(), B.vectors())) {
+ assert(!(isa<VectorType>(CV->getType()) && static_cast<bool>(MDFrom)) &&
+ "If we have a vector conditional, we should be propagating "
+ "profile information.");
Result.addVector(Builder.CreateSelect(CV, AV, BV, "", MDFrom));
+ }
return Result.addNumComputeOps(getNumOps(Result.getVectorTy()) *
Result.getNumVectors());
More information about the llvm-commits
mailing list