[llvm] [SandboxVec][BottomUpVec] Improve handling of external uses (PR #185468)
via llvm-commits
llvm-commits at lists.llvm.org
Mon Mar 9 10:45:36 PDT 2026
https://github.com/vporpo updated https://github.com/llvm/llvm-project/pull/185468
>From 99318b47f94cf91507fbbf8656c85b90d78a2be1 Mon Sep 17 00:00:00 2001
From: Vasileios Porpodas <vasileios.porpodas at amd.com>
Date: Fri, 27 Feb 2026 21:20:34 +0000
Subject: [PATCH] [SandboxVec][BottomUpVec] Improve handling of external uses
Up until now the bottom-up vectorizer pass would not delete the scalar instructions
that have external uses after being vectorized, because it lacked the ability
to generate extracts from the vectors.
With the term "external uses", we refer to uses outside the currently vectorized graph.
This patch fixes this. We can now properly handle external uses by extracting from the vectors.
This change relies on the recent changes to the DAG's callbacks because the external user
may not be within the current DAG's interval.
---
.../Vectorize/SandboxVectorizer/InstrMaps.h | 4 ++
.../SandboxVectorizer/Passes/BottomUpVec.h | 3 ++
.../SandboxVectorizer/Passes/BottomUpVec.cpp | 48 +++++++++++++++++++
.../SandboxVectorizer/bottomup_basic.ll | 17 ++++---
.../SandboxVectorizer/external_uses.ll | 3 +-
.../Transforms/SandboxVectorizer/scheduler.ll | 3 +-
.../SandboxVectorizer/InstrMapsTest.cpp | 4 ++
7 files changed, 69 insertions(+), 13 deletions(-)
diff --git a/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/InstrMaps.h b/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/InstrMaps.h
index 050396674e159..45b6d1405a8b3 100644
--- a/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/InstrMaps.h
+++ b/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/InstrMaps.h
@@ -61,6 +61,10 @@ class InstrMaps {
public:
InstrMaps() = default;
~InstrMaps() = default;
+ /// \Returns true if \p Orig was vectorized
+ bool isVectorized(Value *Orig) const {
+ return OrigToVectorMap.contains(Orig);
+ }
/// \Returns the vector value that we got from vectorizing \p Orig, or
/// nullptr if not found.
Action *getVectorForOrig(Value *Orig) const {
diff --git a/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/Passes/BottomUpVec.h b/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/Passes/BottomUpVec.h
index c65e2cd45ab48..126795736428e 100644
--- a/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/Passes/BottomUpVec.h
+++ b/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/Passes/BottomUpVec.h
@@ -88,6 +88,9 @@ class BottomUpVec final : public RegionPass {
/// `Actions` vector.
Action *vectorizeRec(ArrayRef<Value *> Bndl, ArrayRef<Value *> UserBndl,
unsigned Depth, LegalityAnalysis &Legality);
+ /// If the values in \p Bndl have external users, then emit unpacks and
+ /// connect them to the users. \p Vec is the vectorized form of \p Bndl.
+ void emitUnpacksForExternalUses(const ArrayRef<Value *> Bndl, Value *Vec);
/// Generate vector instructions based on `Actions` and return the last vector
/// created.
Value *emitVectors();
diff --git a/llvm/lib/Transforms/Vectorize/SandboxVectorizer/Passes/BottomUpVec.cpp b/llvm/lib/Transforms/Vectorize/SandboxVectorizer/Passes/BottomUpVec.cpp
index 5534da902b968..ca187df47d5b7 100644
--- a/llvm/lib/Transforms/Vectorize/SandboxVectorizer/Passes/BottomUpVec.cpp
+++ b/llvm/lib/Transforms/Vectorize/SandboxVectorizer/Passes/BottomUpVec.cpp
@@ -339,6 +339,51 @@ void BottomUpVec::ActionsVector::print(raw_ostream &OS) const {
void BottomUpVec::ActionsVector::dump() const { print(dbgs()); }
#endif // NDEBUG
+void BottomUpVec::emitUnpacksForExternalUses(const ArrayRef<Value *> Bndl,
+ Value *Vec) {
+ // Find where we should emit the unpacks.
+ BasicBlock::iterator WhereIt;
+ if (auto *VecI = dyn_cast<Instruction>(Vec)) {
+ WhereIt = std::next(VecI->getIterator());
+ } else {
+ // If Vec is a constant then it should be safe to emit the unpacks at the
+ // top of the block.
+ assert(isa<Constant>(Vec) && "Expected constant!");
+ assert(isa<Instruction>(Bndl[0]) &&
+ "A widened Bndl should contain instrs!");
+ BasicBlock *BB = cast<Instruction>(Bndl[0])->getParent();
+ WhereIt =
+ BB->empty()
+ ? BB->begin()
+ : std::next(
+ VecUtils::getLastPHIOrSelf(&*BB->begin())->getIterator());
+ }
+ Context &Ctx = Bndl[0]->getContext();
+ for (auto [Idx, Elm] : enumerate(Bndl)) {
+ for (User *U : Elm->users()) {
+ // Skip users that we just vectorized.
+ if (IMaps->isVectorized(U))
+ continue;
+ // An element can be either scalar or vector. We need to generate
+ // different IR for each case.
+ if (Elm->getType()->isVectorTy()) {
+ llvm_unreachable("Unimplemented");
+ } else {
+ Constant *ExtractLaneC =
+ ConstantInt::getSigned(Type::getInt32Ty(Ctx), Idx++);
+ // This may be folded into a Constant if LastInsert is a Constant. In
+ // that case we only collect the last constant.
+ auto *Extract = ExtractElementInst::create(Vec, ExtractLaneC, WhereIt,
+ Ctx, "UnPack");
+ // Move WhereIt to prepare for the next Extract.
+ if (auto *ExtractI = dyn_cast<Instruction>(Extract))
+ WhereIt = std::next(ExtractI->getIterator());
+ Elm->replaceAllUsesWith(Extract);
+ }
+ }
+ }
+}
+
Value *BottomUpVec::emitVectors() {
Value *NewVec = nullptr;
for (const auto &ActionPtr : Actions) {
@@ -376,6 +421,9 @@ Value *BottomUpVec::emitVectors() {
// original scalars and pointer operands of loads/stores.
if (NewVec != nullptr)
collectPotentiallyDeadInstrs(Bndl);
+
+ // Emit unpacks for all external uses, if any.
+ emitUnpacksForExternalUses(ActionPtr->Bndl, NewVec);
break;
}
case LegalityResultID::DiamondReuse: {
diff --git a/llvm/test/Transforms/SandboxVectorizer/bottomup_basic.ll b/llvm/test/Transforms/SandboxVectorizer/bottomup_basic.ll
index b1b8e94e98198..845aeddbe3f84 100644
--- a/llvm/test/Transforms/SandboxVectorizer/bottomup_basic.ll
+++ b/llvm/test/Transforms/SandboxVectorizer/bottomup_basic.ll
@@ -187,10 +187,9 @@ define float @scalars_with_external_uses_not_dead(ptr %ptr) {
; CHECK-LABEL: define float @scalars_with_external_uses_not_dead(
; CHECK-SAME: ptr [[PTR:%.*]]) {
; CHECK-NEXT: [[PTR0:%.*]] = getelementptr float, ptr [[PTR]], i32 0
-; CHECK-NEXT: [[PTR1:%.*]] = getelementptr float, ptr [[PTR]], i32 1
-; CHECK-NEXT: [[LD0:%.*]] = load float, ptr [[PTR0]], align 4
-; CHECK-NEXT: [[LD1:%.*]] = load float, ptr [[PTR1]], align 4
; CHECK-NEXT: [[VECL:%.*]] = load <2 x float>, ptr [[PTR0]], align 4, !sandboxvec [[META5:![0-9]+]]
+; CHECK-NEXT: [[LD0:%.*]] = extractelement <2 x float> [[VECL]], i32 0, !sandboxvec [[META5]]
+; CHECK-NEXT: [[LD1:%.*]] = extractelement <2 x float> [[VECL]], i32 1, !sandboxvec [[META5]]
; CHECK-NEXT: store <2 x float> [[VECL]], ptr [[PTR0]], align 4, !sandboxvec [[META5]]
; CHECK-NEXT: [[USER:%.*]] = fneg float [[LD1]]
; CHECK-NEXT: ret float [[LD0]]
@@ -198,10 +197,10 @@ define float @scalars_with_external_uses_not_dead(ptr %ptr) {
; REVERT-LABEL: define float @scalars_with_external_uses_not_dead(
; REVERT-SAME: ptr [[PTR:%.*]]) {
; REVERT-NEXT: [[PTR0:%.*]] = getelementptr float, ptr [[PTR]], i32 0
-; REVERT-NEXT: [[PTR1:%.*]] = getelementptr float, ptr [[PTR]], i32 1
-; REVERT-NEXT: [[LD0:%.*]] = load float, ptr [[PTR0]], align 4
-; REVERT-NEXT: [[LD1:%.*]] = load float, ptr [[PTR1]], align 4
-; REVERT-NEXT: store float [[LD0]], ptr [[PTR0]], align 4, !sandboxvec [[META5:![0-9]+]]
+; REVERT-NEXT: [[PTR1:%.*]] = getelementptr float, ptr [[PTR]], i32 1, !sandboxvec [[META5:![0-9]+]]
+; REVERT-NEXT: [[LD0:%.*]] = load float, ptr [[PTR0]], align 4, !sandboxvec [[META5]]
+; REVERT-NEXT: [[LD1:%.*]] = load float, ptr [[PTR1]], align 4, !sandboxvec [[META5]]
+; REVERT-NEXT: store float [[LD0]], ptr [[PTR0]], align 4, !sandboxvec [[META5]]
; REVERT-NEXT: store float [[LD1]], ptr [[PTR1]], align 4, !sandboxvec [[META5]]
; REVERT-NEXT: [[USER:%.*]] = fneg float [[LD1]]
; REVERT-NEXT: ret float [[LD0]]
@@ -513,8 +512,8 @@ define void @diamondWithConstantVector(ptr %ptr) {
; REVERT-NEXT: [[GEPA1:%.*]] = getelementptr i32, ptr [[PTR]], i64 1, !sandboxvec [[META14:![0-9]+]]
; REVERT-NEXT: [[GEPB0:%.*]] = getelementptr i32, ptr [[PTR]], i64 10
; REVERT-NEXT: [[GEPB1:%.*]] = getelementptr i32, ptr [[PTR]], i64 11, !sandboxvec [[META15:![0-9]+]]
-; REVERT-NEXT: [[ZEXT0:%.*]] = zext i16 0 to i32
-; REVERT-NEXT: [[ZEXT1:%.*]] = zext i16 0 to i32
+; REVERT-NEXT: [[ZEXT0:%.*]] = zext i16 0 to i32, !sandboxvec [[META14]]
+; REVERT-NEXT: [[ZEXT1:%.*]] = zext i16 0 to i32, !sandboxvec [[META14]]
; REVERT-NEXT: store i32 [[ZEXT0]], ptr [[GEPA0]], align 4, !sandboxvec [[META14]]
; REVERT-NEXT: store i32 [[ZEXT1]], ptr [[GEPA1]], align 4, !sandboxvec [[META14]]
; REVERT-NEXT: [[ORB0:%.*]] = or i32 0, [[ZEXT0]], !sandboxvec [[META15]]
diff --git a/llvm/test/Transforms/SandboxVectorizer/external_uses.ll b/llvm/test/Transforms/SandboxVectorizer/external_uses.ll
index e76441a332b91..4f422a9a199b8 100644
--- a/llvm/test/Transforms/SandboxVectorizer/external_uses.ll
+++ b/llvm/test/Transforms/SandboxVectorizer/external_uses.ll
@@ -7,10 +7,9 @@ define void @external_users(ptr %ptr) {
; CHECK-LABEL: define void @external_users(
; CHECK-SAME: ptr [[PTR:%.*]]) {
; CHECK-NEXT: [[PTR0:%.*]] = getelementptr float, ptr [[PTR]], i32 0
-; CHECK-NEXT: [[LD0:%.*]] = load float, ptr [[PTR0]], align 4
; CHECK-NEXT: [[VECL:%.*]] = load <2 x float>, ptr [[PTR0]], align 4, !sandboxvec [[META0:![0-9]+]]
-; CHECK-NEXT: [[SUB0:%.*]] = fsub float [[LD0]], 0.000000e+00
; CHECK-NEXT: [[VEC:%.*]] = fsub <2 x float> [[VECL]], zeroinitializer, !sandboxvec [[META0]]
+; CHECK-NEXT: [[SUB0:%.*]] = extractelement <2 x float> [[VEC]], i32 0, !sandboxvec [[META0]]
; CHECK-NEXT: store <2 x float> [[VEC]], ptr [[PTR0]], align 4, !sandboxvec [[META0]]
; CHECK-NEXT: [[USER:%.*]] = fneg float [[SUB0]]
; CHECK-NEXT: ret void
diff --git a/llvm/test/Transforms/SandboxVectorizer/scheduler.ll b/llvm/test/Transforms/SandboxVectorizer/scheduler.ll
index f4500935c4891..86016b86f0830 100644
--- a/llvm/test/Transforms/SandboxVectorizer/scheduler.ll
+++ b/llvm/test/Transforms/SandboxVectorizer/scheduler.ll
@@ -57,8 +57,7 @@ define <4 x float> @check_top_of_schedule(ptr %0) {
; CHECK-SAME: ptr [[TMP0:%.*]]) {
; CHECK-NEXT: [[INS_1:%.*]] = insertelement <4 x float> zeroinitializer, float poison, i64 0
; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr double, ptr [[TMP0]], i64 1
-; CHECK-NEXT: [[TRUNC_1:%.*]] = fptrunc double 0.000000e+00 to float
-; CHECK-NEXT: [[INS_2:%.*]] = insertelement <4 x float> [[INS_1]], float [[TRUNC_1]], i64 0
+; CHECK-NEXT: [[INS_2:%.*]] = insertelement <4 x float> [[INS_1]], float 0.000000e+00, i64 0
; CHECK-NEXT: store <2 x double> <double 0.000000e+00, double 1.000000e+00>, ptr [[GEP_1]], align 8, !sandboxvec [[META1:![0-9]+]]
; CHECK-NEXT: ret <4 x float> [[INS_2]]
;
diff --git a/llvm/unittests/Transforms/Vectorize/SandboxVectorizer/InstrMapsTest.cpp b/llvm/unittests/Transforms/Vectorize/SandboxVectorizer/InstrMapsTest.cpp
index c8fee1c24dbcb..69993e05bbd4d 100644
--- a/llvm/unittests/Transforms/Vectorize/SandboxVectorizer/InstrMapsTest.cpp
+++ b/llvm/unittests/Transforms/Vectorize/SandboxVectorizer/InstrMapsTest.cpp
@@ -59,6 +59,8 @@ define void @foo(i8 %v0, i8 %v1, i8 %v2, i8 %v3, <2 x i8> %vec) {
sandboxir::Action A(nullptr, {Add0}, {}, 0);
EXPECT_EQ(IMaps.getVectorForOrig(Add0), nullptr);
EXPECT_EQ(IMaps.getVectorForOrig(Add1), nullptr);
+ EXPECT_FALSE(IMaps.isVectorized(Add0));
+ EXPECT_FALSE(IMaps.isVectorized(Add1));
EXPECT_FALSE(IMaps.getOrigLane(&A, Add0));
}
{
@@ -68,6 +70,8 @@ define void @foo(i8 %v0, i8 %v1, i8 %v2, i8 %v3, <2 x i8> %vec) {
IMaps.registerVector({Add0, Add1}, &A);
EXPECT_EQ(IMaps.getVectorForOrig(Add0), &A);
EXPECT_EQ(IMaps.getVectorForOrig(Add1), &A);
+ EXPECT_TRUE(IMaps.isVectorized(Add0));
+ EXPECT_TRUE(IMaps.getVectorForOrig(Add1));
EXPECT_FALSE(IMaps.getOrigLane(&A, VAdd0)); // Bad Orig value
EXPECT_FALSE(IMaps.getOrigLane(&OtherA, Add0)); // Bad Vector value
EXPECT_EQ(*IMaps.getOrigLane(&A, Add0), 0U);
More information about the llvm-commits
mailing list