[llvm] [VectorCombine] Avoid inserting freeze when scalarizing extend-extract if all extracts would lead to UB on poison. (PR #164683)
Julian Nagele via llvm-commits
llvm-commits at lists.llvm.org
Sat Nov 1 08:49:37 PDT 2025
https://github.com/juliannagele updated https://github.com/llvm/llvm-project/pull/164683
>From 553cfa8af6be5685795ac74aa33d1fe900dafabe Mon Sep 17 00:00:00 2001
From: Julian Nagele <j_nagele at apple.com>
Date: Wed, 22 Oct 2025 15:25:29 +0100
Subject: [PATCH 1/5] [VectorCombine] Add test showing exisiting behaviour
adding unneeded freeze.
---
.../VectorCombine/AArch64/ext-extract.ll | 35 +++++++++++++++++++
1 file changed, 35 insertions(+)
diff --git a/llvm/test/Transforms/VectorCombine/AArch64/ext-extract.ll b/llvm/test/Transforms/VectorCombine/AArch64/ext-extract.ll
index 60700412686ea..fa136b2a5c44b 100644
--- a/llvm/test/Transforms/VectorCombine/AArch64/ext-extract.ll
+++ b/llvm/test/Transforms/VectorCombine/AArch64/ext-extract.ll
@@ -346,3 +346,38 @@ entry:
call void @use.i32(i32 %ext.3)
ret void
}
+
+define noundef i32 @zext_v4i8_all_lanes_used_no_freeze(<4 x i8> %src) {
+; CHECK-LABEL: define noundef i32 @zext_v4i8_all_lanes_used_no_freeze(
+; CHECK-SAME: <4 x i8> [[SRC:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[TMP0:%.*]] = freeze <4 x i8> [[SRC]]
+; CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i8> [[TMP0]] to i32
+; CHECK-NEXT: [[TMP2:%.*]] = lshr i32 [[TMP1]], 24
+; CHECK-NEXT: [[TMP3:%.*]] = lshr i32 [[TMP1]], 16
+; CHECK-NEXT: [[TMP4:%.*]] = and i32 [[TMP3]], 255
+; CHECK-NEXT: [[TMP5:%.*]] = lshr i32 [[TMP1]], 8
+; CHECK-NEXT: [[TMP6:%.*]] = and i32 [[TMP5]], 255
+; CHECK-NEXT: [[TMP7:%.*]] = and i32 [[TMP1]], 255
+; CHECK-NEXT: [[EXT:%.*]] = zext nneg <4 x i8> [[SRC]] to <4 x i32>
+; CHECK-NEXT: [[EXT_0:%.*]] = extractelement <4 x i32> [[EXT]], i64 0
+; CHECK-NEXT: [[EXT_1:%.*]] = extractelement <4 x i32> [[EXT]], i64 1
+; CHECK-NEXT: [[EXT_2:%.*]] = extractelement <4 x i32> [[EXT]], i64 2
+; CHECK-NEXT: [[EXT_3:%.*]] = extractelement <4 x i32> [[EXT]], i64 3
+; CHECK-NEXT: [[ADD1:%.*]] = add i32 [[TMP7]], [[TMP6]]
+; CHECK-NEXT: [[ADD2:%.*]] = add i32 [[ADD1]], [[TMP4]]
+; CHECK-NEXT: [[ADD3:%.*]] = add i32 [[ADD2]], [[TMP2]]
+; CHECK-NEXT: ret i32 [[ADD3]]
+;
+entry:
+ %ext = zext nneg <4 x i8> %src to <4 x i32>
+ %ext.0 = extractelement <4 x i32> %ext, i64 0
+ %ext.1 = extractelement <4 x i32> %ext, i64 1
+ %ext.2 = extractelement <4 x i32> %ext, i64 2
+ %ext.3 = extractelement <4 x i32> %ext, i64 3
+
+ %add1 = add i32 %ext.0, %ext.1
+ %add2 = add i32 %add1, %ext.2
+ %add3 = add i32 %add2, %ext.3
+ ret i32 %add3
+}
>From b8b84d2defa9b45cf1b41ebc93b4b638170e315a Mon Sep 17 00:00:00 2001
From: Julian Nagele <j_nagele at apple.com>
Date: Wed, 22 Oct 2025 15:35:09 +0100
Subject: [PATCH 2/5] [VecorCombine] Avoid inserting freeze when scalarizing
extend-extract if all extracts would lead to UB on poison.
---
.../Transforms/Vectorize/VectorCombine.cpp | 20 ++++++++++++++++--
.../VectorCombine/AArch64/ext-extract.ll | 21 +++++++++----------
2 files changed, 28 insertions(+), 13 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
index d6eb00da11dc8..4a0c98e03203d 100644
--- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
+++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
@@ -2017,8 +2017,24 @@ bool VectorCombine::scalarizeExtExtract(Instruction &I) {
Value *ScalarV = Ext->getOperand(0);
if (!isGuaranteedNotToBePoison(ScalarV, &AC, dyn_cast<Instruction>(ScalarV),
- &DT))
- ScalarV = Builder.CreateFreeze(ScalarV);
+ &DT)) {
+ // Check if all lanes are extracted and all extracts trigger UB on poison.
+ // If so, we do not need to insert a freeze.
+ SmallDenseSet<uint64_t, 8> ExtractedLanes;
+ bool AllExtractsHaveUB = true;
+ for (User *U : Ext->users()) {
+ auto *Extract = cast<ExtractElementInst>(U);
+ uint64_t Idx =
+ cast<ConstantInt>(Extract->getIndexOperand())->getZExtValue();
+ ExtractedLanes.insert(Idx);
+ if (!programUndefinedIfPoison(Extract)) {
+ AllExtractsHaveUB = false;
+ break;
+ }
+ }
+ if (!AllExtractsHaveUB || ExtractedLanes.size() != SrcTy->getNumElements())
+ ScalarV = Builder.CreateFreeze(ScalarV);
+ }
ScalarV = Builder.CreateBitCast(
ScalarV,
IntegerType::get(SrcTy->getContext(), DL->getTypeSizeInBits(SrcTy)));
diff --git a/llvm/test/Transforms/VectorCombine/AArch64/ext-extract.ll b/llvm/test/Transforms/VectorCombine/AArch64/ext-extract.ll
index fa136b2a5c44b..29d4ddd3d0ac8 100644
--- a/llvm/test/Transforms/VectorCombine/AArch64/ext-extract.ll
+++ b/llvm/test/Transforms/VectorCombine/AArch64/ext-extract.ll
@@ -351,22 +351,21 @@ define noundef i32 @zext_v4i8_all_lanes_used_no_freeze(<4 x i8> %src) {
; CHECK-LABEL: define noundef i32 @zext_v4i8_all_lanes_used_no_freeze(
; CHECK-SAME: <4 x i8> [[SRC:%.*]]) {
; CHECK-NEXT: [[ENTRY:.*:]]
-; CHECK-NEXT: [[TMP0:%.*]] = freeze <4 x i8> [[SRC]]
-; CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i8> [[TMP0]] to i32
-; CHECK-NEXT: [[TMP2:%.*]] = lshr i32 [[TMP1]], 24
-; CHECK-NEXT: [[TMP3:%.*]] = lshr i32 [[TMP1]], 16
-; CHECK-NEXT: [[TMP4:%.*]] = and i32 [[TMP3]], 255
-; CHECK-NEXT: [[TMP5:%.*]] = lshr i32 [[TMP1]], 8
-; CHECK-NEXT: [[TMP6:%.*]] = and i32 [[TMP5]], 255
-; CHECK-NEXT: [[TMP7:%.*]] = and i32 [[TMP1]], 255
+; CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i8> [[SRC]] to i32
+; CHECK-NEXT: [[TMP1:%.*]] = lshr i32 [[TMP0]], 24
+; CHECK-NEXT: [[TMP2:%.*]] = lshr i32 [[TMP0]], 16
+; CHECK-NEXT: [[TMP3:%.*]] = and i32 [[TMP2]], 255
+; CHECK-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP0]], 8
+; CHECK-NEXT: [[TMP5:%.*]] = and i32 [[TMP4]], 255
+; CHECK-NEXT: [[TMP6:%.*]] = and i32 [[TMP0]], 255
; CHECK-NEXT: [[EXT:%.*]] = zext nneg <4 x i8> [[SRC]] to <4 x i32>
; CHECK-NEXT: [[EXT_0:%.*]] = extractelement <4 x i32> [[EXT]], i64 0
; CHECK-NEXT: [[EXT_1:%.*]] = extractelement <4 x i32> [[EXT]], i64 1
; CHECK-NEXT: [[EXT_2:%.*]] = extractelement <4 x i32> [[EXT]], i64 2
; CHECK-NEXT: [[EXT_3:%.*]] = extractelement <4 x i32> [[EXT]], i64 3
-; CHECK-NEXT: [[ADD1:%.*]] = add i32 [[TMP7]], [[TMP6]]
-; CHECK-NEXT: [[ADD2:%.*]] = add i32 [[ADD1]], [[TMP4]]
-; CHECK-NEXT: [[ADD3:%.*]] = add i32 [[ADD2]], [[TMP2]]
+; CHECK-NEXT: [[ADD1:%.*]] = add i32 [[TMP6]], [[TMP5]]
+; CHECK-NEXT: [[ADD2:%.*]] = add i32 [[ADD1]], [[TMP3]]
+; CHECK-NEXT: [[ADD3:%.*]] = add i32 [[ADD2]], [[TMP1]]
; CHECK-NEXT: ret i32 [[ADD3]]
;
entry:
>From 78259eb7f5c917daf6e0c9bc06fcabfdbb1f9bec Mon Sep 17 00:00:00 2001
From: Julian Nagele <j_nagele at apple.com>
Date: Tue, 28 Oct 2025 17:32:30 +0000
Subject: [PATCH 3/5] fixup! [VecorCombine] Avoid inserting freeze when
scalarizing extend-extract if all extracts would lead to UB on poison.
---
.../Transforms/Vectorize/VectorCombine.cpp | 26 ++-
.../VectorCombine/AArch64/ext-extract.ll | 152 ++++++++++++++++++
2 files changed, 170 insertions(+), 8 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
index 4a0c98e03203d..94a281b8726f9 100644
--- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
+++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
@@ -2020,19 +2020,29 @@ bool VectorCombine::scalarizeExtExtract(Instruction &I) {
&DT)) {
// Check if all lanes are extracted and all extracts trigger UB on poison.
// If so, we do not need to insert a freeze.
- SmallDenseSet<uint64_t, 8> ExtractedLanes;
- bool AllExtractsHaveUB = true;
+ SmallDenseSet<ConstantInt *, 8> ExtractedLanes;
+ bool AllExtractsTriggerUB = true;
+ ExtractElementInst *LastExtract = nullptr;
+ BasicBlock *ExtBB = Ext->getParent();
for (User *U : Ext->users()) {
auto *Extract = cast<ExtractElementInst>(U);
- uint64_t Idx =
- cast<ConstantInt>(Extract->getIndexOperand())->getZExtValue();
- ExtractedLanes.insert(Idx);
- if (!programUndefinedIfPoison(Extract)) {
- AllExtractsHaveUB = false;
+ if (Extract->getParent() != ExtBB || !programUndefinedIfPoison(Extract)) {
+ AllExtractsTriggerUB = false;
break;
}
+ ExtractedLanes.insert(cast<ConstantInt>(Extract->getIndexOperand()));
+ if (!LastExtract || LastExtract->comesBefore(Extract))
+ LastExtract = Extract;
}
- if (!AllExtractsHaveUB || ExtractedLanes.size() != SrcTy->getNumElements())
+ // Check execution is guaranteed from extend to last extract.
+ AllExtractsTriggerUB =
+ AllExtractsTriggerUB &&
+ all_of(make_range(Ext->getIterator(), LastExtract->getIterator()),
+ [](Instruction &I) {
+ return isGuaranteedToTransferExecutionToSuccessor(&I);
+ });
+ if (!AllExtractsTriggerUB ||
+ ExtractedLanes.size() != DstTy->getNumElements())
ScalarV = Builder.CreateFreeze(ScalarV);
}
ScalarV = Builder.CreateBitCast(
diff --git a/llvm/test/Transforms/VectorCombine/AArch64/ext-extract.ll b/llvm/test/Transforms/VectorCombine/AArch64/ext-extract.ll
index 29d4ddd3d0ac8..e7b11cdf8475e 100644
--- a/llvm/test/Transforms/VectorCombine/AArch64/ext-extract.ll
+++ b/llvm/test/Transforms/VectorCombine/AArch64/ext-extract.ll
@@ -380,3 +380,155 @@ entry:
%add3 = add i32 %add2, %ext.3
ret i32 %add3
}
+
+define noundef i32 @zext_v4i8_not_all_lanes_used(<4 x i8> %src) {
+; CHECK-LABEL: define noundef i32 @zext_v4i8_not_all_lanes_used(
+; CHECK-SAME: <4 x i8> [[SRC:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[TMP2:%.*]] = freeze <4 x i8> [[SRC]]
+; CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i8> [[TMP2]] to i32
+; CHECK-NEXT: [[TMP1:%.*]] = lshr i32 [[TMP0]], 24
+; CHECK-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP0]], 8
+; CHECK-NEXT: [[TMP5:%.*]] = and i32 [[TMP4]], 255
+; CHECK-NEXT: [[TMP6:%.*]] = and i32 [[TMP0]], 255
+; CHECK-NEXT: [[EXT:%.*]] = zext nneg <4 x i8> [[SRC]] to <4 x i32>
+; CHECK-NEXT: [[EXT_0:%.*]] = extractelement <4 x i32> [[EXT]], i64 0
+; CHECK-NEXT: [[EXT_1:%.*]] = extractelement <4 x i32> [[EXT]], i64 1
+; CHECK-NEXT: [[EXT_3:%.*]] = extractelement <4 x i32> [[EXT]], i64 3
+; CHECK-NEXT: [[ADD2:%.*]] = add i32 [[TMP6]], [[TMP5]]
+; CHECK-NEXT: [[ADD3:%.*]] = add i32 [[ADD2]], [[TMP1]]
+; CHECK-NEXT: ret i32 [[ADD3]]
+;
+entry:
+ %ext = zext nneg <4 x i8> %src to <4 x i32>
+ %ext.0 = extractelement <4 x i32> %ext, i64 0
+ %ext.1 = extractelement <4 x i32> %ext, i64 1
+ %ext.3 = extractelement <4 x i32> %ext, i64 3
+
+ %add1 = add i32 %ext.0, %ext.1
+ %add2 = add i32 %add1, %ext.3
+ ret i32 %add2
+}
+
+define i32 @zext_v4i8_all_lanes_used_no_ub(<4 x i8> %src) {
+; CHECK-LABEL: define i32 @zext_v4i8_all_lanes_used_no_ub(
+; CHECK-SAME: <4 x i8> [[SRC:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[TMP0:%.*]] = freeze <4 x i8> [[SRC]]
+; CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i8> [[TMP0]] to i32
+; CHECK-NEXT: [[TMP2:%.*]] = lshr i32 [[TMP1]], 24
+; CHECK-NEXT: [[TMP3:%.*]] = lshr i32 [[TMP1]], 16
+; CHECK-NEXT: [[TMP4:%.*]] = and i32 [[TMP3]], 255
+; CHECK-NEXT: [[TMP5:%.*]] = lshr i32 [[TMP1]], 8
+; CHECK-NEXT: [[TMP6:%.*]] = and i32 [[TMP5]], 255
+; CHECK-NEXT: [[TMP7:%.*]] = and i32 [[TMP1]], 255
+; CHECK-NEXT: [[EXT:%.*]] = zext nneg <4 x i8> [[SRC]] to <4 x i32>
+; CHECK-NEXT: [[EXT_0:%.*]] = extractelement <4 x i32> [[EXT]], i64 0
+; CHECK-NEXT: [[EXT_1:%.*]] = extractelement <4 x i32> [[EXT]], i64 1
+; CHECK-NEXT: [[EXT_2:%.*]] = extractelement <4 x i32> [[EXT]], i64 2
+; CHECK-NEXT: [[EXT_3:%.*]] = extractelement <4 x i32> [[EXT]], i64 3
+; CHECK-NEXT: [[ADD1:%.*]] = add i32 [[TMP7]], [[TMP6]]
+; CHECK-NEXT: [[ADD2:%.*]] = add i32 [[ADD1]], [[TMP4]]
+; CHECK-NEXT: [[ADD3:%.*]] = add i32 [[ADD2]], [[TMP2]]
+; CHECK-NEXT: ret i32 [[ADD3]]
+;
+entry:
+ %ext = zext nneg <4 x i8> %src to <4 x i32>
+ %ext.0 = extractelement <4 x i32> %ext, i64 0
+ %ext.1 = extractelement <4 x i32> %ext, i64 1
+ %ext.2 = extractelement <4 x i32> %ext, i64 2
+ %ext.3 = extractelement <4 x i32> %ext, i64 3
+
+ %add1 = add i32 %ext.0, %ext.1
+ %add2 = add i32 %add1, %ext.2
+ %add3 = add i32 %add2, %ext.3
+ ret i32 %add3
+}
+
+define noundef i32 @zext_v4i8_extracts_different_blocks(<4 x i8> %src, i1 %cond) {
+; CHECK-LABEL: define noundef i32 @zext_v4i8_extracts_different_blocks(
+; CHECK-SAME: <4 x i8> [[SRC:%.*]], i1 [[COND:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[TMP0:%.*]] = freeze <4 x i8> [[SRC]]
+; CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i8> [[TMP0]] to i32
+; CHECK-NEXT: [[TMP2:%.*]] = lshr i32 [[TMP1]], 24
+; CHECK-NEXT: [[TMP3:%.*]] = lshr i32 [[TMP1]], 16
+; CHECK-NEXT: [[TMP4:%.*]] = and i32 [[TMP3]], 255
+; CHECK-NEXT: [[TMP5:%.*]] = lshr i32 [[TMP1]], 8
+; CHECK-NEXT: [[TMP6:%.*]] = and i32 [[TMP5]], 255
+; CHECK-NEXT: [[TMP7:%.*]] = and i32 [[TMP1]], 255
+; CHECK-NEXT: [[EXT:%.*]] = zext nneg <4 x i8> [[SRC]] to <4 x i32>
+; CHECK-NEXT: [[EXT_0:%.*]] = extractelement <4 x i32> [[EXT]], i64 0
+; CHECK-NEXT: [[EXT_1:%.*]] = extractelement <4 x i32> [[EXT]], i64 1
+; CHECK-NEXT: br i1 [[COND]], label %[[THEN:.*]], label %[[ELSE:.*]]
+; CHECK: [[THEN]]:
+; CHECK-NEXT: [[EXT_2:%.*]] = extractelement <4 x i32> [[EXT]], i64 2
+; CHECK-NEXT: br label %[[EXIT:.*]]
+; CHECK: [[ELSE]]:
+; CHECK-NEXT: [[EXT_3:%.*]] = extractelement <4 x i32> [[EXT]], i64 3
+; CHECK-NEXT: br label %[[EXIT]]
+; CHECK: [[EXIT]]:
+; CHECK-NEXT: [[PHI:%.*]] = phi i32 [ [[TMP4]], %[[THEN]] ], [ [[TMP2]], %[[ELSE]] ]
+; CHECK-NEXT: [[ADD1:%.*]] = add i32 [[TMP7]], [[TMP6]]
+; CHECK-NEXT: [[ADD2:%.*]] = add i32 [[ADD1]], [[PHI]]
+; CHECK-NEXT: ret i32 [[ADD2]]
+;
+entry:
+ %ext = zext nneg <4 x i8> %src to <4 x i32>
+ %ext.0 = extractelement <4 x i32> %ext, i64 0
+ %ext.1 = extractelement <4 x i32> %ext, i64 1
+ br i1 %cond, label %then, label %else
+
+then:
+ %ext.2 = extractelement <4 x i32> %ext, i64 2
+ br label %exit
+
+else:
+ %ext.3 = extractelement <4 x i32> %ext, i64 3
+ br label %exit
+
+exit:
+ %phi = phi i32 [ %ext.2, %then ], [ %ext.3, %else ]
+ %add1 = add i32 %ext.0, %ext.1
+ %add2 = add i32 %add1, %phi
+ ret i32 %add2
+}
+
+
+declare void @may_throw() willreturn
+
+define noundef i32 @zext_v4i8_throwing_call_between(<4 x i8> %src) {
+; CHECK-LABEL: define noundef i32 @zext_v4i8_throwing_call_between(
+; CHECK-SAME: <4 x i8> [[SRC:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[TMP0:%.*]] = freeze <4 x i8> [[SRC]]
+; CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i8> [[TMP0]] to i32
+; CHECK-NEXT: [[TMP2:%.*]] = lshr i32 [[TMP1]], 24
+; CHECK-NEXT: [[TMP3:%.*]] = lshr i32 [[TMP1]], 16
+; CHECK-NEXT: [[TMP4:%.*]] = and i32 [[TMP3]], 255
+; CHECK-NEXT: [[TMP5:%.*]] = lshr i32 [[TMP1]], 8
+; CHECK-NEXT: [[TMP6:%.*]] = and i32 [[TMP5]], 255
+; CHECK-NEXT: [[TMP7:%.*]] = and i32 [[TMP1]], 255
+; CHECK-NEXT: [[EXT:%.*]] = zext nneg <4 x i8> [[SRC]] to <4 x i32>
+; CHECK-NEXT: [[EXT_0:%.*]] = extractelement <4 x i32> [[EXT]], i64 0
+; CHECK-NEXT: [[EXT_1:%.*]] = extractelement <4 x i32> [[EXT]], i64 1
+; CHECK-NEXT: [[EXT_2:%.*]] = extractelement <4 x i32> [[EXT]], i64 2
+; CHECK-NEXT: call void @may_throw()
+; CHECK-NEXT: [[EXT_3:%.*]] = extractelement <4 x i32> [[EXT]], i64 3
+; CHECK-NEXT: [[ADD1:%.*]] = add i32 [[TMP7]], [[TMP6]]
+; CHECK-NEXT: [[ADD2:%.*]] = add i32 [[ADD1]], [[TMP4]]
+; CHECK-NEXT: [[ADD3:%.*]] = add i32 [[ADD2]], [[TMP2]]
+; CHECK-NEXT: ret i32 [[ADD3]]
+;
+entry:
+ %ext = zext nneg <4 x i8> %src to <4 x i32>
+ %ext.0 = extractelement <4 x i32> %ext, i64 0
+ %ext.1 = extractelement <4 x i32> %ext, i64 1
+ %ext.2 = extractelement <4 x i32> %ext, i64 2
+ call void @may_throw()
+ %ext.3 = extractelement <4 x i32> %ext, i64 3
+ %add1 = add i32 %ext.0, %ext.1
+ %add2 = add i32 %add1, %ext.2
+ %add3 = add i32 %add2, %ext.3
+ ret i32 %add3
+}
>From 4b931b523d9828d90e02037d94fe620dc44dad78 Mon Sep 17 00:00:00 2001
From: Julian Nagele <j_nagele at apple.com>
Date: Thu, 30 Oct 2025 21:35:04 +0000
Subject: [PATCH 4/5] fixup! fixup! [VecorCombine] Avoid inserting freeze when
scalarizing extend-extract if all extracts would lead to UB on poison.
---
llvm/lib/Transforms/Vectorize/VectorCombine.cpp | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
index 94a281b8726f9..32ce6b3cb2d59 100644
--- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
+++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
@@ -2034,7 +2034,8 @@ bool VectorCombine::scalarizeExtExtract(Instruction &I) {
if (!LastExtract || LastExtract->comesBefore(Extract))
LastExtract = Extract;
}
- // Check execution is guaranteed from extend to last extract.
+ // Check that the last extract (and hence all previous ones) are guaranteed
+ // to execute if Ext executes.
AllExtractsTriggerUB =
AllExtractsTriggerUB &&
all_of(make_range(Ext->getIterator(), LastExtract->getIterator()),
>From 610cc37233ca661b6f147d5088d36a3cf10438bb Mon Sep 17 00:00:00 2001
From: Julian Nagele <j_nagele at apple.com>
Date: Sat, 1 Nov 2025 15:49:10 +0000
Subject: [PATCH 5/5] fixup! fixup! fixup! [VecorCombine] Avoid inserting
freeze when scalarizing extend-extract if all extracts would lead to UB on
poison.
---
.../lib/Transforms/Vectorize/VectorCombine.cpp | 18 +++++++-----------
1 file changed, 7 insertions(+), 11 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
index 32ce6b3cb2d59..afc02b24368f3 100644
--- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
+++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
@@ -2018,7 +2018,9 @@ bool VectorCombine::scalarizeExtExtract(Instruction &I) {
Value *ScalarV = Ext->getOperand(0);
if (!isGuaranteedNotToBePoison(ScalarV, &AC, dyn_cast<Instruction>(ScalarV),
&DT)) {
- // Check if all lanes are extracted and all extracts trigger UB on poison.
+ // Check wether all lanes are extracted, all extracts trigger UB on
+ // poison, and the last extract (and hence all previous ones)
+ // are guaranteed to execute if Ext executes.
// If so, we do not need to insert a freeze.
SmallDenseSet<ConstantInt *, 8> ExtractedLanes;
bool AllExtractsTriggerUB = true;
@@ -2034,16 +2036,10 @@ bool VectorCombine::scalarizeExtExtract(Instruction &I) {
if (!LastExtract || LastExtract->comesBefore(Extract))
LastExtract = Extract;
}
- // Check that the last extract (and hence all previous ones) are guaranteed
- // to execute if Ext executes.
- AllExtractsTriggerUB =
- AllExtractsTriggerUB &&
- all_of(make_range(Ext->getIterator(), LastExtract->getIterator()),
- [](Instruction &I) {
- return isGuaranteedToTransferExecutionToSuccessor(&I);
- });
- if (!AllExtractsTriggerUB ||
- ExtractedLanes.size() != DstTy->getNumElements())
+ if (ExtractedLanes.size() != DstTy->getNumElements() ||
+ !AllExtractsTriggerUB ||
+ !isGuaranteedToTransferExecutionToSuccessor(Ext->getIterator(),
+ LastExtract->getIterator()))
ScalarV = Builder.CreateFreeze(ScalarV);
}
ScalarV = Builder.CreateBitCast(
More information about the llvm-commits
mailing list