[llvm] [VecorCombine] Avoid inserting freeze when scalarizing extend-extract if all extracts would lead to UB on poison. (PR #164683)
Julian Nagele via llvm-commits
llvm-commits at lists.llvm.org
Wed Oct 22 11:56:15 PDT 2025
https://github.com/juliannagele created https://github.com/llvm/llvm-project/pull/164683
This change aims to avoid inserting a freeze instruction between the load and bitcast when scalarizing extend-extract. This is particularly useful in combination with https://github.com/llvm/llvm-project/pull/164682, which can then potentially further scalarize, provided there is no freeze.
>From 553cfa8af6be5685795ac74aa33d1fe900dafabe Mon Sep 17 00:00:00 2001
From: Julian Nagele <j_nagele at apple.com>
Date: Wed, 22 Oct 2025 15:25:29 +0100
Subject: [PATCH 1/2] [VectorCombine] Add test showing exisiting behaviour
adding unneeded freeze.
---
.../VectorCombine/AArch64/ext-extract.ll | 35 +++++++++++++++++++
1 file changed, 35 insertions(+)
diff --git a/llvm/test/Transforms/VectorCombine/AArch64/ext-extract.ll b/llvm/test/Transforms/VectorCombine/AArch64/ext-extract.ll
index 60700412686ea..fa136b2a5c44b 100644
--- a/llvm/test/Transforms/VectorCombine/AArch64/ext-extract.ll
+++ b/llvm/test/Transforms/VectorCombine/AArch64/ext-extract.ll
@@ -346,3 +346,38 @@ entry:
call void @use.i32(i32 %ext.3)
ret void
}
+
+define noundef i32 @zext_v4i8_all_lanes_used_no_freeze(<4 x i8> %src) {
+; CHECK-LABEL: define noundef i32 @zext_v4i8_all_lanes_used_no_freeze(
+; CHECK-SAME: <4 x i8> [[SRC:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[TMP0:%.*]] = freeze <4 x i8> [[SRC]]
+; CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i8> [[TMP0]] to i32
+; CHECK-NEXT: [[TMP2:%.*]] = lshr i32 [[TMP1]], 24
+; CHECK-NEXT: [[TMP3:%.*]] = lshr i32 [[TMP1]], 16
+; CHECK-NEXT: [[TMP4:%.*]] = and i32 [[TMP3]], 255
+; CHECK-NEXT: [[TMP5:%.*]] = lshr i32 [[TMP1]], 8
+; CHECK-NEXT: [[TMP6:%.*]] = and i32 [[TMP5]], 255
+; CHECK-NEXT: [[TMP7:%.*]] = and i32 [[TMP1]], 255
+; CHECK-NEXT: [[EXT:%.*]] = zext nneg <4 x i8> [[SRC]] to <4 x i32>
+; CHECK-NEXT: [[EXT_0:%.*]] = extractelement <4 x i32> [[EXT]], i64 0
+; CHECK-NEXT: [[EXT_1:%.*]] = extractelement <4 x i32> [[EXT]], i64 1
+; CHECK-NEXT: [[EXT_2:%.*]] = extractelement <4 x i32> [[EXT]], i64 2
+; CHECK-NEXT: [[EXT_3:%.*]] = extractelement <4 x i32> [[EXT]], i64 3
+; CHECK-NEXT: [[ADD1:%.*]] = add i32 [[TMP7]], [[TMP6]]
+; CHECK-NEXT: [[ADD2:%.*]] = add i32 [[ADD1]], [[TMP4]]
+; CHECK-NEXT: [[ADD3:%.*]] = add i32 [[ADD2]], [[TMP2]]
+; CHECK-NEXT: ret i32 [[ADD3]]
+;
+entry:
+ %ext = zext nneg <4 x i8> %src to <4 x i32>
+ %ext.0 = extractelement <4 x i32> %ext, i64 0
+ %ext.1 = extractelement <4 x i32> %ext, i64 1
+ %ext.2 = extractelement <4 x i32> %ext, i64 2
+ %ext.3 = extractelement <4 x i32> %ext, i64 3
+
+ %add1 = add i32 %ext.0, %ext.1
+ %add2 = add i32 %add1, %ext.2
+ %add3 = add i32 %add2, %ext.3
+ ret i32 %add3
+}
>From b8b84d2defa9b45cf1b41ebc93b4b638170e315a Mon Sep 17 00:00:00 2001
From: Julian Nagele <j_nagele at apple.com>
Date: Wed, 22 Oct 2025 15:35:09 +0100
Subject: [PATCH 2/2] [VecorCombine] Avoid inserting freeze when scalarizing
extend-extract if all extracts would lead to UB on poison.
---
.../Transforms/Vectorize/VectorCombine.cpp | 20 ++++++++++++++++--
.../VectorCombine/AArch64/ext-extract.ll | 21 +++++++++----------
2 files changed, 28 insertions(+), 13 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
index d6eb00da11dc8..4a0c98e03203d 100644
--- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
+++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
@@ -2017,8 +2017,24 @@ bool VectorCombine::scalarizeExtExtract(Instruction &I) {
Value *ScalarV = Ext->getOperand(0);
if (!isGuaranteedNotToBePoison(ScalarV, &AC, dyn_cast<Instruction>(ScalarV),
- &DT))
- ScalarV = Builder.CreateFreeze(ScalarV);
+ &DT)) {
+ // Check if all lanes are extracted and all extracts trigger UB on poison.
+ // If so, we do not need to insert a freeze.
+ SmallDenseSet<uint64_t, 8> ExtractedLanes;
+ bool AllExtractsHaveUB = true;
+ for (User *U : Ext->users()) {
+ auto *Extract = cast<ExtractElementInst>(U);
+ uint64_t Idx =
+ cast<ConstantInt>(Extract->getIndexOperand())->getZExtValue();
+ ExtractedLanes.insert(Idx);
+ if (!programUndefinedIfPoison(Extract)) {
+ AllExtractsHaveUB = false;
+ break;
+ }
+ }
+ if (!AllExtractsHaveUB || ExtractedLanes.size() != SrcTy->getNumElements())
+ ScalarV = Builder.CreateFreeze(ScalarV);
+ }
ScalarV = Builder.CreateBitCast(
ScalarV,
IntegerType::get(SrcTy->getContext(), DL->getTypeSizeInBits(SrcTy)));
diff --git a/llvm/test/Transforms/VectorCombine/AArch64/ext-extract.ll b/llvm/test/Transforms/VectorCombine/AArch64/ext-extract.ll
index fa136b2a5c44b..29d4ddd3d0ac8 100644
--- a/llvm/test/Transforms/VectorCombine/AArch64/ext-extract.ll
+++ b/llvm/test/Transforms/VectorCombine/AArch64/ext-extract.ll
@@ -351,22 +351,21 @@ define noundef i32 @zext_v4i8_all_lanes_used_no_freeze(<4 x i8> %src) {
; CHECK-LABEL: define noundef i32 @zext_v4i8_all_lanes_used_no_freeze(
; CHECK-SAME: <4 x i8> [[SRC:%.*]]) {
; CHECK-NEXT: [[ENTRY:.*:]]
-; CHECK-NEXT: [[TMP0:%.*]] = freeze <4 x i8> [[SRC]]
-; CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i8> [[TMP0]] to i32
-; CHECK-NEXT: [[TMP2:%.*]] = lshr i32 [[TMP1]], 24
-; CHECK-NEXT: [[TMP3:%.*]] = lshr i32 [[TMP1]], 16
-; CHECK-NEXT: [[TMP4:%.*]] = and i32 [[TMP3]], 255
-; CHECK-NEXT: [[TMP5:%.*]] = lshr i32 [[TMP1]], 8
-; CHECK-NEXT: [[TMP6:%.*]] = and i32 [[TMP5]], 255
-; CHECK-NEXT: [[TMP7:%.*]] = and i32 [[TMP1]], 255
+; CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i8> [[SRC]] to i32
+; CHECK-NEXT: [[TMP1:%.*]] = lshr i32 [[TMP0]], 24
+; CHECK-NEXT: [[TMP2:%.*]] = lshr i32 [[TMP0]], 16
+; CHECK-NEXT: [[TMP3:%.*]] = and i32 [[TMP2]], 255
+; CHECK-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP0]], 8
+; CHECK-NEXT: [[TMP5:%.*]] = and i32 [[TMP4]], 255
+; CHECK-NEXT: [[TMP6:%.*]] = and i32 [[TMP0]], 255
; CHECK-NEXT: [[EXT:%.*]] = zext nneg <4 x i8> [[SRC]] to <4 x i32>
; CHECK-NEXT: [[EXT_0:%.*]] = extractelement <4 x i32> [[EXT]], i64 0
; CHECK-NEXT: [[EXT_1:%.*]] = extractelement <4 x i32> [[EXT]], i64 1
; CHECK-NEXT: [[EXT_2:%.*]] = extractelement <4 x i32> [[EXT]], i64 2
; CHECK-NEXT: [[EXT_3:%.*]] = extractelement <4 x i32> [[EXT]], i64 3
-; CHECK-NEXT: [[ADD1:%.*]] = add i32 [[TMP7]], [[TMP6]]
-; CHECK-NEXT: [[ADD2:%.*]] = add i32 [[ADD1]], [[TMP4]]
-; CHECK-NEXT: [[ADD3:%.*]] = add i32 [[ADD2]], [[TMP2]]
+; CHECK-NEXT: [[ADD1:%.*]] = add i32 [[TMP6]], [[TMP5]]
+; CHECK-NEXT: [[ADD2:%.*]] = add i32 [[ADD1]], [[TMP3]]
+; CHECK-NEXT: [[ADD3:%.*]] = add i32 [[ADD2]], [[TMP1]]
; CHECK-NEXT: ret i32 [[ADD3]]
;
entry:
More information about the llvm-commits
mailing list