[llvm] [VectorCombine] Avoid inserting freeze when scalarizing extend-extract if all extracts would lead to UB on poison. (PR #164683)

Julian Nagele via llvm-commits llvm-commits at lists.llvm.org
Tue Nov 4 03:38:48 PST 2025


https://github.com/juliannagele updated https://github.com/llvm/llvm-project/pull/164683

>From 553cfa8af6be5685795ac74aa33d1fe900dafabe Mon Sep 17 00:00:00 2001
From: Julian Nagele <j_nagele at apple.com>
Date: Wed, 22 Oct 2025 15:25:29 +0100
Subject: [PATCH 1/6] [VectorCombine] Add test showing exisiting behaviour
 adding unneeded freeze.

---
 .../VectorCombine/AArch64/ext-extract.ll      | 35 +++++++++++++++++++
 1 file changed, 35 insertions(+)

diff --git a/llvm/test/Transforms/VectorCombine/AArch64/ext-extract.ll b/llvm/test/Transforms/VectorCombine/AArch64/ext-extract.ll
index 60700412686ea..fa136b2a5c44b 100644
--- a/llvm/test/Transforms/VectorCombine/AArch64/ext-extract.ll
+++ b/llvm/test/Transforms/VectorCombine/AArch64/ext-extract.ll
@@ -346,3 +346,38 @@ entry:
   call void @use.i32(i32 %ext.3)
   ret void
 }
+
+define noundef i32 @zext_v4i8_all_lanes_used_no_freeze(<4 x i8> %src) {
+; CHECK-LABEL: define noundef i32 @zext_v4i8_all_lanes_used_no_freeze(
+; CHECK-SAME: <4 x i8> [[SRC:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[TMP0:%.*]] = freeze <4 x i8> [[SRC]]
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i8> [[TMP0]] to i32
+; CHECK-NEXT:    [[TMP2:%.*]] = lshr i32 [[TMP1]], 24
+; CHECK-NEXT:    [[TMP3:%.*]] = lshr i32 [[TMP1]], 16
+; CHECK-NEXT:    [[TMP4:%.*]] = and i32 [[TMP3]], 255
+; CHECK-NEXT:    [[TMP5:%.*]] = lshr i32 [[TMP1]], 8
+; CHECK-NEXT:    [[TMP6:%.*]] = and i32 [[TMP5]], 255
+; CHECK-NEXT:    [[TMP7:%.*]] = and i32 [[TMP1]], 255
+; CHECK-NEXT:    [[EXT:%.*]] = zext nneg <4 x i8> [[SRC]] to <4 x i32>
+; CHECK-NEXT:    [[EXT_0:%.*]] = extractelement <4 x i32> [[EXT]], i64 0
+; CHECK-NEXT:    [[EXT_1:%.*]] = extractelement <4 x i32> [[EXT]], i64 1
+; CHECK-NEXT:    [[EXT_2:%.*]] = extractelement <4 x i32> [[EXT]], i64 2
+; CHECK-NEXT:    [[EXT_3:%.*]] = extractelement <4 x i32> [[EXT]], i64 3
+; CHECK-NEXT:    [[ADD1:%.*]] = add i32 [[TMP7]], [[TMP6]]
+; CHECK-NEXT:    [[ADD2:%.*]] = add i32 [[ADD1]], [[TMP4]]
+; CHECK-NEXT:    [[ADD3:%.*]] = add i32 [[ADD2]], [[TMP2]]
+; CHECK-NEXT:    ret i32 [[ADD3]]
+;
+entry:
+  %ext = zext nneg <4 x i8> %src to <4 x i32>
+  %ext.0 = extractelement <4 x i32> %ext, i64 0
+  %ext.1 = extractelement <4 x i32> %ext, i64 1
+  %ext.2 = extractelement <4 x i32> %ext, i64 2
+  %ext.3 = extractelement <4 x i32> %ext, i64 3
+
+  %add1 = add i32 %ext.0, %ext.1
+  %add2 = add i32 %add1, %ext.2
+  %add3 = add i32 %add2, %ext.3
+  ret i32 %add3
+}

>From b8b84d2defa9b45cf1b41ebc93b4b638170e315a Mon Sep 17 00:00:00 2001
From: Julian Nagele <j_nagele at apple.com>
Date: Wed, 22 Oct 2025 15:35:09 +0100
Subject: [PATCH 2/6] [VecorCombine] Avoid inserting freeze when scalarizing
 extend-extract if all extracts would lead to UB on poison.

---
 .../Transforms/Vectorize/VectorCombine.cpp    | 20 ++++++++++++++++--
 .../VectorCombine/AArch64/ext-extract.ll      | 21 +++++++++----------
 2 files changed, 28 insertions(+), 13 deletions(-)

diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
index d6eb00da11dc8..4a0c98e03203d 100644
--- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
+++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
@@ -2017,8 +2017,24 @@ bool VectorCombine::scalarizeExtExtract(Instruction &I) {
 
   Value *ScalarV = Ext->getOperand(0);
   if (!isGuaranteedNotToBePoison(ScalarV, &AC, dyn_cast<Instruction>(ScalarV),
-                                 &DT))
-    ScalarV = Builder.CreateFreeze(ScalarV);
+                                 &DT)) {
+    // Check if all lanes are extracted and all extracts trigger UB on poison.
+    // If so, we do not need to insert a freeze.
+    SmallDenseSet<uint64_t, 8> ExtractedLanes;
+    bool AllExtractsHaveUB = true;
+    for (User *U : Ext->users()) {
+      auto *Extract = cast<ExtractElementInst>(U);
+      uint64_t Idx =
+          cast<ConstantInt>(Extract->getIndexOperand())->getZExtValue();
+      ExtractedLanes.insert(Idx);
+      if (!programUndefinedIfPoison(Extract)) {
+        AllExtractsHaveUB = false;
+        break;
+      }
+    }
+    if (!AllExtractsHaveUB || ExtractedLanes.size() != SrcTy->getNumElements())
+      ScalarV = Builder.CreateFreeze(ScalarV);
+  }
   ScalarV = Builder.CreateBitCast(
       ScalarV,
       IntegerType::get(SrcTy->getContext(), DL->getTypeSizeInBits(SrcTy)));
diff --git a/llvm/test/Transforms/VectorCombine/AArch64/ext-extract.ll b/llvm/test/Transforms/VectorCombine/AArch64/ext-extract.ll
index fa136b2a5c44b..29d4ddd3d0ac8 100644
--- a/llvm/test/Transforms/VectorCombine/AArch64/ext-extract.ll
+++ b/llvm/test/Transforms/VectorCombine/AArch64/ext-extract.ll
@@ -351,22 +351,21 @@ define noundef i32 @zext_v4i8_all_lanes_used_no_freeze(<4 x i8> %src) {
 ; CHECK-LABEL: define noundef i32 @zext_v4i8_all_lanes_used_no_freeze(
 ; CHECK-SAME: <4 x i8> [[SRC:%.*]]) {
 ; CHECK-NEXT:  [[ENTRY:.*:]]
-; CHECK-NEXT:    [[TMP0:%.*]] = freeze <4 x i8> [[SRC]]
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i8> [[TMP0]] to i32
-; CHECK-NEXT:    [[TMP2:%.*]] = lshr i32 [[TMP1]], 24
-; CHECK-NEXT:    [[TMP3:%.*]] = lshr i32 [[TMP1]], 16
-; CHECK-NEXT:    [[TMP4:%.*]] = and i32 [[TMP3]], 255
-; CHECK-NEXT:    [[TMP5:%.*]] = lshr i32 [[TMP1]], 8
-; CHECK-NEXT:    [[TMP6:%.*]] = and i32 [[TMP5]], 255
-; CHECK-NEXT:    [[TMP7:%.*]] = and i32 [[TMP1]], 255
+; CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i8> [[SRC]] to i32
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr i32 [[TMP0]], 24
+; CHECK-NEXT:    [[TMP2:%.*]] = lshr i32 [[TMP0]], 16
+; CHECK-NEXT:    [[TMP3:%.*]] = and i32 [[TMP2]], 255
+; CHECK-NEXT:    [[TMP4:%.*]] = lshr i32 [[TMP0]], 8
+; CHECK-NEXT:    [[TMP5:%.*]] = and i32 [[TMP4]], 255
+; CHECK-NEXT:    [[TMP6:%.*]] = and i32 [[TMP0]], 255
 ; CHECK-NEXT:    [[EXT:%.*]] = zext nneg <4 x i8> [[SRC]] to <4 x i32>
 ; CHECK-NEXT:    [[EXT_0:%.*]] = extractelement <4 x i32> [[EXT]], i64 0
 ; CHECK-NEXT:    [[EXT_1:%.*]] = extractelement <4 x i32> [[EXT]], i64 1
 ; CHECK-NEXT:    [[EXT_2:%.*]] = extractelement <4 x i32> [[EXT]], i64 2
 ; CHECK-NEXT:    [[EXT_3:%.*]] = extractelement <4 x i32> [[EXT]], i64 3
-; CHECK-NEXT:    [[ADD1:%.*]] = add i32 [[TMP7]], [[TMP6]]
-; CHECK-NEXT:    [[ADD2:%.*]] = add i32 [[ADD1]], [[TMP4]]
-; CHECK-NEXT:    [[ADD3:%.*]] = add i32 [[ADD2]], [[TMP2]]
+; CHECK-NEXT:    [[ADD1:%.*]] = add i32 [[TMP6]], [[TMP5]]
+; CHECK-NEXT:    [[ADD2:%.*]] = add i32 [[ADD1]], [[TMP3]]
+; CHECK-NEXT:    [[ADD3:%.*]] = add i32 [[ADD2]], [[TMP1]]
 ; CHECK-NEXT:    ret i32 [[ADD3]]
 ;
 entry:

>From 78259eb7f5c917daf6e0c9bc06fcabfdbb1f9bec Mon Sep 17 00:00:00 2001
From: Julian Nagele <j_nagele at apple.com>
Date: Tue, 28 Oct 2025 17:32:30 +0000
Subject: [PATCH 3/6] fixup! [VecorCombine] Avoid inserting freeze when
 scalarizing extend-extract if all extracts would lead to UB on poison.

---
 .../Transforms/Vectorize/VectorCombine.cpp    |  26 ++-
 .../VectorCombine/AArch64/ext-extract.ll      | 152 ++++++++++++++++++
 2 files changed, 170 insertions(+), 8 deletions(-)

diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
index 4a0c98e03203d..94a281b8726f9 100644
--- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
+++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
@@ -2020,19 +2020,29 @@ bool VectorCombine::scalarizeExtExtract(Instruction &I) {
                                  &DT)) {
     // Check if all lanes are extracted and all extracts trigger UB on poison.
     // If so, we do not need to insert a freeze.
-    SmallDenseSet<uint64_t, 8> ExtractedLanes;
-    bool AllExtractsHaveUB = true;
+    SmallDenseSet<ConstantInt *, 8> ExtractedLanes;
+    bool AllExtractsTriggerUB = true;
+    ExtractElementInst *LastExtract = nullptr;
+    BasicBlock *ExtBB = Ext->getParent();
     for (User *U : Ext->users()) {
       auto *Extract = cast<ExtractElementInst>(U);
-      uint64_t Idx =
-          cast<ConstantInt>(Extract->getIndexOperand())->getZExtValue();
-      ExtractedLanes.insert(Idx);
-      if (!programUndefinedIfPoison(Extract)) {
-        AllExtractsHaveUB = false;
+      if (Extract->getParent() != ExtBB || !programUndefinedIfPoison(Extract)) {
+        AllExtractsTriggerUB = false;
         break;
       }
+      ExtractedLanes.insert(cast<ConstantInt>(Extract->getIndexOperand()));
+      if (!LastExtract || LastExtract->comesBefore(Extract))
+        LastExtract = Extract;
     }
-    if (!AllExtractsHaveUB || ExtractedLanes.size() != SrcTy->getNumElements())
+    // Check execution is guaranteed from extend to last extract.
+    AllExtractsTriggerUB =
+        AllExtractsTriggerUB &&
+        all_of(make_range(Ext->getIterator(), LastExtract->getIterator()),
+               [](Instruction &I) {
+                 return isGuaranteedToTransferExecutionToSuccessor(&I);
+               });
+    if (!AllExtractsTriggerUB ||
+        ExtractedLanes.size() != DstTy->getNumElements())
       ScalarV = Builder.CreateFreeze(ScalarV);
   }
   ScalarV = Builder.CreateBitCast(
diff --git a/llvm/test/Transforms/VectorCombine/AArch64/ext-extract.ll b/llvm/test/Transforms/VectorCombine/AArch64/ext-extract.ll
index 29d4ddd3d0ac8..e7b11cdf8475e 100644
--- a/llvm/test/Transforms/VectorCombine/AArch64/ext-extract.ll
+++ b/llvm/test/Transforms/VectorCombine/AArch64/ext-extract.ll
@@ -380,3 +380,155 @@ entry:
   %add3 = add i32 %add2, %ext.3
   ret i32 %add3
 }
+
+define noundef i32 @zext_v4i8_not_all_lanes_used(<4 x i8> %src) {
+; CHECK-LABEL: define noundef i32 @zext_v4i8_not_all_lanes_used(
+; CHECK-SAME: <4 x i8> [[SRC:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[TMP2:%.*]] = freeze <4 x i8> [[SRC]]
+; CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i8> [[TMP2]] to i32
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr i32 [[TMP0]], 24
+; CHECK-NEXT:    [[TMP4:%.*]] = lshr i32 [[TMP0]], 8
+; CHECK-NEXT:    [[TMP5:%.*]] = and i32 [[TMP4]], 255
+; CHECK-NEXT:    [[TMP6:%.*]] = and i32 [[TMP0]], 255
+; CHECK-NEXT:    [[EXT:%.*]] = zext nneg <4 x i8> [[SRC]] to <4 x i32>
+; CHECK-NEXT:    [[EXT_0:%.*]] = extractelement <4 x i32> [[EXT]], i64 0
+; CHECK-NEXT:    [[EXT_1:%.*]] = extractelement <4 x i32> [[EXT]], i64 1
+; CHECK-NEXT:    [[EXT_3:%.*]] = extractelement <4 x i32> [[EXT]], i64 3
+; CHECK-NEXT:    [[ADD2:%.*]] = add i32 [[TMP6]], [[TMP5]]
+; CHECK-NEXT:    [[ADD3:%.*]] = add i32 [[ADD2]], [[TMP1]]
+; CHECK-NEXT:    ret i32 [[ADD3]]
+;
+entry:
+  %ext = zext nneg <4 x i8> %src to <4 x i32>
+  %ext.0 = extractelement <4 x i32> %ext, i64 0
+  %ext.1 = extractelement <4 x i32> %ext, i64 1
+  %ext.3 = extractelement <4 x i32> %ext, i64 3
+
+  %add1 = add i32 %ext.0, %ext.1
+  %add2 = add i32 %add1, %ext.3
+  ret i32 %add2
+}
+
+define i32 @zext_v4i8_all_lanes_used_no_ub(<4 x i8> %src) {
+; CHECK-LABEL: define i32 @zext_v4i8_all_lanes_used_no_ub(
+; CHECK-SAME: <4 x i8> [[SRC:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[TMP0:%.*]] = freeze <4 x i8> [[SRC]]
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i8> [[TMP0]] to i32
+; CHECK-NEXT:    [[TMP2:%.*]] = lshr i32 [[TMP1]], 24
+; CHECK-NEXT:    [[TMP3:%.*]] = lshr i32 [[TMP1]], 16
+; CHECK-NEXT:    [[TMP4:%.*]] = and i32 [[TMP3]], 255
+; CHECK-NEXT:    [[TMP5:%.*]] = lshr i32 [[TMP1]], 8
+; CHECK-NEXT:    [[TMP6:%.*]] = and i32 [[TMP5]], 255
+; CHECK-NEXT:    [[TMP7:%.*]] = and i32 [[TMP1]], 255
+; CHECK-NEXT:    [[EXT:%.*]] = zext nneg <4 x i8> [[SRC]] to <4 x i32>
+; CHECK-NEXT:    [[EXT_0:%.*]] = extractelement <4 x i32> [[EXT]], i64 0
+; CHECK-NEXT:    [[EXT_1:%.*]] = extractelement <4 x i32> [[EXT]], i64 1
+; CHECK-NEXT:    [[EXT_2:%.*]] = extractelement <4 x i32> [[EXT]], i64 2
+; CHECK-NEXT:    [[EXT_3:%.*]] = extractelement <4 x i32> [[EXT]], i64 3
+; CHECK-NEXT:    [[ADD1:%.*]] = add i32 [[TMP7]], [[TMP6]]
+; CHECK-NEXT:    [[ADD2:%.*]] = add i32 [[ADD1]], [[TMP4]]
+; CHECK-NEXT:    [[ADD3:%.*]] = add i32 [[ADD2]], [[TMP2]]
+; CHECK-NEXT:    ret i32 [[ADD3]]
+;
+entry:
+  %ext = zext nneg <4 x i8> %src to <4 x i32>
+  %ext.0 = extractelement <4 x i32> %ext, i64 0
+  %ext.1 = extractelement <4 x i32> %ext, i64 1
+  %ext.2 = extractelement <4 x i32> %ext, i64 2
+  %ext.3 = extractelement <4 x i32> %ext, i64 3
+
+  %add1 = add i32 %ext.0, %ext.1
+  %add2 = add i32 %add1, %ext.2
+  %add3 = add i32 %add2, %ext.3
+  ret i32 %add3
+}
+
+define noundef i32 @zext_v4i8_extracts_different_blocks(<4 x i8> %src, i1 %cond) {
+; CHECK-LABEL: define noundef i32 @zext_v4i8_extracts_different_blocks(
+; CHECK-SAME: <4 x i8> [[SRC:%.*]], i1 [[COND:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[TMP0:%.*]] = freeze <4 x i8> [[SRC]]
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i8> [[TMP0]] to i32
+; CHECK-NEXT:    [[TMP2:%.*]] = lshr i32 [[TMP1]], 24
+; CHECK-NEXT:    [[TMP3:%.*]] = lshr i32 [[TMP1]], 16
+; CHECK-NEXT:    [[TMP4:%.*]] = and i32 [[TMP3]], 255
+; CHECK-NEXT:    [[TMP5:%.*]] = lshr i32 [[TMP1]], 8
+; CHECK-NEXT:    [[TMP6:%.*]] = and i32 [[TMP5]], 255
+; CHECK-NEXT:    [[TMP7:%.*]] = and i32 [[TMP1]], 255
+; CHECK-NEXT:    [[EXT:%.*]] = zext nneg <4 x i8> [[SRC]] to <4 x i32>
+; CHECK-NEXT:    [[EXT_0:%.*]] = extractelement <4 x i32> [[EXT]], i64 0
+; CHECK-NEXT:    [[EXT_1:%.*]] = extractelement <4 x i32> [[EXT]], i64 1
+; CHECK-NEXT:    br i1 [[COND]], label %[[THEN:.*]], label %[[ELSE:.*]]
+; CHECK:       [[THEN]]:
+; CHECK-NEXT:    [[EXT_2:%.*]] = extractelement <4 x i32> [[EXT]], i64 2
+; CHECK-NEXT:    br label %[[EXIT:.*]]
+; CHECK:       [[ELSE]]:
+; CHECK-NEXT:    [[EXT_3:%.*]] = extractelement <4 x i32> [[EXT]], i64 3
+; CHECK-NEXT:    br label %[[EXIT]]
+; CHECK:       [[EXIT]]:
+; CHECK-NEXT:    [[PHI:%.*]] = phi i32 [ [[TMP4]], %[[THEN]] ], [ [[TMP2]], %[[ELSE]] ]
+; CHECK-NEXT:    [[ADD1:%.*]] = add i32 [[TMP7]], [[TMP6]]
+; CHECK-NEXT:    [[ADD2:%.*]] = add i32 [[ADD1]], [[PHI]]
+; CHECK-NEXT:    ret i32 [[ADD2]]
+;
+entry:
+  %ext = zext nneg <4 x i8> %src to <4 x i32>
+  %ext.0 = extractelement <4 x i32> %ext, i64 0
+  %ext.1 = extractelement <4 x i32> %ext, i64 1
+  br i1 %cond, label %then, label %else
+
+then:
+  %ext.2 = extractelement <4 x i32> %ext, i64 2
+  br label %exit
+
+else:
+  %ext.3 = extractelement <4 x i32> %ext, i64 3
+  br label %exit
+
+exit:
+  %phi = phi i32 [ %ext.2, %then ], [ %ext.3, %else ]
+  %add1 = add i32 %ext.0, %ext.1
+  %add2 = add i32 %add1, %phi
+  ret i32 %add2
+}
+
+
+declare void @may_throw() willreturn
+
+define noundef i32 @zext_v4i8_throwing_call_between(<4 x i8> %src) {
+; CHECK-LABEL: define noundef i32 @zext_v4i8_throwing_call_between(
+; CHECK-SAME: <4 x i8> [[SRC:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[TMP0:%.*]] = freeze <4 x i8> [[SRC]]
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i8> [[TMP0]] to i32
+; CHECK-NEXT:    [[TMP2:%.*]] = lshr i32 [[TMP1]], 24
+; CHECK-NEXT:    [[TMP3:%.*]] = lshr i32 [[TMP1]], 16
+; CHECK-NEXT:    [[TMP4:%.*]] = and i32 [[TMP3]], 255
+; CHECK-NEXT:    [[TMP5:%.*]] = lshr i32 [[TMP1]], 8
+; CHECK-NEXT:    [[TMP6:%.*]] = and i32 [[TMP5]], 255
+; CHECK-NEXT:    [[TMP7:%.*]] = and i32 [[TMP1]], 255
+; CHECK-NEXT:    [[EXT:%.*]] = zext nneg <4 x i8> [[SRC]] to <4 x i32>
+; CHECK-NEXT:    [[EXT_0:%.*]] = extractelement <4 x i32> [[EXT]], i64 0
+; CHECK-NEXT:    [[EXT_1:%.*]] = extractelement <4 x i32> [[EXT]], i64 1
+; CHECK-NEXT:    [[EXT_2:%.*]] = extractelement <4 x i32> [[EXT]], i64 2
+; CHECK-NEXT:    call void @may_throw()
+; CHECK-NEXT:    [[EXT_3:%.*]] = extractelement <4 x i32> [[EXT]], i64 3
+; CHECK-NEXT:    [[ADD1:%.*]] = add i32 [[TMP7]], [[TMP6]]
+; CHECK-NEXT:    [[ADD2:%.*]] = add i32 [[ADD1]], [[TMP4]]
+; CHECK-NEXT:    [[ADD3:%.*]] = add i32 [[ADD2]], [[TMP2]]
+; CHECK-NEXT:    ret i32 [[ADD3]]
+;
+entry:
+  %ext = zext nneg <4 x i8> %src to <4 x i32>
+  %ext.0 = extractelement <4 x i32> %ext, i64 0
+  %ext.1 = extractelement <4 x i32> %ext, i64 1
+  %ext.2 = extractelement <4 x i32> %ext, i64 2
+  call void @may_throw()
+  %ext.3 = extractelement <4 x i32> %ext, i64 3
+  %add1 = add i32 %ext.0, %ext.1
+  %add2 = add i32 %add1, %ext.2
+  %add3 = add i32 %add2, %ext.3
+  ret i32 %add3
+}

>From 4b931b523d9828d90e02037d94fe620dc44dad78 Mon Sep 17 00:00:00 2001
From: Julian Nagele <j_nagele at apple.com>
Date: Thu, 30 Oct 2025 21:35:04 +0000
Subject: [PATCH 4/6] fixup! fixup! [VecorCombine] Avoid inserting freeze when
 scalarizing extend-extract if all extracts would lead to UB on poison.

---
 llvm/lib/Transforms/Vectorize/VectorCombine.cpp | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
index 94a281b8726f9..32ce6b3cb2d59 100644
--- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
+++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
@@ -2034,7 +2034,8 @@ bool VectorCombine::scalarizeExtExtract(Instruction &I) {
       if (!LastExtract || LastExtract->comesBefore(Extract))
         LastExtract = Extract;
     }
-    // Check execution is guaranteed from extend to last extract.
+    // Check that the last extract (and hence all previous ones) are guaranteed
+    // to execute if Ext executes.
     AllExtractsTriggerUB =
         AllExtractsTriggerUB &&
         all_of(make_range(Ext->getIterator(), LastExtract->getIterator()),

>From 610cc37233ca661b6f147d5088d36a3cf10438bb Mon Sep 17 00:00:00 2001
From: Julian Nagele <j_nagele at apple.com>
Date: Sat, 1 Nov 2025 15:49:10 +0000
Subject: [PATCH 5/6] fixup! fixup! fixup! [VecorCombine] Avoid inserting
 freeze when scalarizing extend-extract if all extracts would lead to UB on
 poison.

---
 .../lib/Transforms/Vectorize/VectorCombine.cpp | 18 +++++++-----------
 1 file changed, 7 insertions(+), 11 deletions(-)

diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
index 32ce6b3cb2d59..afc02b24368f3 100644
--- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
+++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
@@ -2018,7 +2018,9 @@ bool VectorCombine::scalarizeExtExtract(Instruction &I) {
   Value *ScalarV = Ext->getOperand(0);
   if (!isGuaranteedNotToBePoison(ScalarV, &AC, dyn_cast<Instruction>(ScalarV),
                                  &DT)) {
-    // Check if all lanes are extracted and all extracts trigger UB on poison.
+    // Check wether all lanes are extracted, all extracts trigger UB on
+    // poison, and the last extract (and hence all previous ones)
+    // are guaranteed to execute if Ext executes.
     // If so, we do not need to insert a freeze.
     SmallDenseSet<ConstantInt *, 8> ExtractedLanes;
     bool AllExtractsTriggerUB = true;
@@ -2034,16 +2036,10 @@ bool VectorCombine::scalarizeExtExtract(Instruction &I) {
       if (!LastExtract || LastExtract->comesBefore(Extract))
         LastExtract = Extract;
     }
-    // Check that the last extract (and hence all previous ones) are guaranteed
-    // to execute if Ext executes.
-    AllExtractsTriggerUB =
-        AllExtractsTriggerUB &&
-        all_of(make_range(Ext->getIterator(), LastExtract->getIterator()),
-               [](Instruction &I) {
-                 return isGuaranteedToTransferExecutionToSuccessor(&I);
-               });
-    if (!AllExtractsTriggerUB ||
-        ExtractedLanes.size() != DstTy->getNumElements())
+    if (ExtractedLanes.size() != DstTy->getNumElements() ||
+        !AllExtractsTriggerUB ||
+        !isGuaranteedToTransferExecutionToSuccessor(Ext->getIterator(),
+                                                    LastExtract->getIterator()))
       ScalarV = Builder.CreateFreeze(ScalarV);
   }
   ScalarV = Builder.CreateBitCast(

>From c197ff6d7f8d401a1fd7fb4a990c4b83b8dc66b6 Mon Sep 17 00:00:00 2001
From: Julian Nagele <j_nagele at apple.com>
Date: Tue, 4 Nov 2025 11:34:28 +0000
Subject: [PATCH 6/6] fixup! fixup! fixup! fixup! [VecorCombine] Avoid
 inserting freeze when scalarizing extend-extract if all extracts would lead
 to UB on poison.

---
 llvm/lib/Transforms/Vectorize/VectorCombine.cpp | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
index afc02b24368f3..27a8bbd5776be 100644
--- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
+++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
@@ -2018,10 +2018,10 @@ bool VectorCombine::scalarizeExtExtract(Instruction &I) {
   Value *ScalarV = Ext->getOperand(0);
   if (!isGuaranteedNotToBePoison(ScalarV, &AC, dyn_cast<Instruction>(ScalarV),
                                  &DT)) {
-    // Check wether all lanes are extracted, all extracts trigger UB on
-    // poison, and the last extract (and hence all previous ones)
-    // are guaranteed to execute if Ext executes.
-    // If so, we do not need to insert a freeze.
+    // Check wether all lanes are extracted, all extracts trigger UB
+    // on poison, and the last extract (and hence all previous ones)
+    // are guaranteed to execute if Ext executes.  If so, we do not
+    // need to insert a freeze.
     SmallDenseSet<ConstantInt *, 8> ExtractedLanes;
     bool AllExtractsTriggerUB = true;
     ExtractElementInst *LastExtract = nullptr;



More information about the llvm-commits mailing list