[llvm] [VecorCombine] Avoid inserting freeze when scalarizing extend-extract if all extracts would lead to UB on poison. (PR #164683)

Julian Nagele via llvm-commits llvm-commits at lists.llvm.org
Wed Oct 22 11:56:15 PDT 2025


https://github.com/juliannagele created https://github.com/llvm/llvm-project/pull/164683

This change aims to avoid inserting a freeze instruction between the load and bitcast when scalarizing extend-extract. This is particularly useful in combination with https://github.com/llvm/llvm-project/pull/164682, which can then potentially further scalarize, provided there is no freeze.

>From 553cfa8af6be5685795ac74aa33d1fe900dafabe Mon Sep 17 00:00:00 2001
From: Julian Nagele <j_nagele at apple.com>
Date: Wed, 22 Oct 2025 15:25:29 +0100
Subject: [PATCH 1/2] [VectorCombine] Add test showing exisiting behaviour
 adding unneeded freeze.

---
 .../VectorCombine/AArch64/ext-extract.ll      | 35 +++++++++++++++++++
 1 file changed, 35 insertions(+)

diff --git a/llvm/test/Transforms/VectorCombine/AArch64/ext-extract.ll b/llvm/test/Transforms/VectorCombine/AArch64/ext-extract.ll
index 60700412686ea..fa136b2a5c44b 100644
--- a/llvm/test/Transforms/VectorCombine/AArch64/ext-extract.ll
+++ b/llvm/test/Transforms/VectorCombine/AArch64/ext-extract.ll
@@ -346,3 +346,38 @@ entry:
   call void @use.i32(i32 %ext.3)
   ret void
 }
+
+define noundef i32 @zext_v4i8_all_lanes_used_no_freeze(<4 x i8> %src) {
+; CHECK-LABEL: define noundef i32 @zext_v4i8_all_lanes_used_no_freeze(
+; CHECK-SAME: <4 x i8> [[SRC:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[TMP0:%.*]] = freeze <4 x i8> [[SRC]]
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i8> [[TMP0]] to i32
+; CHECK-NEXT:    [[TMP2:%.*]] = lshr i32 [[TMP1]], 24
+; CHECK-NEXT:    [[TMP3:%.*]] = lshr i32 [[TMP1]], 16
+; CHECK-NEXT:    [[TMP4:%.*]] = and i32 [[TMP3]], 255
+; CHECK-NEXT:    [[TMP5:%.*]] = lshr i32 [[TMP1]], 8
+; CHECK-NEXT:    [[TMP6:%.*]] = and i32 [[TMP5]], 255
+; CHECK-NEXT:    [[TMP7:%.*]] = and i32 [[TMP1]], 255
+; CHECK-NEXT:    [[EXT:%.*]] = zext nneg <4 x i8> [[SRC]] to <4 x i32>
+; CHECK-NEXT:    [[EXT_0:%.*]] = extractelement <4 x i32> [[EXT]], i64 0
+; CHECK-NEXT:    [[EXT_1:%.*]] = extractelement <4 x i32> [[EXT]], i64 1
+; CHECK-NEXT:    [[EXT_2:%.*]] = extractelement <4 x i32> [[EXT]], i64 2
+; CHECK-NEXT:    [[EXT_3:%.*]] = extractelement <4 x i32> [[EXT]], i64 3
+; CHECK-NEXT:    [[ADD1:%.*]] = add i32 [[TMP7]], [[TMP6]]
+; CHECK-NEXT:    [[ADD2:%.*]] = add i32 [[ADD1]], [[TMP4]]
+; CHECK-NEXT:    [[ADD3:%.*]] = add i32 [[ADD2]], [[TMP2]]
+; CHECK-NEXT:    ret i32 [[ADD3]]
+;
+entry:
+  %ext = zext nneg <4 x i8> %src to <4 x i32>
+  %ext.0 = extractelement <4 x i32> %ext, i64 0
+  %ext.1 = extractelement <4 x i32> %ext, i64 1
+  %ext.2 = extractelement <4 x i32> %ext, i64 2
+  %ext.3 = extractelement <4 x i32> %ext, i64 3
+
+  %add1 = add i32 %ext.0, %ext.1
+  %add2 = add i32 %add1, %ext.2
+  %add3 = add i32 %add2, %ext.3
+  ret i32 %add3
+}

>From b8b84d2defa9b45cf1b41ebc93b4b638170e315a Mon Sep 17 00:00:00 2001
From: Julian Nagele <j_nagele at apple.com>
Date: Wed, 22 Oct 2025 15:35:09 +0100
Subject: [PATCH 2/2] [VecorCombine] Avoid inserting freeze when scalarizing
 extend-extract if all extracts would lead to UB on poison.

---
 .../Transforms/Vectorize/VectorCombine.cpp    | 20 ++++++++++++++++--
 .../VectorCombine/AArch64/ext-extract.ll      | 21 +++++++++----------
 2 files changed, 28 insertions(+), 13 deletions(-)

diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
index d6eb00da11dc8..4a0c98e03203d 100644
--- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
+++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
@@ -2017,8 +2017,24 @@ bool VectorCombine::scalarizeExtExtract(Instruction &I) {
 
   Value *ScalarV = Ext->getOperand(0);
   if (!isGuaranteedNotToBePoison(ScalarV, &AC, dyn_cast<Instruction>(ScalarV),
-                                 &DT))
-    ScalarV = Builder.CreateFreeze(ScalarV);
+                                 &DT)) {
+    // Check if all lanes are extracted and all extracts trigger UB on poison.
+    // If so, we do not need to insert a freeze.
+    SmallDenseSet<uint64_t, 8> ExtractedLanes;
+    bool AllExtractsHaveUB = true;
+    for (User *U : Ext->users()) {
+      auto *Extract = cast<ExtractElementInst>(U);
+      uint64_t Idx =
+          cast<ConstantInt>(Extract->getIndexOperand())->getZExtValue();
+      ExtractedLanes.insert(Idx);
+      if (!programUndefinedIfPoison(Extract)) {
+        AllExtractsHaveUB = false;
+        break;
+      }
+    }
+    if (!AllExtractsHaveUB || ExtractedLanes.size() != SrcTy->getNumElements())
+      ScalarV = Builder.CreateFreeze(ScalarV);
+  }
   ScalarV = Builder.CreateBitCast(
       ScalarV,
       IntegerType::get(SrcTy->getContext(), DL->getTypeSizeInBits(SrcTy)));
diff --git a/llvm/test/Transforms/VectorCombine/AArch64/ext-extract.ll b/llvm/test/Transforms/VectorCombine/AArch64/ext-extract.ll
index fa136b2a5c44b..29d4ddd3d0ac8 100644
--- a/llvm/test/Transforms/VectorCombine/AArch64/ext-extract.ll
+++ b/llvm/test/Transforms/VectorCombine/AArch64/ext-extract.ll
@@ -351,22 +351,21 @@ define noundef i32 @zext_v4i8_all_lanes_used_no_freeze(<4 x i8> %src) {
 ; CHECK-LABEL: define noundef i32 @zext_v4i8_all_lanes_used_no_freeze(
 ; CHECK-SAME: <4 x i8> [[SRC:%.*]]) {
 ; CHECK-NEXT:  [[ENTRY:.*:]]
-; CHECK-NEXT:    [[TMP0:%.*]] = freeze <4 x i8> [[SRC]]
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i8> [[TMP0]] to i32
-; CHECK-NEXT:    [[TMP2:%.*]] = lshr i32 [[TMP1]], 24
-; CHECK-NEXT:    [[TMP3:%.*]] = lshr i32 [[TMP1]], 16
-; CHECK-NEXT:    [[TMP4:%.*]] = and i32 [[TMP3]], 255
-; CHECK-NEXT:    [[TMP5:%.*]] = lshr i32 [[TMP1]], 8
-; CHECK-NEXT:    [[TMP6:%.*]] = and i32 [[TMP5]], 255
-; CHECK-NEXT:    [[TMP7:%.*]] = and i32 [[TMP1]], 255
+; CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i8> [[SRC]] to i32
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr i32 [[TMP0]], 24
+; CHECK-NEXT:    [[TMP2:%.*]] = lshr i32 [[TMP0]], 16
+; CHECK-NEXT:    [[TMP3:%.*]] = and i32 [[TMP2]], 255
+; CHECK-NEXT:    [[TMP4:%.*]] = lshr i32 [[TMP0]], 8
+; CHECK-NEXT:    [[TMP5:%.*]] = and i32 [[TMP4]], 255
+; CHECK-NEXT:    [[TMP6:%.*]] = and i32 [[TMP0]], 255
 ; CHECK-NEXT:    [[EXT:%.*]] = zext nneg <4 x i8> [[SRC]] to <4 x i32>
 ; CHECK-NEXT:    [[EXT_0:%.*]] = extractelement <4 x i32> [[EXT]], i64 0
 ; CHECK-NEXT:    [[EXT_1:%.*]] = extractelement <4 x i32> [[EXT]], i64 1
 ; CHECK-NEXT:    [[EXT_2:%.*]] = extractelement <4 x i32> [[EXT]], i64 2
 ; CHECK-NEXT:    [[EXT_3:%.*]] = extractelement <4 x i32> [[EXT]], i64 3
-; CHECK-NEXT:    [[ADD1:%.*]] = add i32 [[TMP7]], [[TMP6]]
-; CHECK-NEXT:    [[ADD2:%.*]] = add i32 [[ADD1]], [[TMP4]]
-; CHECK-NEXT:    [[ADD3:%.*]] = add i32 [[ADD2]], [[TMP2]]
+; CHECK-NEXT:    [[ADD1:%.*]] = add i32 [[TMP6]], [[TMP5]]
+; CHECK-NEXT:    [[ADD2:%.*]] = add i32 [[ADD1]], [[TMP3]]
+; CHECK-NEXT:    [[ADD3:%.*]] = add i32 [[ADD2]], [[TMP1]]
 ; CHECK-NEXT:    ret i32 [[ADD3]]
 ;
 entry:



More information about the llvm-commits mailing list