[llvm] [VectorCombine] Fix crash when folding select of bitcast (PR #177183)

Mitch Briles via llvm-commits llvm-commits at lists.llvm.org
Tue Jan 27 21:15:28 PST 2026


https://github.com/MitchBriles updated https://github.com/llvm/llvm-project/pull/177183

>From a1f61bba2afe02946fc9842ea05ce944b6fda3cd Mon Sep 17 00:00:00 2001
From: Mitch <mitchbriles at gmail.com>
Date: Wed, 21 Jan 2026 07:45:34 -0700
Subject: [PATCH 1/9] Create fold-selects-from-bitcast.ll

---
 .../fold-selects-from-bitcast.ll              | 61 +++++++++++++++++++
 1 file changed, 61 insertions(+)
 create mode 100644 llvm/test/Transforms/VectorCombine/fold-selects-from-bitcast.ll

diff --git a/llvm/test/Transforms/VectorCombine/fold-selects-from-bitcast.ll b/llvm/test/Transforms/VectorCombine/fold-selects-from-bitcast.ll
new file mode 100644
index 0000000000000..17f58c8717a2b
--- /dev/null
+++ b/llvm/test/Transforms/VectorCombine/fold-selects-from-bitcast.ll
@@ -0,0 +1,61 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
+; RUN: opt -passes=vector-combine -S %s | FileCheck %s
+
+define i8 @fold_selects_from_bitcast_dominance() {
+; CHECK-LABEL: define i8 @fold_selects_from_bitcast_dominance() {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[BC:%.*]] = bitcast <2 x i32> splat (i32 1) to <8 x i8>
+; CHECK-NEXT:    [[E0:%.*]] = extractelement <8 x i8> [[BC]], i64 0
+; CHECK-NEXT:    [[COND:%.*]] = icmp eq i8 0, 0
+; CHECK-NEXT:    [[E1:%.*]] = extractelement <8 x i8> [[BC]], i64 1
+; CHECK-NEXT:    [[E2:%.*]] = extractelement <8 x i8> [[BC]], i64 2
+; CHECK-NEXT:    [[E3:%.*]] = extractelement <8 x i8> [[BC]], i64 3
+; CHECK-NEXT:    [[E4:%.*]] = extractelement <8 x i8> [[BC]], i64 4
+; CHECK-NEXT:    [[E5:%.*]] = extractelement <8 x i8> [[BC]], i64 5
+; CHECK-NEXT:    [[E6:%.*]] = extractelement <8 x i8> [[BC]], i64 6
+; CHECK-NEXT:    [[E7:%.*]] = extractelement <8 x i8> [[BC]], i64 7
+; CHECK-NEXT:    [[S0:%.*]] = select i1 [[COND]], i8 [[E0]], i8 0
+; CHECK-NEXT:    [[S1:%.*]] = select i1 [[COND]], i8 [[E1]], i8 0
+; CHECK-NEXT:    [[S2:%.*]] = select i1 [[COND]], i8 [[E2]], i8 0
+; CHECK-NEXT:    [[S3:%.*]] = select i1 [[COND]], i8 [[E3]], i8 0
+; CHECK-NEXT:    [[S4:%.*]] = select i1 [[COND]], i8 [[E4]], i8 0
+; CHECK-NEXT:    [[S5:%.*]] = select i1 [[COND]], i8 [[E5]], i8 0
+; CHECK-NEXT:    [[S6:%.*]] = select i1 [[COND]], i8 [[E6]], i8 0
+; CHECK-NEXT:    [[S7:%.*]] = select i1 [[COND]], i8 [[E7]], i8 0
+; CHECK-NEXT:    [[SUM0:%.*]] = add i8 [[S0]], [[S1]]
+; CHECK-NEXT:    [[SUM1:%.*]] = add i8 [[SUM0]], [[S2]]
+; CHECK-NEXT:    [[SUM2:%.*]] = add i8 [[SUM1]], [[S3]]
+; CHECK-NEXT:    [[SUM3:%.*]] = add i8 [[SUM2]], [[S4]]
+; CHECK-NEXT:    [[SUM4:%.*]] = add i8 [[SUM3]], [[S5]]
+; CHECK-NEXT:    [[SUM5:%.*]] = add i8 [[SUM4]], [[S6]]
+; CHECK-NEXT:    [[SUM6:%.*]] = add i8 [[SUM5]], [[S7]]
+; CHECK-NEXT:    ret i8 [[SUM6]]
+;
+entry:
+  %bc = bitcast <2 x i32> splat (i32 1) to <8 x i8>
+  %e0 = extractelement <8 x i8> %bc, i64 0
+  %cond = icmp eq i8 0, 0
+  %e1 = extractelement <8 x i8> %bc, i64 1
+  %e2 = extractelement <8 x i8> %bc, i64 2
+  %e3 = extractelement <8 x i8> %bc, i64 3
+  %e4 = extractelement <8 x i8> %bc, i64 4
+  %e5 = extractelement <8 x i8> %bc, i64 5
+  %e6 = extractelement <8 x i8> %bc, i64 6
+  %e7 = extractelement <8 x i8> %bc, i64 7
+  %s0 = select i1 %cond, i8 %e0, i8 0
+  %s1 = select i1 %cond, i8 %e1, i8 0
+  %s2 = select i1 %cond, i8 %e2, i8 0
+  %s3 = select i1 %cond, i8 %e3, i8 0
+  %s4 = select i1 %cond, i8 %e4, i8 0
+  %s5 = select i1 %cond, i8 %e5, i8 0
+  %s6 = select i1 %cond, i8 %e6, i8 0
+  %s7 = select i1 %cond, i8 %e7, i8 0
+  %sum0 = add i8 %s0, %s1
+  %sum1 = add i8 %sum0, %s2
+  %sum2 = add i8 %sum1, %s3
+  %sum3 = add i8 %sum2, %s4
+  %sum4 = add i8 %sum3, %s5
+  %sum5 = add i8 %sum4, %s6
+  %sum6 = add i8 %sum5, %s7
+  ret i8 %sum6
+}

>From daac43d2f039264875015099adcdcfbb0b99ee17 Mon Sep 17 00:00:00 2001
From: Mitch <mitchbriles at gmail.com>
Date: Wed, 21 Jan 2026 07:46:23 -0700
Subject: [PATCH 2/9] [VectorCombine] Fix crash when folding select of bitcast

Fixes #177144. Nits appreciated.
---
 .../Transforms/Vectorize/VectorCombine.cpp    | 33 ++++++++++++++-
 .../fold-selects-from-bitcast.ll              | 41 ++++++++-----------
 2 files changed, 49 insertions(+), 25 deletions(-)

diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
index e6b21fb2f31c3..197a578e0338e 100644
--- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
+++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
@@ -1652,7 +1652,38 @@ bool VectorCombine::foldSelectsFromBitcast(Instruction &I) {
     }
 
     // Create the vector select and bitcast once for this condition.
-    Builder.SetInsertPoint(BC->getNextNode());
+    // Insert in a block that dominates all selects and after any local defs.
+    BasicBlock *InsertBB = Selects.front()->getParent();
+    for (SelectInst *Sel : Selects)
+      InsertBB = DT.findNearestCommonDominator(InsertBB, Sel->getParent());
+    if (!InsertBB)
+      continue;
+
+    BasicBlock::iterator InsertPt = InsertBB->getFirstInsertionPt();
+    if (InsertPt == InsertBB->end())
+      continue;
+
+    auto ProcessDef = [&](Value *V) -> bool {
+      auto *DefI = dyn_cast<Instruction>(V);
+      if (!DefI || DefI->getParent() != InsertBB)
+        return true;
+
+      auto AfterDefOpt = DefI->getInsertionPointAfterDef();
+      if (!AfterDefOpt)
+        return false;
+
+      BasicBlock::iterator AfterDefIt = *AfterDefOpt;
+      Instruction &AfterDef = *AfterDefIt;
+      if (InsertPt->comesBefore(&AfterDef))
+        InsertPt = AfterDefIt;
+
+      return true;
+    };
+
+    if (!ProcessDef(Cond) || !ProcessDef(SrcVec))
+      continue;
+
+    Builder.SetInsertPoint(InsertBB, InsertPt);
     Value *VecSel =
         Builder.CreateSelect(Cond, SrcVec, Constant::getNullValue(SrcVecTy));
     Value *NewBC = Builder.CreateBitCast(VecSel, DstVecTy);
diff --git a/llvm/test/Transforms/VectorCombine/fold-selects-from-bitcast.ll b/llvm/test/Transforms/VectorCombine/fold-selects-from-bitcast.ll
index 17f58c8717a2b..a99748b4b6cc9 100644
--- a/llvm/test/Transforms/VectorCombine/fold-selects-from-bitcast.ll
+++ b/llvm/test/Transforms/VectorCombine/fold-selects-from-bitcast.ll
@@ -4,31 +4,24 @@
 define i8 @fold_selects_from_bitcast_dominance() {
 ; CHECK-LABEL: define i8 @fold_selects_from_bitcast_dominance() {
 ; CHECK-NEXT:  [[ENTRY:.*:]]
-; CHECK-NEXT:    [[BC:%.*]] = bitcast <2 x i32> splat (i32 1) to <8 x i8>
-; CHECK-NEXT:    [[E0:%.*]] = extractelement <8 x i8> [[BC]], i64 0
 ; CHECK-NEXT:    [[COND:%.*]] = icmp eq i8 0, 0
-; CHECK-NEXT:    [[E1:%.*]] = extractelement <8 x i8> [[BC]], i64 1
-; CHECK-NEXT:    [[E2:%.*]] = extractelement <8 x i8> [[BC]], i64 2
-; CHECK-NEXT:    [[E3:%.*]] = extractelement <8 x i8> [[BC]], i64 3
-; CHECK-NEXT:    [[E4:%.*]] = extractelement <8 x i8> [[BC]], i64 4
-; CHECK-NEXT:    [[E5:%.*]] = extractelement <8 x i8> [[BC]], i64 5
-; CHECK-NEXT:    [[E6:%.*]] = extractelement <8 x i8> [[BC]], i64 6
-; CHECK-NEXT:    [[E7:%.*]] = extractelement <8 x i8> [[BC]], i64 7
-; CHECK-NEXT:    [[S0:%.*]] = select i1 [[COND]], i8 [[E0]], i8 0
-; CHECK-NEXT:    [[S1:%.*]] = select i1 [[COND]], i8 [[E1]], i8 0
-; CHECK-NEXT:    [[S2:%.*]] = select i1 [[COND]], i8 [[E2]], i8 0
-; CHECK-NEXT:    [[S3:%.*]] = select i1 [[COND]], i8 [[E3]], i8 0
-; CHECK-NEXT:    [[S4:%.*]] = select i1 [[COND]], i8 [[E4]], i8 0
-; CHECK-NEXT:    [[S5:%.*]] = select i1 [[COND]], i8 [[E5]], i8 0
-; CHECK-NEXT:    [[S6:%.*]] = select i1 [[COND]], i8 [[E6]], i8 0
-; CHECK-NEXT:    [[S7:%.*]] = select i1 [[COND]], i8 [[E7]], i8 0
-; CHECK-NEXT:    [[SUM0:%.*]] = add i8 [[S0]], [[S1]]
-; CHECK-NEXT:    [[SUM1:%.*]] = add i8 [[SUM0]], [[S2]]
-; CHECK-NEXT:    [[SUM2:%.*]] = add i8 [[SUM1]], [[S3]]
-; CHECK-NEXT:    [[SUM3:%.*]] = add i8 [[SUM2]], [[S4]]
-; CHECK-NEXT:    [[SUM4:%.*]] = add i8 [[SUM3]], [[S5]]
-; CHECK-NEXT:    [[SUM5:%.*]] = add i8 [[SUM4]], [[S6]]
-; CHECK-NEXT:    [[SUM6:%.*]] = add i8 [[SUM5]], [[S7]]
+; CHECK-NEXT:    [[TMP0:%.*]] = select i1 [[COND]], <2 x i32> splat (i32 1), <2 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <2 x i32> [[TMP0]] to <8 x i8>
+; CHECK-NEXT:    [[SHIFT:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> poison, <8 x i32> <i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT:    [[FOLDEXTEXTBINOP:%.*]] = add <8 x i8> [[TMP1]], [[SHIFT]]
+; CHECK-NEXT:    [[SHIFT2:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> poison, <8 x i32> <i32 2, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT:    [[FOLDEXTEXTBINOP3:%.*]] = add <8 x i8> [[FOLDEXTEXTBINOP]], [[SHIFT2]]
+; CHECK-NEXT:    [[SHIFT5:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> poison, <8 x i32> <i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT:    [[FOLDEXTEXTBINOP6:%.*]] = add <8 x i8> [[FOLDEXTEXTBINOP3]], [[SHIFT5]]
+; CHECK-NEXT:    [[SHIFT8:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> poison, <8 x i32> <i32 4, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT:    [[FOLDEXTEXTBINOP9:%.*]] = add <8 x i8> [[FOLDEXTEXTBINOP6]], [[SHIFT8]]
+; CHECK-NEXT:    [[SHIFT11:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> poison, <8 x i32> <i32 5, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT:    [[FOLDEXTEXTBINOP12:%.*]] = add <8 x i8> [[FOLDEXTEXTBINOP9]], [[SHIFT11]]
+; CHECK-NEXT:    [[SHIFT14:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> poison, <8 x i32> <i32 6, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT:    [[FOLDEXTEXTBINOP15:%.*]] = add <8 x i8> [[FOLDEXTEXTBINOP12]], [[SHIFT14]]
+; CHECK-NEXT:    [[SHIFT17:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> poison, <8 x i32> <i32 7, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT:    [[FOLDEXTEXTBINOP18:%.*]] = add <8 x i8> [[FOLDEXTEXTBINOP15]], [[SHIFT17]]
+; CHECK-NEXT:    [[SUM6:%.*]] = extractelement <8 x i8> [[FOLDEXTEXTBINOP18]], i64 0
 ; CHECK-NEXT:    ret i8 [[SUM6]]
 ;
 entry:

>From e1a441c2a5f7a056b5cea67b3d9b50f666700e08 Mon Sep 17 00:00:00 2001
From: Mitch <mitchbriles at gmail.com>
Date: Wed, 21 Jan 2026 08:39:36 -0700
Subject: [PATCH 3/9] Update AMDGPU test

---
 .../Transforms/VectorCombine/AMDGPU/combine-scalar-selects.ll   | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/test/Transforms/VectorCombine/AMDGPU/combine-scalar-selects.ll b/llvm/test/Transforms/VectorCombine/AMDGPU/combine-scalar-selects.ll
index 8867387e1fad0..ce681d0e939e1 100644
--- a/llvm/test/Transforms/VectorCombine/AMDGPU/combine-scalar-selects.ll
+++ b/llvm/test/Transforms/VectorCombine/AMDGPU/combine-scalar-selects.ll
@@ -539,9 +539,9 @@ define amdgpu_kernel void @combine_with_extract_other_uses(
 ; CHECK-OPT-LABEL: define amdgpu_kernel void @combine_with_extract_other_uses(
 ; CHECK-OPT-SAME: ptr addrspace(1) [[OUT:%.*]], ptr addrspace(1) [[OUT2:%.*]], <2 x i32> [[SRC:%.*]], i1 [[COND:%.*]]) #[[ATTR0]] {
 ; CHECK-OPT-NEXT:  [[ENTRY:.*:]]
-; CHECK-OPT-NEXT:    [[BYTES:%.*]] = bitcast <2 x i32> [[SRC]] to <8 x i8>
 ; CHECK-OPT-NEXT:    [[TMP8:%.*]] = select i1 [[COND]], <2 x i32> [[SRC]], <2 x i32> zeroinitializer
 ; CHECK-OPT-NEXT:    [[COMBINED_BC:%.*]] = bitcast <2 x i32> [[TMP8]] to <8 x i8>
+; CHECK-OPT-NEXT:    [[BYTES:%.*]] = bitcast <2 x i32> [[SRC]] to <8 x i8>
 ; CHECK-OPT-NEXT:    [[E0:%.*]] = extractelement <8 x i8> [[BYTES]], i64 0
 ; CHECK-OPT-NEXT:    [[E1:%.*]] = extractelement <8 x i8> [[BYTES]], i64 1
 ; CHECK-OPT-NEXT:    [[E2:%.*]] = extractelement <8 x i8> [[BYTES]], i64 2

>From abfa57f4133251c13efa1e262d1366489bb308ab Mon Sep 17 00:00:00 2001
From: Mitch <mitchbriles at gmail.com>
Date: Wed, 21 Jan 2026 12:39:07 -0700
Subject: [PATCH 4/9] Address nits

---
 llvm/lib/Transforms/Vectorize/VectorCombine.cpp | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
index 197a578e0338e..ac00e59b5782f 100644
--- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
+++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
@@ -1652,9 +1652,9 @@ bool VectorCombine::foldSelectsFromBitcast(Instruction &I) {
     }
 
     // Create the vector select and bitcast once for this condition.
-    // Insert in a block that dominates all selects and after any local defs.
+    // Insert in a block that dominates all selects, and where Cond and SrcVec are defined.
     BasicBlock *InsertBB = Selects.front()->getParent();
-    for (SelectInst *Sel : Selects)
+    for (SelectInst *Sel : drop_begin(Selects))
       InsertBB = DT.findNearestCommonDominator(InsertBB, Sel->getParent());
     if (!InsertBB)
       continue;
@@ -1673,8 +1673,7 @@ bool VectorCombine::foldSelectsFromBitcast(Instruction &I) {
         return false;
 
       BasicBlock::iterator AfterDefIt = *AfterDefOpt;
-      Instruction &AfterDef = *AfterDefIt;
-      if (InsertPt->comesBefore(&AfterDef))
+      if (InsertPt->comesBefore(&*AfterDefIt))
         InsertPt = AfterDefIt;
 
       return true;

>From 5cc17842715f9e6ad288cd895ae62fb9ab45f92e Mon Sep 17 00:00:00 2001
From: Mitch <mitchbriles at gmail.com>
Date: Wed, 21 Jan 2026 12:42:11 -0700
Subject: [PATCH 5/9] Improve testing

Added a multi-block test. No longer using trivially constant-foldable instruction.
---
 .../fold-selects-from-bitcast.ll              | 71 ++++++++++++++-----
 1 file changed, 55 insertions(+), 16 deletions(-)

diff --git a/llvm/test/Transforms/VectorCombine/fold-selects-from-bitcast.ll b/llvm/test/Transforms/VectorCombine/fold-selects-from-bitcast.ll
index a99748b4b6cc9..619faea434fd4 100644
--- a/llvm/test/Transforms/VectorCombine/fold-selects-from-bitcast.ll
+++ b/llvm/test/Transforms/VectorCombine/fold-selects-from-bitcast.ll
@@ -1,33 +1,34 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
 ; RUN: opt -passes=vector-combine -S %s | FileCheck %s
 
-define i8 @fold_selects_from_bitcast_dominance() {
-; CHECK-LABEL: define i8 @fold_selects_from_bitcast_dominance() {
+define i8 @fold_selects_from_bitcast_dominance(i8 %0) {
+; CHECK-LABEL: define i8 @fold_selects_from_bitcast_dominance(
+; CHECK-SAME: i8 [[TMP0:%.*]]) {
 ; CHECK-NEXT:  [[ENTRY:.*:]]
-; CHECK-NEXT:    [[COND:%.*]] = icmp eq i8 0, 0
-; CHECK-NEXT:    [[TMP0:%.*]] = select i1 [[COND]], <2 x i32> splat (i32 1), <2 x i32> zeroinitializer
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <2 x i32> [[TMP0]] to <8 x i8>
-; CHECK-NEXT:    [[SHIFT:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> poison, <8 x i32> <i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
-; CHECK-NEXT:    [[FOLDEXTEXTBINOP:%.*]] = add <8 x i8> [[TMP1]], [[SHIFT]]
-; CHECK-NEXT:    [[SHIFT2:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> poison, <8 x i32> <i32 2, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT:    [[COND:%.*]] = icmp eq i8 [[TMP0]], 0
+; CHECK-NEXT:    [[TMP1:%.*]] = select i1 [[COND]], <2 x i32> splat (i32 1), <2 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x i32> [[TMP1]] to <8 x i8>
+; CHECK-NEXT:    [[SHIFT:%.*]] = shufflevector <8 x i8> [[TMP2]], <8 x i8> poison, <8 x i32> <i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT:    [[FOLDEXTEXTBINOP:%.*]] = add <8 x i8> [[TMP2]], [[SHIFT]]
+; CHECK-NEXT:    [[SHIFT2:%.*]] = shufflevector <8 x i8> [[TMP2]], <8 x i8> poison, <8 x i32> <i32 2, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
 ; CHECK-NEXT:    [[FOLDEXTEXTBINOP3:%.*]] = add <8 x i8> [[FOLDEXTEXTBINOP]], [[SHIFT2]]
-; CHECK-NEXT:    [[SHIFT5:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> poison, <8 x i32> <i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT:    [[SHIFT5:%.*]] = shufflevector <8 x i8> [[TMP2]], <8 x i8> poison, <8 x i32> <i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
 ; CHECK-NEXT:    [[FOLDEXTEXTBINOP6:%.*]] = add <8 x i8> [[FOLDEXTEXTBINOP3]], [[SHIFT5]]
-; CHECK-NEXT:    [[SHIFT8:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> poison, <8 x i32> <i32 4, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT:    [[SHIFT8:%.*]] = shufflevector <8 x i8> [[TMP2]], <8 x i8> poison, <8 x i32> <i32 4, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
 ; CHECK-NEXT:    [[FOLDEXTEXTBINOP9:%.*]] = add <8 x i8> [[FOLDEXTEXTBINOP6]], [[SHIFT8]]
-; CHECK-NEXT:    [[SHIFT11:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> poison, <8 x i32> <i32 5, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT:    [[SHIFT11:%.*]] = shufflevector <8 x i8> [[TMP2]], <8 x i8> poison, <8 x i32> <i32 5, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
 ; CHECK-NEXT:    [[FOLDEXTEXTBINOP12:%.*]] = add <8 x i8> [[FOLDEXTEXTBINOP9]], [[SHIFT11]]
-; CHECK-NEXT:    [[SHIFT14:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> poison, <8 x i32> <i32 6, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT:    [[SHIFT14:%.*]] = shufflevector <8 x i8> [[TMP2]], <8 x i8> poison, <8 x i32> <i32 6, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
 ; CHECK-NEXT:    [[FOLDEXTEXTBINOP15:%.*]] = add <8 x i8> [[FOLDEXTEXTBINOP12]], [[SHIFT14]]
-; CHECK-NEXT:    [[SHIFT17:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> poison, <8 x i32> <i32 7, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT:    [[SHIFT17:%.*]] = shufflevector <8 x i8> [[TMP2]], <8 x i8> poison, <8 x i32> <i32 7, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
 ; CHECK-NEXT:    [[FOLDEXTEXTBINOP18:%.*]] = add <8 x i8> [[FOLDEXTEXTBINOP15]], [[SHIFT17]]
-; CHECK-NEXT:    [[SUM6:%.*]] = extractelement <8 x i8> [[FOLDEXTEXTBINOP18]], i64 0
-; CHECK-NEXT:    ret i8 [[SUM6]]
+; CHECK-NEXT:    [[SUM7:%.*]] = extractelement <8 x i8> [[FOLDEXTEXTBINOP18]], i64 0
+; CHECK-NEXT:    ret i8 [[SUM7]]
 ;
 entry:
   %bc = bitcast <2 x i32> splat (i32 1) to <8 x i8>
   %e0 = extractelement <8 x i8> %bc, i64 0
-  %cond = icmp eq i8 0, 0
+  %cond = icmp eq i8 %0, 0
   %e1 = extractelement <8 x i8> %bc, i64 1
   %e2 = extractelement <8 x i8> %bc, i64 2
   %e3 = extractelement <8 x i8> %bc, i64 3
@@ -52,3 +53,41 @@ entry:
   %sum6 = add i8 %sum5, %s7
   ret i8 %sum6
 }
+
+define i8 @fold_selects_from_bitcast_dominance_multiblock(i8 %in) {
+; CHECK-LABEL: define i8 @fold_selects_from_bitcast_dominance_multiblock(
+; CHECK-SAME: i8 [[IN:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    br label %[[FIRST:.*]]
+; CHECK:       [[FIRST]]:
+; CHECK-NEXT:    [[TOBOOL_NOT:%.*]] = icmp eq i8 [[IN]], 0
+; CHECK-NEXT:    [[TMP0:%.*]] = select i1 [[TOBOOL_NOT]], <4 x i64> splat (i64 1), <4 x i64> zeroinitializer
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i64> [[TMP0]] to <32 x i8>
+; CHECK-NEXT:    [[BRCOND:%.*]] = icmp slt i8 [[IN]], 7
+; CHECK-NEXT:    br i1 [[BRCOND]], label %[[THEN:.*]], label %[[ELSE:.*]]
+; CHECK:       [[THEN]]:
+; CHECK-NEXT:    [[NARROW8:%.*]] = extractelement <32 x i8> [[TMP1]], i64 0
+; CHECK-NEXT:    ret i8 [[NARROW8]]
+; CHECK:       [[ELSE]]:
+; CHECK-NEXT:    [[NARROW:%.*]] = extractelement <32 x i8> [[TMP1]], i64 0
+; CHECK-NEXT:    ret i8 [[NARROW]]
+;
+entry:
+  %0 = bitcast <4 x i64> splat (i64 1) to <32 x i8>
+  br label %first
+
+first:
+  %vecext56 = extractelement <32 x i8> %0, i64 0
+  %vecext36 = extractelement <32 x i8> %0, i64 0
+  %tobool.not = icmp eq i8 %in, 0
+  %brcond = icmp slt i8 %in, 7
+  br i1 %brcond, label %then, label %else
+
+then:
+  %narrow8 = select i1 %tobool.not, i8 %vecext56, i8 0
+  ret i8 %narrow8
+
+else:
+  %narrow = select i1 %tobool.not, i8 %vecext36, i8 0
+  ret i8 %narrow
+}

>From 07ae48581067b9d949062041e48a66a7ea8bda22 Mon Sep 17 00:00:00 2001
From: Mitch Briles <mitchbriles at gmail.com>
Date: Wed, 21 Jan 2026 11:52:52 -0800
Subject: [PATCH 6/9] Format comment

---
 llvm/lib/Transforms/Vectorize/VectorCombine.cpp | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
index ac00e59b5782f..4912db76253e2 100644
--- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
+++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
@@ -1652,7 +1652,8 @@ bool VectorCombine::foldSelectsFromBitcast(Instruction &I) {
     }
 
     // Create the vector select and bitcast once for this condition.
-    // Insert in a block that dominates all selects, and where Cond and SrcVec are defined.
+    // Insert in a block that dominates all selects, and where Cond and SrcVec
+    // are defined.
     BasicBlock *InsertBB = Selects.front()->getParent();
     for (SelectInst *Sel : drop_begin(Selects))
       InsertBB = DT.findNearestCommonDominator(InsertBB, Sel->getParent());

>From 5067d6424f8373b5c3d3927f9b2877d0fd087362 Mon Sep 17 00:00:00 2001
From: Mitch <mitchbriles at gmail.com>
Date: Sat, 24 Jan 2026 04:51:53 -0700
Subject: [PATCH 7/9] A cleaner patch

---
 .../Transforms/Vectorize/VectorCombine.cpp    | 35 +++----------------
 1 file changed, 5 insertions(+), 30 deletions(-)

diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
index 4912db76253e2..f0f7f44d126f4 100644
--- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
+++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
@@ -1652,38 +1652,13 @@ bool VectorCombine::foldSelectsFromBitcast(Instruction &I) {
     }
 
     // Create the vector select and bitcast once for this condition.
-    // Insert in a block that dominates all selects, and where Cond and SrcVec
-    // are defined.
-    BasicBlock *InsertBB = Selects.front()->getParent();
-    for (SelectInst *Sel : drop_begin(Selects))
-      InsertBB = DT.findNearestCommonDominator(InsertBB, Sel->getParent());
-    if (!InsertBB)
-      continue;
-
-    BasicBlock::iterator InsertPt = InsertBB->getFirstInsertionPt();
-    if (InsertPt == InsertBB->end())
-      continue;
-
-    auto ProcessDef = [&](Value *V) -> bool {
-      auto *DefI = dyn_cast<Instruction>(V);
-      if (!DefI || DefI->getParent() != InsertBB)
-        return true;
-
-      auto AfterDefOpt = DefI->getInsertionPointAfterDef();
-      if (!AfterDefOpt)
-        return false;
-
-      BasicBlock::iterator AfterDefIt = *AfterDefOpt;
-      if (InsertPt->comesBefore(&*AfterDefIt))
-        InsertPt = AfterDefIt;
-
-      return true;
-    };
+    Instruction *InsertPt = BC->getNextNode();
 
-    if (!ProcessDef(Cond) || !ProcessDef(SrcVec))
-      continue;
+    if (auto *CondInst = dyn_cast<Instruction>(Cond))
+      if (DT.dominates(BC, CondInst))
+        InsertPt = CondInst->getNextNode();
 
-    Builder.SetInsertPoint(InsertBB, InsertPt);
+    Builder.SetInsertPoint(InsertPt);
     Value *VecSel =
         Builder.CreateSelect(Cond, SrcVec, Constant::getNullValue(SrcVecTy));
     Value *NewBC = Builder.CreateBitCast(VecSel, DstVecTy);

>From 2f02853af6598a27cb54c37444113efaf94d1858 Mon Sep 17 00:00:00 2001
From: Mitch <mitchbriles at gmail.com>
Date: Sat, 24 Jan 2026 04:52:11 -0700
Subject: [PATCH 8/9] Update tests

---
 .../AMDGPU/combine-scalar-selects.ll          |   2 +-
 .../fold-selects-from-bitcast.ll              | 131 +++++++++++++++---
 2 files changed, 113 insertions(+), 20 deletions(-)

diff --git a/llvm/test/Transforms/VectorCombine/AMDGPU/combine-scalar-selects.ll b/llvm/test/Transforms/VectorCombine/AMDGPU/combine-scalar-selects.ll
index ce681d0e939e1..8867387e1fad0 100644
--- a/llvm/test/Transforms/VectorCombine/AMDGPU/combine-scalar-selects.ll
+++ b/llvm/test/Transforms/VectorCombine/AMDGPU/combine-scalar-selects.ll
@@ -539,9 +539,9 @@ define amdgpu_kernel void @combine_with_extract_other_uses(
 ; CHECK-OPT-LABEL: define amdgpu_kernel void @combine_with_extract_other_uses(
 ; CHECK-OPT-SAME: ptr addrspace(1) [[OUT:%.*]], ptr addrspace(1) [[OUT2:%.*]], <2 x i32> [[SRC:%.*]], i1 [[COND:%.*]]) #[[ATTR0]] {
 ; CHECK-OPT-NEXT:  [[ENTRY:.*:]]
+; CHECK-OPT-NEXT:    [[BYTES:%.*]] = bitcast <2 x i32> [[SRC]] to <8 x i8>
 ; CHECK-OPT-NEXT:    [[TMP8:%.*]] = select i1 [[COND]], <2 x i32> [[SRC]], <2 x i32> zeroinitializer
 ; CHECK-OPT-NEXT:    [[COMBINED_BC:%.*]] = bitcast <2 x i32> [[TMP8]] to <8 x i8>
-; CHECK-OPT-NEXT:    [[BYTES:%.*]] = bitcast <2 x i32> [[SRC]] to <8 x i8>
 ; CHECK-OPT-NEXT:    [[E0:%.*]] = extractelement <8 x i8> [[BYTES]], i64 0
 ; CHECK-OPT-NEXT:    [[E1:%.*]] = extractelement <8 x i8> [[BYTES]], i64 1
 ; CHECK-OPT-NEXT:    [[E2:%.*]] = extractelement <8 x i8> [[BYTES]], i64 2
diff --git a/llvm/test/Transforms/VectorCombine/fold-selects-from-bitcast.ll b/llvm/test/Transforms/VectorCombine/fold-selects-from-bitcast.ll
index 619faea434fd4..7ae3f7d926203 100644
--- a/llvm/test/Transforms/VectorCombine/fold-selects-from-bitcast.ll
+++ b/llvm/test/Transforms/VectorCombine/fold-selects-from-bitcast.ll
@@ -1,12 +1,12 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
 ; RUN: opt -passes=vector-combine -S %s | FileCheck %s
 
-define i8 @fold_selects_from_bitcast_dominance(i8 %0) {
+define i8 @fold_selects_from_bitcast_dominance(i8 %0, <2 x i32> %v) {
 ; CHECK-LABEL: define i8 @fold_selects_from_bitcast_dominance(
-; CHECK-SAME: i8 [[TMP0:%.*]]) {
+; CHECK-SAME: i8 [[TMP0:%.*]], <2 x i32> [[V:%.*]]) {
 ; CHECK-NEXT:  [[ENTRY:.*:]]
 ; CHECK-NEXT:    [[COND:%.*]] = icmp eq i8 [[TMP0]], 0
-; CHECK-NEXT:    [[TMP1:%.*]] = select i1 [[COND]], <2 x i32> splat (i32 1), <2 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP1:%.*]] = select i1 [[COND]], <2 x i32> [[V]], <2 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x i32> [[TMP1]] to <8 x i8>
 ; CHECK-NEXT:    [[SHIFT:%.*]] = shufflevector <8 x i8> [[TMP2]], <8 x i8> poison, <8 x i32> <i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
 ; CHECK-NEXT:    [[FOLDEXTEXTBINOP:%.*]] = add <8 x i8> [[TMP2]], [[SHIFT]]
@@ -22,11 +22,11 @@ define i8 @fold_selects_from_bitcast_dominance(i8 %0) {
 ; CHECK-NEXT:    [[FOLDEXTEXTBINOP15:%.*]] = add <8 x i8> [[FOLDEXTEXTBINOP12]], [[SHIFT14]]
 ; CHECK-NEXT:    [[SHIFT17:%.*]] = shufflevector <8 x i8> [[TMP2]], <8 x i8> poison, <8 x i32> <i32 7, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
 ; CHECK-NEXT:    [[FOLDEXTEXTBINOP18:%.*]] = add <8 x i8> [[FOLDEXTEXTBINOP15]], [[SHIFT17]]
-; CHECK-NEXT:    [[SUM7:%.*]] = extractelement <8 x i8> [[FOLDEXTEXTBINOP18]], i64 0
-; CHECK-NEXT:    ret i8 [[SUM7]]
+; CHECK-NEXT:    [[SUM6:%.*]] = extractelement <8 x i8> [[FOLDEXTEXTBINOP18]], i64 0
+; CHECK-NEXT:    ret i8 [[SUM6]]
 ;
 entry:
-  %bc = bitcast <2 x i32> splat (i32 1) to <8 x i8>
+  %bc = bitcast <2 x i32> %v to <8 x i8>
   %e0 = extractelement <8 x i8> %bc, i64 0
   %cond = icmp eq i8 %0, 0
   %e1 = extractelement <8 x i8> %bc, i64 1
@@ -54,34 +54,73 @@ entry:
   ret i8 %sum6
 }
 
-define i8 @fold_selects_from_bitcast_dominance_multiblock(i8 %in) {
-; CHECK-LABEL: define i8 @fold_selects_from_bitcast_dominance_multiblock(
-; CHECK-SAME: i8 [[IN:%.*]]) {
+define i8 @fold_selects_from_bitcast_dominance_multiblock1(<4 x i32> %src, i8 %a, i1 %flag) {
+; CHECK-LABEL: define i8 @fold_selects_from_bitcast_dominance_multiblock1(
+; CHECK-SAME: <4 x i32> [[SRC:%.*]], i8 [[A:%.*]], i1 [[FLAG:%.*]]) {
+; CHECK-NEXT:    [[COND:%.*]] = icmp eq i8 [[A]], 0
+; CHECK-NEXT:    [[TMP1:%.*]] = select i1 [[COND]], <4 x i32> [[SRC]], <4 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to <16 x i8>
+; CHECK-NEXT:    br i1 [[FLAG]], label %[[FIRST:.*]], label %[[SECOND:.*]]
+; CHECK:       [[FIRST]]:
+; CHECK-NEXT:    [[S0:%.*]] = extractelement <16 x i8> [[TMP2]], i32 0
+; CHECK-NEXT:    br label %[[END:.*]]
+; CHECK:       [[SECOND]]:
+; CHECK-NEXT:    [[S1:%.*]] = extractelement <16 x i8> [[TMP2]], i32 7
+; CHECK-NEXT:    br label %[[END]]
+; CHECK:       [[END]]:
+; CHECK-NEXT:    [[RES:%.*]] = phi i8 [ [[S0]], %[[FIRST]] ], [ [[S1]], %[[SECOND]] ]
+; CHECK-NEXT:    ret i8 [[RES]]
+;
+  %bc = bitcast <4 x i32> %src to <16 x i8>
+  %cond = icmp eq i8 %a, 0
+  br i1 %flag, label %first, label %second
+
+first:
+  %e0 = extractelement <16 x i8> %bc, i32 0
+  %s0 = select i1 %cond, i8 %e0, i8 0
+  br label %end
+
+second:
+  %e1 = extractelement <16 x i8> %bc, i32 7
+  %s1 = select i1 %cond, i8 %e1, i8 0
+  br label %end
+
+end:
+  %res = phi i8 [%s0, %first], [%s1, %second]
+  ret i8 %res
+}
+
+define i8 @fold_selects_from_bitcast_dominance_multiblock2(<4 x i64> %v, i8 %a, i1 %flag) {
+; CHECK-LABEL: define i8 @fold_selects_from_bitcast_dominance_multiblock2(
+; CHECK-SAME: <4 x i64> [[V:%.*]], i8 [[A:%.*]], i1 [[FLAG:%.*]]) {
 ; CHECK-NEXT:  [[ENTRY:.*:]]
 ; CHECK-NEXT:    br label %[[FIRST:.*]]
 ; CHECK:       [[FIRST]]:
-; CHECK-NEXT:    [[TOBOOL_NOT:%.*]] = icmp eq i8 [[IN]], 0
-; CHECK-NEXT:    [[TMP0:%.*]] = select i1 [[TOBOOL_NOT]], <4 x i64> splat (i64 1), <4 x i64> zeroinitializer
+; CHECK-NEXT:    br label %[[SECOND:.*]]
+; CHECK:       [[SECOND]]:
+; CHECK-NEXT:    [[TOBOOL_NOT:%.*]] = icmp eq i8 [[A]], 0
+; CHECK-NEXT:    [[TMP0:%.*]] = select i1 [[TOBOOL_NOT]], <4 x i64> [[V]], <4 x i64> zeroinitializer
 ; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i64> [[TMP0]] to <32 x i8>
-; CHECK-NEXT:    [[BRCOND:%.*]] = icmp slt i8 [[IN]], 7
-; CHECK-NEXT:    br i1 [[BRCOND]], label %[[THEN:.*]], label %[[ELSE:.*]]
+; CHECK-NEXT:    br i1 [[FLAG]], label %[[THEN:.*]], label %[[ELSE:.*]]
 ; CHECK:       [[THEN]]:
 ; CHECK-NEXT:    [[NARROW8:%.*]] = extractelement <32 x i8> [[TMP1]], i64 0
 ; CHECK-NEXT:    ret i8 [[NARROW8]]
 ; CHECK:       [[ELSE]]:
-; CHECK-NEXT:    [[NARROW:%.*]] = extractelement <32 x i8> [[TMP1]], i64 0
+; CHECK-NEXT:    [[NARROW:%.*]] = extractelement <32 x i8> [[TMP1]], i64 7
 ; CHECK-NEXT:    ret i8 [[NARROW]]
 ;
 entry:
-  %0 = bitcast <4 x i64> splat (i64 1) to <32 x i8>
+  %0 = bitcast <4 x i64> %v to <32 x i8>
   br label %first
 
 first:
   %vecext56 = extractelement <32 x i8> %0, i64 0
-  %vecext36 = extractelement <32 x i8> %0, i64 0
-  %tobool.not = icmp eq i8 %in, 0
-  %brcond = icmp slt i8 %in, 7
-  br i1 %brcond, label %then, label %else
+  %vecext36 = extractelement <32 x i8> %0, i64 7
+  br label %second
+
+second:
+  %tobool.not = icmp eq i8 %a, 0
+  br i1 %flag, label %then, label %else
 
 then:
   %narrow8 = select i1 %tobool.not, i8 %vecext56, i8 0
@@ -91,3 +130,57 @@ else:
   %narrow = select i1 %tobool.not, i8 %vecext36, i8 0
   ret i8 %narrow
 }
+
+define i8 @fold_selects_from_bitcast_dominance_multiblock3(<4 x i64> %v, <4 x i64> %v2, i8 %a, i1 %flag1, i1 %cond2) {
+; CHECK-LABEL: define i8 @fold_selects_from_bitcast_dominance_multiblock3(
+; CHECK-SAME: <4 x i64> [[V:%.*]], <4 x i64> [[V2:%.*]], i8 [[A:%.*]], i1 [[FLAG1:%.*]], i1 [[COND2:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[TMP0:%.*]] = select i1 [[COND2]], <4 x i64> [[V]], <4 x i64> zeroinitializer
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i64> [[TMP0]] to <32 x i8>
+; CHECK-NEXT:    br label %[[FIRST:.*]]
+; CHECK:       [[FIRST]]:
+; CHECK-NEXT:    br label %[[SECOND:.*]]
+; CHECK:       [[SECOND]]:
+; CHECK-NEXT:    [[COND1:%.*]] = icmp eq i8 [[A]], 0
+; CHECK-NEXT:    [[TMP2:%.*]] = select i1 [[COND1]], <4 x i64> [[V]], <4 x i64> zeroinitializer
+; CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x i64> [[TMP2]] to <32 x i8>
+; CHECK-NEXT:    br i1 [[FLAG1]], label %[[THEN:.*]], label %[[ELSE:.*]]
+; CHECK:       [[THEN]]:
+; CHECK-NEXT:    [[SHIFT:%.*]] = shufflevector <32 x i8> [[TMP1]], <32 x i8> poison, <32 x i32> <i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT:    [[FOLDEXTEXTBINOP:%.*]] = add <32 x i8> [[TMP3]], [[SHIFT]]
+; CHECK-NEXT:    [[RES12:%.*]] = extractelement <32 x i8> [[FOLDEXTEXTBINOP]], i64 0
+; CHECK-NEXT:    ret i8 [[RES12]]
+; CHECK:       [[ELSE]]:
+; CHECK-NEXT:    [[SHIFT2:%.*]] = shufflevector <32 x i8> [[TMP1]], <32 x i8> poison, <32 x i32> <i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 8, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT:    [[FOLDEXTEXTBINOP3:%.*]] = add <32 x i8> [[TMP3]], [[SHIFT2]]
+; CHECK-NEXT:    [[RES34:%.*]] = extractelement <32 x i8> [[FOLDEXTEXTBINOP3]], i64 7
+; CHECK-NEXT:    ret i8 [[RES34]]
+;
+entry:
+  %0 = bitcast <4 x i64> %v to <32 x i8>
+  %1 = bitcast <4 x i64> %v to <32 x i8>
+  br label %first
+
+first:
+  %vecext00 = extractelement <32 x i8> %0, i64 0
+  %vecext07 = extractelement <32 x i8> %0, i64 7
+  %vecext11 = extractelement <32 x i8> %1, i64 1
+  %vecext18 = extractelement <32 x i8> %1, i64 8
+  br label %second
+
+second:
+  %cond1 = icmp eq i8 %a, 0
+  br i1 %flag1, label %then, label %else
+
+then:
+  %res1 = select i1 %cond1, i8 %vecext00, i8 0
+  %res2 = select i1 %cond2, i8 %vecext11, i8 0
+  %res12 = add i8 %res1, %res2
+  ret i8 %res12
+
+else:
+  %res3 = select i1 %cond1, i8 %vecext07, i8 0
+  %res4 = select i1 %cond2, i8 %vecext18, i8 0
+  %res34 = add i8 %res3, %res4
+  ret i8 %res34
+}

>From 3eb8a97db8f7b9db44430863c92d7a5cb4dd2a0e Mon Sep 17 00:00:00 2001
From: Mitch <mitchbriles at gmail.com>
Date: Tue, 27 Jan 2026 22:14:51 -0700
Subject: [PATCH 9/9] Update and move tests

---
 .../AMDGPU/combine-scalar-selects.ll          | 351 ++++++++++++++++++
 .../fold-selects-from-bitcast.ll              | 186 ----------
 2 files changed, 351 insertions(+), 186 deletions(-)
 delete mode 100644 llvm/test/Transforms/VectorCombine/fold-selects-from-bitcast.ll

diff --git a/llvm/test/Transforms/VectorCombine/AMDGPU/combine-scalar-selects.ll b/llvm/test/Transforms/VectorCombine/AMDGPU/combine-scalar-selects.ll
index 8867387e1fad0..3111d74c895e6 100644
--- a/llvm/test/Transforms/VectorCombine/AMDGPU/combine-scalar-selects.ll
+++ b/llvm/test/Transforms/VectorCombine/AMDGPU/combine-scalar-selects.ll
@@ -1870,5 +1870,356 @@ entry:
   ret void
 }
 
+define i8 @fold_selects_from_bitcast_dominance_multiblock1(<4 x i32> %src, i8 %a, i1 %flag1, i1 %flag2) {
+; CHECK-OPT-LABEL: define i8 @fold_selects_from_bitcast_dominance_multiblock1(
+; CHECK-OPT-SAME: <4 x i32> [[SRC:%.*]], i8 [[A:%.*]], i1 [[FLAG1:%.*]], i1 [[FLAG2:%.*]]) #[[ATTR0]] {
+; CHECK-OPT-NEXT:    [[COND:%.*]] = icmp eq i8 [[A]], 0
+; CHECK-OPT-NEXT:    [[TMP1:%.*]] = select i1 [[COND]], <4 x i32> [[SRC]], <4 x i32> zeroinitializer
+; CHECK-OPT-NEXT:    [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to <16 x i8>
+; CHECK-OPT-NEXT:    br i1 [[FLAG1]], label %[[FIRST:.*]], label %[[TRAMPOLINE:.*]]
+; CHECK-OPT:       [[TRAMPOLINE]]:
+; CHECK-OPT-NEXT:    br i1 [[FLAG2]], label %[[SECOND:.*]], label %[[THIRD:.*]]
+; CHECK-OPT:       [[FIRST]]:
+; CHECK-OPT-NEXT:    [[S00:%.*]] = extractelement <16 x i8> [[TMP2]], i32 0
+; CHECK-OPT-NEXT:    [[S01:%.*]] = extractelement <16 x i8> [[TMP2]], i32 2
+; CHECK-OPT-NEXT:    [[S0:%.*]] = add i8 [[S00]], [[S01]]
+; CHECK-OPT-NEXT:    br label %[[END:.*]]
+; CHECK-OPT:       [[SECOND]]:
+; CHECK-OPT-NEXT:    [[S10:%.*]] = extractelement <16 x i8> [[TMP2]], i32 3
+; CHECK-OPT-NEXT:    [[S11:%.*]] = extractelement <16 x i8> [[TMP2]], i32 4
+; CHECK-OPT-NEXT:    [[S1:%.*]] = add i8 [[S10]], [[S11]]
+; CHECK-OPT-NEXT:    br label %[[END]]
+; CHECK-OPT:       [[THIRD]]:
+; CHECK-OPT-NEXT:    [[S20:%.*]] = extractelement <16 x i8> [[TMP2]], i32 5
+; CHECK-OPT-NEXT:    [[S21:%.*]] = extractelement <16 x i8> [[TMP2]], i32 6
+; CHECK-OPT-NEXT:    [[S2:%.*]] = add i8 [[S20]], [[S21]]
+; CHECK-OPT-NEXT:    br label %[[END]]
+; CHECK-OPT:       [[END]]:
+; CHECK-OPT-NEXT:    [[RES:%.*]] = phi i8 [ [[S0]], %[[FIRST]] ], [ [[S1]], %[[SECOND]] ], [ [[S2]], %[[THIRD]] ]
+; CHECK-OPT-NEXT:    ret i8 [[RES]]
+;
+; CHECK-NOOPT-LABEL: define i8 @fold_selects_from_bitcast_dominance_multiblock1(
+; CHECK-NOOPT-SAME: <4 x i32> [[SRC:%.*]], i8 [[A:%.*]], i1 [[FLAG1:%.*]], i1 [[FLAG2:%.*]]) #[[ATTR0]] {
+; CHECK-NOOPT-NEXT:    [[BC:%.*]] = bitcast <4 x i32> [[SRC]] to <16 x i8>
+; CHECK-NOOPT-NEXT:    [[COND:%.*]] = icmp eq i8 [[A]], 0
+; CHECK-NOOPT-NEXT:    br i1 [[FLAG1]], label %[[FIRST:.*]], label %[[TRAMPOLINE:.*]]
+; CHECK-NOOPT:       [[TRAMPOLINE]]:
+; CHECK-NOOPT-NEXT:    br i1 [[FLAG2]], label %[[SECOND:.*]], label %[[THIRD:.*]]
+; CHECK-NOOPT:       [[FIRST]]:
+; CHECK-NOOPT-NEXT:    [[E00:%.*]] = extractelement <16 x i8> [[BC]], i32 0
+; CHECK-NOOPT-NEXT:    [[S00:%.*]] = select i1 [[COND]], i8 [[E00]], i8 0
+; CHECK-NOOPT-NEXT:    [[E01:%.*]] = extractelement <16 x i8> [[BC]], i32 2
+; CHECK-NOOPT-NEXT:    [[S01:%.*]] = select i1 [[COND]], i8 [[E01]], i8 0
+; CHECK-NOOPT-NEXT:    [[S0:%.*]] = add i8 [[S00]], [[S01]]
+; CHECK-NOOPT-NEXT:    br label %[[END:.*]]
+; CHECK-NOOPT:       [[SECOND]]:
+; CHECK-NOOPT-NEXT:    [[E10:%.*]] = extractelement <16 x i8> [[BC]], i32 3
+; CHECK-NOOPT-NEXT:    [[S10:%.*]] = select i1 [[COND]], i8 [[E10]], i8 0
+; CHECK-NOOPT-NEXT:    [[E11:%.*]] = extractelement <16 x i8> [[BC]], i32 4
+; CHECK-NOOPT-NEXT:    [[S11:%.*]] = select i1 [[COND]], i8 [[E11]], i8 0
+; CHECK-NOOPT-NEXT:    [[S1:%.*]] = add i8 [[S10]], [[S11]]
+; CHECK-NOOPT-NEXT:    br label %[[END]]
+; CHECK-NOOPT:       [[THIRD]]:
+; CHECK-NOOPT-NEXT:    [[E20:%.*]] = extractelement <16 x i8> [[BC]], i32 5
+; CHECK-NOOPT-NEXT:    [[S20:%.*]] = select i1 [[COND]], i8 [[E20]], i8 0
+; CHECK-NOOPT-NEXT:    [[E21:%.*]] = extractelement <16 x i8> [[BC]], i32 6
+; CHECK-NOOPT-NEXT:    [[S21:%.*]] = select i1 [[COND]], i8 [[E21]], i8 0
+; CHECK-NOOPT-NEXT:    [[S2:%.*]] = add i8 [[S20]], [[S21]]
+; CHECK-NOOPT-NEXT:    br label %[[END]]
+; CHECK-NOOPT:       [[END]]:
+; CHECK-NOOPT-NEXT:    [[RES:%.*]] = phi i8 [ [[S0]], %[[FIRST]] ], [ [[S1]], %[[SECOND]] ], [ [[S2]], %[[THIRD]] ]
+; CHECK-NOOPT-NEXT:    ret i8 [[RES]]
+;
+  %bc = bitcast <4 x i32> %src to <16 x i8>
+  %cond = icmp eq i8 %a, 0
+  br i1 %flag1, label %first, label %trampoline
+
+trampoline:
+  br i1 %flag2, label %second, label %third
+
+first:
+  %e00 = extractelement <16 x i8> %bc, i32 0
+  %s00 = select i1 %cond, i8 %e00, i8 0
+  %e01 = extractelement <16 x i8> %bc, i32 2
+  %s01 = select i1 %cond, i8 %e01, i8 0
+  %s0 = add i8 %s00, %s01
+  br label %end
+
+second:
+  %e10 = extractelement <16 x i8> %bc, i32 3
+  %s10 = select i1 %cond, i8 %e10, i8 0
+  %e11 = extractelement <16 x i8> %bc, i32 4
+  %s11 = select i1 %cond, i8 %e11, i8 0
+  %s1 = add i8 %s10, %s11
+  br label %end
+
+third:
+  %e20 = extractelement <16 x i8> %bc, i32 5
+  %s20 = select i1 %cond, i8 %e20, i8 0
+  %e21 = extractelement <16 x i8> %bc, i32 6
+  %s21 = select i1 %cond, i8 %e21, i8 0
+  %s2 = add i8 %s20, %s21
+  br label %end
+
+end:
+  %res = phi i8 [%s0, %first], [%s1, %second], [%s2, %third]
+  ret i8 %res
+}
+
+define i8 @fold_selects_from_bitcast_dominance_multiblock2(<4 x i64> %v, i8 %a, i1 %flag) {
+; CHECK-OPT-LABEL: define i8 @fold_selects_from_bitcast_dominance_multiblock2(
+; CHECK-OPT-SAME: <4 x i64> [[V:%.*]], i8 [[A:%.*]], i1 [[FLAG:%.*]]) #[[ATTR0]] {
+; CHECK-OPT-NEXT:  [[ENTRY:.*:]]
+; CHECK-OPT-NEXT:    br label %[[FIRST:.*]]
+; CHECK-OPT:       [[FIRST]]:
+; CHECK-OPT-NEXT:    br label %[[SECOND:.*]]
+; CHECK-OPT:       [[SECOND]]:
+; CHECK-OPT-NEXT:    [[COND:%.*]] = icmp eq i8 [[A]], 0
+; CHECK-OPT-NEXT:    [[TMP0:%.*]] = select i1 [[COND]], <4 x i64> [[V]], <4 x i64> zeroinitializer
+; CHECK-OPT-NEXT:    [[TMP1:%.*]] = bitcast <4 x i64> [[TMP0]] to <32 x i8>
+; CHECK-OPT-NEXT:    br i1 [[FLAG]], label %[[THEN:.*]], label %[[ELSE:.*]]
+; CHECK-OPT:       [[THEN]]:
+; CHECK-OPT-NEXT:    [[S1:%.*]] = extractelement <32 x i8> [[TMP1]], i64 0
+; CHECK-OPT-NEXT:    br label %[[COMBINE:.*]]
+; CHECK-OPT:       [[ELSE]]:
+; CHECK-OPT-NEXT:    [[S2:%.*]] = extractelement <32 x i8> [[TMP1]], i64 7
+; CHECK-OPT-NEXT:    br label %[[COMBINE]]
+; CHECK-OPT:       [[COMBINE]]:
+; CHECK-OPT-NEXT:    [[R1:%.*]] = phi i8 [ [[S1]], %[[THEN]] ], [ [[S2]], %[[ELSE]] ]
+; CHECK-OPT-NEXT:    [[S3:%.*]] = extractelement <32 x i8> [[TMP1]], i64 1
+; CHECK-OPT-NEXT:    [[S4:%.*]] = extractelement <32 x i8> [[TMP1]], i64 8
+; CHECK-OPT-NEXT:    [[S5:%.*]] = extractelement <32 x i8> [[TMP1]], i64 9
+; CHECK-OPT-NEXT:    [[R2:%.*]] = add i8 [[R1]], [[S3]]
+; CHECK-OPT-NEXT:    [[R3:%.*]] = add i8 [[R2]], [[S4]]
+; CHECK-OPT-NEXT:    [[R4:%.*]] = add i8 [[R3]], [[S5]]
+; CHECK-OPT-NEXT:    ret i8 [[R4]]
+;
+; CHECK-NOOPT-LABEL: define i8 @fold_selects_from_bitcast_dominance_multiblock2(
+; CHECK-NOOPT-SAME: <4 x i64> [[V:%.*]], i8 [[A:%.*]], i1 [[FLAG:%.*]]) #[[ATTR0]] {
+; CHECK-NOOPT-NEXT:  [[ENTRY:.*:]]
+; CHECK-NOOPT-NEXT:    [[TMP0:%.*]] = bitcast <4 x i64> [[V]] to <32 x i8>
+; CHECK-NOOPT-NEXT:    br label %[[FIRST:.*]]
+; CHECK-NOOPT:       [[FIRST]]:
+; CHECK-NOOPT-NEXT:    [[VECEXT1:%.*]] = extractelement <32 x i8> [[TMP0]], i64 0
+; CHECK-NOOPT-NEXT:    [[VECEXT2:%.*]] = extractelement <32 x i8> [[TMP0]], i64 7
+; CHECK-NOOPT-NEXT:    [[VECEXT3:%.*]] = extractelement <32 x i8> [[TMP0]], i64 1
+; CHECK-NOOPT-NEXT:    [[VECEXT4:%.*]] = extractelement <32 x i8> [[TMP0]], i64 8
+; CHECK-NOOPT-NEXT:    [[VECEXT5:%.*]] = extractelement <32 x i8> [[TMP0]], i64 9
+; CHECK-NOOPT-NEXT:    br label %[[SECOND:.*]]
+; CHECK-NOOPT:       [[SECOND]]:
+; CHECK-NOOPT-NEXT:    [[COND:%.*]] = icmp eq i8 [[A]], 0
+; CHECK-NOOPT-NEXT:    br i1 [[FLAG]], label %[[THEN:.*]], label %[[ELSE:.*]]
+; CHECK-NOOPT:       [[THEN]]:
+; CHECK-NOOPT-NEXT:    [[S1:%.*]] = select i1 [[COND]], i8 [[VECEXT1]], i8 0
+; CHECK-NOOPT-NEXT:    br label %[[COMBINE:.*]]
+; CHECK-NOOPT:       [[ELSE]]:
+; CHECK-NOOPT-NEXT:    [[S2:%.*]] = select i1 [[COND]], i8 [[VECEXT2]], i8 0
+; CHECK-NOOPT-NEXT:    br label %[[COMBINE]]
+; CHECK-NOOPT:       [[COMBINE]]:
+; CHECK-NOOPT-NEXT:    [[R1:%.*]] = phi i8 [ [[S1]], %[[THEN]] ], [ [[S2]], %[[ELSE]] ]
+; CHECK-NOOPT-NEXT:    [[S3:%.*]] = select i1 [[COND]], i8 [[VECEXT3]], i8 0
+; CHECK-NOOPT-NEXT:    [[S4:%.*]] = select i1 [[COND]], i8 [[VECEXT4]], i8 0
+; CHECK-NOOPT-NEXT:    [[S5:%.*]] = select i1 [[COND]], i8 [[VECEXT5]], i8 0
+; CHECK-NOOPT-NEXT:    [[R2:%.*]] = add i8 [[R1]], [[S3]]
+; CHECK-NOOPT-NEXT:    [[R3:%.*]] = add i8 [[R2]], [[S4]]
+; CHECK-NOOPT-NEXT:    [[R4:%.*]] = add i8 [[R3]], [[S5]]
+; CHECK-NOOPT-NEXT:    ret i8 [[R4]]
+;
+entry:
+  %0 = bitcast <4 x i64> %v to <32 x i8>
+  br label %first
+
+first:
+  %vecext1 = extractelement <32 x i8> %0, i64 0
+  %vecext2 = extractelement <32 x i8> %0, i64 7
+  %vecext3 = extractelement <32 x i8> %0, i64 1
+  %vecext4 = extractelement <32 x i8> %0, i64 8
+  %vecext5 = extractelement <32 x i8> %0, i64 9
+  br label %second
+
+second:
+  %cond = icmp eq i8 %a, 0
+  br i1 %flag, label %then, label %else
+
+then:
+  %s1 = select i1 %cond, i8 %vecext1, i8 0
+  br label %combine
+
+else:
+  %s2 = select i1 %cond, i8 %vecext2, i8 0
+  br label %combine
+
+combine:
+  %r1 = phi i8 [%s1, %then], [%s2, %else]
+  %s3 = select i1 %cond, i8 %vecext3, i8 0
+  %s4 = select i1 %cond, i8 %vecext4, i8 0
+  %s5 = select i1 %cond, i8 %vecext5, i8 0
+  %r2 = add i8 %r1, %s3
+  %r3 = add i8 %r2, %s4
+  %r4 = add i8 %r3, %s5
+  ret i8 %r4
+}
+
+define i8 @fold_selects_from_bitcast_dominance_multiblock3(<4 x i64> %v1, <4 x i64> %v2, i8 %a, i1 %flag1, i1 %flag2, i1 %flag3, i1 %flag4, i1 %cond2) {
+; CHECK-OPT-LABEL: define i8 @fold_selects_from_bitcast_dominance_multiblock3(
+; CHECK-OPT-SAME: <4 x i64> [[V1:%.*]], <4 x i64> [[V2:%.*]], i8 [[A:%.*]], i1 [[FLAG1:%.*]], i1 [[FLAG2:%.*]], i1 [[FLAG3:%.*]], i1 [[FLAG4:%.*]], i1 [[COND2:%.*]]) #[[ATTR0]] {
+; CHECK-OPT-NEXT:  [[ENTRY:.*:]]
+; CHECK-OPT-NEXT:    [[TMP0:%.*]] = select i1 [[COND2]], <4 x i64> [[V2]], <4 x i64> zeroinitializer
+; CHECK-OPT-NEXT:    [[TMP1:%.*]] = bitcast <4 x i64> [[TMP0]] to <32 x i8>
+; CHECK-OPT-NEXT:    br label %[[FIRST:.*]]
+; CHECK-OPT:       [[FIRST]]:
+; CHECK-OPT-NEXT:    br label %[[T0:.*]]
+; CHECK-OPT:       [[T0]]:
+; CHECK-OPT-NEXT:    [[COND1:%.*]] = icmp eq i8 [[A]], 0
+; CHECK-OPT-NEXT:    [[TMP2:%.*]] = select i1 [[COND1]], <4 x i64> [[V1]], <4 x i64> zeroinitializer
+; CHECK-OPT-NEXT:    [[TMP3:%.*]] = bitcast <4 x i64> [[TMP2]] to <32 x i8>
+; CHECK-OPT-NEXT:    br i1 [[FLAG1]], label %[[R0:.*]], label %[[T1:.*]]
+; CHECK-OPT:       [[T1]]:
+; CHECK-OPT-NEXT:    br i1 [[FLAG2]], label %[[R1:.*]], label %[[T2:.*]]
+; CHECK-OPT:       [[T2]]:
+; CHECK-OPT-NEXT:    br i1 [[FLAG3]], label %[[R2:.*]], label %[[T3:.*]]
+; CHECK-OPT:       [[T3]]:
+; CHECK-OPT-NEXT:    br i1 [[FLAG4]], label %[[R3:.*]], label %[[R4:.*]]
+; CHECK-OPT:       [[R0]]:
+; CHECK-OPT-NEXT:    [[RES0:%.*]] = extractelement <32 x i8> [[TMP3]], i64 0
+; CHECK-OPT-NEXT:    [[RES1:%.*]] = extractelement <32 x i8> [[TMP1]], i64 1
+; CHECK-OPT-NEXT:    [[RES01:%.*]] = add i8 [[RES0]], [[RES1]]
+; CHECK-OPT-NEXT:    ret i8 [[RES01]]
+; CHECK-OPT:       [[R1]]:
+; CHECK-OPT-NEXT:    [[RES2:%.*]] = extractelement <32 x i8> [[TMP3]], i64 2
+; CHECK-OPT-NEXT:    [[RES3:%.*]] = extractelement <32 x i8> [[TMP1]], i64 3
+; CHECK-OPT-NEXT:    [[RES23:%.*]] = add i8 [[RES2]], [[RES3]]
+; CHECK-OPT-NEXT:    ret i8 [[RES23]]
+; CHECK-OPT:       [[R2]]:
+; CHECK-OPT-NEXT:    [[RES4:%.*]] = extractelement <32 x i8> [[TMP3]], i64 4
+; CHECK-OPT-NEXT:    [[RES5:%.*]] = extractelement <32 x i8> [[TMP1]], i64 5
+; CHECK-OPT-NEXT:    [[RES45:%.*]] = add i8 [[RES4]], [[RES5]]
+; CHECK-OPT-NEXT:    ret i8 [[RES45]]
+; CHECK-OPT:       [[R3]]:
+; CHECK-OPT-NEXT:    [[RES6:%.*]] = extractelement <32 x i8> [[TMP3]], i64 6
+; CHECK-OPT-NEXT:    [[RES7:%.*]] = extractelement <32 x i8> [[TMP1]], i64 7
+; CHECK-OPT-NEXT:    [[RES67:%.*]] = add i8 [[RES6]], [[RES7]]
+; CHECK-OPT-NEXT:    ret i8 [[RES67]]
+; CHECK-OPT:       [[R4]]:
+; CHECK-OPT-NEXT:    [[RES8:%.*]] = extractelement <32 x i8> [[TMP3]], i64 8
+; CHECK-OPT-NEXT:    [[RES9:%.*]] = extractelement <32 x i8> [[TMP1]], i64 9
+; CHECK-OPT-NEXT:    [[RES89:%.*]] = add i8 [[RES8]], [[RES9]]
+; CHECK-OPT-NEXT:    ret i8 [[RES89]]
+;
+; CHECK-NOOPT-LABEL: define i8 @fold_selects_from_bitcast_dominance_multiblock3(
+; CHECK-NOOPT-SAME: <4 x i64> [[V1:%.*]], <4 x i64> [[V2:%.*]], i8 [[A:%.*]], i1 [[FLAG1:%.*]], i1 [[FLAG2:%.*]], i1 [[FLAG3:%.*]], i1 [[FLAG4:%.*]], i1 [[COND2:%.*]]) #[[ATTR0]] {
+; CHECK-NOOPT-NEXT:  [[ENTRY:.*:]]
+; CHECK-NOOPT-NEXT:    [[TMP0:%.*]] = bitcast <4 x i64> [[V1]] to <32 x i8>
+; CHECK-NOOPT-NEXT:    [[TMP1:%.*]] = bitcast <4 x i64> [[V2]] to <32 x i8>
+; CHECK-NOOPT-NEXT:    br label %[[FIRST:.*]]
+; CHECK-NOOPT:       [[FIRST]]:
+; CHECK-NOOPT-NEXT:    [[VECEXT00:%.*]] = extractelement <32 x i8> [[TMP0]], i64 0
+; CHECK-NOOPT-NEXT:    [[VECEXT02:%.*]] = extractelement <32 x i8> [[TMP0]], i64 2
+; CHECK-NOOPT-NEXT:    [[VECEXT04:%.*]] = extractelement <32 x i8> [[TMP0]], i64 4
+; CHECK-NOOPT-NEXT:    [[VECEXT06:%.*]] = extractelement <32 x i8> [[TMP0]], i64 6
+; CHECK-NOOPT-NEXT:    [[VECEXT08:%.*]] = extractelement <32 x i8> [[TMP0]], i64 8
+; CHECK-NOOPT-NEXT:    [[VECEXT11:%.*]] = extractelement <32 x i8> [[TMP1]], i64 1
+; CHECK-NOOPT-NEXT:    [[VECEXT13:%.*]] = extractelement <32 x i8> [[TMP1]], i64 3
+; CHECK-NOOPT-NEXT:    [[VECEXT15:%.*]] = extractelement <32 x i8> [[TMP1]], i64 5
+; CHECK-NOOPT-NEXT:    [[VECEXT17:%.*]] = extractelement <32 x i8> [[TMP1]], i64 7
+; CHECK-NOOPT-NEXT:    [[VECEXT19:%.*]] = extractelement <32 x i8> [[TMP1]], i64 9
+; CHECK-NOOPT-NEXT:    br label %[[T0:.*]]
+; CHECK-NOOPT:       [[T0]]:
+; CHECK-NOOPT-NEXT:    [[COND1:%.*]] = icmp eq i8 [[A]], 0
+; CHECK-NOOPT-NEXT:    br i1 [[FLAG1]], label %[[R0:.*]], label %[[T1:.*]]
+; CHECK-NOOPT:       [[T1]]:
+; CHECK-NOOPT-NEXT:    br i1 [[FLAG2]], label %[[R1:.*]], label %[[T2:.*]]
+; CHECK-NOOPT:       [[T2]]:
+; CHECK-NOOPT-NEXT:    br i1 [[FLAG3]], label %[[R2:.*]], label %[[T3:.*]]
+; CHECK-NOOPT:       [[T3]]:
+; CHECK-NOOPT-NEXT:    br i1 [[FLAG4]], label %[[R3:.*]], label %[[R4:.*]]
+; CHECK-NOOPT:       [[R0]]:
+; CHECK-NOOPT-NEXT:    [[RES0:%.*]] = select i1 [[COND1]], i8 [[VECEXT00]], i8 0
+; CHECK-NOOPT-NEXT:    [[RES1:%.*]] = select i1 [[COND2]], i8 [[VECEXT11]], i8 0
+; CHECK-NOOPT-NEXT:    [[RES01:%.*]] = add i8 [[RES0]], [[RES1]]
+; CHECK-NOOPT-NEXT:    ret i8 [[RES01]]
+; CHECK-NOOPT:       [[R1]]:
+; CHECK-NOOPT-NEXT:    [[RES2:%.*]] = select i1 [[COND1]], i8 [[VECEXT02]], i8 0
+; CHECK-NOOPT-NEXT:    [[RES3:%.*]] = select i1 [[COND2]], i8 [[VECEXT13]], i8 0
+; CHECK-NOOPT-NEXT:    [[RES23:%.*]] = add i8 [[RES2]], [[RES3]]
+; CHECK-NOOPT-NEXT:    ret i8 [[RES23]]
+; CHECK-NOOPT:       [[R2]]:
+; CHECK-NOOPT-NEXT:    [[RES4:%.*]] = select i1 [[COND1]], i8 [[VECEXT04]], i8 0
+; CHECK-NOOPT-NEXT:    [[RES5:%.*]] = select i1 [[COND2]], i8 [[VECEXT15]], i8 0
+; CHECK-NOOPT-NEXT:    [[RES45:%.*]] = add i8 [[RES4]], [[RES5]]
+; CHECK-NOOPT-NEXT:    ret i8 [[RES45]]
+; CHECK-NOOPT:       [[R3]]:
+; CHECK-NOOPT-NEXT:    [[RES6:%.*]] = select i1 [[COND1]], i8 [[VECEXT06]], i8 0
+; CHECK-NOOPT-NEXT:    [[RES7:%.*]] = select i1 [[COND2]], i8 [[VECEXT17]], i8 0
+; CHECK-NOOPT-NEXT:    [[RES67:%.*]] = add i8 [[RES6]], [[RES7]]
+; CHECK-NOOPT-NEXT:    ret i8 [[RES67]]
+; CHECK-NOOPT:       [[R4]]:
+; CHECK-NOOPT-NEXT:    [[RES8:%.*]] = select i1 [[COND1]], i8 [[VECEXT08]], i8 0
+; CHECK-NOOPT-NEXT:    [[RES9:%.*]] = select i1 [[COND2]], i8 [[VECEXT19]], i8 0
+; CHECK-NOOPT-NEXT:    [[RES89:%.*]] = add i8 [[RES8]], [[RES9]]
+; CHECK-NOOPT-NEXT:    ret i8 [[RES89]]
+;
+entry:
+  %0 = bitcast <4 x i64> %v1 to <32 x i8>
+  %1 = bitcast <4 x i64> %v2 to <32 x i8>
+  br label %first
+
+first:
+  %vecext00 = extractelement <32 x i8> %0, i64 0
+  %vecext02 = extractelement <32 x i8> %0, i64 2
+  %vecext04 = extractelement <32 x i8> %0, i64 4
+  %vecext06 = extractelement <32 x i8> %0, i64 6
+  %vecext08 = extractelement <32 x i8> %0, i64 8
+  %vecext11 = extractelement <32 x i8> %1, i64 1
+  %vecext13 = extractelement <32 x i8> %1, i64 3
+  %vecext15 = extractelement <32 x i8> %1, i64 5
+  %vecext17 = extractelement <32 x i8> %1, i64 7
+  %vecext19 = extractelement <32 x i8> %1, i64 9
+  br label %t0
+
+t0:
+  %cond1 = icmp eq i8 %a, 0
+  br i1 %flag1, label %r0, label %t1
+
+t1:
+  br i1 %flag2, label %r1, label %t2
+
+t2:
+  br i1 %flag3, label %r2, label %t3
+
+t3:
+  br i1 %flag4, label %r3, label %r4
+
+r0:
+  %res0 = select i1 %cond1, i8 %vecext00, i8 0
+  %res1 = select i1 %cond2, i8 %vecext11, i8 0
+  %res01 = add i8 %res0, %res1
+  ret i8 %res01
+
+r1:
+  %res2 = select i1 %cond1, i8 %vecext02, i8 0
+  %res3 = select i1 %cond2, i8 %vecext13, i8 0
+  %res23 = add i8 %res2, %res3
+  ret i8 %res23
+
+r2:
+  %res4 = select i1 %cond1, i8 %vecext04, i8 0
+  %res5 = select i1 %cond2, i8 %vecext15, i8 0
+  %res45 = add i8 %res4, %res5
+  ret i8 %res45
+
+r3:
+  %res6 = select i1 %cond1, i8 %vecext06, i8 0
+  %res7 = select i1 %cond2, i8 %vecext17, i8 0
+  %res67 = add i8 %res6, %res7
+  ret i8 %res67
+
+r4:
+  %res8 = select i1 %cond1, i8 %vecext08, i8 0
+  %res9 = select i1 %cond2, i8 %vecext19, i8 0
+  %res89 = add i8 %res8, %res9
+  ret i8 %res89
+}
+
 declare <4 x i32> @llvm.amdgcn.raw.buffer.load.v4i32(<4 x i32>, i32, i32, i32 immarg)
 
diff --git a/llvm/test/Transforms/VectorCombine/fold-selects-from-bitcast.ll b/llvm/test/Transforms/VectorCombine/fold-selects-from-bitcast.ll
deleted file mode 100644
index 7ae3f7d926203..0000000000000
--- a/llvm/test/Transforms/VectorCombine/fold-selects-from-bitcast.ll
+++ /dev/null
@@ -1,186 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
-; RUN: opt -passes=vector-combine -S %s | FileCheck %s
-
-define i8 @fold_selects_from_bitcast_dominance(i8 %0, <2 x i32> %v) {
-; CHECK-LABEL: define i8 @fold_selects_from_bitcast_dominance(
-; CHECK-SAME: i8 [[TMP0:%.*]], <2 x i32> [[V:%.*]]) {
-; CHECK-NEXT:  [[ENTRY:.*:]]
-; CHECK-NEXT:    [[COND:%.*]] = icmp eq i8 [[TMP0]], 0
-; CHECK-NEXT:    [[TMP1:%.*]] = select i1 [[COND]], <2 x i32> [[V]], <2 x i32> zeroinitializer
-; CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x i32> [[TMP1]] to <8 x i8>
-; CHECK-NEXT:    [[SHIFT:%.*]] = shufflevector <8 x i8> [[TMP2]], <8 x i8> poison, <8 x i32> <i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
-; CHECK-NEXT:    [[FOLDEXTEXTBINOP:%.*]] = add <8 x i8> [[TMP2]], [[SHIFT]]
-; CHECK-NEXT:    [[SHIFT2:%.*]] = shufflevector <8 x i8> [[TMP2]], <8 x i8> poison, <8 x i32> <i32 2, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
-; CHECK-NEXT:    [[FOLDEXTEXTBINOP3:%.*]] = add <8 x i8> [[FOLDEXTEXTBINOP]], [[SHIFT2]]
-; CHECK-NEXT:    [[SHIFT5:%.*]] = shufflevector <8 x i8> [[TMP2]], <8 x i8> poison, <8 x i32> <i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
-; CHECK-NEXT:    [[FOLDEXTEXTBINOP6:%.*]] = add <8 x i8> [[FOLDEXTEXTBINOP3]], [[SHIFT5]]
-; CHECK-NEXT:    [[SHIFT8:%.*]] = shufflevector <8 x i8> [[TMP2]], <8 x i8> poison, <8 x i32> <i32 4, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
-; CHECK-NEXT:    [[FOLDEXTEXTBINOP9:%.*]] = add <8 x i8> [[FOLDEXTEXTBINOP6]], [[SHIFT8]]
-; CHECK-NEXT:    [[SHIFT11:%.*]] = shufflevector <8 x i8> [[TMP2]], <8 x i8> poison, <8 x i32> <i32 5, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
-; CHECK-NEXT:    [[FOLDEXTEXTBINOP12:%.*]] = add <8 x i8> [[FOLDEXTEXTBINOP9]], [[SHIFT11]]
-; CHECK-NEXT:    [[SHIFT14:%.*]] = shufflevector <8 x i8> [[TMP2]], <8 x i8> poison, <8 x i32> <i32 6, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
-; CHECK-NEXT:    [[FOLDEXTEXTBINOP15:%.*]] = add <8 x i8> [[FOLDEXTEXTBINOP12]], [[SHIFT14]]
-; CHECK-NEXT:    [[SHIFT17:%.*]] = shufflevector <8 x i8> [[TMP2]], <8 x i8> poison, <8 x i32> <i32 7, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
-; CHECK-NEXT:    [[FOLDEXTEXTBINOP18:%.*]] = add <8 x i8> [[FOLDEXTEXTBINOP15]], [[SHIFT17]]
-; CHECK-NEXT:    [[SUM6:%.*]] = extractelement <8 x i8> [[FOLDEXTEXTBINOP18]], i64 0
-; CHECK-NEXT:    ret i8 [[SUM6]]
-;
-entry:
-  %bc = bitcast <2 x i32> %v to <8 x i8>
-  %e0 = extractelement <8 x i8> %bc, i64 0
-  %cond = icmp eq i8 %0, 0
-  %e1 = extractelement <8 x i8> %bc, i64 1
-  %e2 = extractelement <8 x i8> %bc, i64 2
-  %e3 = extractelement <8 x i8> %bc, i64 3
-  %e4 = extractelement <8 x i8> %bc, i64 4
-  %e5 = extractelement <8 x i8> %bc, i64 5
-  %e6 = extractelement <8 x i8> %bc, i64 6
-  %e7 = extractelement <8 x i8> %bc, i64 7
-  %s0 = select i1 %cond, i8 %e0, i8 0
-  %s1 = select i1 %cond, i8 %e1, i8 0
-  %s2 = select i1 %cond, i8 %e2, i8 0
-  %s3 = select i1 %cond, i8 %e3, i8 0
-  %s4 = select i1 %cond, i8 %e4, i8 0
-  %s5 = select i1 %cond, i8 %e5, i8 0
-  %s6 = select i1 %cond, i8 %e6, i8 0
-  %s7 = select i1 %cond, i8 %e7, i8 0
-  %sum0 = add i8 %s0, %s1
-  %sum1 = add i8 %sum0, %s2
-  %sum2 = add i8 %sum1, %s3
-  %sum3 = add i8 %sum2, %s4
-  %sum4 = add i8 %sum3, %s5
-  %sum5 = add i8 %sum4, %s6
-  %sum6 = add i8 %sum5, %s7
-  ret i8 %sum6
-}
-
-define i8 @fold_selects_from_bitcast_dominance_multiblock1(<4 x i32> %src, i8 %a, i1 %flag) {
-; CHECK-LABEL: define i8 @fold_selects_from_bitcast_dominance_multiblock1(
-; CHECK-SAME: <4 x i32> [[SRC:%.*]], i8 [[A:%.*]], i1 [[FLAG:%.*]]) {
-; CHECK-NEXT:    [[COND:%.*]] = icmp eq i8 [[A]], 0
-; CHECK-NEXT:    [[TMP1:%.*]] = select i1 [[COND]], <4 x i32> [[SRC]], <4 x i32> zeroinitializer
-; CHECK-NEXT:    [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to <16 x i8>
-; CHECK-NEXT:    br i1 [[FLAG]], label %[[FIRST:.*]], label %[[SECOND:.*]]
-; CHECK:       [[FIRST]]:
-; CHECK-NEXT:    [[S0:%.*]] = extractelement <16 x i8> [[TMP2]], i32 0
-; CHECK-NEXT:    br label %[[END:.*]]
-; CHECK:       [[SECOND]]:
-; CHECK-NEXT:    [[S1:%.*]] = extractelement <16 x i8> [[TMP2]], i32 7
-; CHECK-NEXT:    br label %[[END]]
-; CHECK:       [[END]]:
-; CHECK-NEXT:    [[RES:%.*]] = phi i8 [ [[S0]], %[[FIRST]] ], [ [[S1]], %[[SECOND]] ]
-; CHECK-NEXT:    ret i8 [[RES]]
-;
-  %bc = bitcast <4 x i32> %src to <16 x i8>
-  %cond = icmp eq i8 %a, 0
-  br i1 %flag, label %first, label %second
-
-first:
-  %e0 = extractelement <16 x i8> %bc, i32 0
-  %s0 = select i1 %cond, i8 %e0, i8 0
-  br label %end
-
-second:
-  %e1 = extractelement <16 x i8> %bc, i32 7
-  %s1 = select i1 %cond, i8 %e1, i8 0
-  br label %end
-
-end:
-  %res = phi i8 [%s0, %first], [%s1, %second]
-  ret i8 %res
-}
-
-define i8 @fold_selects_from_bitcast_dominance_multiblock2(<4 x i64> %v, i8 %a, i1 %flag) {
-; CHECK-LABEL: define i8 @fold_selects_from_bitcast_dominance_multiblock2(
-; CHECK-SAME: <4 x i64> [[V:%.*]], i8 [[A:%.*]], i1 [[FLAG:%.*]]) {
-; CHECK-NEXT:  [[ENTRY:.*:]]
-; CHECK-NEXT:    br label %[[FIRST:.*]]
-; CHECK:       [[FIRST]]:
-; CHECK-NEXT:    br label %[[SECOND:.*]]
-; CHECK:       [[SECOND]]:
-; CHECK-NEXT:    [[TOBOOL_NOT:%.*]] = icmp eq i8 [[A]], 0
-; CHECK-NEXT:    [[TMP0:%.*]] = select i1 [[TOBOOL_NOT]], <4 x i64> [[V]], <4 x i64> zeroinitializer
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i64> [[TMP0]] to <32 x i8>
-; CHECK-NEXT:    br i1 [[FLAG]], label %[[THEN:.*]], label %[[ELSE:.*]]
-; CHECK:       [[THEN]]:
-; CHECK-NEXT:    [[NARROW8:%.*]] = extractelement <32 x i8> [[TMP1]], i64 0
-; CHECK-NEXT:    ret i8 [[NARROW8]]
-; CHECK:       [[ELSE]]:
-; CHECK-NEXT:    [[NARROW:%.*]] = extractelement <32 x i8> [[TMP1]], i64 7
-; CHECK-NEXT:    ret i8 [[NARROW]]
-;
-entry:
-  %0 = bitcast <4 x i64> %v to <32 x i8>
-  br label %first
-
-first:
-  %vecext56 = extractelement <32 x i8> %0, i64 0
-  %vecext36 = extractelement <32 x i8> %0, i64 7
-  br label %second
-
-second:
-  %tobool.not = icmp eq i8 %a, 0
-  br i1 %flag, label %then, label %else
-
-then:
-  %narrow8 = select i1 %tobool.not, i8 %vecext56, i8 0
-  ret i8 %narrow8
-
-else:
-  %narrow = select i1 %tobool.not, i8 %vecext36, i8 0
-  ret i8 %narrow
-}
-
-define i8 @fold_selects_from_bitcast_dominance_multiblock3(<4 x i64> %v, <4 x i64> %v2, i8 %a, i1 %flag1, i1 %cond2) {
-; CHECK-LABEL: define i8 @fold_selects_from_bitcast_dominance_multiblock3(
-; CHECK-SAME: <4 x i64> [[V:%.*]], <4 x i64> [[V2:%.*]], i8 [[A:%.*]], i1 [[FLAG1:%.*]], i1 [[COND2:%.*]]) {
-; CHECK-NEXT:  [[ENTRY:.*:]]
-; CHECK-NEXT:    [[TMP0:%.*]] = select i1 [[COND2]], <4 x i64> [[V]], <4 x i64> zeroinitializer
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i64> [[TMP0]] to <32 x i8>
-; CHECK-NEXT:    br label %[[FIRST:.*]]
-; CHECK:       [[FIRST]]:
-; CHECK-NEXT:    br label %[[SECOND:.*]]
-; CHECK:       [[SECOND]]:
-; CHECK-NEXT:    [[COND1:%.*]] = icmp eq i8 [[A]], 0
-; CHECK-NEXT:    [[TMP2:%.*]] = select i1 [[COND1]], <4 x i64> [[V]], <4 x i64> zeroinitializer
-; CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x i64> [[TMP2]] to <32 x i8>
-; CHECK-NEXT:    br i1 [[FLAG1]], label %[[THEN:.*]], label %[[ELSE:.*]]
-; CHECK:       [[THEN]]:
-; CHECK-NEXT:    [[SHIFT:%.*]] = shufflevector <32 x i8> [[TMP1]], <32 x i8> poison, <32 x i32> <i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
-; CHECK-NEXT:    [[FOLDEXTEXTBINOP:%.*]] = add <32 x i8> [[TMP3]], [[SHIFT]]
-; CHECK-NEXT:    [[RES12:%.*]] = extractelement <32 x i8> [[FOLDEXTEXTBINOP]], i64 0
-; CHECK-NEXT:    ret i8 [[RES12]]
-; CHECK:       [[ELSE]]:
-; CHECK-NEXT:    [[SHIFT2:%.*]] = shufflevector <32 x i8> [[TMP1]], <32 x i8> poison, <32 x i32> <i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 8, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
-; CHECK-NEXT:    [[FOLDEXTEXTBINOP3:%.*]] = add <32 x i8> [[TMP3]], [[SHIFT2]]
-; CHECK-NEXT:    [[RES34:%.*]] = extractelement <32 x i8> [[FOLDEXTEXTBINOP3]], i64 7
-; CHECK-NEXT:    ret i8 [[RES34]]
-;
-entry:
-  %0 = bitcast <4 x i64> %v to <32 x i8>
-  %1 = bitcast <4 x i64> %v to <32 x i8>
-  br label %first
-
-first:
-  %vecext00 = extractelement <32 x i8> %0, i64 0
-  %vecext07 = extractelement <32 x i8> %0, i64 7
-  %vecext11 = extractelement <32 x i8> %1, i64 1
-  %vecext18 = extractelement <32 x i8> %1, i64 8
-  br label %second
-
-second:
-  %cond1 = icmp eq i8 %a, 0
-  br i1 %flag1, label %then, label %else
-
-then:
-  %res1 = select i1 %cond1, i8 %vecext00, i8 0
-  %res2 = select i1 %cond2, i8 %vecext11, i8 0
-  %res12 = add i8 %res1, %res2
-  ret i8 %res12
-
-else:
-  %res3 = select i1 %cond1, i8 %vecext07, i8 0
-  %res4 = select i1 %cond2, i8 %vecext18, i8 0
-  %res34 = add i8 %res3, %res4
-  ret i8 %res34
-}



More information about the llvm-commits mailing list