[llvm] [VectorCombine] Fold chain of (scalar load)->ext->ext to load->ext. (PR #141109)

Florian Hahn via llvm-commits llvm-commits at lists.llvm.org
Fri May 23 06:17:37 PDT 2025


https://github.com/fhahn updated https://github.com/llvm/llvm-project/pull/141109

>From eb5c8a67ecce1ae5aae63d82e9bde22ae99820e5 Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo at fhahn.com>
Date: Tue, 20 May 2025 12:40:11 +0100
Subject: [PATCH 1/3] [VectorCombine] Fold chain of (scalar load)->ext->ext to
 load->ext.

Add a new combine that folds a chain of (scalar load)->ext->ext (with
shuffles/casts/inserts in between) to a single vector load and wide
extend.

This makes the IR simpler to analyze and to process, while the backend
can still decide to break them up. Code like that comes from code
written with vector intrinsics. Some examples of real-world use are in
https://github.com/ARM-software/astc-encoder/.
---
 .../Transforms/Vectorize/VectorCombine.cpp    | 51 +++++++++++++++
 .../AArch64/combine-shuffle-ext.ll            | 64 +++++--------------
 2 files changed, 67 insertions(+), 48 deletions(-)

diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
index fe1d930f295ce..3de60adcd4b2f 100644
--- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
+++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
@@ -127,6 +127,7 @@ class VectorCombine {
   bool foldShuffleOfShuffles(Instruction &I);
   bool foldShuffleOfIntrinsics(Instruction &I);
   bool foldShuffleToIdentity(Instruction &I);
+  bool foldShuffleExtExtracts(Instruction &I);
   bool foldShuffleFromReductions(Instruction &I);
   bool foldCastFromReductions(Instruction &I);
   bool foldSelectShuffle(Instruction &I, bool FromReduction = false);
@@ -2777,6 +2778,55 @@ bool VectorCombine::foldShuffleToIdentity(Instruction &I) {
   return true;
 }
 
+bool VectorCombine::foldShuffleExtExtracts(Instruction &I) {
+  // Try to fold vector zero- and sign-extends split across multiple operations
+  // into a single extend, removing redundant inserts and shuffles.
+
+  // Check if we have an extended shuffle that selects the first vector, which
+  // itself is another extend fed by a load.
+  Instruction *L;
+  if (!match(
+          &I,
+          m_OneUse(m_Shuffle(
+              m_OneUse(m_ZExtOrSExt(m_OneUse(m_BitCast(m_OneUse(m_InsertElt(
+                  m_Value(), m_OneUse(m_Instruction(L)), m_SpecificInt(0))))))),
+              m_Value()))) ||
+      !cast<ShuffleVectorInst>(&I)->isIdentityWithExtract() ||
+      !isa<LoadInst>(L))
+    return false;
+  auto *InnerExt = cast<Instruction>(I.getOperand(0));
+  auto *OuterExt = dyn_cast<Instruction>(*I.user_begin());
+  if (!isa<SExtInst, ZExtInst>(OuterExt))
+    return false;
+
+  // If the inner extend is a sign extend and the outer one isnt (i.e. a
+  // zero-extend), don't fold. If the first one is zero-extend, it doesn't
+  // matter if the second one is a sign- or zero-extend.
+  if (isa<SExtInst>(InnerExt) && !isa<SExtInst>(OuterExt))
+    return false;
+
+  // Don't try to convert the load if it has an odd size.
+  if (!DL->typeSizeEqualsStoreSize(L->getType()))
+    return false;
+  auto *DstTy = cast<FixedVectorType>(OuterExt->getType());
+  auto *SrcTy =
+      FixedVectorType::get(InnerExt->getOperand(0)->getType()->getScalarType(),
+                           DstTy->getNumElements());
+  if (DL->getTypeStoreSize(SrcTy) != DL->getTypeStoreSize(L->getType()))
+    return false;
+
+  // Convert to a vector load feeding a single wide extend.
+  Builder.SetInsertPoint(*L->getInsertionPointAfterDef());
+  auto *NewLoad = cast<LoadInst>(
+      Builder.CreateLoad(SrcTy, L->getOperand(0), L->getName() + ".vec"));
+  auto *NewExt = isa<ZExtInst>(InnerExt) ? Builder.CreateZExt(NewLoad, DstTy)
+                                         : Builder.CreateSExt(NewLoad, DstTy);
+  OuterExt->replaceAllUsesWith(NewExt);
+  replaceValue(*OuterExt, *NewExt);
+  Worklist.pushValue(NewLoad);
+  return true;
+}
+
 /// Given a commutative reduction, the order of the input lanes does not alter
 /// the results. We can use this to remove certain shuffles feeding the
 /// reduction, removing the need to shuffle at all.
@@ -3551,6 +3601,7 @@ bool VectorCombine::run() {
         break;
       case Instruction::ShuffleVector:
         MadeChange |= widenSubvectorLoad(I);
+        MadeChange |= foldShuffleExtExtracts(I);
         break;
       default:
         break;
diff --git a/llvm/test/Transforms/VectorCombine/AArch64/combine-shuffle-ext.ll b/llvm/test/Transforms/VectorCombine/AArch64/combine-shuffle-ext.ll
index 6341c8945247d..2d6d80ad57fb5 100644
--- a/llvm/test/Transforms/VectorCombine/AArch64/combine-shuffle-ext.ll
+++ b/llvm/test/Transforms/VectorCombine/AArch64/combine-shuffle-ext.ll
@@ -11,12 +11,8 @@ define <4 x i32> @load_i32_zext_to_v4i32(ptr %di) {
 ; CHECK-LABEL: define <4 x i32> @load_i32_zext_to_v4i32(
 ; CHECK-SAME: ptr [[DI:%.*]]) {
 ; CHECK-NEXT:  [[ENTRY:.*:]]
-; CHECK-NEXT:    [[L:%.*]] = load i32, ptr [[DI]], align 4
-; CHECK-NEXT:    [[VEC_INS:%.*]] = insertelement <2 x i32> <i32 poison, i32 0>, i32 [[L]], i64 0
-; CHECK-NEXT:    [[VEC_BC:%.*]] = bitcast <2 x i32> [[VEC_INS]] to <8 x i8>
-; CHECK-NEXT:    [[E_1:%.*]] = zext <8 x i8> [[VEC_BC]] to <8 x i16>
-; CHECK-NEXT:    [[VEC_SHUFFLE:%.*]] = shufflevector <8 x i16> [[E_1]], <8 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; CHECK-NEXT:    [[EXT_2:%.*]] = zext nneg <4 x i16> [[VEC_SHUFFLE]] to <4 x i32>
+; CHECK-NEXT:    [[L_VEC:%.*]] = load <4 x i8>, ptr [[DI]], align 4
+; CHECK-NEXT:    [[EXT_2:%.*]] = zext <4 x i8> [[L_VEC]] to <4 x i32>
 ; CHECK-NEXT:    ret <4 x i32> [[EXT_2]]
 ;
 entry:
@@ -33,12 +29,8 @@ define <4 x i32> @load_i32_zext_to_v4i32_both_nneg(ptr %di) {
 ; CHECK-LABEL: define <4 x i32> @load_i32_zext_to_v4i32_both_nneg(
 ; CHECK-SAME: ptr [[DI:%.*]]) {
 ; CHECK-NEXT:  [[ENTRY:.*:]]
-; CHECK-NEXT:    [[L:%.*]] = load i32, ptr [[DI]], align 4
-; CHECK-NEXT:    [[VEC_INS:%.*]] = insertelement <2 x i32> <i32 poison, i32 0>, i32 [[L]], i64 0
-; CHECK-NEXT:    [[VEC_BC:%.*]] = bitcast <2 x i32> [[VEC_INS]] to <8 x i8>
-; CHECK-NEXT:    [[E_1:%.*]] = zext nneg <8 x i8> [[VEC_BC]] to <8 x i16>
-; CHECK-NEXT:    [[VEC_SHUFFLE:%.*]] = shufflevector <8 x i16> [[E_1]], <8 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; CHECK-NEXT:    [[EXT_2:%.*]] = zext nneg <4 x i16> [[VEC_SHUFFLE]] to <4 x i32>
+; CHECK-NEXT:    [[L_VEC:%.*]] = load <4 x i8>, ptr [[DI]], align 4
+; CHECK-NEXT:    [[EXT_2:%.*]] = zext <4 x i8> [[L_VEC]] to <4 x i32>
 ; CHECK-NEXT:    ret <4 x i32> [[EXT_2]]
 ;
 entry:
@@ -121,13 +113,9 @@ define <4 x i32> @load_i32_zext_to_v4i32_clobber_after_load(ptr %di) {
 ; CHECK-LABEL: define <4 x i32> @load_i32_zext_to_v4i32_clobber_after_load(
 ; CHECK-SAME: ptr [[DI:%.*]]) {
 ; CHECK-NEXT:  [[ENTRY:.*:]]
-; CHECK-NEXT:    [[L:%.*]] = load i32, ptr [[DI]], align 4
+; CHECK-NEXT:    [[L_VEC:%.*]] = load <4 x i8>, ptr [[DI]], align 4
+; CHECK-NEXT:    [[EXT_2:%.*]] = zext <4 x i8> [[L_VEC]] to <4 x i32>
 ; CHECK-NEXT:    call void @use.i32(i32 0)
-; CHECK-NEXT:    [[VEC_INS:%.*]] = insertelement <2 x i32> <i32 poison, i32 0>, i32 [[L]], i64 0
-; CHECK-NEXT:    [[VEC_BC:%.*]] = bitcast <2 x i32> [[VEC_INS]] to <8 x i8>
-; CHECK-NEXT:    [[E_1:%.*]] = zext <8 x i8> [[VEC_BC]] to <8 x i16>
-; CHECK-NEXT:    [[VEC_SHUFFLE:%.*]] = shufflevector <8 x i16> [[E_1]], <8 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; CHECK-NEXT:    [[EXT_2:%.*]] = zext nneg <4 x i16> [[VEC_SHUFFLE]] to <4 x i32>
 ; CHECK-NEXT:    ret <4 x i32> [[EXT_2]]
 ;
 entry:
@@ -287,12 +275,8 @@ define <8 x i32> @load_i64_zext_to_v8i32(ptr %di) {
 ; CHECK-LABEL: define <8 x i32> @load_i64_zext_to_v8i32(
 ; CHECK-SAME: ptr [[DI:%.*]]) {
 ; CHECK-NEXT:  [[ENTRY:.*:]]
-; CHECK-NEXT:    [[L:%.*]] = load i64, ptr [[DI]], align 8
-; CHECK-NEXT:    [[VEC_INS:%.*]] = insertelement <2 x i64> <i64 poison, i64 0>, i64 [[L]], i64 0
-; CHECK-NEXT:    [[VEC_BC:%.*]] = bitcast <2 x i64> [[VEC_INS]] to <16 x i8>
-; CHECK-NEXT:    [[EXT_1:%.*]] = zext <16 x i8> [[VEC_BC]] to <16 x i16>
-; CHECK-NEXT:    [[VEC_SHUFFLE:%.*]] = shufflevector <16 x i16> [[EXT_1]], <16 x i16> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-; CHECK-NEXT:    [[OUTER_EXT:%.*]] = zext nneg <8 x i16> [[VEC_SHUFFLE]] to <8 x i32>
+; CHECK-NEXT:    [[L_VEC:%.*]] = load <8 x i8>, ptr [[DI]], align 8
+; CHECK-NEXT:    [[OUTER_EXT:%.*]] = zext <8 x i8> [[L_VEC]] to <8 x i32>
 ; CHECK-NEXT:    ret <8 x i32> [[OUTER_EXT]]
 ;
 entry:
@@ -309,12 +293,8 @@ define <3 x i32> @load_i24_zext_to_v3i32(ptr %di) {
 ; CHECK-LABEL: define <3 x i32> @load_i24_zext_to_v3i32(
 ; CHECK-SAME: ptr [[DI:%.*]]) {
 ; CHECK-NEXT:  [[ENTRY:.*:]]
-; CHECK-NEXT:    [[L:%.*]] = load i24, ptr [[DI]], align 4
-; CHECK-NEXT:    [[VEC_INS:%.*]] = insertelement <2 x i24> <i24 poison, i24 0>, i24 [[L]], i64 0
-; CHECK-NEXT:    [[VEC_BC:%.*]] = bitcast <2 x i24> [[VEC_INS]] to <6 x i8>
-; CHECK-NEXT:    [[EXT_1:%.*]] = zext <6 x i8> [[VEC_BC]] to <6 x i16>
-; CHECK-NEXT:    [[VEC_SHUFFLE:%.*]] = shufflevector <6 x i16> [[EXT_1]], <6 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
-; CHECK-NEXT:    [[EXT_2:%.*]] = zext nneg <3 x i16> [[VEC_SHUFFLE]] to <3 x i32>
+; CHECK-NEXT:    [[L_VEC:%.*]] = load <3 x i8>, ptr [[DI]], align 4
+; CHECK-NEXT:    [[EXT_2:%.*]] = zext <3 x i8> [[L_VEC]] to <3 x i32>
 ; CHECK-NEXT:    ret <3 x i32> [[EXT_2]]
 ;
 entry:
@@ -419,12 +399,8 @@ define <4 x i32> @load_i32_sext_to_v4i32(ptr %di) {
 ; CHECK-LABEL: define <4 x i32> @load_i32_sext_to_v4i32(
 ; CHECK-SAME: ptr [[DI:%.*]]) {
 ; CHECK-NEXT:  [[ENTRY:.*:]]
-; CHECK-NEXT:    [[L:%.*]] = load i32, ptr [[DI]], align 4
-; CHECK-NEXT:    [[VEC_INS:%.*]] = insertelement <2 x i32> <i32 poison, i32 0>, i32 [[L]], i64 0
-; CHECK-NEXT:    [[VEC_BC:%.*]] = bitcast <2 x i32> [[VEC_INS]] to <8 x i8>
-; CHECK-NEXT:    [[E_1:%.*]] = sext <8 x i8> [[VEC_BC]] to <8 x i16>
-; CHECK-NEXT:    [[VEC_SHUFFLE:%.*]] = shufflevector <8 x i16> [[E_1]], <8 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; CHECK-NEXT:    [[EXT_2:%.*]] = sext <4 x i16> [[VEC_SHUFFLE]] to <4 x i32>
+; CHECK-NEXT:    [[L_VEC:%.*]] = load <4 x i8>, ptr [[DI]], align 4
+; CHECK-NEXT:    [[EXT_2:%.*]] = sext <4 x i8> [[L_VEC]] to <4 x i32>
 ; CHECK-NEXT:    ret <4 x i32> [[EXT_2]]
 ;
 entry:
@@ -441,12 +417,8 @@ define <8 x i32> @load_i64_sext_to_v8i32(ptr %di) {
 ; CHECK-LABEL: define <8 x i32> @load_i64_sext_to_v8i32(
 ; CHECK-SAME: ptr [[DI:%.*]]) {
 ; CHECK-NEXT:  [[ENTRY:.*:]]
-; CHECK-NEXT:    [[L:%.*]] = load i64, ptr [[DI]], align 8
-; CHECK-NEXT:    [[VEC_INS:%.*]] = insertelement <2 x i64> <i64 poison, i64 0>, i64 [[L]], i64 0
-; CHECK-NEXT:    [[VEC_BC:%.*]] = bitcast <2 x i64> [[VEC_INS]] to <16 x i8>
-; CHECK-NEXT:    [[EXT_1:%.*]] = sext <16 x i8> [[VEC_BC]] to <16 x i16>
-; CHECK-NEXT:    [[VEC_SHUFFLE:%.*]] = shufflevector <16 x i16> [[EXT_1]], <16 x i16> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-; CHECK-NEXT:    [[OUTER_EXT:%.*]] = sext <8 x i16> [[VEC_SHUFFLE]] to <8 x i32>
+; CHECK-NEXT:    [[L_VEC:%.*]] = load <8 x i8>, ptr [[DI]], align 8
+; CHECK-NEXT:    [[OUTER_EXT:%.*]] = sext <8 x i8> [[L_VEC]] to <8 x i32>
 ; CHECK-NEXT:    ret <8 x i32> [[OUTER_EXT]]
 ;
 entry:
@@ -463,12 +435,8 @@ define <3 x i32> @load_i24_sext_to_v3i32(ptr %di) {
 ; CHECK-LABEL: define <3 x i32> @load_i24_sext_to_v3i32(
 ; CHECK-SAME: ptr [[DI:%.*]]) {
 ; CHECK-NEXT:  [[ENTRY:.*:]]
-; CHECK-NEXT:    [[L:%.*]] = load i24, ptr [[DI]], align 4
-; CHECK-NEXT:    [[VEC_INS:%.*]] = insertelement <2 x i24> <i24 poison, i24 0>, i24 [[L]], i64 0
-; CHECK-NEXT:    [[VEC_BC:%.*]] = bitcast <2 x i24> [[VEC_INS]] to <6 x i8>
-; CHECK-NEXT:    [[EXT_1:%.*]] = sext <6 x i8> [[VEC_BC]] to <6 x i16>
-; CHECK-NEXT:    [[VEC_SHUFFLE:%.*]] = shufflevector <6 x i16> [[EXT_1]], <6 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
-; CHECK-NEXT:    [[EXT_2:%.*]] = sext <3 x i16> [[VEC_SHUFFLE]] to <3 x i32>
+; CHECK-NEXT:    [[L_VEC:%.*]] = load <3 x i8>, ptr [[DI]], align 4
+; CHECK-NEXT:    [[EXT_2:%.*]] = sext <3 x i8> [[L_VEC]] to <3 x i32>
 ; CHECK-NEXT:    ret <3 x i32> [[EXT_2]]
 ;
 entry:

>From 5d5fe9b6085c7c875ebf70f68bca0628c81881b4 Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo at fhahn.com>
Date: Fri, 23 May 2025 14:03:11 +0100
Subject: [PATCH 2/3] !fixup address comments, thanks

---
 .../Transforms/Vectorize/VectorCombine.cpp    |  6 ++---
 .../AArch64/combine-shuffle-ext.ll            | 26 +++++--------------
 2 files changed, 10 insertions(+), 22 deletions(-)

diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
index 3de60adcd4b2f..b0839f907951c 100644
--- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
+++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
@@ -2795,7 +2795,7 @@ bool VectorCombine::foldShuffleExtExtracts(Instruction &I) {
       !isa<LoadInst>(L))
     return false;
   auto *InnerExt = cast<Instruction>(I.getOperand(0));
-  auto *OuterExt = dyn_cast<Instruction>(*I.user_begin());
+  auto *OuterExt = cast<Instruction>(*I.user_begin());
   if (!isa<SExtInst, ZExtInst>(OuterExt))
     return false;
 
@@ -2819,8 +2819,8 @@ bool VectorCombine::foldShuffleExtExtracts(Instruction &I) {
   Builder.SetInsertPoint(*L->getInsertionPointAfterDef());
   auto *NewLoad = cast<LoadInst>(
       Builder.CreateLoad(SrcTy, L->getOperand(0), L->getName() + ".vec"));
-  auto *NewExt = isa<ZExtInst>(InnerExt) ? Builder.CreateZExt(NewLoad, DstTy)
-                                         : Builder.CreateSExt(NewLoad, DstTy);
+  auto *NewExt = isa<ZExtInst>(InnerExt) ? Builder.CreateZExt(NewLoad, DstTy, "vec.ext", InnerExt->hasNonNeg())
+                                         : Builder.CreateSExt(NewLoad, DstTy, "vec.ext");
   OuterExt->replaceAllUsesWith(NewExt);
   replaceValue(*OuterExt, *NewExt);
   Worklist.pushValue(NewLoad);
diff --git a/llvm/test/Transforms/VectorCombine/AArch64/combine-shuffle-ext.ll b/llvm/test/Transforms/VectorCombine/AArch64/combine-shuffle-ext.ll
index 2d6d80ad57fb5..55a38d8a5307c 100644
--- a/llvm/test/Transforms/VectorCombine/AArch64/combine-shuffle-ext.ll
+++ b/llvm/test/Transforms/VectorCombine/AArch64/combine-shuffle-ext.ll
@@ -30,7 +30,7 @@ define <4 x i32> @load_i32_zext_to_v4i32_both_nneg(ptr %di) {
 ; CHECK-SAME: ptr [[DI:%.*]]) {
 ; CHECK-NEXT:  [[ENTRY:.*:]]
 ; CHECK-NEXT:    [[L_VEC:%.*]] = load <4 x i8>, ptr [[DI]], align 4
-; CHECK-NEXT:    [[EXT_2:%.*]] = zext <4 x i8> [[L_VEC]] to <4 x i32>
+; CHECK-NEXT:    [[EXT_2:%.*]] = zext nneg <4 x i8> [[L_VEC]] to <4 x i32>
 ; CHECK-NEXT:    ret <4 x i32> [[EXT_2]]
 ;
 entry:
@@ -47,12 +47,8 @@ define <4 x i32> @load_i32_zext_to_v4i32_inner_nneg(ptr %di) {
 ; CHECK-LABEL: define <4 x i32> @load_i32_zext_to_v4i32_inner_nneg(
 ; CHECK-SAME: ptr [[DI:%.*]]) {
 ; CHECK-NEXT:  [[ENTRY:.*:]]
-; CHECK-NEXT:    [[L:%.*]] = load i32, ptr [[DI]], align 4
-; CHECK-NEXT:    [[VEC_INS:%.*]] = insertelement <2 x i32> <i32 poison, i32 0>, i32 [[L]], i64 0
-; CHECK-NEXT:    [[VEC_BC:%.*]] = bitcast <2 x i32> [[VEC_INS]] to <8 x i8>
-; CHECK-NEXT:    [[E_1:%.*]] = zext nneg <8 x i8> [[VEC_BC]] to <8 x i16>
-; CHECK-NEXT:    [[VEC_SHUFFLE:%.*]] = shufflevector <8 x i16> [[E_1]], <8 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; CHECK-NEXT:    [[EXT_2:%.*]] = zext <4 x i16> [[VEC_SHUFFLE]] to <4 x i32>
+; CHECK-NEXT:    [[L_VEC:%.*]] = load <4 x i8>, ptr [[DI]], align 4
+; CHECK-NEXT:    [[EXT_2:%.*]] = zext nneg <4 x i8> [[L_VEC]] to <4 x i32>
 ; CHECK-NEXT:    ret <4 x i32> [[EXT_2]]
 ;
 entry:
@@ -69,12 +65,8 @@ define <4 x i32> @load_i32_zext_to_v4i32_outer_nneg(ptr %di) {
 ; CHECK-LABEL: define <4 x i32> @load_i32_zext_to_v4i32_outer_nneg(
 ; CHECK-SAME: ptr [[DI:%.*]]) {
 ; CHECK-NEXT:  [[ENTRY:.*:]]
-; CHECK-NEXT:    [[L:%.*]] = load i32, ptr [[DI]], align 4
-; CHECK-NEXT:    [[VEC_INS:%.*]] = insertelement <2 x i32> <i32 poison, i32 0>, i32 [[L]], i64 0
-; CHECK-NEXT:    [[VEC_BC:%.*]] = bitcast <2 x i32> [[VEC_INS]] to <8 x i8>
-; CHECK-NEXT:    [[E_1:%.*]] = zext <8 x i8> [[VEC_BC]] to <8 x i16>
-; CHECK-NEXT:    [[VEC_SHUFFLE:%.*]] = shufflevector <8 x i16> [[E_1]], <8 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; CHECK-NEXT:    [[EXT_2:%.*]] = zext nneg <4 x i16> [[VEC_SHUFFLE]] to <4 x i32>
+; CHECK-NEXT:    [[L_VEC:%.*]] = load <4 x i8>, ptr [[DI]], align 4
+; CHECK-NEXT:    [[EXT_2:%.*]] = zext <4 x i8> [[L_VEC]] to <4 x i32>
 ; CHECK-NEXT:    ret <4 x i32> [[EXT_2]]
 ;
 entry:
@@ -91,12 +83,8 @@ define <4 x i32> @load_i32_zext_to_v4i32_inner_nneg_outer_sext(ptr %di) {
 ; CHECK-LABEL: define <4 x i32> @load_i32_zext_to_v4i32_inner_nneg_outer_sext(
 ; CHECK-SAME: ptr [[DI:%.*]]) {
 ; CHECK-NEXT:  [[ENTRY:.*:]]
-; CHECK-NEXT:    [[L:%.*]] = load i32, ptr [[DI]], align 4
-; CHECK-NEXT:    [[VEC_INS:%.*]] = insertelement <2 x i32> <i32 poison, i32 0>, i32 [[L]], i64 0
-; CHECK-NEXT:    [[VEC_BC:%.*]] = bitcast <2 x i32> [[VEC_INS]] to <8 x i8>
-; CHECK-NEXT:    [[E_1:%.*]] = zext nneg <8 x i8> [[VEC_BC]] to <8 x i16>
-; CHECK-NEXT:    [[VEC_SHUFFLE:%.*]] = shufflevector <8 x i16> [[E_1]], <8 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; CHECK-NEXT:    [[EXT_2:%.*]] = sext <4 x i16> [[VEC_SHUFFLE]] to <4 x i32>
+; CHECK-NEXT:    [[L_VEC:%.*]] = load <4 x i8>, ptr [[DI]], align 4
+; CHECK-NEXT:    [[EXT_2:%.*]] = zext nneg <4 x i8> [[L_VEC]] to <4 x i32>
 ; CHECK-NEXT:    ret <4 x i32> [[EXT_2]]
 ;
 entry:

>From 23da6b204cfbde97274da40ab2eaa71838729e72 Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo at fhahn.com>
Date: Fri, 23 May 2025 14:17:18 +0100
Subject: [PATCH 3/3] !fixup fix formatting

---
 llvm/lib/Transforms/Vectorize/VectorCombine.cpp | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
index b0839f907951c..2e9480c83678f 100644
--- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
+++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
@@ -2819,8 +2819,10 @@ bool VectorCombine::foldShuffleExtExtracts(Instruction &I) {
   Builder.SetInsertPoint(*L->getInsertionPointAfterDef());
   auto *NewLoad = cast<LoadInst>(
       Builder.CreateLoad(SrcTy, L->getOperand(0), L->getName() + ".vec"));
-  auto *NewExt = isa<ZExtInst>(InnerExt) ? Builder.CreateZExt(NewLoad, DstTy, "vec.ext", InnerExt->hasNonNeg())
-                                         : Builder.CreateSExt(NewLoad, DstTy, "vec.ext");
+  auto *NewExt =
+      isa<ZExtInst>(InnerExt)
+          ? Builder.CreateZExt(NewLoad, DstTy, "vec.ext", InnerExt->hasNonNeg())
+          : Builder.CreateSExt(NewLoad, DstTy, "vec.ext");
   OuterExt->replaceAllUsesWith(NewExt);
   replaceValue(*OuterExt, *NewExt);
   Worklist.pushValue(NewLoad);



More information about the llvm-commits mailing list