[llvm] [InstCombine] Fold shuffled intrinsic operands with constant operands (PR #141300)

Fri May 23 15:53:20 PDT 2025

https://github.com/lukel97 created https://github.com/llvm/llvm-project/pull/141300

We currently pull shuffles through binops and intrinsics, which is an important canonical form for VectorCombine to be able to scalarize vector sequences. But while binops can be folded with a constant operand, intrinsics currently require all operands to be shufflevectors.

This extends intrinsic folding to be in line with regular binops by reusing the constant "unshuffling" logic.

As far as I can tell the list of currently folded intrinsics don't require any special UB handling.

This change in combination with #138095 and #137823 fixes the following C:

```c
void max(int *x, int *y, int n) {
  for (int i = 0; i < n; i++)
    x[i] += *y > 42 ? *y : 42;
}
```

Not using the splatted vector form on RISC-V with `-O3 -march=rva23u64`:

```asm
	vmv.s.x	v8, a4
	li	a4, 42
	vmax.vx	v10, v8, a4
	vrgather.vi	v8, v10, 0
.LBB0_9:                                # %vector.body
                                        # =>This Inner Loop Header: Depth=1
	vl2re32.v	v10, (a5)
	vadd.vv	v10, v10, v8
	vs2r.v	v10, (a5)
```

i.e., it now generates

```asm
        li	a6, 42
        max	a6, a4, a6
.LBB0_9:                                # %vector.body
                                        # =>This Inner Loop Header: Depth=1
	vl2re32.v	v8, (a5)
	vadd.vx	v8, v8, a6
	vs2r.v	v8, (a5)
```

Stacked on #141287 

>From 689b76298568f70857391e26d5f05aa9df88c1d1 Mon Sep 17 00:00:00 2001
From: Luke Lau <luke at igalia.com>
Date: Fri, 23 May 2025 20:37:14 +0100
Subject: [PATCH 1/3] [InstCombine] Refactor fixed and scalable binop shuffle
 combine. NFCI

This extracts the logic that works out the "unshuffled" constant when pulling shuffle vectors out of binary ops, so the same combine can be generic over fixed and scalable vectors.

The plan is to reuse this helper to do the same canonicalization on intrinsics too.
---
 .../InstCombine/InstructionCombining.cpp      | 110 ++++++++----------
 1 file changed, 49 insertions(+), 61 deletions(-)

diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
index c1608b1866a5d..eb5352524853f 100644
--- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -2094,6 +2094,50 @@ static bool shouldMergeGEPs(GEPOperator &GEP, GEPOperator &Src) {
   return true;
 }
 
+// Find constant NewC that has property:
+//   shuffle(NewC, ShMask) = C
+// Returns nullptr if such a constant does not exist e.g. ShMask=<0,0> C=<1,2>
+//
+// A 1-to-1 mapping is not required. Example:
+// ShMask = <1,1,2,2> and C = <5,5,6,6> --> NewC = <poison,5,6,poison>
+static Constant *unshuffleConstant(ArrayRef<int> ShMask, Constant *C,
+                                   VectorType *NewCTy) {
+  if (isa<ScalableVectorType>(NewCTy)) {
+    Constant *Splat = C->getSplatValue();
+    if (!Splat)
+      return nullptr;
+    return ConstantVector::getSplat(
+        cast<VectorType>(C->getType())->getElementCount(), Splat);
+  }
+
+  if (cast<FixedVectorType>(NewCTy)->getNumElements() >
+      cast<FixedVectorType>(C->getType())->getNumElements())
+    return nullptr;
+
+  unsigned NewCNumElts = cast<FixedVectorType>(NewCTy)->getNumElements();
+  PoisonValue *PoisonScalar = PoisonValue::get(C->getType()->getScalarType());
+  SmallVector<Constant *, 16> NewVecC(NewCNumElts, PoisonScalar);
+  unsigned NumElts = cast<FixedVectorType>(C->getType())->getNumElements();
+  for (unsigned I = 0; I < NumElts; ++I) {
+    Constant *CElt = C->getAggregateElement(I);
+    if (ShMask[I] >= 0) {
+      assert(ShMask[I] < (int)NumElts && "Not expecting narrowing shuffle");
+      Constant *NewCElt = NewVecC[ShMask[I]];
+      // Bail out if:
+      // 1. The constant vector contains a constant expression.
+      // 2. The shuffle needs an element of the constant vector that can't
+      //    be mapped to a new constant vector.
+      // 3. This is a widening shuffle that copies elements of V1 into the
+      //    extended elements (extending with poison is allowed).
+      if (!CElt || (!isa<PoisonValue>(NewCElt) && NewCElt != CElt) ||
+          I >= NewCNumElts)
+        return nullptr;
+      NewVecC[ShMask[I]] = CElt;
+    }
+  }
+  return ConstantVector::get(NewVecC);
+}
+
 Instruction *InstCombinerImpl::foldVectorBinop(BinaryOperator &Inst) {
   if (!isa<VectorType>(Inst.getType()))
     return nullptr;
@@ -2213,50 +2257,15 @@ Instruction *InstCombinerImpl::foldVectorBinop(BinaryOperator &Inst) {
   // other binops, so they can be folded. It may also enable demanded elements
   // transforms.
   Constant *C;
-  auto *InstVTy = dyn_cast<FixedVectorType>(Inst.getType());
-  if (InstVTy &&
-      match(&Inst, m_c_BinOp(m_OneUse(m_Shuffle(m_Value(V1), m_Poison(),
+  if (match(&Inst, m_c_BinOp(m_OneUse(m_Shuffle(m_Value(V1), m_Poison(),
                                                 m_Mask(Mask))),
-                             m_ImmConstant(C))) &&
-      cast<FixedVectorType>(V1->getType())->getNumElements() <=
-          InstVTy->getNumElements()) {
-    assert(InstVTy->getScalarType() == V1->getType()->getScalarType() &&
+                             m_ImmConstant(C)))) {
+    assert(Inst.getType()->getScalarType() == V1->getType()->getScalarType() &&
            "Shuffle should not change scalar type");
 
-    // Find constant NewC that has property:
-    //   shuffle(NewC, ShMask) = C
-    // If such constant does not exist (example: ShMask=<0,0> and C=<1,2>)
-    // reorder is not possible. A 1-to-1 mapping is not required. Example:
-    // ShMask = <1,1,2,2> and C = <5,5,6,6> --> NewC = <undef,5,6,undef>
     bool ConstOp1 = isa<Constant>(RHS);
-    ArrayRef<int> ShMask = Mask;
-    unsigned SrcVecNumElts =
-        cast<FixedVectorType>(V1->getType())->getNumElements();
-    PoisonValue *PoisonScalar = PoisonValue::get(C->getType()->getScalarType());
-    SmallVector<Constant *, 16> NewVecC(SrcVecNumElts, PoisonScalar);
-    bool MayChange = true;
-    unsigned NumElts = InstVTy->getNumElements();
-    for (unsigned I = 0; I < NumElts; ++I) {
-      Constant *CElt = C->getAggregateElement(I);
-      if (ShMask[I] >= 0) {
-        assert(ShMask[I] < (int)NumElts && "Not expecting narrowing shuffle");
-        Constant *NewCElt = NewVecC[ShMask[I]];
-        // Bail out if:
-        // 1. The constant vector contains a constant expression.
-        // 2. The shuffle needs an element of the constant vector that can't
-        //    be mapped to a new constant vector.
-        // 3. This is a widening shuffle that copies elements of V1 into the
-        //    extended elements (extending with poison is allowed).
-        if (!CElt || (!isa<PoisonValue>(NewCElt) && NewCElt != CElt) ||
-            I >= SrcVecNumElts) {
-          MayChange = false;
-          break;
-        }
-        NewVecC[ShMask[I]] = CElt;
-      }
-    }
-    if (MayChange) {
-      Constant *NewC = ConstantVector::get(NewVecC);
+    if (Constant *NewC =
+            unshuffleConstant(Mask, C, cast<VectorType>(V1->getType()))) {
       // It may not be safe to execute a binop on a vector with poison elements
       // because the entire instruction can be folded to undef or create poison
       // that did not exist in the original code.
@@ -2272,27 +2281,6 @@ Instruction *InstCombinerImpl::foldVectorBinop(BinaryOperator &Inst) {
     }
   }
 
-  // Similar to the combine above, but handles the case for scalable vectors
-  // where both shuffle(V1, 0) and C are splats.
-  //
-  // Op(shuffle(V1, 0), (splat C)) -> shuffle(Op(V1, (splat C)), 0)
-  if (isa<ScalableVectorType>(Inst.getType()) &&
-      match(&Inst, m_c_BinOp(m_OneUse(m_Shuffle(m_Value(V1), m_Poison(),
-                                                m_ZeroMask())),
-                             m_ImmConstant(C)))) {
-    if (Constant *Splat = C->getSplatValue()) {
-      bool ConstOp1 = isa<Constant>(RHS);
-      VectorType *V1Ty = cast<VectorType>(V1->getType());
-      Constant *NewC = ConstantVector::getSplat(V1Ty->getElementCount(), Splat);
-
-      Value *NewLHS = ConstOp1 ? V1 : NewC;
-      Value *NewRHS = ConstOp1 ? NewC : V1;
-      VectorType *VTy = cast<VectorType>(Inst.getType());
-      SmallVector<int> Mask(VTy->getElementCount().getKnownMinValue(), 0);
-      return createBinOpShuffle(NewLHS, NewRHS, Mask);
-    }
-  }
-
   // Try to reassociate to sink a splat shuffle after a binary operation.
   if (Inst.isAssociative() && Inst.isCommutative()) {
     // Canonicalize shuffle operand as LHS.

>From 24e82ca410a78c6d45d9105eb9df594380737a59 Mon Sep 17 00:00:00 2001
From: Luke Lau <luke at igalia.com>
Date: Fri, 23 May 2025 22:46:34 +0100
Subject: [PATCH 2/3] Precommit tests

---
 llvm/test/Transforms/InstCombine/fma.ll       | 63 +++++++++++++++++++
 llvm/test/Transforms/InstCombine/fsh.ll       | 63 +++++++++++++++++++
 .../InstCombine/minmax-intrinsics.ll          | 33 ++++++++++
 3 files changed, 159 insertions(+)

diff --git a/llvm/test/Transforms/InstCombine/fma.ll b/llvm/test/Transforms/InstCombine/fma.ll
index ae0067d41426c..1a8ce85e9f506 100644
--- a/llvm/test/Transforms/InstCombine/fma.ll
+++ b/llvm/test/Transforms/InstCombine/fma.ll
@@ -802,6 +802,69 @@ define <2 x float> @fma_unary_shuffle_ops_narrowing(<3 x float> %x, <3 x float>
   ret <2 x float> %r
 }
 
+define <2 x float> @fma_unary_shuffle_ops_1_const(<2 x float> %x, <2 x float> %y) {
+; CHECK-LABEL: @fma_unary_shuffle_ops_1_const(
+; CHECK-NEXT:    [[A:%.*]] = shufflevector <2 x float> [[X:%.*]], <2 x float> poison, <2 x i32> <i32 1, i32 0>
+; CHECK-NEXT:    [[B:%.*]] = shufflevector <2 x float> [[Y:%.*]], <2 x float> poison, <2 x i32> <i32 1, i32 0>
+; CHECK-NEXT:    [[R:%.*]] = call <2 x float> @llvm.fma.v2f32(<2 x float> [[A]], <2 x float> <float 1.000000e+00, float 2.000000e+00>, <2 x float> [[B]])
+; CHECK-NEXT:    ret <2 x float> [[R]]
+;
+  %a = shufflevector <2 x float> %x, <2 x float> poison, <2 x i32> <i32 1, i32 0>
+  %b = shufflevector <2 x float> %y, <2 x float> poison, <2 x i32> <i32 1, i32 0>
+  %r = call <2 x float> @llvm.fma(<2 x float> <float 1.0, float 2.0>, <2 x float> %a, <2 x float> %b)
+  ret <2 x float> %r
+}
+
+define <2 x float> @fma_unary_shuffle_ops_2_const(<2 x float> %x) {
+; CHECK-LABEL: @fma_unary_shuffle_ops_2_const(
+; CHECK-NEXT:    [[A:%.*]] = shufflevector <2 x float> [[X:%.*]], <2 x float> poison, <2 x i32> <i32 1, i32 0>
+; CHECK-NEXT:    [[R:%.*]] = call <2 x float> @llvm.fma.v2f32(<2 x float> <float 1.000000e+00, float 2.000000e+00>, <2 x float> <float 1.000000e+00, float 2.000000e+00>, <2 x float> [[A]])
+; CHECK-NEXT:    ret <2 x float> [[R]]
+;
+  %a = shufflevector <2 x float> %x, <2 x float> poison, <2 x i32> <i32 1, i32 0>
+  %r = call <2 x float> @llvm.fma(<2 x float> <float 1.0, float 2.0>, <2 x float> <float 1.0, float 2.0>, <2 x float> %a)
+  ret <2 x float> %r
+}
+
+define <vscale x 2 x float> @fma_unary_shuffle_ops_1_const_scalable(<vscale x 2 x float> %x, <vscale x 2 x float> %y) {
+; CHECK-LABEL: @fma_unary_shuffle_ops_1_const_scalable(
+; CHECK-NEXT:    [[A:%.*]] = shufflevector <vscale x 2 x float> [[X:%.*]], <vscale x 2 x float> poison, <vscale x 2 x i32> zeroinitializer
+; CHECK-NEXT:    [[B:%.*]] = shufflevector <vscale x 2 x float> [[Y:%.*]], <vscale x 2 x float> poison, <vscale x 2 x i32> zeroinitializer
+; CHECK-NEXT:    [[R:%.*]] = call <vscale x 2 x float> @llvm.fma.nxv2f32(<vscale x 2 x float> [[A]], <vscale x 2 x float> splat (float 4.200000e+01), <vscale x 2 x float> [[B]])
+; CHECK-NEXT:    ret <vscale x 2 x float> [[R]]
+;
+  %a = shufflevector <vscale x 2 x float> %x, <vscale x 2 x float> poison, <vscale x 2 x i32> zeroinitializer
+  %b = shufflevector <vscale x 2 x float> %y, <vscale x 2 x float> poison, <vscale x 2 x i32> zeroinitializer
+  %r = call <vscale x 2 x float> @llvm.fma(<vscale x 2 x float> splat (float 42.0), <vscale x 2 x float> %a, <vscale x 2 x float> %b)
+  ret <vscale x 2 x float> %r
+}
+
+define <vscale x 2 x float> @fma_unary_shuffle_ops_2_const_scalable(<vscale x 2 x float> %x) {
+; CHECK-LABEL: @fma_unary_shuffle_ops_2_const_scalable(
+; CHECK-NEXT:    [[A:%.*]] = shufflevector <vscale x 2 x float> [[X:%.*]], <vscale x 2 x float> poison, <vscale x 2 x i32> zeroinitializer
+; CHECK-NEXT:    [[R:%.*]] = call <vscale x 2 x float> @llvm.fma.nxv2f32(<vscale x 2 x float> splat (float 4.200000e+01), <vscale x 2 x float> splat (float 4.200000e+01), <vscale x 2 x float> [[A]])
+; CHECK-NEXT:    ret <vscale x 2 x float> [[R]]
+;
+  %a = shufflevector <vscale x 2 x float> %x, <vscale x 2 x float> poison, <vscale x 2 x i32> zeroinitializer
+  %r = call <vscale x 2 x float> @llvm.fma(<vscale x 2 x float> splat (float 42.0), <vscale x 2 x float> splat (float 42.0), <vscale x 2 x float> %a)
+  ret <vscale x 2 x float> %r
+}
+
+define <3 x float> @fma_unary_shuffle_ops_widening_1_const(<2 x float> %x, <2 x float> %y) {
+; CHECK-LABEL: @fma_unary_shuffle_ops_widening_1_const(
+; CHECK-NEXT:    [[A:%.*]] = shufflevector <2 x float> [[X:%.*]], <2 x float> poison, <3 x i32> <i32 1, i32 0, i32 poison>
+; CHECK-NEXT:    call void @use_vec3(<3 x float> [[A]])
+; CHECK-NEXT:    [[B:%.*]] = shufflevector <2 x float> [[Y:%.*]], <2 x float> poison, <3 x i32> <i32 1, i32 0, i32 poison>
+; CHECK-NEXT:    [[R:%.*]] = call fast <3 x float> @llvm.fma.v3f32(<3 x float> [[A]], <3 x float> splat (float 4.200000e+01), <3 x float> [[B]])
+; CHECK-NEXT:    ret <3 x float> [[R]]
+;
+  %a = shufflevector <2 x float> %x, <2 x float> poison, <3 x i32> <i32 1, i32 0, i32 poison>
+  call void @use_vec3(<3 x float> %a)
+  %b = shufflevector <2 x float> %y, <2 x float> poison, <3 x i32> <i32 1, i32 0, i32 poison>
+  %r = call fast <3 x float> @llvm.fma(<3 x float> splat (float 42.0), <3 x float> %a, <3 x float> %b)
+  ret <3 x float> %r
+}
+
 ; negative test - must have 3 shuffles
 
 define <2 x float> @fma_unary_shuffle_ops_unshuffled(<2 x float> %x, <2 x float> %y, <2 x float> %z) {
diff --git a/llvm/test/Transforms/InstCombine/fsh.ll b/llvm/test/Transforms/InstCombine/fsh.ll
index 862853f992968..d8f0f439b37df 100644
--- a/llvm/test/Transforms/InstCombine/fsh.ll
+++ b/llvm/test/Transforms/InstCombine/fsh.ll
@@ -930,6 +930,69 @@ define <2 x i31> @fsh_unary_shuffle_ops_narrowing(<3 x i31> %x, <3 x i31> %y, <3
   ret <2 x i31> %r
 }
 
+define <2 x i32> @fsh_unary_shuffle_ops_1_const(<2 x i32> %x, <2 x i32> %y) {
+; CHECK-LABEL: @fsh_unary_shuffle_ops_1_const(
+; CHECK-NEXT:    [[A:%.*]] = shufflevector <2 x i32> [[X:%.*]], <2 x i32> poison, <2 x i32> <i32 1, i32 0>
+; CHECK-NEXT:    [[B:%.*]] = shufflevector <2 x i32> [[Y:%.*]], <2 x i32> poison, <2 x i32> <i32 1, i32 0>
+; CHECK-NEXT:    [[R:%.*]] = call <2 x i32> @llvm.fshr.v2i32(<2 x i32> <i32 1, i32 2>, <2 x i32> [[A]], <2 x i32> [[B]])
+; CHECK-NEXT:    ret <2 x i32> [[R]]
+;
+  %a = shufflevector <2 x i32> %x, <2 x i32> poison, <2 x i32> <i32 1, i32 0>
+  %b = shufflevector <2 x i32> %y, <2 x i32> poison, <2 x i32> <i32 1, i32 0>
+  %r = call <2 x i32> @llvm.fshr(<2 x i32> <i32 1, i32 2>, <2 x i32> %a, <2 x i32> %b)
+  ret <2 x i32> %r
+}
+
+define <2 x i32> @fsh_unary_shuffle_ops_2_const(<2 x i32> %x) {
+; CHECK-LABEL: @fsh_unary_shuffle_ops_2_const(
+; CHECK-NEXT:    [[A:%.*]] = shufflevector <2 x i32> [[X:%.*]], <2 x i32> poison, <2 x i32> <i32 1, i32 0>
+; CHECK-NEXT:    [[R:%.*]] = call <2 x i32> @llvm.fshr.v2i32(<2 x i32> <i32 1, i32 2>, <2 x i32> <i32 1, i32 2>, <2 x i32> [[A]])
+; CHECK-NEXT:    ret <2 x i32> [[R]]
+;
+  %a = shufflevector <2 x i32> %x, <2 x i32> poison, <2 x i32> <i32 1, i32 0>
+  %r = call <2 x i32> @llvm.fshr(<2 x i32> <i32 1, i32 2>, <2 x i32> <i32 1, i32 2>, <2 x i32> %a)
+  ret <2 x i32> %r
+}
+
+define <vscale x 2 x i32> @fsh_unary_shuffle_ops_1_const_scalable(<vscale x 2 x i32> %x, <vscale x 2 x i32> %y) {
+; CHECK-LABEL: @fsh_unary_shuffle_ops_1_const_scalable(
+; CHECK-NEXT:    [[A:%.*]] = shufflevector <vscale x 2 x i32> [[X:%.*]], <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer
+; CHECK-NEXT:    [[B:%.*]] = shufflevector <vscale x 2 x i32> [[Y:%.*]], <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer
+; CHECK-NEXT:    [[R:%.*]] = call <vscale x 2 x i32> @llvm.fshr.nxv2i32(<vscale x 2 x i32> splat (i32 42), <vscale x 2 x i32> [[A]], <vscale x 2 x i32> [[B]])
+; CHECK-NEXT:    ret <vscale x 2 x i32> [[R]]
+;
+  %a = shufflevector <vscale x 2 x i32> %x, <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer
+  %b = shufflevector <vscale x 2 x i32> %y, <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer
+  %r = call <vscale x 2 x i32> @llvm.fshr(<vscale x 2 x i32> splat (i32 42), <vscale x 2 x i32> %a, <vscale x 2 x i32> %b)
+  ret <vscale x 2 x i32> %r
+}
+
+define <vscale x 2 x i32> @fsh_unary_shuffle_ops_2_const_scalable(<vscale x 2 x i32> %x) {
+; CHECK-LABEL: @fsh_unary_shuffle_ops_2_const_scalable(
+; CHECK-NEXT:    [[A:%.*]] = shufflevector <vscale x 2 x i32> [[X:%.*]], <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer
+; CHECK-NEXT:    [[R:%.*]] = call <vscale x 2 x i32> @llvm.fshr.nxv2i32(<vscale x 2 x i32> splat (i32 42), <vscale x 2 x i32> splat (i32 42), <vscale x 2 x i32> [[A]])
+; CHECK-NEXT:    ret <vscale x 2 x i32> [[R]]
+;
+  %a = shufflevector <vscale x 2 x i32> %x, <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer
+  %r = call <vscale x 2 x i32> @llvm.fshr(<vscale x 2 x i32> splat (i32 42), <vscale x 2 x i32> splat (i32 42), <vscale x 2 x i32> %a)
+  ret <vscale x 2 x i32> %r
+}
+
+define <3 x i32> @fsh_unary_shuffle_ops_widening_1_const(<2 x i32> %x, <2 x i32> %y) {
+; CHECK-LABEL: @fsh_unary_shuffle_ops_widening_1_const(
+; CHECK-NEXT:    [[A:%.*]] = shufflevector <2 x i32> [[X:%.*]], <2 x i32> poison, <3 x i32> <i32 1, i32 0, i32 poison>
+; CHECK-NEXT:    call void @use_v3(<3 x i32> [[A]])
+; CHECK-NEXT:    [[B:%.*]] = shufflevector <2 x i32> [[Y:%.*]], <2 x i32> poison, <3 x i32> <i32 1, i32 0, i32 poison>
+; CHECK-NEXT:    [[R:%.*]] = call <3 x i32> @llvm.fshr.v3i32(<3 x i32> splat (i32 42), <3 x i32> [[A]], <3 x i32> [[B]])
+; CHECK-NEXT:    ret <3 x i32> [[R]]
+;
+  %a = shufflevector <2 x i32> %x, <2 x i32> poison, <3 x i32> <i32 1, i32 0, i32 poison>
+  call void @use_v3(<3 x i32> %a)
+  %b = shufflevector <2 x i32> %y, <2 x i32> poison, <3 x i32> <i32 1, i32 0, i32 poison>
+  %r = call <3 x i32> @llvm.fshr(<3 x i32> splat (i32 42), <3 x i32> %a, <3 x i32> %b)
+  ret <3 x i32> %r
+}
+
 ; negative test - must have 3 shuffles
 
 define <2 x i32> @fsh_unary_shuffle_ops_unshuffled(<2 x i32> %x, <2 x i32> %y, <2 x i32> %z) {
diff --git a/llvm/test/Transforms/InstCombine/minmax-intrinsics.ll b/llvm/test/Transforms/InstCombine/minmax-intrinsics.ll
index 9a8608da9fd5b..4e38370cf4e58 100644
--- a/llvm/test/Transforms/InstCombine/minmax-intrinsics.ll
+++ b/llvm/test/Transforms/InstCombine/minmax-intrinsics.ll
@@ -2416,6 +2416,39 @@ define <3 x i8> @umin_unary_shuffle_ops_narrowing(<4 x i8> %x, <4 x i8> %y) {
   ret <3 x i8> %r
 }
 
+define <3 x i8> @smax_unary_shuffle_ops_lhs_const(<3 x i8> %x) {
+; CHECK-LABEL: @smax_unary_shuffle_ops_lhs_const(
+; CHECK-NEXT:    [[SX:%.*]] = shufflevector <3 x i8> [[X:%.*]], <3 x i8> poison, <3 x i32> <i32 1, i32 0, i32 2>
+; CHECK-NEXT:    [[R:%.*]] = call <3 x i8> @llvm.smax.v3i8(<3 x i8> [[SX]], <3 x i8> <i8 0, i8 1, i8 2>)
+; CHECK-NEXT:    ret <3 x i8> [[R]]
+;
+  %sx = shufflevector <3 x i8> %x, <3 x i8> poison, <3 x i32> <i32 1, i32 0, i32 2>
+  %r = call <3 x i8> @llvm.smax(<3 x i8> <i8 0, i8 1, i8 2>, <3 x i8> %sx)
+  ret <3 x i8> %r
+}
+
+define <vscale x 3 x i8> @smax_unary_shuffle_ops_lhs_const_scalable(<vscale x 3 x i8> %x) {
+; CHECK-LABEL: @smax_unary_shuffle_ops_lhs_const_scalable(
+; CHECK-NEXT:    [[SX:%.*]] = shufflevector <vscale x 3 x i8> [[X:%.*]], <vscale x 3 x i8> poison, <vscale x 3 x i32> zeroinitializer
+; CHECK-NEXT:    [[R:%.*]] = call <vscale x 3 x i8> @llvm.smax.nxv3i8(<vscale x 3 x i8> [[SX]], <vscale x 3 x i8> splat (i8 42))
+; CHECK-NEXT:    ret <vscale x 3 x i8> [[R]]
+;
+  %sx = shufflevector <vscale x 3 x i8> %x, <vscale x 3 x i8> poison, <vscale x 3 x i32> zeroinitializer
+  %r = call <vscale x 3 x i8> @llvm.smax(<vscale x 3 x i8> splat (i8 42), <vscale x 3 x i8> %sx)
+  ret <vscale x 3 x i8> %r
+}
+
+define <3 x i8> @smax_unary_shuffle_ops_lhs_const_widening(<2 x i8> %x) {
+; CHECK-LABEL: @smax_unary_shuffle_ops_lhs_const_widening(
+; CHECK-NEXT:    [[SX:%.*]] = shufflevector <2 x i8> [[X:%.*]], <2 x i8> poison, <3 x i32> <i32 1, i32 0, i32 poison>
+; CHECK-NEXT:    [[R:%.*]] = call <3 x i8> @llvm.smax.v3i8(<3 x i8> [[SX]], <3 x i8> <i8 0, i8 1, i8 2>)
+; CHECK-NEXT:    ret <3 x i8> [[R]]
+;
+  %sx = shufflevector <2 x i8> %x, <2 x i8> poison, <3 x i32> <i32 1, i32 0, i32 poison>
+  %r = call <3 x i8> @llvm.smax(<3 x i8> <i8 0, i8 1, i8 2>, <3 x i8> %sx)
+  ret <3 x i8> %r
+}
+
 ; negative test - must have 2 shuffles
 
 define <3 x i8> @smax_unary_shuffle_ops_unshuffled_op(<3 x i8> %x, <3 x i8> %y) {

>From 87187198aad1386108ec419616161b1d99abae31 Mon Sep 17 00:00:00 2001
From: Luke Lau <luke at igalia.com>
Date: Fri, 23 May 2025 23:22:31 +0100
Subject: [PATCH 3/3] [InstCombine] Fold shuffled intrinsic operands with
 constants

---
 .../InstCombine/InstCombineCalls.cpp          | 29 +++++++++++------
 .../InstCombine/InstCombineInternal.h         |  3 ++
 .../InstCombine/InstructionCombining.cpp      |  4 +--
 llvm/test/Transforms/InstCombine/fma.ll       | 32 +++++++++----------
 llvm/test/Transforms/InstCombine/fsh.ll       | 32 +++++++++----------
 .../InstCombine/minmax-intrinsics.ll          | 18 +++++------
 6 files changed, 63 insertions(+), 55 deletions(-)

diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
index 12e08c09ea67d..f1ff8180cde23 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -1399,9 +1399,8 @@ static Instruction *factorizeMinMaxTree(IntrinsicInst *II) {
 
 /// If all arguments of the intrinsic are unary shuffles with the same mask,
 /// try to shuffle after the intrinsic.
-static Instruction *
-foldShuffledIntrinsicOperands(IntrinsicInst *II,
-                              InstCombiner::BuilderTy &Builder) {
+Instruction *
+InstCombinerImpl::foldShuffledIntrinsicOperands(IntrinsicInst *II) {
   // TODO: This should be extended to handle other intrinsics like fshl, ctpop,
   //       etc. Use llvm::isTriviallyVectorizable() and related to determine
   //       which intrinsics are safe to shuffle?
@@ -1419,9 +1418,11 @@ foldShuffledIntrinsicOperands(IntrinsicInst *II,
   }
 
   Value *X;
+  Constant *C;
   ArrayRef<int> Mask;
-  if (!match(II->getArgOperand(0),
-             m_Shuffle(m_Value(X), m_Undef(), m_Mask(Mask))))
+  auto *NonConstArg = find_if_not(II->args(), IsaPred<Constant>);
+  if (!NonConstArg ||
+      !match(NonConstArg, m_Shuffle(m_Value(X), m_Undef(), m_Mask(Mask))))
     return nullptr;
 
   // At least 1 operand must have 1 use because we are creating 2 instructions.
@@ -1433,11 +1434,19 @@ foldShuffledIntrinsicOperands(IntrinsicInst *II,
   NewArgs[0] = X;
   Type *SrcTy = X->getType();
   for (unsigned i = 1, e = II->arg_size(); i != e; ++i) {
-    if (!match(II->getArgOperand(i),
-               m_Shuffle(m_Value(X), m_Undef(), m_SpecificMask(Mask))) ||
-        X->getType() != SrcTy)
+    if (match(II->getArgOperand(i),
+              m_Shuffle(m_Value(X), m_Undef(), m_SpecificMask(Mask))) &&
+        X->getType() == SrcTy)
+      NewArgs[i] = X;
+    else if (match(II->getArgOperand(i), m_ImmConstant(C))) {
+      // If it's a constant, try find the constant that would be shuffled to C.
+      if (Constant *ShuffledC =
+              unshuffleConstant(Mask, C, cast<VectorType>(SrcTy)))
+        NewArgs[i] = ShuffledC;
+      else
+        return nullptr;
+    } else
       return nullptr;
-    NewArgs[i] = X;
   }
 
   // intrinsic (shuf X, M), (shuf Y, M), ... --> shuf (intrinsic X, Y, ...), M
@@ -3849,7 +3858,7 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) {
         if (Instruction *R = FoldOpIntoSelect(*II, Sel))
           return R;
 
-  if (Instruction *Shuf = foldShuffledIntrinsicOperands(II, Builder))
+  if (Instruction *Shuf = foldShuffledIntrinsicOperands(II))
     return Shuf;
 
   // Some intrinsics (like experimental_gc_statepoint) can be used in invoke
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
index 8b657b3f8555c..5e0cd17fb1924 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
+++ b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
@@ -147,6 +147,7 @@ class LLVM_LIBRARY_VISIBILITY InstCombinerImpl final
   Instruction *visitAddrSpaceCast(AddrSpaceCastInst &CI);
   Instruction *foldItoFPtoI(CastInst &FI);
   Instruction *visitSelectInst(SelectInst &SI);
+  Instruction *foldShuffledIntrinsicOperands(IntrinsicInst *II);
   Instruction *visitCallInst(CallInst &CI);
   Instruction *visitInvokeInst(InvokeInst &II);
   Instruction *visitCallBrInst(CallBrInst &CBI);
@@ -604,6 +605,8 @@ class LLVM_LIBRARY_VISIBILITY InstCombinerImpl final
   Instruction *foldVectorBinop(BinaryOperator &Inst);
   Instruction *foldVectorSelect(SelectInst &Sel);
   Instruction *foldSelectShuffle(ShuffleVectorInst &Shuf);
+  Constant *unshuffleConstant(ArrayRef<int> ShMask, Constant *C,
+                              VectorType *NewCTy);
 
   /// Given a binary operator, cast instruction, or select which has a PHI node
   /// as operand #0, see if we can fold the instruction into the PHI (which is
diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
index eb5352524853f..2e0becbe9f1f8 100644
--- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -2100,8 +2100,8 @@ static bool shouldMergeGEPs(GEPOperator &GEP, GEPOperator &Src) {
 //
 // A 1-to-1 mapping is not required. Example:
 // ShMask = <1,1,2,2> and C = <5,5,6,6> --> NewC = <poison,5,6,poison>
-static Constant *unshuffleConstant(ArrayRef<int> ShMask, Constant *C,
-                                   VectorType *NewCTy) {
+Constant *InstCombinerImpl::unshuffleConstant(ArrayRef<int> ShMask, Constant *C,
+                                              VectorType *NewCTy) {
   if (isa<ScalableVectorType>(NewCTy)) {
     Constant *Splat = C->getSplatValue();
     if (!Splat)
diff --git a/llvm/test/Transforms/InstCombine/fma.ll b/llvm/test/Transforms/InstCombine/fma.ll
index 1a8ce85e9f506..86a67c996b4d6 100644
--- a/llvm/test/Transforms/InstCombine/fma.ll
+++ b/llvm/test/Transforms/InstCombine/fma.ll
@@ -804,10 +804,9 @@ define <2 x float> @fma_unary_shuffle_ops_narrowing(<3 x float> %x, <3 x float>
 
 define <2 x float> @fma_unary_shuffle_ops_1_const(<2 x float> %x, <2 x float> %y) {
 ; CHECK-LABEL: @fma_unary_shuffle_ops_1_const(
-; CHECK-NEXT:    [[A:%.*]] = shufflevector <2 x float> [[X:%.*]], <2 x float> poison, <2 x i32> <i32 1, i32 0>
-; CHECK-NEXT:    [[B:%.*]] = shufflevector <2 x float> [[Y:%.*]], <2 x float> poison, <2 x i32> <i32 1, i32 0>
-; CHECK-NEXT:    [[R:%.*]] = call <2 x float> @llvm.fma.v2f32(<2 x float> [[A]], <2 x float> <float 1.000000e+00, float 2.000000e+00>, <2 x float> [[B]])
-; CHECK-NEXT:    ret <2 x float> [[R]]
+; CHECK-NEXT:    [[Y:%.*]] = call <2 x float> @llvm.fma.v2f32(<2 x float> [[X:%.*]], <2 x float> <float 2.000000e+00, float 1.000000e+00>, <2 x float> [[Y1:%.*]])
+; CHECK-NEXT:    [[B:%.*]] = shufflevector <2 x float> [[Y]], <2 x float> poison, <2 x i32> <i32 1, i32 0>
+; CHECK-NEXT:    ret <2 x float> [[B]]
 ;
   %a = shufflevector <2 x float> %x, <2 x float> poison, <2 x i32> <i32 1, i32 0>
   %b = shufflevector <2 x float> %y, <2 x float> poison, <2 x i32> <i32 1, i32 0>
@@ -817,9 +816,9 @@ define <2 x float> @fma_unary_shuffle_ops_1_const(<2 x float> %x, <2 x float> %y
 
 define <2 x float> @fma_unary_shuffle_ops_2_const(<2 x float> %x) {
 ; CHECK-LABEL: @fma_unary_shuffle_ops_2_const(
-; CHECK-NEXT:    [[A:%.*]] = shufflevector <2 x float> [[X:%.*]], <2 x float> poison, <2 x i32> <i32 1, i32 0>
-; CHECK-NEXT:    [[R:%.*]] = call <2 x float> @llvm.fma.v2f32(<2 x float> <float 1.000000e+00, float 2.000000e+00>, <2 x float> <float 1.000000e+00, float 2.000000e+00>, <2 x float> [[A]])
-; CHECK-NEXT:    ret <2 x float> [[R]]
+; CHECK-NEXT:    [[X:%.*]] = call <2 x float> @llvm.fma.v2f32(<2 x float> [[X1:%.*]], <2 x float> <float 2.000000e+00, float 1.000000e+00>, <2 x float> [[X1]])
+; CHECK-NEXT:    [[A:%.*]] = shufflevector <2 x float> [[X]], <2 x float> poison, <2 x i32> <i32 1, i32 0>
+; CHECK-NEXT:    ret <2 x float> [[A]]
 ;
   %a = shufflevector <2 x float> %x, <2 x float> poison, <2 x i32> <i32 1, i32 0>
   %r = call <2 x float> @llvm.fma(<2 x float> <float 1.0, float 2.0>, <2 x float> <float 1.0, float 2.0>, <2 x float> %a)
@@ -828,10 +827,9 @@ define <2 x float> @fma_unary_shuffle_ops_2_const(<2 x float> %x) {
 
 define <vscale x 2 x float> @fma_unary_shuffle_ops_1_const_scalable(<vscale x 2 x float> %x, <vscale x 2 x float> %y) {
 ; CHECK-LABEL: @fma_unary_shuffle_ops_1_const_scalable(
-; CHECK-NEXT:    [[A:%.*]] = shufflevector <vscale x 2 x float> [[X:%.*]], <vscale x 2 x float> poison, <vscale x 2 x i32> zeroinitializer
-; CHECK-NEXT:    [[B:%.*]] = shufflevector <vscale x 2 x float> [[Y:%.*]], <vscale x 2 x float> poison, <vscale x 2 x i32> zeroinitializer
-; CHECK-NEXT:    [[R:%.*]] = call <vscale x 2 x float> @llvm.fma.nxv2f32(<vscale x 2 x float> [[A]], <vscale x 2 x float> splat (float 4.200000e+01), <vscale x 2 x float> [[B]])
-; CHECK-NEXT:    ret <vscale x 2 x float> [[R]]
+; CHECK-NEXT:    [[R:%.*]] = call <vscale x 2 x float> @llvm.fma.nxv2f32(<vscale x 2 x float> [[A:%.*]], <vscale x 2 x float> splat (float 4.200000e+01), <vscale x 2 x float> [[B:%.*]])
+; CHECK-NEXT:    [[R1:%.*]] = shufflevector <vscale x 2 x float> [[R]], <vscale x 2 x float> poison, <vscale x 2 x i32> zeroinitializer
+; CHECK-NEXT:    ret <vscale x 2 x float> [[R1]]
 ;
   %a = shufflevector <vscale x 2 x float> %x, <vscale x 2 x float> poison, <vscale x 2 x i32> zeroinitializer
   %b = shufflevector <vscale x 2 x float> %y, <vscale x 2 x float> poison, <vscale x 2 x i32> zeroinitializer
@@ -841,9 +839,9 @@ define <vscale x 2 x float> @fma_unary_shuffle_ops_1_const_scalable(<vscale x 2
 
 define <vscale x 2 x float> @fma_unary_shuffle_ops_2_const_scalable(<vscale x 2 x float> %x) {
 ; CHECK-LABEL: @fma_unary_shuffle_ops_2_const_scalable(
-; CHECK-NEXT:    [[A:%.*]] = shufflevector <vscale x 2 x float> [[X:%.*]], <vscale x 2 x float> poison, <vscale x 2 x i32> zeroinitializer
-; CHECK-NEXT:    [[R:%.*]] = call <vscale x 2 x float> @llvm.fma.nxv2f32(<vscale x 2 x float> splat (float 4.200000e+01), <vscale x 2 x float> splat (float 4.200000e+01), <vscale x 2 x float> [[A]])
-; CHECK-NEXT:    ret <vscale x 2 x float> [[R]]
+; CHECK-NEXT:    [[X:%.*]] = call <vscale x 2 x float> @llvm.fma.nxv2f32(<vscale x 2 x float> [[X1:%.*]], <vscale x 2 x float> splat (float 4.200000e+01), <vscale x 2 x float> [[X1]])
+; CHECK-NEXT:    [[A:%.*]] = shufflevector <vscale x 2 x float> [[X]], <vscale x 2 x float> poison, <vscale x 2 x i32> zeroinitializer
+; CHECK-NEXT:    ret <vscale x 2 x float> [[A]]
 ;
   %a = shufflevector <vscale x 2 x float> %x, <vscale x 2 x float> poison, <vscale x 2 x i32> zeroinitializer
   %r = call <vscale x 2 x float> @llvm.fma(<vscale x 2 x float> splat (float 42.0), <vscale x 2 x float> splat (float 42.0), <vscale x 2 x float> %a)
@@ -854,9 +852,9 @@ define <3 x float> @fma_unary_shuffle_ops_widening_1_const(<2 x float> %x, <2 x
 ; CHECK-LABEL: @fma_unary_shuffle_ops_widening_1_const(
 ; CHECK-NEXT:    [[A:%.*]] = shufflevector <2 x float> [[X:%.*]], <2 x float> poison, <3 x i32> <i32 1, i32 0, i32 poison>
 ; CHECK-NEXT:    call void @use_vec3(<3 x float> [[A]])
-; CHECK-NEXT:    [[B:%.*]] = shufflevector <2 x float> [[Y:%.*]], <2 x float> poison, <3 x i32> <i32 1, i32 0, i32 poison>
-; CHECK-NEXT:    [[R:%.*]] = call fast <3 x float> @llvm.fma.v3f32(<3 x float> [[A]], <3 x float> splat (float 4.200000e+01), <3 x float> [[B]])
-; CHECK-NEXT:    ret <3 x float> [[R]]
+; CHECK-NEXT:    [[Y:%.*]] = call fast <2 x float> @llvm.fma.v2f32(<2 x float> [[X]], <2 x float> splat (float 4.200000e+01), <2 x float> [[Y1:%.*]])
+; CHECK-NEXT:    [[B:%.*]] = shufflevector <2 x float> [[Y]], <2 x float> poison, <3 x i32> <i32 1, i32 0, i32 poison>
+; CHECK-NEXT:    ret <3 x float> [[B]]
 ;
   %a = shufflevector <2 x float> %x, <2 x float> poison, <3 x i32> <i32 1, i32 0, i32 poison>
   call void @use_vec3(<3 x float> %a)
diff --git a/llvm/test/Transforms/InstCombine/fsh.ll b/llvm/test/Transforms/InstCombine/fsh.ll
index d8f0f439b37df..398117b5c1c5c 100644
--- a/llvm/test/Transforms/InstCombine/fsh.ll
+++ b/llvm/test/Transforms/InstCombine/fsh.ll
@@ -932,10 +932,9 @@ define <2 x i31> @fsh_unary_shuffle_ops_narrowing(<3 x i31> %x, <3 x i31> %y, <3
 
 define <2 x i32> @fsh_unary_shuffle_ops_1_const(<2 x i32> %x, <2 x i32> %y) {
 ; CHECK-LABEL: @fsh_unary_shuffle_ops_1_const(
-; CHECK-NEXT:    [[A:%.*]] = shufflevector <2 x i32> [[X:%.*]], <2 x i32> poison, <2 x i32> <i32 1, i32 0>
-; CHECK-NEXT:    [[B:%.*]] = shufflevector <2 x i32> [[Y:%.*]], <2 x i32> poison, <2 x i32> <i32 1, i32 0>
-; CHECK-NEXT:    [[R:%.*]] = call <2 x i32> @llvm.fshr.v2i32(<2 x i32> <i32 1, i32 2>, <2 x i32> [[A]], <2 x i32> [[B]])
-; CHECK-NEXT:    ret <2 x i32> [[R]]
+; CHECK-NEXT:    [[Y:%.*]] = call <2 x i32> @llvm.fshr.v2i32(<2 x i32> [[X:%.*]], <2 x i32> [[X]], <2 x i32> [[Y1:%.*]])
+; CHECK-NEXT:    [[B:%.*]] = shufflevector <2 x i32> [[Y]], <2 x i32> poison, <2 x i32> <i32 1, i32 0>
+; CHECK-NEXT:    ret <2 x i32> [[B]]
 ;
   %a = shufflevector <2 x i32> %x, <2 x i32> poison, <2 x i32> <i32 1, i32 0>
   %b = shufflevector <2 x i32> %y, <2 x i32> poison, <2 x i32> <i32 1, i32 0>
@@ -945,9 +944,9 @@ define <2 x i32> @fsh_unary_shuffle_ops_1_const(<2 x i32> %x, <2 x i32> %y) {
 
 define <2 x i32> @fsh_unary_shuffle_ops_2_const(<2 x i32> %x) {
 ; CHECK-LABEL: @fsh_unary_shuffle_ops_2_const(
-; CHECK-NEXT:    [[A:%.*]] = shufflevector <2 x i32> [[X:%.*]], <2 x i32> poison, <2 x i32> <i32 1, i32 0>
-; CHECK-NEXT:    [[R:%.*]] = call <2 x i32> @llvm.fshr.v2i32(<2 x i32> <i32 1, i32 2>, <2 x i32> <i32 1, i32 2>, <2 x i32> [[A]])
-; CHECK-NEXT:    ret <2 x i32> [[R]]
+; CHECK-NEXT:    [[X:%.*]] = call <2 x i32> @llvm.fshr.v2i32(<2 x i32> [[X1:%.*]], <2 x i32> <i32 2, i32 1>, <2 x i32> [[X1]])
+; CHECK-NEXT:    [[A:%.*]] = shufflevector <2 x i32> [[X]], <2 x i32> poison, <2 x i32> <i32 1, i32 0>
+; CHECK-NEXT:    ret <2 x i32> [[A]]
 ;
   %a = shufflevector <2 x i32> %x, <2 x i32> poison, <2 x i32> <i32 1, i32 0>
   %r = call <2 x i32> @llvm.fshr(<2 x i32> <i32 1, i32 2>, <2 x i32> <i32 1, i32 2>, <2 x i32> %a)
@@ -956,10 +955,9 @@ define <2 x i32> @fsh_unary_shuffle_ops_2_const(<2 x i32> %x) {
 
 define <vscale x 2 x i32> @fsh_unary_shuffle_ops_1_const_scalable(<vscale x 2 x i32> %x, <vscale x 2 x i32> %y) {
 ; CHECK-LABEL: @fsh_unary_shuffle_ops_1_const_scalable(
-; CHECK-NEXT:    [[A:%.*]] = shufflevector <vscale x 2 x i32> [[X:%.*]], <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer
-; CHECK-NEXT:    [[B:%.*]] = shufflevector <vscale x 2 x i32> [[Y:%.*]], <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer
-; CHECK-NEXT:    [[R:%.*]] = call <vscale x 2 x i32> @llvm.fshr.nxv2i32(<vscale x 2 x i32> splat (i32 42), <vscale x 2 x i32> [[A]], <vscale x 2 x i32> [[B]])
-; CHECK-NEXT:    ret <vscale x 2 x i32> [[R]]
+; CHECK-NEXT:    [[Y:%.*]] = call <vscale x 2 x i32> @llvm.fshr.nxv2i32(<vscale x 2 x i32> [[X:%.*]], <vscale x 2 x i32> [[X]], <vscale x 2 x i32> [[Y1:%.*]])
+; CHECK-NEXT:    [[B:%.*]] = shufflevector <vscale x 2 x i32> [[Y]], <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer
+; CHECK-NEXT:    ret <vscale x 2 x i32> [[B]]
 ;
   %a = shufflevector <vscale x 2 x i32> %x, <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer
   %b = shufflevector <vscale x 2 x i32> %y, <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer
@@ -969,9 +967,9 @@ define <vscale x 2 x i32> @fsh_unary_shuffle_ops_1_const_scalable(<vscale x 2 x
 
 define <vscale x 2 x i32> @fsh_unary_shuffle_ops_2_const_scalable(<vscale x 2 x i32> %x) {
 ; CHECK-LABEL: @fsh_unary_shuffle_ops_2_const_scalable(
-; CHECK-NEXT:    [[A:%.*]] = shufflevector <vscale x 2 x i32> [[X:%.*]], <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer
-; CHECK-NEXT:    [[R:%.*]] = call <vscale x 2 x i32> @llvm.fshr.nxv2i32(<vscale x 2 x i32> splat (i32 42), <vscale x 2 x i32> splat (i32 42), <vscale x 2 x i32> [[A]])
-; CHECK-NEXT:    ret <vscale x 2 x i32> [[R]]
+; CHECK-NEXT:    [[X:%.*]] = call <vscale x 2 x i32> @llvm.fshr.nxv2i32(<vscale x 2 x i32> [[X1:%.*]], <vscale x 2 x i32> splat (i32 42), <vscale x 2 x i32> [[X1]])
+; CHECK-NEXT:    [[A:%.*]] = shufflevector <vscale x 2 x i32> [[X]], <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer
+; CHECK-NEXT:    ret <vscale x 2 x i32> [[A]]
 ;
   %a = shufflevector <vscale x 2 x i32> %x, <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer
   %r = call <vscale x 2 x i32> @llvm.fshr(<vscale x 2 x i32> splat (i32 42), <vscale x 2 x i32> splat (i32 42), <vscale x 2 x i32> %a)
@@ -982,9 +980,9 @@ define <3 x i32> @fsh_unary_shuffle_ops_widening_1_const(<2 x i32> %x, <2 x i32>
 ; CHECK-LABEL: @fsh_unary_shuffle_ops_widening_1_const(
 ; CHECK-NEXT:    [[A:%.*]] = shufflevector <2 x i32> [[X:%.*]], <2 x i32> poison, <3 x i32> <i32 1, i32 0, i32 poison>
 ; CHECK-NEXT:    call void @use_v3(<3 x i32> [[A]])
-; CHECK-NEXT:    [[B:%.*]] = shufflevector <2 x i32> [[Y:%.*]], <2 x i32> poison, <3 x i32> <i32 1, i32 0, i32 poison>
-; CHECK-NEXT:    [[R:%.*]] = call <3 x i32> @llvm.fshr.v3i32(<3 x i32> splat (i32 42), <3 x i32> [[A]], <3 x i32> [[B]])
-; CHECK-NEXT:    ret <3 x i32> [[R]]
+; CHECK-NEXT:    [[Y:%.*]] = call <2 x i32> @llvm.fshr.v2i32(<2 x i32> [[X]], <2 x i32> [[X]], <2 x i32> [[Y1:%.*]])
+; CHECK-NEXT:    [[B:%.*]] = shufflevector <2 x i32> [[Y]], <2 x i32> poison, <3 x i32> <i32 1, i32 0, i32 poison>
+; CHECK-NEXT:    ret <3 x i32> [[B]]
 ;
   %a = shufflevector <2 x i32> %x, <2 x i32> poison, <3 x i32> <i32 1, i32 0, i32 poison>
   call void @use_v3(<3 x i32> %a)
diff --git a/llvm/test/Transforms/InstCombine/minmax-intrinsics.ll b/llvm/test/Transforms/InstCombine/minmax-intrinsics.ll
index 4e38370cf4e58..85f2a1ccb3a3d 100644
--- a/llvm/test/Transforms/InstCombine/minmax-intrinsics.ll
+++ b/llvm/test/Transforms/InstCombine/minmax-intrinsics.ll
@@ -2418,9 +2418,9 @@ define <3 x i8> @umin_unary_shuffle_ops_narrowing(<4 x i8> %x, <4 x i8> %y) {
 
 define <3 x i8> @smax_unary_shuffle_ops_lhs_const(<3 x i8> %x) {
 ; CHECK-LABEL: @smax_unary_shuffle_ops_lhs_const(
-; CHECK-NEXT:    [[SX:%.*]] = shufflevector <3 x i8> [[X:%.*]], <3 x i8> poison, <3 x i32> <i32 1, i32 0, i32 2>
-; CHECK-NEXT:    [[R:%.*]] = call <3 x i8> @llvm.smax.v3i8(<3 x i8> [[SX]], <3 x i8> <i8 0, i8 1, i8 2>)
-; CHECK-NEXT:    ret <3 x i8> [[R]]
+; CHECK-NEXT:    [[X:%.*]] = call <3 x i8> @llvm.smax.v3i8(<3 x i8> [[X1:%.*]], <3 x i8> <i8 1, i8 0, i8 2>)
+; CHECK-NEXT:    [[SX:%.*]] = shufflevector <3 x i8> [[X]], <3 x i8> poison, <3 x i32> <i32 1, i32 0, i32 2>
+; CHECK-NEXT:    ret <3 x i8> [[SX]]
 ;
   %sx = shufflevector <3 x i8> %x, <3 x i8> poison, <3 x i32> <i32 1, i32 0, i32 2>
   %r = call <3 x i8> @llvm.smax(<3 x i8> <i8 0, i8 1, i8 2>, <3 x i8> %sx)
@@ -2429,9 +2429,9 @@ define <3 x i8> @smax_unary_shuffle_ops_lhs_const(<3 x i8> %x) {
 
 define <vscale x 3 x i8> @smax_unary_shuffle_ops_lhs_const_scalable(<vscale x 3 x i8> %x) {
 ; CHECK-LABEL: @smax_unary_shuffle_ops_lhs_const_scalable(
-; CHECK-NEXT:    [[SX:%.*]] = shufflevector <vscale x 3 x i8> [[X:%.*]], <vscale x 3 x i8> poison, <vscale x 3 x i32> zeroinitializer
-; CHECK-NEXT:    [[R:%.*]] = call <vscale x 3 x i8> @llvm.smax.nxv3i8(<vscale x 3 x i8> [[SX]], <vscale x 3 x i8> splat (i8 42))
-; CHECK-NEXT:    ret <vscale x 3 x i8> [[R]]
+; CHECK-NEXT:    [[R:%.*]] = call <vscale x 3 x i8> @llvm.smax.nxv3i8(<vscale x 3 x i8> [[SX:%.*]], <vscale x 3 x i8> splat (i8 42))
+; CHECK-NEXT:    [[R1:%.*]] = shufflevector <vscale x 3 x i8> [[R]], <vscale x 3 x i8> poison, <vscale x 3 x i32> zeroinitializer
+; CHECK-NEXT:    ret <vscale x 3 x i8> [[R1]]
 ;
   %sx = shufflevector <vscale x 3 x i8> %x, <vscale x 3 x i8> poison, <vscale x 3 x i32> zeroinitializer
   %r = call <vscale x 3 x i8> @llvm.smax(<vscale x 3 x i8> splat (i8 42), <vscale x 3 x i8> %sx)
@@ -2440,9 +2440,9 @@ define <vscale x 3 x i8> @smax_unary_shuffle_ops_lhs_const_scalable(<vscale x 3
 
 define <3 x i8> @smax_unary_shuffle_ops_lhs_const_widening(<2 x i8> %x) {
 ; CHECK-LABEL: @smax_unary_shuffle_ops_lhs_const_widening(
-; CHECK-NEXT:    [[SX:%.*]] = shufflevector <2 x i8> [[X:%.*]], <2 x i8> poison, <3 x i32> <i32 1, i32 0, i32 poison>
-; CHECK-NEXT:    [[R:%.*]] = call <3 x i8> @llvm.smax.v3i8(<3 x i8> [[SX]], <3 x i8> <i8 0, i8 1, i8 2>)
-; CHECK-NEXT:    ret <3 x i8> [[R]]
+; CHECK-NEXT:    [[X:%.*]] = call <2 x i8> @llvm.smax.v2i8(<2 x i8> [[X1:%.*]], <2 x i8> <i8 1, i8 0>)
+; CHECK-NEXT:    [[SX:%.*]] = shufflevector <2 x i8> [[X]], <2 x i8> poison, <3 x i32> <i32 1, i32 0, i32 poison>
+; CHECK-NEXT:    ret <3 x i8> [[SX]]
 ;
   %sx = shufflevector <2 x i8> %x, <2 x i8> poison, <3 x i32> <i32 1, i32 0, i32 poison>
   %r = call <3 x i8> @llvm.smax(<3 x i8> <i8 0, i8 1, i8 2>, <3 x i8> %sx)