[llvm] [InstCombine] Fold shuffled intrinsic operands with constant operands (PR #141300)

Fri May 23 15:53:54 PDT 2025

llvmbot wrote:




@llvm/pr-subscribers-llvm-transforms

Author: Luke Lau (lukel97)

<details>
<summary>Changes</summary>

We currently pull shuffles through binops and intrinsics, which is an important canonical form for VectorCombine to be able to scalarize vector sequences. But while binops can be folded with a constant operand, intrinsics currently require all operands to be shufflevectors.

This extends intrinsic folding to be in line with regular binops by reusing the constant "unshuffling" logic.

As far as I can tell the list of currently folded intrinsics don't require any special UB handling.

This change in combination with #138095 and #137823 fixes the following C:

```c
void max(int *x, int *y, int n) {
  for (int i = 0; i < n; i++)
    x[i] += *y > 42 ? *y : 42;
}
```

Not using the splatted vector form on RISC-V with `-O3 -march=rva23u64`:

```asm
	vmv.s.x	v8, a4
	li	a4, 42
	vmax.vx	v10, v8, a4
	vrgather.vi	v8, v10, 0
.LBB0_9:                                # %vector.body
                                        # =>This Inner Loop Header: Depth=1
	vl2re32.v	v10, (a5)
	vadd.vv	v10, v10, v8
	vs2r.v	v10, (a5)
```

i.e., it now generates

```asm
        li	a6, 42
        max	a6, a4, a6
.LBB0_9:                                # %vector.body
                                        # =>This Inner Loop Header: Depth=1
	vl2re32.v	v8, (a5)
	vadd.vx	v8, v8, a6
	vs2r.v	v8, (a5)
```

Stacked on #141287 

---

Patch is 21.48 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/141300.diff


6 Files Affected:

- (modified) llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp (+19-10) 
- (modified) llvm/lib/Transforms/InstCombine/InstCombineInternal.h (+3) 
- (modified) llvm/lib/Transforms/InstCombine/InstructionCombining.cpp (+49-61) 
- (modified) llvm/test/Transforms/InstCombine/fma.ll (+61) 
- (modified) llvm/test/Transforms/InstCombine/fsh.ll (+61) 
- (modified) llvm/test/Transforms/InstCombine/minmax-intrinsics.ll (+33) 


``````````diff

diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
index 12e08c09ea67d..f1ff8180cde23 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -1399,9 +1399,8 @@ static Instruction *factorizeMinMaxTree(IntrinsicInst *II) {
 
 /// If all arguments of the intrinsic are unary shuffles with the same mask,
 /// try to shuffle after the intrinsic.
-static Instruction *
-foldShuffledIntrinsicOperands(IntrinsicInst *II,
-                              InstCombiner::BuilderTy &Builder) {
+Instruction *
+InstCombinerImpl::foldShuffledIntrinsicOperands(IntrinsicInst *II) {
   // TODO: This should be extended to handle other intrinsics like fshl, ctpop,
   //       etc. Use llvm::isTriviallyVectorizable() and related to determine
   //       which intrinsics are safe to shuffle?
@@ -1419,9 +1418,11 @@ foldShuffledIntrinsicOperands(IntrinsicInst *II,
   }
 
   Value *X;
+  Constant *C;
   ArrayRef<int> Mask;
-  if (!match(II->getArgOperand(0),
-             m_Shuffle(m_Value(X), m_Undef(), m_Mask(Mask))))
+  auto *NonConstArg = find_if_not(II->args(), IsaPred<Constant>);
+  if (!NonConstArg ||
+      !match(NonConstArg, m_Shuffle(m_Value(X), m_Undef(), m_Mask(Mask))))
     return nullptr;
 
   // At least 1 operand must have 1 use because we are creating 2 instructions.
@@ -1433,11 +1434,19 @@ foldShuffledIntrinsicOperands(IntrinsicInst *II,
   NewArgs[0] = X;
   Type *SrcTy = X->getType();
   for (unsigned i = 1, e = II->arg_size(); i != e; ++i) {
-    if (!match(II->getArgOperand(i),
-               m_Shuffle(m_Value(X), m_Undef(), m_SpecificMask(Mask))) ||
-        X->getType() != SrcTy)
+    if (match(II->getArgOperand(i),
+              m_Shuffle(m_Value(X), m_Undef(), m_SpecificMask(Mask))) &&
+        X->getType() == SrcTy)
+      NewArgs[i] = X;
+    else if (match(II->getArgOperand(i), m_ImmConstant(C))) {
+      // If it's a constant, try find the constant that would be shuffled to C.
+      if (Constant *ShuffledC =
+              unshuffleConstant(Mask, C, cast<VectorType>(SrcTy)))
+        NewArgs[i] = ShuffledC;
+      else
+        return nullptr;
+    } else
       return nullptr;
-    NewArgs[i] = X;
   }
 
   // intrinsic (shuf X, M), (shuf Y, M), ... --> shuf (intrinsic X, Y, ...), M
@@ -3849,7 +3858,7 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) {
         if (Instruction *R = FoldOpIntoSelect(*II, Sel))
           return R;
 
-  if (Instruction *Shuf = foldShuffledIntrinsicOperands(II, Builder))
+  if (Instruction *Shuf = foldShuffledIntrinsicOperands(II))
     return Shuf;
 
   // Some intrinsics (like experimental_gc_statepoint) can be used in invoke
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
index 8b657b3f8555c..5e0cd17fb1924 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
+++ b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
@@ -147,6 +147,7 @@ class LLVM_LIBRARY_VISIBILITY InstCombinerImpl final
   Instruction *visitAddrSpaceCast(AddrSpaceCastInst &CI);
   Instruction *foldItoFPtoI(CastInst &FI);
   Instruction *visitSelectInst(SelectInst &SI);
+  Instruction *foldShuffledIntrinsicOperands(IntrinsicInst *II);
   Instruction *visitCallInst(CallInst &CI);
   Instruction *visitInvokeInst(InvokeInst &II);
   Instruction *visitCallBrInst(CallBrInst &CBI);
@@ -604,6 +605,8 @@ class LLVM_LIBRARY_VISIBILITY InstCombinerImpl final
   Instruction *foldVectorBinop(BinaryOperator &Inst);
   Instruction *foldVectorSelect(SelectInst &Sel);
   Instruction *foldSelectShuffle(ShuffleVectorInst &Shuf);
+  Constant *unshuffleConstant(ArrayRef<int> ShMask, Constant *C,
+                              VectorType *NewCTy);
 
   /// Given a binary operator, cast instruction, or select which has a PHI node
   /// as operand #0, see if we can fold the instruction into the PHI (which is
diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
index c1608b1866a5d..2e0becbe9f1f8 100644
--- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -2094,6 +2094,50 @@ static bool shouldMergeGEPs(GEPOperator &GEP, GEPOperator &Src) {
   return true;
 }
 
+// Find constant NewC that has property:
+//   shuffle(NewC, ShMask) = C
+// Returns nullptr if such a constant does not exist e.g. ShMask=<0,0> C=<1,2>
+//
+// A 1-to-1 mapping is not required. Example:
+// ShMask = <1,1,2,2> and C = <5,5,6,6> --> NewC = <poison,5,6,poison>
+Constant *InstCombinerImpl::unshuffleConstant(ArrayRef<int> ShMask, Constant *C,
+                                              VectorType *NewCTy) {
+  if (isa<ScalableVectorType>(NewCTy)) {
+    Constant *Splat = C->getSplatValue();
+    if (!Splat)
+      return nullptr;
+    return ConstantVector::getSplat(
+        cast<VectorType>(C->getType())->getElementCount(), Splat);
+  }
+
+  if (cast<FixedVectorType>(NewCTy)->getNumElements() >
+      cast<FixedVectorType>(C->getType())->getNumElements())
+    return nullptr;
+
+  unsigned NewCNumElts = cast<FixedVectorType>(NewCTy)->getNumElements();
+  PoisonValue *PoisonScalar = PoisonValue::get(C->getType()->getScalarType());
+  SmallVector<Constant *, 16> NewVecC(NewCNumElts, PoisonScalar);
+  unsigned NumElts = cast<FixedVectorType>(C->getType())->getNumElements();
+  for (unsigned I = 0; I < NumElts; ++I) {
+    Constant *CElt = C->getAggregateElement(I);
+    if (ShMask[I] >= 0) {
+      assert(ShMask[I] < (int)NumElts && "Not expecting narrowing shuffle");
+      Constant *NewCElt = NewVecC[ShMask[I]];
+      // Bail out if:
+      // 1. The constant vector contains a constant expression.
+      // 2. The shuffle needs an element of the constant vector that can't
+      //    be mapped to a new constant vector.
+      // 3. This is a widening shuffle that copies elements of V1 into the
+      //    extended elements (extending with poison is allowed).
+      if (!CElt || (!isa<PoisonValue>(NewCElt) && NewCElt != CElt) ||
+          I >= NewCNumElts)
+        return nullptr;
+      NewVecC[ShMask[I]] = CElt;
+    }
+  }
+  return ConstantVector::get(NewVecC);
+}
+
 Instruction *InstCombinerImpl::foldVectorBinop(BinaryOperator &Inst) {
   if (!isa<VectorType>(Inst.getType()))
     return nullptr;
@@ -2213,50 +2257,15 @@ Instruction *InstCombinerImpl::foldVectorBinop(BinaryOperator &Inst) {
   // other binops, so they can be folded. It may also enable demanded elements
   // transforms.
   Constant *C;
-  auto *InstVTy = dyn_cast<FixedVectorType>(Inst.getType());
-  if (InstVTy &&
-      match(&Inst, m_c_BinOp(m_OneUse(m_Shuffle(m_Value(V1), m_Poison(),
+  if (match(&Inst, m_c_BinOp(m_OneUse(m_Shuffle(m_Value(V1), m_Poison(),
                                                 m_Mask(Mask))),
-                             m_ImmConstant(C))) &&
-      cast<FixedVectorType>(V1->getType())->getNumElements() <=
-          InstVTy->getNumElements()) {
-    assert(InstVTy->getScalarType() == V1->getType()->getScalarType() &&
+                             m_ImmConstant(C)))) {
+    assert(Inst.getType()->getScalarType() == V1->getType()->getScalarType() &&
            "Shuffle should not change scalar type");
 
-    // Find constant NewC that has property:
-    //   shuffle(NewC, ShMask) = C
-    // If such constant does not exist (example: ShMask=<0,0> and C=<1,2>)
-    // reorder is not possible. A 1-to-1 mapping is not required. Example:
-    // ShMask = <1,1,2,2> and C = <5,5,6,6> --> NewC = <undef,5,6,undef>
     bool ConstOp1 = isa<Constant>(RHS);
-    ArrayRef<int> ShMask = Mask;
-    unsigned SrcVecNumElts =
-        cast<FixedVectorType>(V1->getType())->getNumElements();
-    PoisonValue *PoisonScalar = PoisonValue::get(C->getType()->getScalarType());
-    SmallVector<Constant *, 16> NewVecC(SrcVecNumElts, PoisonScalar);
-    bool MayChange = true;
-    unsigned NumElts = InstVTy->getNumElements();
-    for (unsigned I = 0; I < NumElts; ++I) {
-      Constant *CElt = C->getAggregateElement(I);
-      if (ShMask[I] >= 0) {
-        assert(ShMask[I] < (int)NumElts && "Not expecting narrowing shuffle");
-        Constant *NewCElt = NewVecC[ShMask[I]];
-        // Bail out if:
-        // 1. The constant vector contains a constant expression.
-        // 2. The shuffle needs an element of the constant vector that can't
-        //    be mapped to a new constant vector.
-        // 3. This is a widening shuffle that copies elements of V1 into the
-        //    extended elements (extending with poison is allowed).
-        if (!CElt || (!isa<PoisonValue>(NewCElt) && NewCElt != CElt) ||
-            I >= SrcVecNumElts) {
-          MayChange = false;
-          break;
-        }
-        NewVecC[ShMask[I]] = CElt;
-      }
-    }
-    if (MayChange) {
-      Constant *NewC = ConstantVector::get(NewVecC);
+    if (Constant *NewC =
+            unshuffleConstant(Mask, C, cast<VectorType>(V1->getType()))) {
       // It may not be safe to execute a binop on a vector with poison elements
       // because the entire instruction can be folded to undef or create poison
       // that did not exist in the original code.
@@ -2272,27 +2281,6 @@ Instruction *InstCombinerImpl::foldVectorBinop(BinaryOperator &Inst) {
     }
   }
 
-  // Similar to the combine above, but handles the case for scalable vectors
-  // where both shuffle(V1, 0) and C are splats.
-  //
-  // Op(shuffle(V1, 0), (splat C)) -> shuffle(Op(V1, (splat C)), 0)
-  if (isa<ScalableVectorType>(Inst.getType()) &&
-      match(&Inst, m_c_BinOp(m_OneUse(m_Shuffle(m_Value(V1), m_Poison(),
-                                                m_ZeroMask())),
-                             m_ImmConstant(C)))) {
-    if (Constant *Splat = C->getSplatValue()) {
-      bool ConstOp1 = isa<Constant>(RHS);
-      VectorType *V1Ty = cast<VectorType>(V1->getType());
-      Constant *NewC = ConstantVector::getSplat(V1Ty->getElementCount(), Splat);
-
-      Value *NewLHS = ConstOp1 ? V1 : NewC;
-      Value *NewRHS = ConstOp1 ? NewC : V1;
-      VectorType *VTy = cast<VectorType>(Inst.getType());
-      SmallVector<int> Mask(VTy->getElementCount().getKnownMinValue(), 0);
-      return createBinOpShuffle(NewLHS, NewRHS, Mask);
-    }
-  }
-
   // Try to reassociate to sink a splat shuffle after a binary operation.
   if (Inst.isAssociative() && Inst.isCommutative()) {
     // Canonicalize shuffle operand as LHS.
diff --git a/llvm/test/Transforms/InstCombine/fma.ll b/llvm/test/Transforms/InstCombine/fma.ll
index ae0067d41426c..86a67c996b4d6 100644
--- a/llvm/test/Transforms/InstCombine/fma.ll
+++ b/llvm/test/Transforms/InstCombine/fma.ll
@@ -802,6 +802,67 @@ define <2 x float> @fma_unary_shuffle_ops_narrowing(<3 x float> %x, <3 x float>
   ret <2 x float> %r
 }
 
+define <2 x float> @fma_unary_shuffle_ops_1_const(<2 x float> %x, <2 x float> %y) {
+; CHECK-LABEL: @fma_unary_shuffle_ops_1_const(
+; CHECK-NEXT:    [[Y:%.*]] = call <2 x float> @llvm.fma.v2f32(<2 x float> [[X:%.*]], <2 x float> <float 2.000000e+00, float 1.000000e+00>, <2 x float> [[Y1:%.*]])
+; CHECK-NEXT:    [[B:%.*]] = shufflevector <2 x float> [[Y]], <2 x float> poison, <2 x i32> <i32 1, i32 0>
+; CHECK-NEXT:    ret <2 x float> [[B]]
+;
+  %a = shufflevector <2 x float> %x, <2 x float> poison, <2 x i32> <i32 1, i32 0>
+  %b = shufflevector <2 x float> %y, <2 x float> poison, <2 x i32> <i32 1, i32 0>
+  %r = call <2 x float> @llvm.fma(<2 x float> <float 1.0, float 2.0>, <2 x float> %a, <2 x float> %b)
+  ret <2 x float> %r
+}
+
+define <2 x float> @fma_unary_shuffle_ops_2_const(<2 x float> %x) {
+; CHECK-LABEL: @fma_unary_shuffle_ops_2_const(
+; CHECK-NEXT:    [[X:%.*]] = call <2 x float> @llvm.fma.v2f32(<2 x float> [[X1:%.*]], <2 x float> <float 2.000000e+00, float 1.000000e+00>, <2 x float> [[X1]])
+; CHECK-NEXT:    [[A:%.*]] = shufflevector <2 x float> [[X]], <2 x float> poison, <2 x i32> <i32 1, i32 0>
+; CHECK-NEXT:    ret <2 x float> [[A]]
+;
+  %a = shufflevector <2 x float> %x, <2 x float> poison, <2 x i32> <i32 1, i32 0>
+  %r = call <2 x float> @llvm.fma(<2 x float> <float 1.0, float 2.0>, <2 x float> <float 1.0, float 2.0>, <2 x float> %a)
+  ret <2 x float> %r
+}
+
+define <vscale x 2 x float> @fma_unary_shuffle_ops_1_const_scalable(<vscale x 2 x float> %x, <vscale x 2 x float> %y) {
+; CHECK-LABEL: @fma_unary_shuffle_ops_1_const_scalable(
+; CHECK-NEXT:    [[R:%.*]] = call <vscale x 2 x float> @llvm.fma.nxv2f32(<vscale x 2 x float> [[A:%.*]], <vscale x 2 x float> splat (float 4.200000e+01), <vscale x 2 x float> [[B:%.*]])
+; CHECK-NEXT:    [[R1:%.*]] = shufflevector <vscale x 2 x float> [[R]], <vscale x 2 x float> poison, <vscale x 2 x i32> zeroinitializer
+; CHECK-NEXT:    ret <vscale x 2 x float> [[R1]]
+;
+  %a = shufflevector <vscale x 2 x float> %x, <vscale x 2 x float> poison, <vscale x 2 x i32> zeroinitializer
+  %b = shufflevector <vscale x 2 x float> %y, <vscale x 2 x float> poison, <vscale x 2 x i32> zeroinitializer
+  %r = call <vscale x 2 x float> @llvm.fma(<vscale x 2 x float> splat (float 42.0), <vscale x 2 x float> %a, <vscale x 2 x float> %b)
+  ret <vscale x 2 x float> %r
+}
+
+define <vscale x 2 x float> @fma_unary_shuffle_ops_2_const_scalable(<vscale x 2 x float> %x) {
+; CHECK-LABEL: @fma_unary_shuffle_ops_2_const_scalable(
+; CHECK-NEXT:    [[X:%.*]] = call <vscale x 2 x float> @llvm.fma.nxv2f32(<vscale x 2 x float> [[X1:%.*]], <vscale x 2 x float> splat (float 4.200000e+01), <vscale x 2 x float> [[X1]])
+; CHECK-NEXT:    [[A:%.*]] = shufflevector <vscale x 2 x float> [[X]], <vscale x 2 x float> poison, <vscale x 2 x i32> zeroinitializer
+; CHECK-NEXT:    ret <vscale x 2 x float> [[A]]
+;
+  %a = shufflevector <vscale x 2 x float> %x, <vscale x 2 x float> poison, <vscale x 2 x i32> zeroinitializer
+  %r = call <vscale x 2 x float> @llvm.fma(<vscale x 2 x float> splat (float 42.0), <vscale x 2 x float> splat (float 42.0), <vscale x 2 x float> %a)
+  ret <vscale x 2 x float> %r
+}
+
+define <3 x float> @fma_unary_shuffle_ops_widening_1_const(<2 x float> %x, <2 x float> %y) {
+; CHECK-LABEL: @fma_unary_shuffle_ops_widening_1_const(
+; CHECK-NEXT:    [[A:%.*]] = shufflevector <2 x float> [[X:%.*]], <2 x float> poison, <3 x i32> <i32 1, i32 0, i32 poison>
+; CHECK-NEXT:    call void @use_vec3(<3 x float> [[A]])
+; CHECK-NEXT:    [[Y:%.*]] = call fast <2 x float> @llvm.fma.v2f32(<2 x float> [[X]], <2 x float> splat (float 4.200000e+01), <2 x float> [[Y1:%.*]])
+; CHECK-NEXT:    [[B:%.*]] = shufflevector <2 x float> [[Y]], <2 x float> poison, <3 x i32> <i32 1, i32 0, i32 poison>
+; CHECK-NEXT:    ret <3 x float> [[B]]
+;
+  %a = shufflevector <2 x float> %x, <2 x float> poison, <3 x i32> <i32 1, i32 0, i32 poison>
+  call void @use_vec3(<3 x float> %a)
+  %b = shufflevector <2 x float> %y, <2 x float> poison, <3 x i32> <i32 1, i32 0, i32 poison>
+  %r = call fast <3 x float> @llvm.fma(<3 x float> splat (float 42.0), <3 x float> %a, <3 x float> %b)
+  ret <3 x float> %r
+}
+
 ; negative test - must have 3 shuffles
 
 define <2 x float> @fma_unary_shuffle_ops_unshuffled(<2 x float> %x, <2 x float> %y, <2 x float> %z) {
diff --git a/llvm/test/Transforms/InstCombine/fsh.ll b/llvm/test/Transforms/InstCombine/fsh.ll
index 862853f992968..398117b5c1c5c 100644
--- a/llvm/test/Transforms/InstCombine/fsh.ll
+++ b/llvm/test/Transforms/InstCombine/fsh.ll
@@ -930,6 +930,67 @@ define <2 x i31> @fsh_unary_shuffle_ops_narrowing(<3 x i31> %x, <3 x i31> %y, <3
   ret <2 x i31> %r
 }
 
+define <2 x i32> @fsh_unary_shuffle_ops_1_const(<2 x i32> %x, <2 x i32> %y) {
+; CHECK-LABEL: @fsh_unary_shuffle_ops_1_const(
+; CHECK-NEXT:    [[Y:%.*]] = call <2 x i32> @llvm.fshr.v2i32(<2 x i32> [[X:%.*]], <2 x i32> [[X]], <2 x i32> [[Y1:%.*]])
+; CHECK-NEXT:    [[B:%.*]] = shufflevector <2 x i32> [[Y]], <2 x i32> poison, <2 x i32> <i32 1, i32 0>
+; CHECK-NEXT:    ret <2 x i32> [[B]]
+;
+  %a = shufflevector <2 x i32> %x, <2 x i32> poison, <2 x i32> <i32 1, i32 0>
+  %b = shufflevector <2 x i32> %y, <2 x i32> poison, <2 x i32> <i32 1, i32 0>
+  %r = call <2 x i32> @llvm.fshr(<2 x i32> <i32 1, i32 2>, <2 x i32> %a, <2 x i32> %b)
+  ret <2 x i32> %r
+}
+
+define <2 x i32> @fsh_unary_shuffle_ops_2_const(<2 x i32> %x) {
+; CHECK-LABEL: @fsh_unary_shuffle_ops_2_const(
+; CHECK-NEXT:    [[X:%.*]] = call <2 x i32> @llvm.fshr.v2i32(<2 x i32> [[X1:%.*]], <2 x i32> <i32 2, i32 1>, <2 x i32> [[X1]])
+; CHECK-NEXT:    [[A:%.*]] = shufflevector <2 x i32> [[X]], <2 x i32> poison, <2 x i32> <i32 1, i32 0>
+; CHECK-NEXT:    ret <2 x i32> [[A]]
+;
+  %a = shufflevector <2 x i32> %x, <2 x i32> poison, <2 x i32> <i32 1, i32 0>
+  %r = call <2 x i32> @llvm.fshr(<2 x i32> <i32 1, i32 2>, <2 x i32> <i32 1, i32 2>, <2 x i32> %a)
+  ret <2 x i32> %r
+}
+
+define <vscale x 2 x i32> @fsh_unary_shuffle_ops_1_const_scalable(<vscale x 2 x i32> %x, <vscale x 2 x i32> %y) {
+; CHECK-LABEL: @fsh_unary_shuffle_ops_1_const_scalable(
+; CHECK-NEXT:    [[Y:%.*]] = call <vscale x 2 x i32> @llvm.fshr.nxv2i32(<vscale x 2 x i32> [[X:%.*]], <vscale x 2 x i32> [[X]], <vscale x 2 x i32> [[Y1:%.*]])
+; CHECK-NEXT:    [[B:%.*]] = shufflevector <vscale x 2 x i32> [[Y]], <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer
+; CHECK-NEXT:    ret <vscale x 2 x i32> [[B]]
+;
+  %a = shufflevector <vscale x 2 x i32> %x, <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer
+  %b = shufflevector <vscale x 2 x i32> %y, <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer
+  %r = call <vscale x 2 x i32> @llvm.fshr(<vscale x 2 x i32> splat (i32 42), <vscale x 2 x i32> %a, <vscale x 2 x i32> %b)
+  ret <vscale x 2 x i32> %r
+}
+
+define <vscale x 2 x i32> @fsh_unary_shuffle_ops_2_const_scalable(<vscale x 2 x i32> %x) {
+; CHECK-LABEL: @fsh_unary_shuffle_ops_2_const_scalable(
+; CHECK-NEXT:    [[X:%.*]] = call <vscale x 2 x i32> @llvm.fshr.nxv2i32(<vscale x 2 x i32> [[X1:%.*]], <vscale x 2 x i32> splat (i32 42), <vscale x 2 x i32> [[X1]])
+; CHECK-NEXT:    [[A:%.*]] = shufflevector <vscale x 2 x i32> [[X]], <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer
+; CHECK-NEXT:    ret <vscale x 2 x i32> [[A]]
+;
+  %a = shufflevector <vscale x 2 x i32> %x, <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer
+  %r = call <vscale x 2 x i32> @llvm.fshr(<vscale x 2 x i32> splat (i32 42), <vscale x 2 x i32> splat (i32 42), <vscale x 2 x i32> %a)
+  ret <vscale x 2 x i32> %r
+}
+
+define <3 x i32> @fsh_unary_shuffle_ops_widening_1_const(<2 x i32> %x, <2 x i32> %y) {
+; CHECK-LABEL: @fsh_unary_shuffle_ops_widening_1_const(
+; CHECK-NEXT:    [[A:%.*]] = shufflevector <2 x i32> [[X:%.*]], <2 x i32> poison, <3 x i32> <i32 1, i32 0, i32 poison>
+; CHECK-NEXT:    call void @use_v3(<3 x i32> [[A]])
+; CHECK-NEXT:    [[Y:%.*]] = call <2 x i32> @llvm.fshr.v2i32(<2 x i32> [[X]], <2 x i32> [[X]], <2 x i32> [[Y1:%.*]])
+; CHECK-NEXT:    [[B:%.*]] = shufflevector <2 x i32> [[Y]], <2 x i32> poison, <3 x i32> <i32 1, i32 0, i32 poison>
+; CHECK-NEXT:    ret <3 x i32> [[B]]
+;
+  %a = shufflevector <2 x i32> %x, <2 x i32> poison, <3 x i32> <i32 1, i32 0, i32 poison>
+  call void @use_v3(<3 x i32> %a)
+  %b = shufflevector <2 x i32> %y, <2 x i32> poison, <3 x i32> <i32 1, i32 0, i32 poison>
+  %r = call <3 x i32> @llvm.fshr(<3 x i32> splat (i32 42), <3 x i32> %a, <3 x i32> %b)
+  ret <3 x i32> %r
+}
+
 ; negative test - must have 3 shuffles
 
 define <2 x i32> @fsh_unary_shuffle_ops_unshuffled(<2 x i32> %x, <2 x i32> %y, <2 x i32> %z) {
diff --git a/llvm/test/Transforms/InstCombine/minmax-intrinsics.ll b/llvm/test/Transforms/InstCombine/minmax-intrinsics.ll
index 9a8608da9fd5b..85f2a1ccb3a3d 100644
--- a/llvm/test/Transforms/InstCombine/minmax-intrinsics.ll
+++ b/llvm/test/Transforms/InstCombine/minmax-intrinsics.ll
@@ -2416,6 +2416,39 @@ define <3 x i8> @umin_unary_shuffle_ops_narrowing(<4 x i8> %x, <4 x i8> %y) {
   ret <3 x i8> %r
 }
 
+define <3 x i8> @smax_unary_shuffle_ops_lhs_const(<3 x i8> %x) {
+; CHECK-LABEL: @smax_unary_shuffle_ops_lhs_const(
+; CHECK-NEXT:    [[X:%.*]] = call <3 x i8> @llvm.smax.v3i8(<3 x i8> [[X1:%.*]], <3 x i8> <i8 1, i8 0, i8 2>)
+; CHECK-NEXT:    [[SX:%.*]] = shufflevector <3 x i8> [[X]], <3 x i8> poison, <3 x i32> <i32 1, i32 0, i32 2>
+; CHECK-NEXT:    ret <3 x i8> [[SX]]
+;
+  %sx = shufflevector <3 x i8> %x, <3 x i8> poison, <3 x i32> <i32 1, i32 0, i32 ...
[truncated]

``````````

</details>


https://github.com/llvm/llvm-project/pull/141300