[llvm] 9262e37 - [InstCombine] Fold shuffled intrinsic operands with constant operands (#141300)
via llvm-commits
llvm-commits at lists.llvm.org
Wed May 28 02:57:12 PDT 2025
Author: Luke Lau
Date: 2025-05-28T10:57:08+01:00
New Revision: 9262e37d8c2d77fd86a1b4c183ac3ab6fe076d30
URL: https://github.com/llvm/llvm-project/commit/9262e37d8c2d77fd86a1b4c183ac3ab6fe076d30
DIFF: https://github.com/llvm/llvm-project/commit/9262e37d8c2d77fd86a1b4c183ac3ab6fe076d30.diff
LOG: [InstCombine] Fold shuffled intrinsic operands with constant operands (#141300)
We currently pull shuffles through binops and intrinsics, which is an
important canonical form for VectorCombine to be able to scalarize
vector sequences. But while binops can be folded with a constant
operand, intrinsics currently require all operands to be shufflevectors.
This extends intrinsic folding to be in line with regular binops by
reusing the constant "unshuffling" logic.
As far as I can tell the list of currently folded intrinsics don't
require any special UB handling.
This change in combination with #138095 and #137823 fixes the following
C:
```c
void max(int *x, int *y, int n) {
for (int i = 0; i < n; i++)
x[i] += *y > 42 ? *y : 42;
}
```
Not using the splatted vector form on RISC-V with `-O3 -march=rva23u64`:
```asm
vmv.s.x v8, a4
li a4, 42
vmax.vx v10, v8, a4
vrgather.vi v8, v10, 0
.LBB0_9: # %vector.body
# =>This Inner Loop Header: Depth=1
vl2re32.v v10, (a5)
vadd.vv v10, v10, v8
vs2r.v v10, (a5)
```
i.e., it now generates
```asm
li a6, 42
max a6, a4, a6
.LBB0_9: # %vector.body
# =>This Inner Loop Header: Depth=1
vl2re32.v v8, (a5)
vadd.vx v8, v8, a6
vs2r.v v8, (a5)
```
Added:
Modified:
llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
llvm/lib/Transforms/InstCombine/InstCombineInternal.h
llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
llvm/test/Transforms/InstCombine/fma.ll
llvm/test/Transforms/InstCombine/fsh.ll
llvm/test/Transforms/InstCombine/minmax-intrinsics.ll
Removed:
################################################################################
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
index 12e08c09ea67d..e01dafd36d30d 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -1399,9 +1399,8 @@ static Instruction *factorizeMinMaxTree(IntrinsicInst *II) {
/// If all arguments of the intrinsic are unary shuffles with the same mask,
/// try to shuffle after the intrinsic.
-static Instruction *
-foldShuffledIntrinsicOperands(IntrinsicInst *II,
- InstCombiner::BuilderTy &Builder) {
+Instruction *
+InstCombinerImpl::foldShuffledIntrinsicOperands(IntrinsicInst *II) {
// TODO: This should be extended to handle other intrinsics like fshl, ctpop,
// etc. Use llvm::isTriviallyVectorizable() and related to determine
// which intrinsics are safe to shuffle?
@@ -1419,9 +1418,11 @@ foldShuffledIntrinsicOperands(IntrinsicInst *II,
}
Value *X;
+ Constant *C;
ArrayRef<int> Mask;
- if (!match(II->getArgOperand(0),
- m_Shuffle(m_Value(X), m_Undef(), m_Mask(Mask))))
+ auto *NonConstArg = find_if_not(II->args(), IsaPred<Constant>);
+ if (!NonConstArg ||
+ !match(NonConstArg, m_Shuffle(m_Value(X), m_Undef(), m_Mask(Mask))))
return nullptr;
// At least 1 operand must have 1 use because we are creating 2 instructions.
@@ -1429,15 +1430,21 @@ foldShuffledIntrinsicOperands(IntrinsicInst *II,
return nullptr;
// See if all arguments are shuffled with the same mask.
- SmallVector<Value *, 4> NewArgs(II->arg_size());
- NewArgs[0] = X;
+ SmallVector<Value *, 4> NewArgs;
Type *SrcTy = X->getType();
- for (unsigned i = 1, e = II->arg_size(); i != e; ++i) {
- if (!match(II->getArgOperand(i),
- m_Shuffle(m_Value(X), m_Undef(), m_SpecificMask(Mask))) ||
- X->getType() != SrcTy)
+ for (Value *Arg : II->args()) {
+ if (match(Arg, m_Shuffle(m_Value(X), m_Undef(), m_SpecificMask(Mask))) &&
+ X->getType() == SrcTy)
+ NewArgs.push_back(X);
+ else if (match(Arg, m_ImmConstant(C))) {
+ // If it's a constant, try find the constant that would be shuffled to C.
+ if (Constant *ShuffledC =
+ unshuffleConstant(Mask, C, cast<VectorType>(SrcTy)))
+ NewArgs.push_back(ShuffledC);
+ else
+ return nullptr;
+ } else
return nullptr;
- NewArgs[i] = X;
}
// intrinsic (shuf X, M), (shuf Y, M), ... --> shuf (intrinsic X, Y, ...), M
@@ -3849,7 +3856,7 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) {
if (Instruction *R = FoldOpIntoSelect(*II, Sel))
return R;
- if (Instruction *Shuf = foldShuffledIntrinsicOperands(II, Builder))
+ if (Instruction *Shuf = foldShuffledIntrinsicOperands(II))
return Shuf;
// Some intrinsics (like experimental_gc_statepoint) can be used in invoke
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
index 8b657b3f8555c..5e0cd17fb1924 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
+++ b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
@@ -147,6 +147,7 @@ class LLVM_LIBRARY_VISIBILITY InstCombinerImpl final
Instruction *visitAddrSpaceCast(AddrSpaceCastInst &CI);
Instruction *foldItoFPtoI(CastInst &FI);
Instruction *visitSelectInst(SelectInst &SI);
+ Instruction *foldShuffledIntrinsicOperands(IntrinsicInst *II);
Instruction *visitCallInst(CallInst &CI);
Instruction *visitInvokeInst(InvokeInst &II);
Instruction *visitCallBrInst(CallBrInst &CBI);
@@ -604,6 +605,8 @@ class LLVM_LIBRARY_VISIBILITY InstCombinerImpl final
Instruction *foldVectorBinop(BinaryOperator &Inst);
Instruction *foldVectorSelect(SelectInst &Sel);
Instruction *foldSelectShuffle(ShuffleVectorInst &Shuf);
+ Constant *unshuffleConstant(ArrayRef<int> ShMask, Constant *C,
+ VectorType *NewCTy);
/// Given a binary operator, cast instruction, or select which has a PHI node
/// as operand #0, see if we can fold the instruction into the PHI (which is
diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
index 9ddcef0396e39..3dc89772676df 100644
--- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -2100,8 +2100,8 @@ static bool shouldMergeGEPs(GEPOperator &GEP, GEPOperator &Src) {
///
/// A 1-to-1 mapping is not required. Example:
/// ShMask = <1,1,2,2> and C = <5,5,6,6> --> NewC = <poison,5,6,poison>
-static Constant *unshuffleConstant(ArrayRef<int> ShMask, Constant *C,
- VectorType *NewCTy) {
+Constant *InstCombinerImpl::unshuffleConstant(ArrayRef<int> ShMask, Constant *C,
+ VectorType *NewCTy) {
if (isa<ScalableVectorType>(NewCTy)) {
Constant *Splat = C->getSplatValue();
if (!Splat)
diff --git a/llvm/test/Transforms/InstCombine/fma.ll b/llvm/test/Transforms/InstCombine/fma.ll
index ae0067d41426c..f0d4f776a5d90 100644
--- a/llvm/test/Transforms/InstCombine/fma.ll
+++ b/llvm/test/Transforms/InstCombine/fma.ll
@@ -802,6 +802,67 @@ define <2 x float> @fma_unary_shuffle_ops_narrowing(<3 x float> %x, <3 x float>
ret <2 x float> %r
}
+define <2 x float> @fma_unary_shuffle_ops_1_const(<2 x float> %x, <2 x float> %y) {
+; CHECK-LABEL: @fma_unary_shuffle_ops_1_const(
+; CHECK-NEXT: [[Y:%.*]] = call <2 x float> @llvm.fma.v2f32(<2 x float> [[X:%.*]], <2 x float> <float 2.000000e+00, float 1.000000e+00>, <2 x float> [[Y1:%.*]])
+; CHECK-NEXT: [[B:%.*]] = shufflevector <2 x float> [[Y]], <2 x float> poison, <2 x i32> <i32 1, i32 0>
+; CHECK-NEXT: ret <2 x float> [[B]]
+;
+ %a = shufflevector <2 x float> %x, <2 x float> poison, <2 x i32> <i32 1, i32 0>
+ %b = shufflevector <2 x float> %y, <2 x float> poison, <2 x i32> <i32 1, i32 0>
+ %r = call <2 x float> @llvm.fma(<2 x float> %a, <2 x float> <float 1.0, float 2.0>, <2 x float> %b)
+ ret <2 x float> %r
+}
+
+define <2 x float> @fma_unary_shuffle_ops_2_const(<2 x float> %x) {
+; CHECK-LABEL: @fma_unary_shuffle_ops_2_const(
+; CHECK-NEXT: [[X:%.*]] = call <2 x float> @llvm.fma.v2f32(<2 x float> <float 2.000000e+00, float 1.000000e+00>, <2 x float> <float 2.000000e+00, float 1.000000e+00>, <2 x float> [[X1:%.*]])
+; CHECK-NEXT: [[A:%.*]] = shufflevector <2 x float> [[X]], <2 x float> poison, <2 x i32> <i32 1, i32 0>
+; CHECK-NEXT: ret <2 x float> [[A]]
+;
+ %a = shufflevector <2 x float> %x, <2 x float> poison, <2 x i32> <i32 1, i32 0>
+ %r = call <2 x float> @llvm.fma(<2 x float> <float 1.0, float 2.0>, <2 x float> <float 1.0, float 2.0>, <2 x float> %a)
+ ret <2 x float> %r
+}
+
+define <vscale x 2 x float> @fma_unary_shuffle_ops_1_const_scalable(<vscale x 2 x float> %x, <vscale x 2 x float> %y) {
+; CHECK-LABEL: @fma_unary_shuffle_ops_1_const_scalable(
+; CHECK-NEXT: [[R:%.*]] = call <vscale x 2 x float> @llvm.fma.nxv2f32(<vscale x 2 x float> [[A:%.*]], <vscale x 2 x float> splat (float 4.200000e+01), <vscale x 2 x float> [[B:%.*]])
+; CHECK-NEXT: [[R1:%.*]] = shufflevector <vscale x 2 x float> [[R]], <vscale x 2 x float> poison, <vscale x 2 x i32> zeroinitializer
+; CHECK-NEXT: ret <vscale x 2 x float> [[R1]]
+;
+ %a = shufflevector <vscale x 2 x float> %x, <vscale x 2 x float> poison, <vscale x 2 x i32> zeroinitializer
+ %b = shufflevector <vscale x 2 x float> %y, <vscale x 2 x float> poison, <vscale x 2 x i32> zeroinitializer
+ %r = call <vscale x 2 x float> @llvm.fma(<vscale x 2 x float> %a, <vscale x 2 x float> splat (float 42.0), <vscale x 2 x float> %b)
+ ret <vscale x 2 x float> %r
+}
+
+define <vscale x 2 x float> @fma_unary_shuffle_ops_2_const_scalable(<vscale x 2 x float> %x) {
+; CHECK-LABEL: @fma_unary_shuffle_ops_2_const_scalable(
+; CHECK-NEXT: [[X:%.*]] = call <vscale x 2 x float> @llvm.fma.nxv2f32(<vscale x 2 x float> splat (float 4.200000e+01), <vscale x 2 x float> splat (float 4.200000e+01), <vscale x 2 x float> [[X1:%.*]])
+; CHECK-NEXT: [[A:%.*]] = shufflevector <vscale x 2 x float> [[X]], <vscale x 2 x float> poison, <vscale x 2 x i32> zeroinitializer
+; CHECK-NEXT: ret <vscale x 2 x float> [[A]]
+;
+ %a = shufflevector <vscale x 2 x float> %x, <vscale x 2 x float> poison, <vscale x 2 x i32> zeroinitializer
+ %r = call <vscale x 2 x float> @llvm.fma(<vscale x 2 x float> splat (float 42.0), <vscale x 2 x float> splat (float 42.0), <vscale x 2 x float> %a)
+ ret <vscale x 2 x float> %r
+}
+
+define <3 x float> @fma_unary_shuffle_ops_widening_1_const(<2 x float> %x, <2 x float> %y) {
+; CHECK-LABEL: @fma_unary_shuffle_ops_widening_1_const(
+; CHECK-NEXT: [[A:%.*]] = shufflevector <2 x float> [[X:%.*]], <2 x float> poison, <3 x i32> <i32 1, i32 0, i32 poison>
+; CHECK-NEXT: call void @use_vec3(<3 x float> [[A]])
+; CHECK-NEXT: [[Y:%.*]] = call fast <2 x float> @llvm.fma.v2f32(<2 x float> [[X]], <2 x float> splat (float 4.200000e+01), <2 x float> [[Y1:%.*]])
+; CHECK-NEXT: [[B:%.*]] = shufflevector <2 x float> [[Y]], <2 x float> poison, <3 x i32> <i32 1, i32 0, i32 poison>
+; CHECK-NEXT: ret <3 x float> [[B]]
+;
+ %a = shufflevector <2 x float> %x, <2 x float> poison, <3 x i32> <i32 1, i32 0, i32 poison>
+ call void @use_vec3(<3 x float> %a)
+ %b = shufflevector <2 x float> %y, <2 x float> poison, <3 x i32> <i32 1, i32 0, i32 poison>
+ %r = call fast <3 x float> @llvm.fma(<3 x float> %a, <3 x float> splat (float 42.0), <3 x float> %b)
+ ret <3 x float> %r
+}
+
; negative test - must have 3 shuffles
define <2 x float> @fma_unary_shuffle_ops_unshuffled(<2 x float> %x, <2 x float> %y, <2 x float> %z) {
diff --git a/llvm/test/Transforms/InstCombine/fsh.ll b/llvm/test/Transforms/InstCombine/fsh.ll
index 862853f992968..0325c60997dfd 100644
--- a/llvm/test/Transforms/InstCombine/fsh.ll
+++ b/llvm/test/Transforms/InstCombine/fsh.ll
@@ -930,6 +930,67 @@ define <2 x i31> @fsh_unary_shuffle_ops_narrowing(<3 x i31> %x, <3 x i31> %y, <3
ret <2 x i31> %r
}
+define <2 x i32> @fsh_unary_shuffle_ops_1_const(<2 x i32> %x, <2 x i32> %y) {
+; CHECK-LABEL: @fsh_unary_shuffle_ops_1_const(
+; CHECK-NEXT: [[Y:%.*]] = call <2 x i32> @llvm.fshr.v2i32(<2 x i32> <i32 2, i32 1>, <2 x i32> [[X:%.*]], <2 x i32> [[Y1:%.*]])
+; CHECK-NEXT: [[B:%.*]] = shufflevector <2 x i32> [[Y]], <2 x i32> poison, <2 x i32> <i32 1, i32 0>
+; CHECK-NEXT: ret <2 x i32> [[B]]
+;
+ %a = shufflevector <2 x i32> %x, <2 x i32> poison, <2 x i32> <i32 1, i32 0>
+ %b = shufflevector <2 x i32> %y, <2 x i32> poison, <2 x i32> <i32 1, i32 0>
+ %r = call <2 x i32> @llvm.fshr(<2 x i32> <i32 1, i32 2>, <2 x i32> %a, <2 x i32> %b)
+ ret <2 x i32> %r
+}
+
+define <2 x i32> @fsh_unary_shuffle_ops_2_const(<2 x i32> %x) {
+; CHECK-LABEL: @fsh_unary_shuffle_ops_2_const(
+; CHECK-NEXT: [[X:%.*]] = call <2 x i32> @llvm.fshr.v2i32(<2 x i32> <i32 2, i32 1>, <2 x i32> <i32 2, i32 1>, <2 x i32> [[X1:%.*]])
+; CHECK-NEXT: [[A:%.*]] = shufflevector <2 x i32> [[X]], <2 x i32> poison, <2 x i32> <i32 1, i32 0>
+; CHECK-NEXT: ret <2 x i32> [[A]]
+;
+ %a = shufflevector <2 x i32> %x, <2 x i32> poison, <2 x i32> <i32 1, i32 0>
+ %r = call <2 x i32> @llvm.fshr(<2 x i32> <i32 1, i32 2>, <2 x i32> <i32 1, i32 2>, <2 x i32> %a)
+ ret <2 x i32> %r
+}
+
+define <vscale x 2 x i32> @fsh_unary_shuffle_ops_1_const_scalable(<vscale x 2 x i32> %x, <vscale x 2 x i32> %y) {
+; CHECK-LABEL: @fsh_unary_shuffle_ops_1_const_scalable(
+; CHECK-NEXT: [[Y:%.*]] = call <vscale x 2 x i32> @llvm.fshr.nxv2i32(<vscale x 2 x i32> splat (i32 42), <vscale x 2 x i32> [[X:%.*]], <vscale x 2 x i32> [[Y1:%.*]])
+; CHECK-NEXT: [[B:%.*]] = shufflevector <vscale x 2 x i32> [[Y]], <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer
+; CHECK-NEXT: ret <vscale x 2 x i32> [[B]]
+;
+ %a = shufflevector <vscale x 2 x i32> %x, <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer
+ %b = shufflevector <vscale x 2 x i32> %y, <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer
+ %r = call <vscale x 2 x i32> @llvm.fshr(<vscale x 2 x i32> splat (i32 42), <vscale x 2 x i32> %a, <vscale x 2 x i32> %b)
+ ret <vscale x 2 x i32> %r
+}
+
+define <vscale x 2 x i32> @fsh_unary_shuffle_ops_2_const_scalable(<vscale x 2 x i32> %x) {
+; CHECK-LABEL: @fsh_unary_shuffle_ops_2_const_scalable(
+; CHECK-NEXT: [[X:%.*]] = call <vscale x 2 x i32> @llvm.fshr.nxv2i32(<vscale x 2 x i32> splat (i32 42), <vscale x 2 x i32> splat (i32 42), <vscale x 2 x i32> [[X1:%.*]])
+; CHECK-NEXT: [[A:%.*]] = shufflevector <vscale x 2 x i32> [[X]], <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer
+; CHECK-NEXT: ret <vscale x 2 x i32> [[A]]
+;
+ %a = shufflevector <vscale x 2 x i32> %x, <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer
+ %r = call <vscale x 2 x i32> @llvm.fshr(<vscale x 2 x i32> splat (i32 42), <vscale x 2 x i32> splat (i32 42), <vscale x 2 x i32> %a)
+ ret <vscale x 2 x i32> %r
+}
+
+define <3 x i32> @fsh_unary_shuffle_ops_widening_1_const(<2 x i32> %x, <2 x i32> %y) {
+; CHECK-LABEL: @fsh_unary_shuffle_ops_widening_1_const(
+; CHECK-NEXT: [[A:%.*]] = shufflevector <2 x i32> [[X:%.*]], <2 x i32> poison, <3 x i32> <i32 1, i32 0, i32 poison>
+; CHECK-NEXT: call void @use_v3(<3 x i32> [[A]])
+; CHECK-NEXT: [[Y:%.*]] = call <2 x i32> @llvm.fshr.v2i32(<2 x i32> splat (i32 42), <2 x i32> [[X]], <2 x i32> [[Y1:%.*]])
+; CHECK-NEXT: [[B:%.*]] = shufflevector <2 x i32> [[Y]], <2 x i32> poison, <3 x i32> <i32 1, i32 0, i32 poison>
+; CHECK-NEXT: ret <3 x i32> [[B]]
+;
+ %a = shufflevector <2 x i32> %x, <2 x i32> poison, <3 x i32> <i32 1, i32 0, i32 poison>
+ call void @use_v3(<3 x i32> %a)
+ %b = shufflevector <2 x i32> %y, <2 x i32> poison, <3 x i32> <i32 1, i32 0, i32 poison>
+ %r = call <3 x i32> @llvm.fshr(<3 x i32> splat (i32 42), <3 x i32> %a, <3 x i32> %b)
+ ret <3 x i32> %r
+}
+
; negative test - must have 3 shuffles
define <2 x i32> @fsh_unary_shuffle_ops_unshuffled(<2 x i32> %x, <2 x i32> %y, <2 x i32> %z) {
diff --git a/llvm/test/Transforms/InstCombine/minmax-intrinsics.ll b/llvm/test/Transforms/InstCombine/minmax-intrinsics.ll
index 9a8608da9fd5b..38930956eda2f 100644
--- a/llvm/test/Transforms/InstCombine/minmax-intrinsics.ll
+++ b/llvm/test/Transforms/InstCombine/minmax-intrinsics.ll
@@ -2416,6 +2416,39 @@ define <3 x i8> @umin_unary_shuffle_ops_narrowing(<4 x i8> %x, <4 x i8> %y) {
ret <3 x i8> %r
}
+define <3 x i8> @smax_unary_shuffle_ops_lhs_const(<3 x i8> %x) {
+; CHECK-LABEL: @smax_unary_shuffle_ops_lhs_const(
+; CHECK-NEXT: [[X:%.*]] = call <3 x i8> @llvm.smax.v3i8(<3 x i8> [[X1:%.*]], <3 x i8> <i8 1, i8 0, i8 2>)
+; CHECK-NEXT: [[SX:%.*]] = shufflevector <3 x i8> [[X]], <3 x i8> poison, <3 x i32> <i32 1, i32 0, i32 2>
+; CHECK-NEXT: ret <3 x i8> [[SX]]
+;
+ %sx = shufflevector <3 x i8> %x, <3 x i8> poison, <3 x i32> <i32 1, i32 0, i32 2>
+ %r = call <3 x i8> @llvm.smax(<3 x i8> %sx, <3 x i8> <i8 0, i8 1, i8 2>)
+ ret <3 x i8> %r
+}
+
+define <vscale x 3 x i8> @smax_unary_shuffle_ops_lhs_const_scalable(<vscale x 3 x i8> %x) {
+; CHECK-LABEL: @smax_unary_shuffle_ops_lhs_const_scalable(
+; CHECK-NEXT: [[R:%.*]] = call <vscale x 3 x i8> @llvm.smax.nxv3i8(<vscale x 3 x i8> [[SX:%.*]], <vscale x 3 x i8> splat (i8 42))
+; CHECK-NEXT: [[R1:%.*]] = shufflevector <vscale x 3 x i8> [[R]], <vscale x 3 x i8> poison, <vscale x 3 x i32> zeroinitializer
+; CHECK-NEXT: ret <vscale x 3 x i8> [[R1]]
+;
+ %sx = shufflevector <vscale x 3 x i8> %x, <vscale x 3 x i8> poison, <vscale x 3 x i32> zeroinitializer
+ %r = call <vscale x 3 x i8> @llvm.smax(<vscale x 3 x i8> %sx, <vscale x 3 x i8> splat (i8 42))
+ ret <vscale x 3 x i8> %r
+}
+
+define <3 x i8> @smax_unary_shuffle_ops_lhs_const_widening(<2 x i8> %x) {
+; CHECK-LABEL: @smax_unary_shuffle_ops_lhs_const_widening(
+; CHECK-NEXT: [[X:%.*]] = call <2 x i8> @llvm.smax.v2i8(<2 x i8> [[X1:%.*]], <2 x i8> <i8 1, i8 0>)
+; CHECK-NEXT: [[SX:%.*]] = shufflevector <2 x i8> [[X]], <2 x i8> poison, <3 x i32> <i32 1, i32 0, i32 poison>
+; CHECK-NEXT: ret <3 x i8> [[SX]]
+;
+ %sx = shufflevector <2 x i8> %x, <2 x i8> poison, <3 x i32> <i32 1, i32 0, i32 poison>
+ %r = call <3 x i8> @llvm.smax(<3 x i8> %sx, <3 x i8> <i8 0, i8 1, i8 2>)
+ ret <3 x i8> %r
+}
+
; negative test - must have 2 shuffles
define <3 x i8> @smax_unary_shuffle_ops_unshuffled_op(<3 x i8> %x, <3 x i8> %y) {
More information about the llvm-commits
mailing list