[llvm] [VectorCombine] Scalarize binop-like intrinsics (PR #138095)

Thu May 1 00:50:45 PDT 2025

https://github.com/lukel97 updated https://github.com/llvm/llvm-project/pull/138095

>From 1d031ea0234499e5cd42df4933508379a2e286fd Mon Sep 17 00:00:00 2001
From: Luke Lau <luke at igalia.com>
Date: Thu, 1 May 2025 15:32:07 +0800
Subject: [PATCH 1/3] Precommit tests

---
 .../RISCV/intrinsic-scalarize.ll              | 93 +++++++++++++++++++
 1 file changed, 93 insertions(+)
 create mode 100644 llvm/test/Transforms/VectorCombine/RISCV/intrinsic-scalarize.ll

diff --git a/llvm/test/Transforms/VectorCombine/RISCV/intrinsic-scalarize.ll b/llvm/test/Transforms/VectorCombine/RISCV/intrinsic-scalarize.ll
new file mode 100644
index 0000000000000..55b78c4716bc0
--- /dev/null
+++ b/llvm/test/Transforms/VectorCombine/RISCV/intrinsic-scalarize.ll
@@ -0,0 +1,93 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt < %s -S -p vector-combine | FileCheck %s
+
+define <4 x i32> @umax_fixed(i32 %x, i32 %y) {
+; CHECK-LABEL: define <4 x i32> @umax_fixed(
+; CHECK-SAME: i32 [[X:%.*]], i32 [[Y:%.*]]) {
+; CHECK-NEXT:    [[X_INSERT:%.*]] = insertelement <4 x i32> poison, i32 [[X]], i32 0
+; CHECK-NEXT:    [[Y_INSERT:%.*]] = insertelement <4 x i32> poison, i32 [[Y]], i32 0
+; CHECK-NEXT:    [[V:%.*]] = call <4 x i32> @llvm.umax.v4i32(<4 x i32> [[X_INSERT]], <4 x i32> [[Y_INSERT]])
+; CHECK-NEXT:    ret <4 x i32> [[V]]
+;
+  %x.insert = insertelement <4 x i32> poison, i32 %x, i32 0
+  %y.insert = insertelement <4 x i32> poison, i32 %y, i32 0
+  %v = call <4 x i32> @llvm.umax(<4 x i32> %x.insert, <4 x i32> %y.insert)
+  ret <4 x i32> %v
+}
+
+define <vscale x 4 x i32> @umax_scalable(i32 %x, i32 %y) {
+; CHECK-LABEL: define <vscale x 4 x i32> @umax_scalable(
+; CHECK-SAME: i32 [[X:%.*]], i32 [[Y:%.*]]) {
+; CHECK-NEXT:    [[X_INSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[X]], i32 0
+; CHECK-NEXT:    [[Y_INSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[Y]], i32 0
+; CHECK-NEXT:    [[V:%.*]] = call <vscale x 4 x i32> @llvm.umax.nxv4i32(<vscale x 4 x i32> [[X_INSERT]], <vscale x 4 x i32> [[Y_INSERT]])
+; CHECK-NEXT:    ret <vscale x 4 x i32> [[V]]
+;
+  %x.insert = insertelement <vscale x 4 x i32> poison, i32 %x, i32 0
+  %y.insert = insertelement <vscale x 4 x i32> poison, i32 %y, i32 0
+  %v = call <vscale x 4 x i32> @llvm.umax(<vscale x 4 x i32> %x.insert, <vscale x 4 x i32> %y.insert)
+  ret <vscale x 4 x i32> %v
+}
+
+define <4 x i32> @umax_fixed_lhs_const(i32 %x) {
+; CHECK-LABEL: define <4 x i32> @umax_fixed_lhs_const(
+; CHECK-SAME: i32 [[X:%.*]]) {
+; CHECK-NEXT:    [[X_INSERT:%.*]] = insertelement <4 x i32> poison, i32 [[X]], i32 0
+; CHECK-NEXT:    [[V:%.*]] = call <4 x i32> @llvm.umax.v4i32(<4 x i32> <i32 1, i32 2, i32 3, i32 4>, <4 x i32> [[X_INSERT]])
+; CHECK-NEXT:    ret <4 x i32> [[V]]
+;
+  %x.insert = insertelement <4 x i32> poison, i32 %x, i32 0
+  %v = call <4 x i32> @llvm.umax(<4 x i32> <i32 1, i32 2, i32 3, i32 4>, <4 x i32> %x.insert)
+  ret <4 x i32> %v
+}
+
+define <4 x i32> @umax_fixed_rhs_const(i32 %x) {
+; CHECK-LABEL: define <4 x i32> @umax_fixed_rhs_const(
+; CHECK-SAME: i32 [[X:%.*]]) {
+; CHECK-NEXT:    [[X_INSERT:%.*]] = insertelement <4 x i32> poison, i32 [[X]], i32 0
+; CHECK-NEXT:    [[V:%.*]] = call <4 x i32> @llvm.umax.v4i32(<4 x i32> [[X_INSERT]], <4 x i32> <i32 1, i32 2, i32 3, i32 4>)
+; CHECK-NEXT:    ret <4 x i32> [[V]]
+;
+  %x.insert = insertelement <4 x i32> poison, i32 %x, i32 0
+  %v = call <4 x i32> @llvm.umax(<4 x i32> %x.insert, <4 x i32> <i32 1, i32 2, i32 3, i32 4>)
+  ret <4 x i32> %v
+}
+
+define <vscale x 4 x i32> @umax_scalable_lhs_const(i32 %x) {
+; CHECK-LABEL: define <vscale x 4 x i32> @umax_scalable_lhs_const(
+; CHECK-SAME: i32 [[X:%.*]]) {
+; CHECK-NEXT:    [[X_INSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[X]], i32 0
+; CHECK-NEXT:    [[V:%.*]] = call <vscale x 4 x i32> @llvm.umax.nxv4i32(<vscale x 4 x i32> splat (i32 42), <vscale x 4 x i32> [[X_INSERT]])
+; CHECK-NEXT:    ret <vscale x 4 x i32> [[V]]
+;
+  %x.insert = insertelement <vscale x 4 x i32> poison, i32 %x, i32 0
+  %v = call <vscale x 4 x i32> @llvm.umax(<vscale x 4 x i32> splat (i32 42), <vscale x 4 x i32> %x.insert)
+  ret <vscale x 4 x i32> %v
+}
+
+define <vscale x 4 x i32> @umax_scalable_rhs_const(i32 %x) {
+; CHECK-LABEL: define <vscale x 4 x i32> @umax_scalable_rhs_const(
+; CHECK-SAME: i32 [[X:%.*]]) {
+; CHECK-NEXT:    [[X_INSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[X]], i32 0
+; CHECK-NEXT:    [[V:%.*]] = call <vscale x 4 x i32> @llvm.umax.nxv4i32(<vscale x 4 x i32> [[X_INSERT]], <vscale x 4 x i32> splat (i32 42))
+; CHECK-NEXT:    ret <vscale x 4 x i32> [[V]]
+;
+  %x.insert = insertelement <vscale x 4 x i32> poison, i32 %x, i32 0
+  %v = call <vscale x 4 x i32> @llvm.umax(<vscale x 4 x i32> %x.insert, <vscale x 4 x i32> splat (i32 42))
+  ret <vscale x 4 x i32> %v
+}
+
+; Shouldn't be scalarized, not a "trivially vectorizable" intrinsic.
+define <4 x i32> @non_trivially_vectorizable(i32 %x, i32 %y) {
+; CHECK-LABEL: define <4 x i32> @non_trivially_vectorizable(
+; CHECK-SAME: i32 [[X:%.*]], i32 [[Y:%.*]]) {
+; CHECK-NEXT:    [[X_INSERT:%.*]] = insertelement <4 x i32> poison, i32 [[X]], i32 0
+; CHECK-NEXT:    [[Y_INSERT:%.*]] = insertelement <8 x i32> poison, i32 [[Y]], i32 0
+; CHECK-NEXT:    [[V:%.*]] = call <4 x i32> @llvm.experimental.vector.partial.reduce.add.v4i32.v8i32(<4 x i32> [[X_INSERT]], <8 x i32> [[Y_INSERT]])
+; CHECK-NEXT:    ret <4 x i32> [[V]]
+;
+  %x.insert = insertelement <4 x i32> poison, i32 %x, i32 0
+  %y.insert = insertelement <8 x i32> poison, i32 %y, i32 0
+  %v = call <4 x i32> @llvm.experimental.vector.partial.reduce.add(<4 x i32> %x.insert, <8 x i32> %y.insert)
+  ret <4 x i32> %v
+}

>From ebfcbe452b7657e54c4c4797b452136afb87a9b3 Mon Sep 17 00:00:00 2001
From: Luke Lau <luke at igalia.com>
Date: Thu, 1 May 2025 15:32:38 +0800
Subject: [PATCH 2/3] [VectorCombine] Scalarize binop-like intrinsics

---
 .../Transforms/Vectorize/VectorCombine.cpp    | 64 ++++++++++++++-----
 .../RISCV/intrinsic-scalarize.ll              | 32 ++++++----
 2 files changed, 66 insertions(+), 30 deletions(-)

diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
index 04c084ffdda97..7a7c533267f6f 100644
--- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
+++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
@@ -48,6 +48,7 @@ STATISTIC(NumVecCmpBO, "Number of vector compare + binop formed");
 STATISTIC(NumShufOfBitcast, "Number of shuffles moved after bitcast");
 STATISTIC(NumScalarBO, "Number of scalar binops formed");
 STATISTIC(NumScalarCmp, "Number of scalar compares formed");
+STATISTIC(NumScalarIntrinsic, "Number of scalar intrinsic calls formed");
 
 static cl::opt<bool> DisableVectorCombine(
     "disable-vector-combine", cl::init(false), cl::Hidden,
@@ -1016,21 +1017,29 @@ bool VectorCombine::scalarizeVPIntrinsic(Instruction &I) {
   return true;
 }
 
-/// Match a vector binop or compare instruction with at least one inserted
-/// scalar operand and convert to scalar binop/cmp followed by insertelement.
+/// Match a vector binop, compare or binop-like intrinsic with at least one
+/// inserted scalar operand and convert to scalar binop/cmp/intrinsic followed
+/// by insertelement.
 bool VectorCombine::scalarizeBinopOrCmp(Instruction &I) {
   CmpPredicate Pred = CmpInst::BAD_ICMP_PREDICATE;
   Value *Ins0, *Ins1;
   if (!match(&I, m_BinOp(m_Value(Ins0), m_Value(Ins1))) &&
-      !match(&I, m_Cmp(Pred, m_Value(Ins0), m_Value(Ins1))))
-    return false;
+      !match(&I, m_Cmp(Pred, m_Value(Ins0), m_Value(Ins1)))) {
+    if (auto *II = dyn_cast<IntrinsicInst>(&I);
+        II && II->arg_size() == 2 &&
+        isTriviallyVectorizable(II->getIntrinsicID())) {
+      Ins0 = II->getArgOperand(0);
+      Ins1 = II->getArgOperand(1);
+    } else {
+      return false;
+    }
+  }
 
   // Do not convert the vector condition of a vector select into a scalar
   // condition. That may cause problems for codegen because of differences in
   // boolean formats and register-file transfers.
   // TODO: Can we account for that in the cost model?
-  bool IsCmp = Pred != CmpInst::Predicate::BAD_ICMP_PREDICATE;
-  if (IsCmp)
+  if (isa<CmpInst>(I))
     for (User *U : I.users())
       if (match(U, m_Select(m_Specific(&I), m_Value(), m_Value())))
         return false;
@@ -1085,15 +1094,24 @@ bool VectorCombine::scalarizeBinopOrCmp(Instruction &I) {
 
   unsigned Opcode = I.getOpcode();
   InstructionCost ScalarOpCost, VectorOpCost;
-  if (IsCmp) {
+  if (isa<CmpInst>(I)) {
     CmpInst::Predicate Pred = cast<CmpInst>(I).getPredicate();
     ScalarOpCost = TTI.getCmpSelInstrCost(
         Opcode, ScalarTy, CmpInst::makeCmpResultType(ScalarTy), Pred, CostKind);
     VectorOpCost = TTI.getCmpSelInstrCost(
         Opcode, VecTy, CmpInst::makeCmpResultType(VecTy), Pred, CostKind);
-  } else {
+  } else if (isa<BinaryOperator>(I)) {
     ScalarOpCost = TTI.getArithmeticInstrCost(Opcode, ScalarTy, CostKind);
     VectorOpCost = TTI.getArithmeticInstrCost(Opcode, VecTy, CostKind);
+  } else if (auto *II = dyn_cast<IntrinsicInst>(&I)) {
+    IntrinsicCostAttributes ScalarICA(
+        II->getIntrinsicID(), ScalarTy,
+        SmallVector<Type *>(II->arg_size(), ScalarTy));
+    ScalarOpCost = TTI.getIntrinsicInstrCost(ScalarICA, CostKind);
+    IntrinsicCostAttributes VectorICA(
+        II->getIntrinsicID(), VecTy,
+        SmallVector<Type *>(II->arg_size(), VecTy));
+    VectorOpCost = TTI.getIntrinsicInstrCost(ScalarICA, CostKind);
   }
 
   // Get cost estimate for the insert element. This cost will factor into
@@ -1112,10 +1130,12 @@ bool VectorCombine::scalarizeBinopOrCmp(Instruction &I) {
 
   // vec_op (inselt VecC0, V0, Index), (inselt VecC1, V1, Index) -->
   // inselt NewVecC, (scalar_op V0, V1), Index
-  if (IsCmp)
+  if (isa<CmpInst>(I))
     ++NumScalarCmp;
-  else
+  else if (isa<BinaryOperator>(I))
     ++NumScalarBO;
+  else if (isa<IntrinsicInst>(I))
+    ++NumScalarIntrinsic;
 
   // For constant cases, extract the scalar element, this should constant fold.
   if (IsConst0)
@@ -1123,9 +1143,15 @@ bool VectorCombine::scalarizeBinopOrCmp(Instruction &I) {
   if (IsConst1)
     V1 = ConstantExpr::getExtractElement(VecC1, Builder.getInt64(Index));
 
-  Value *Scalar =
-      IsCmp ? Builder.CreateCmp(Pred, V0, V1)
-            : Builder.CreateBinOp((Instruction::BinaryOps)Opcode, V0, V1);
+  Value *Scalar;
+  if (isa<CmpInst>(I))
+    Scalar = Builder.CreateCmp(Pred, V0, V1);
+  else if (isa<BinaryOperator>(I))
+    Scalar = Builder.CreateBinOp((Instruction::BinaryOps)Opcode, V0, V1);
+  else if (auto *II = dyn_cast<IntrinsicInst>(&I))
+    Scalar = Builder.CreateIntrinsic(ScalarTy, II->getIntrinsicID(), {V0, V1});
+  else
+    llvm_unreachable("Unexpected instruction type");
 
   Scalar->setName(I.getName() + ".scalar");
 
@@ -1135,9 +1161,15 @@ bool VectorCombine::scalarizeBinopOrCmp(Instruction &I) {
     ScalarInst->copyIRFlags(&I);
 
   // Fold the vector constants in the original vectors into a new base vector.
-  Value *NewVecC =
-      IsCmp ? Builder.CreateCmp(Pred, VecC0, VecC1)
-            : Builder.CreateBinOp((Instruction::BinaryOps)Opcode, VecC0, VecC1);
+  Value *NewVecC;
+  if (isa<CmpInst>(I))
+    NewVecC = Builder.CreateCmp(Pred, VecC0, VecC1);
+  else if (isa<BinaryOperator>(I))
+    NewVecC = Builder.CreateBinOp((Instruction::BinaryOps)Opcode, VecC0, VecC1);
+  else if (auto *II = dyn_cast<IntrinsicInst>(&I))
+    NewVecC = Builder.CreateIntrinsic(VecTy, II->getIntrinsicID(), {VecC0, VecC1});
+  else
+    llvm_unreachable("Unexpected instruction type");
   Value *Insert = Builder.CreateInsertElement(NewVecC, Scalar, Index);
   replaceValue(I, *Insert);
   return true;
diff --git a/llvm/test/Transforms/VectorCombine/RISCV/intrinsic-scalarize.ll b/llvm/test/Transforms/VectorCombine/RISCV/intrinsic-scalarize.ll
index 55b78c4716bc0..5a25f5faf8911 100644
--- a/llvm/test/Transforms/VectorCombine/RISCV/intrinsic-scalarize.ll
+++ b/llvm/test/Transforms/VectorCombine/RISCV/intrinsic-scalarize.ll
@@ -4,9 +4,9 @@
 define <4 x i32> @umax_fixed(i32 %x, i32 %y) {
 ; CHECK-LABEL: define <4 x i32> @umax_fixed(
 ; CHECK-SAME: i32 [[X:%.*]], i32 [[Y:%.*]]) {
-; CHECK-NEXT:    [[X_INSERT:%.*]] = insertelement <4 x i32> poison, i32 [[X]], i32 0
-; CHECK-NEXT:    [[Y_INSERT:%.*]] = insertelement <4 x i32> poison, i32 [[Y]], i32 0
-; CHECK-NEXT:    [[V:%.*]] = call <4 x i32> @llvm.umax.v4i32(<4 x i32> [[X_INSERT]], <4 x i32> [[Y_INSERT]])
+; CHECK-NEXT:    [[V_SCALAR:%.*]] = call i32 @llvm.umax.i32(i32 [[X]], i32 [[Y]])
+; CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i32> @llvm.umax.v4i32(<4 x i32> poison, <4 x i32> poison)
+; CHECK-NEXT:    [[V:%.*]] = insertelement <4 x i32> [[TMP1]], i32 [[V_SCALAR]], i64 0
 ; CHECK-NEXT:    ret <4 x i32> [[V]]
 ;
   %x.insert = insertelement <4 x i32> poison, i32 %x, i32 0
@@ -18,9 +18,9 @@ define <4 x i32> @umax_fixed(i32 %x, i32 %y) {
 define <vscale x 4 x i32> @umax_scalable(i32 %x, i32 %y) {
 ; CHECK-LABEL: define <vscale x 4 x i32> @umax_scalable(
 ; CHECK-SAME: i32 [[X:%.*]], i32 [[Y:%.*]]) {
-; CHECK-NEXT:    [[X_INSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[X]], i32 0
-; CHECK-NEXT:    [[Y_INSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[Y]], i32 0
-; CHECK-NEXT:    [[V:%.*]] = call <vscale x 4 x i32> @llvm.umax.nxv4i32(<vscale x 4 x i32> [[X_INSERT]], <vscale x 4 x i32> [[Y_INSERT]])
+; CHECK-NEXT:    [[V_SCALAR:%.*]] = call i32 @llvm.umax.i32(i32 [[X]], i32 [[Y]])
+; CHECK-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i32> @llvm.umax.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> poison)
+; CHECK-NEXT:    [[V:%.*]] = insertelement <vscale x 4 x i32> [[TMP1]], i32 [[V_SCALAR]], i64 0
 ; CHECK-NEXT:    ret <vscale x 4 x i32> [[V]]
 ;
   %x.insert = insertelement <vscale x 4 x i32> poison, i32 %x, i32 0
@@ -32,8 +32,9 @@ define <vscale x 4 x i32> @umax_scalable(i32 %x, i32 %y) {
 define <4 x i32> @umax_fixed_lhs_const(i32 %x) {
 ; CHECK-LABEL: define <4 x i32> @umax_fixed_lhs_const(
 ; CHECK-SAME: i32 [[X:%.*]]) {
-; CHECK-NEXT:    [[X_INSERT:%.*]] = insertelement <4 x i32> poison, i32 [[X]], i32 0
-; CHECK-NEXT:    [[V:%.*]] = call <4 x i32> @llvm.umax.v4i32(<4 x i32> <i32 1, i32 2, i32 3, i32 4>, <4 x i32> [[X_INSERT]])
+; CHECK-NEXT:    [[V_SCALAR:%.*]] = call i32 @llvm.umax.i32(i32 1, i32 [[X]])
+; CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i32> @llvm.umax.v4i32(<4 x i32> <i32 1, i32 2, i32 3, i32 4>, <4 x i32> poison)
+; CHECK-NEXT:    [[V:%.*]] = insertelement <4 x i32> [[TMP1]], i32 [[V_SCALAR]], i64 0
 ; CHECK-NEXT:    ret <4 x i32> [[V]]
 ;
   %x.insert = insertelement <4 x i32> poison, i32 %x, i32 0
@@ -44,8 +45,9 @@ define <4 x i32> @umax_fixed_lhs_const(i32 %x) {
 define <4 x i32> @umax_fixed_rhs_const(i32 %x) {
 ; CHECK-LABEL: define <4 x i32> @umax_fixed_rhs_const(
 ; CHECK-SAME: i32 [[X:%.*]]) {
-; CHECK-NEXT:    [[X_INSERT:%.*]] = insertelement <4 x i32> poison, i32 [[X]], i32 0
-; CHECK-NEXT:    [[V:%.*]] = call <4 x i32> @llvm.umax.v4i32(<4 x i32> [[X_INSERT]], <4 x i32> <i32 1, i32 2, i32 3, i32 4>)
+; CHECK-NEXT:    [[V_SCALAR:%.*]] = call i32 @llvm.umax.i32(i32 [[X]], i32 1)
+; CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i32> @llvm.umax.v4i32(<4 x i32> poison, <4 x i32> <i32 1, i32 2, i32 3, i32 4>)
+; CHECK-NEXT:    [[V:%.*]] = insertelement <4 x i32> [[TMP1]], i32 [[V_SCALAR]], i64 0
 ; CHECK-NEXT:    ret <4 x i32> [[V]]
 ;
   %x.insert = insertelement <4 x i32> poison, i32 %x, i32 0
@@ -56,8 +58,9 @@ define <4 x i32> @umax_fixed_rhs_const(i32 %x) {
 define <vscale x 4 x i32> @umax_scalable_lhs_const(i32 %x) {
 ; CHECK-LABEL: define <vscale x 4 x i32> @umax_scalable_lhs_const(
 ; CHECK-SAME: i32 [[X:%.*]]) {
-; CHECK-NEXT:    [[X_INSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[X]], i32 0
-; CHECK-NEXT:    [[V:%.*]] = call <vscale x 4 x i32> @llvm.umax.nxv4i32(<vscale x 4 x i32> splat (i32 42), <vscale x 4 x i32> [[X_INSERT]])
+; CHECK-NEXT:    [[V_SCALAR:%.*]] = call i32 @llvm.umax.i32(i32 42, i32 [[X]])
+; CHECK-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i32> @llvm.umax.nxv4i32(<vscale x 4 x i32> splat (i32 42), <vscale x 4 x i32> poison)
+; CHECK-NEXT:    [[V:%.*]] = insertelement <vscale x 4 x i32> [[TMP1]], i32 [[V_SCALAR]], i64 0
 ; CHECK-NEXT:    ret <vscale x 4 x i32> [[V]]
 ;
   %x.insert = insertelement <vscale x 4 x i32> poison, i32 %x, i32 0
@@ -68,8 +71,9 @@ define <vscale x 4 x i32> @umax_scalable_lhs_const(i32 %x) {
 define <vscale x 4 x i32> @umax_scalable_rhs_const(i32 %x) {
 ; CHECK-LABEL: define <vscale x 4 x i32> @umax_scalable_rhs_const(
 ; CHECK-SAME: i32 [[X:%.*]]) {
-; CHECK-NEXT:    [[X_INSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[X]], i32 0
-; CHECK-NEXT:    [[V:%.*]] = call <vscale x 4 x i32> @llvm.umax.nxv4i32(<vscale x 4 x i32> [[X_INSERT]], <vscale x 4 x i32> splat (i32 42))
+; CHECK-NEXT:    [[V_SCALAR:%.*]] = call i32 @llvm.umax.i32(i32 [[X]], i32 42)
+; CHECK-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i32> @llvm.umax.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> splat (i32 42))
+; CHECK-NEXT:    [[V:%.*]] = insertelement <vscale x 4 x i32> [[TMP1]], i32 [[V_SCALAR]], i64 0
 ; CHECK-NEXT:    ret <vscale x 4 x i32> [[V]]
 ;
   %x.insert = insertelement <vscale x 4 x i32> poison, i32 %x, i32 0

>From 43743048ab11bfd108b377b8d6ba4f6d55472fd9 Mon Sep 17 00:00:00 2001
From: Luke Lau <luke at igalia.com>
Date: Thu, 1 May 2025 15:50:24 +0800
Subject: [PATCH 3/3] clang-format

---
 llvm/lib/Transforms/Vectorize/VectorCombine.cpp | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
index 7a7c533267f6f..4f018f5af03a5 100644
--- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
+++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
@@ -1167,7 +1167,8 @@ bool VectorCombine::scalarizeBinopOrCmp(Instruction &I) {
   else if (isa<BinaryOperator>(I))
     NewVecC = Builder.CreateBinOp((Instruction::BinaryOps)Opcode, VecC0, VecC1);
   else if (auto *II = dyn_cast<IntrinsicInst>(&I))
-    NewVecC = Builder.CreateIntrinsic(VecTy, II->getIntrinsicID(), {VecC0, VecC1});
+    NewVecC =
+        Builder.CreateIntrinsic(VecTy, II->getIntrinsicID(), {VecC0, VecC1});
   else
     llvm_unreachable("Unexpected instruction type");
   Value *Insert = Builder.CreateInsertElement(NewVecC, Scalar, Index);