[llvm] [InstCombine] Constant fold binops through `vector.insert` (PR #164624)

Thu Oct 23 05:39:52 PDT 2025

https://github.com/MacDue updated https://github.com/llvm/llvm-project/pull/164624

>From 513b92b7a54c93f33de21cadd7eaf909c7c8e9ce Mon Sep 17 00:00:00 2001
From: Benjamin Maxwell <benjamin.maxwell at arm.com>
Date: Wed, 22 Oct 2025 13:15:00 +0000
Subject: [PATCH 1/7] Precommit tests

---
 .../InstCombine/constant-vector-insert.ll     | 137 ++++++++++++++++++
 1 file changed, 137 insertions(+)
 create mode 100644 llvm/test/Transforms/InstCombine/constant-vector-insert.ll

diff --git a/llvm/test/Transforms/InstCombine/constant-vector-insert.ll b/llvm/test/Transforms/InstCombine/constant-vector-insert.ll
new file mode 100644
index 0000000000000..088cd8702ccb7
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/constant-vector-insert.ll
@@ -0,0 +1,137 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -S -passes=instcombine %s | FileCheck %s
+
+define <vscale x 4 x i32> @insert_div() {
+; CHECK-LABEL: @insert_div(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v4i32(<vscale x 4 x i32> poison, <4 x i32> splat (i32 9), i64 0)
+; CHECK-NEXT:    [[DIV:%.*]] = udiv <vscale x 4 x i32> [[TMP0]], splat (i32 3)
+; CHECK-NEXT:    ret <vscale x 4 x i32> [[DIV]]
+;
+entry:
+  %0 = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v4i32(<vscale x 4 x i32> poison, <4 x i32> splat (i32 9), i64 0)
+  %div = udiv <vscale x 4 x i32> %0, splat (i32 3)
+  ret <vscale x 4 x i32> %div
+}
+
+define <vscale x 4 x i32> @insert_div_splat_lhs() {
+; CHECK-LABEL: @insert_div_splat_lhs(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v4i32(<vscale x 4 x i32> poison, <4 x i32> splat (i32 9), i64 0)
+; CHECK-NEXT:    [[DIV:%.*]] = udiv <vscale x 4 x i32> splat (i32 3), [[TMP0]]
+; CHECK-NEXT:    ret <vscale x 4 x i32> [[DIV]]
+;
+entry:
+  %0 = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v4i32(<vscale x 4 x i32> poison, <4 x i32> splat (i32 9), i64 0)
+  %div = udiv <vscale x 4 x i32> splat (i32 3), %0
+  ret <vscale x 4 x i32> %div
+}
+
+define <vscale x 4 x i32> @insert_div_mixed_splat() {
+; CHECK-LABEL: @insert_div_mixed_splat(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v4i32(<vscale x 4 x i32> splat (i32 18), <4 x i32> splat (i32 9), i64 0)
+; CHECK-NEXT:    [[DIV:%.*]] = udiv <vscale x 4 x i32> [[TMP0]], splat (i32 3)
+; CHECK-NEXT:    ret <vscale x 4 x i32> [[DIV]]
+;
+entry:
+  %0 = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v4i32(<vscale x 4 x i32> splat (i32 18), <4 x i32> splat (i32 9), i64 0)
+  %div = udiv <vscale x 4 x i32> %0, splat (i32 3)
+  ret <vscale x 4 x i32> %div
+}
+
+define <vscale x 4 x i32> @insert_mul() {
+; CHECK-LABEL: @insert_mul(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v4i32(<vscale x 4 x i32> poison, <4 x i32> splat (i32 1), i64 4)
+; CHECK-NEXT:    [[MUL:%.*]] = mul <vscale x 4 x i32> [[TMP0]], splat (i32 7)
+; CHECK-NEXT:    ret <vscale x 4 x i32> [[MUL]]
+;
+entry:
+  %0 = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v4i32(<vscale x 4 x i32> poison, <4 x i32> splat (i32 1), i64 4)
+  %mul = mul <vscale x 4 x i32> %0, splat (i32 7)
+  ret <vscale x 4 x i32> %mul
+}
+
+define <vscale x 4 x i32> @insert_add() {
+; CHECK-LABEL: @insert_add(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v4i32(<vscale x 4 x i32> poison, <4 x i32> splat (i32 5), i64 0)
+; CHECK-NEXT:    [[ADD:%.*]] = add <vscale x 4 x i32> [[TMP0]], splat (i32 11)
+; CHECK-NEXT:    ret <vscale x 4 x i32> [[ADD]]
+;
+entry:
+  %0 = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v4i32(<vscale x 4 x i32> poison, <4 x i32> splat (i32 5), i64 0)
+  %add = add <vscale x 4 x i32> %0, splat (i32 11)
+  ret <vscale x 4 x i32> %add
+}
+
+define <vscale x 8 x i32> @insert_add_scalable_subvector() {
+; CHECK-LABEL: @insert_add_scalable_subvector(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i32> @llvm.vector.insert.nxv8i32.nxv4i32(<vscale x 8 x i32> splat (i32 16), <vscale x 4 x i32> splat (i32 -8), i64 0)
+; CHECK-NEXT:    [[ADD:%.*]] = add <vscale x 8 x i32> [[TMP0]], splat (i32 4)
+; CHECK-NEXT:    ret <vscale x 8 x i32> [[ADD]]
+;
+entry:
+  %0 = call <vscale x 8 x i32> @llvm.vector.insert.nxv8i32.nxv4i32(<vscale x 8 x i32> splat(i32 16), <vscale x 4 x i32> splat (i32 -8), i64 0)
+  %add = add <vscale x 8 x i32> %0, splat (i32 4)
+  ret <vscale x 8 x i32> %add
+}
+
+define <vscale x 4 x i32> @insert_sub() {
+; CHECK-LABEL: @insert_sub(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v4i32(<vscale x 4 x i32> poison, <4 x i32> splat (i32 11), i64 8)
+; CHECK-NEXT:    [[SUB:%.*]] = add <vscale x 4 x i32> [[TMP0]], splat (i32 -11)
+; CHECK-NEXT:    ret <vscale x 4 x i32> [[SUB]]
+;
+entry:
+  %0 = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v4i32(<vscale x 4 x i32> poison, <4 x i32> splat (i32 11), i64 8)
+  %sub = add <vscale x 4 x i32> %0, splat (i32 -11)
+  ret <vscale x 4 x i32> %sub
+}
+
+define <vscale x 4 x i32> @insert_and_partially_undef() {
+; CHECK-LABEL: @insert_and_partially_undef(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v4i32(<vscale x 4 x i32> undef, <4 x i32> splat (i32 6), i64 0)
+; CHECK-NEXT:    [[AND:%.*]] = and <vscale x 4 x i32> [[TMP0]], splat (i32 4)
+; CHECK-NEXT:    ret <vscale x 4 x i32> [[AND]]
+;
+entry:
+  %0 = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v4i32(<vscale x 4 x i32> undef, <4 x i32> splat (i32 6), i64 0)
+  %and = and <vscale x 4 x i32> %0, splat (i32 4)
+  ret <vscale x 4 x i32> %and
+}
+
+define <vscale x 4 x i32> @insert_fold_chain() {
+; CHECK-LABEL: @insert_fold_chain(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v4i32(<vscale x 4 x i32> splat (i32 21), <4 x i32> splat (i32 12), i64 0)
+; CHECK-NEXT:    [[DIV:%.*]] = udiv <vscale x 4 x i32> [[TMP0]], splat (i32 3)
+; CHECK-NEXT:    [[ADD:%.*]] = add nuw nsw <vscale x 4 x i32> [[DIV]], splat (i32 4)
+; CHECK-NEXT:    ret <vscale x 4 x i32> [[ADD]]
+;
+entry:
+  %0 = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v4i32(<vscale x 4 x i32> splat (i32 21), <4 x i32> splat (i32 12), i64 0)
+  %div = udiv <vscale x 4 x i32> %0, splat (i32 3)
+  %add = add <vscale x 4 x i32> %div, splat (i32 4)
+  ret <vscale x 4 x i32> %add
+}
+
+; TODO: This could be folded more.
+define <vscale x 4 x i32> @insert_add_both_insert_vector() {
+; CHECK-LABEL: @insert_add_both_insert_vector(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v4i32(<vscale x 4 x i32> splat (i32 10), <4 x i32> splat (i32 5), i64 0)
+; CHECK-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v4i32(<vscale x 4 x i32> splat (i32 -1), <4 x i32> splat (i32 2), i64 0)
+; CHECK-NEXT:    [[ADD:%.*]] = add <vscale x 4 x i32> [[TMP0]], [[TMP1]]
+; CHECK-NEXT:    ret <vscale x 4 x i32> [[ADD]]
+;
+entry:
+  %0 = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v4i32(<vscale x 4 x i32> splat(i32 10), <4 x i32> splat (i32 5), i64 0)
+  %1 = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v4i32(<vscale x 4 x i32> splat(i32 -1), <4 x i32> splat (i32 2), i64 0)
+  %add = add <vscale x 4 x i32> %0, %1
+  ret <vscale x 4 x i32> %add
+}

>From 976fc0b6bde584c4909086da8281b565eddb88d4 Mon Sep 17 00:00:00 2001
From: Benjamin Maxwell <benjamin.maxwell at arm.com>
Date: Wed, 22 Oct 2025 13:16:08 +0000
Subject: [PATCH 2/7] [InstCombine] Constant fold binops through
 `vector.insert`

This patch improves constant folding through `llvm.vector.insert`. It
does not change anything for fixed-length vectors (which can already be
folded to ConstantVectors for these cases), but folds scalable vectors
that otherwise would not be folded.

These folds preserve the destination vector (which could be undef or
poison), giving targets more freedom in lowering the operations.
---
 .../InstCombine/InstructionCombining.cpp      | 56 +++++++++++++++++++
 .../InstCombine/constant-vector-insert.ll     | 28 +++-------
 2 files changed, 65 insertions(+), 19 deletions(-)

diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
index 3f11cae143b81..05e8673b25433 100644
--- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -2323,6 +2323,32 @@ Constant *InstCombinerImpl::unshuffleConstant(ArrayRef<int> ShMask, Constant *C,
   return ConstantVector::get(NewVecC);
 }
 
+// Match a vector.insert where both the destination and subvector are constant.
+static bool matchConstantSubVector(Value *V, Constant *&Dest,
+                                   Constant *&SubVector, Value *&Idx) {
+  return match(V, m_Intrinsic<Intrinsic::vector_insert>(
+                      m_Constant(Dest), m_Constant(SubVector), m_Value(Idx)));
+}
+
+static Constant *matchConstantSplat(Value *V) {
+  Constant *C;
+  if (match(V, m_Constant(C)))
+    return C->getSplatValue();
+  return nullptr;
+}
+
+// Get the result of `Vector Op Splat` (or Splat Op Vector if \p SplatLHS).
+static Constant *constantFoldBinOpWithSplat(unsigned Opcode, Constant *Vector,
+                                            Constant *Splat, bool SplatLHS,
+                                            const DataLayout &DL) {
+  ElementCount EC = cast<VectorType>(Vector->getType())->getElementCount();
+  Constant *LHS = ConstantVector::getSplat(EC, Splat);
+  Constant *RHS = Vector;
+  if (!SplatLHS)
+    std::swap(LHS, RHS);
+  return ConstantFoldBinaryOpOperands(Opcode, LHS, RHS, DL);
+}
+
 Instruction *InstCombinerImpl::foldVectorBinop(BinaryOperator &Inst) {
   if (!isa<VectorType>(Inst.getType()))
     return nullptr;
@@ -2334,6 +2360,36 @@ Instruction *InstCombinerImpl::foldVectorBinop(BinaryOperator &Inst) {
   assert(cast<VectorType>(RHS->getType())->getElementCount() ==
          cast<VectorType>(Inst.getType())->getElementCount());
 
+  auto foldConstantsThroughSubVectorInsert =
+      [&](Constant *Dest, Value *DestIdx, Type *SubVecType, Constant *SubVector,
+          Constant *Splat, bool SplatLHS) -> Instruction * {
+    SubVector =
+        constantFoldBinOpWithSplat(Opcode, SubVector, Splat, SplatLHS, DL);
+    Dest = constantFoldBinOpWithSplat(Opcode, Dest, Splat, SplatLHS, DL);
+    if (!SubVector || !Dest)
+      return nullptr;
+    auto *InsertVector =
+        Builder.CreateInsertVector(Dest->getType(), Dest, SubVector, DestIdx);
+    InsertVector->removeFromParent();
+    return InsertVector;
+  };
+
+  // If one operand is a constant splat and the other operand is a
+  // `vector.insert` where both the destination and subvector are constant,
+  // apply the operation to both the destination and subvector, returning a new
+  // constant `vector.insert`. This helps constant folding for scalable vectors.
+  for (bool SwapOperands : {false, true}) {
+    Value *Idx, *MaybeSubVector = LHS, *MaybeSplat = RHS;
+    if (SwapOperands)
+      std::swap(MaybeSplat, MaybeSubVector);
+    Constant *SubVector, *Dest, *Splat;
+    if (matchConstantSubVector(MaybeSubVector, Dest, SubVector, Idx) &&
+        (Splat = matchConstantSplat(MaybeSplat)))
+      return foldConstantsThroughSubVectorInsert(
+          Dest, Idx, SubVector->getType(), SubVector, Splat,
+          /*SplatLHS=*/SwapOperands);
+  }
+
   // If both operands of the binop are vector concatenations, then perform the
   // narrow binop on each pair of the source operands followed by concatenation
   // of the results.
diff --git a/llvm/test/Transforms/InstCombine/constant-vector-insert.ll b/llvm/test/Transforms/InstCombine/constant-vector-insert.ll
index 088cd8702ccb7..aa1ac1eac3ba0 100644
--- a/llvm/test/Transforms/InstCombine/constant-vector-insert.ll
+++ b/llvm/test/Transforms/InstCombine/constant-vector-insert.ll
@@ -4,8 +4,7 @@
 define <vscale x 4 x i32> @insert_div() {
 ; CHECK-LABEL: @insert_div(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v4i32(<vscale x 4 x i32> poison, <4 x i32> splat (i32 9), i64 0)
-; CHECK-NEXT:    [[DIV:%.*]] = udiv <vscale x 4 x i32> [[TMP0]], splat (i32 3)
+; CHECK-NEXT:    [[DIV:%.*]] = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v4i32(<vscale x 4 x i32> poison, <4 x i32> splat (i32 3), i64 0)
 ; CHECK-NEXT:    ret <vscale x 4 x i32> [[DIV]]
 ;
 entry:
@@ -17,8 +16,7 @@ entry:
 define <vscale x 4 x i32> @insert_div_splat_lhs() {
 ; CHECK-LABEL: @insert_div_splat_lhs(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v4i32(<vscale x 4 x i32> poison, <4 x i32> splat (i32 9), i64 0)
-; CHECK-NEXT:    [[DIV:%.*]] = udiv <vscale x 4 x i32> splat (i32 3), [[TMP0]]
+; CHECK-NEXT:    [[DIV:%.*]] = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v4i32(<vscale x 4 x i32> poison, <4 x i32> zeroinitializer, i64 0)
 ; CHECK-NEXT:    ret <vscale x 4 x i32> [[DIV]]
 ;
 entry:
@@ -30,8 +28,7 @@ entry:
 define <vscale x 4 x i32> @insert_div_mixed_splat() {
 ; CHECK-LABEL: @insert_div_mixed_splat(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v4i32(<vscale x 4 x i32> splat (i32 18), <4 x i32> splat (i32 9), i64 0)
-; CHECK-NEXT:    [[DIV:%.*]] = udiv <vscale x 4 x i32> [[TMP0]], splat (i32 3)
+; CHECK-NEXT:    [[DIV:%.*]] = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v4i32(<vscale x 4 x i32> splat (i32 6), <4 x i32> splat (i32 3), i64 0)
 ; CHECK-NEXT:    ret <vscale x 4 x i32> [[DIV]]
 ;
 entry:
@@ -43,8 +40,7 @@ entry:
 define <vscale x 4 x i32> @insert_mul() {
 ; CHECK-LABEL: @insert_mul(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v4i32(<vscale x 4 x i32> poison, <4 x i32> splat (i32 1), i64 4)
-; CHECK-NEXT:    [[MUL:%.*]] = mul <vscale x 4 x i32> [[TMP0]], splat (i32 7)
+; CHECK-NEXT:    [[MUL:%.*]] = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v4i32(<vscale x 4 x i32> poison, <4 x i32> splat (i32 7), i64 4)
 ; CHECK-NEXT:    ret <vscale x 4 x i32> [[MUL]]
 ;
 entry:
@@ -56,8 +52,7 @@ entry:
 define <vscale x 4 x i32> @insert_add() {
 ; CHECK-LABEL: @insert_add(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v4i32(<vscale x 4 x i32> poison, <4 x i32> splat (i32 5), i64 0)
-; CHECK-NEXT:    [[ADD:%.*]] = add <vscale x 4 x i32> [[TMP0]], splat (i32 11)
+; CHECK-NEXT:    [[ADD:%.*]] = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v4i32(<vscale x 4 x i32> poison, <4 x i32> splat (i32 16), i64 0)
 ; CHECK-NEXT:    ret <vscale x 4 x i32> [[ADD]]
 ;
 entry:
@@ -69,8 +64,7 @@ entry:
 define <vscale x 8 x i32> @insert_add_scalable_subvector() {
 ; CHECK-LABEL: @insert_add_scalable_subvector(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i32> @llvm.vector.insert.nxv8i32.nxv4i32(<vscale x 8 x i32> splat (i32 16), <vscale x 4 x i32> splat (i32 -8), i64 0)
-; CHECK-NEXT:    [[ADD:%.*]] = add <vscale x 8 x i32> [[TMP0]], splat (i32 4)
+; CHECK-NEXT:    [[ADD:%.*]] = call <vscale x 8 x i32> @llvm.vector.insert.nxv8i32.nxv4i32(<vscale x 8 x i32> splat (i32 20), <vscale x 4 x i32> splat (i32 -4), i64 0)
 ; CHECK-NEXT:    ret <vscale x 8 x i32> [[ADD]]
 ;
 entry:
@@ -82,8 +76,7 @@ entry:
 define <vscale x 4 x i32> @insert_sub() {
 ; CHECK-LABEL: @insert_sub(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v4i32(<vscale x 4 x i32> poison, <4 x i32> splat (i32 11), i64 8)
-; CHECK-NEXT:    [[SUB:%.*]] = add <vscale x 4 x i32> [[TMP0]], splat (i32 -11)
+; CHECK-NEXT:    [[SUB:%.*]] = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v4i32(<vscale x 4 x i32> poison, <4 x i32> zeroinitializer, i64 8)
 ; CHECK-NEXT:    ret <vscale x 4 x i32> [[SUB]]
 ;
 entry:
@@ -95,8 +88,7 @@ entry:
 define <vscale x 4 x i32> @insert_and_partially_undef() {
 ; CHECK-LABEL: @insert_and_partially_undef(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v4i32(<vscale x 4 x i32> undef, <4 x i32> splat (i32 6), i64 0)
-; CHECK-NEXT:    [[AND:%.*]] = and <vscale x 4 x i32> [[TMP0]], splat (i32 4)
+; CHECK-NEXT:    [[AND:%.*]] = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v4i32(<vscale x 4 x i32> zeroinitializer, <4 x i32> splat (i32 4), i64 0)
 ; CHECK-NEXT:    ret <vscale x 4 x i32> [[AND]]
 ;
 entry:
@@ -108,9 +100,7 @@ entry:
 define <vscale x 4 x i32> @insert_fold_chain() {
 ; CHECK-LABEL: @insert_fold_chain(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v4i32(<vscale x 4 x i32> splat (i32 21), <4 x i32> splat (i32 12), i64 0)
-; CHECK-NEXT:    [[DIV:%.*]] = udiv <vscale x 4 x i32> [[TMP0]], splat (i32 3)
-; CHECK-NEXT:    [[ADD:%.*]] = add nuw nsw <vscale x 4 x i32> [[DIV]], splat (i32 4)
+; CHECK-NEXT:    [[ADD:%.*]] = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v4i32(<vscale x 4 x i32> splat (i32 11), <4 x i32> splat (i32 8), i64 0)
 ; CHECK-NEXT:    ret <vscale x 4 x i32> [[ADD]]
 ;
 entry:

>From ffb21aa316c9f0bea1d68ecbf28c123b47ff046c Mon Sep 17 00:00:00 2001
From: Benjamin Maxwell <benjamin.maxwell at arm.com>
Date: Thu, 23 Oct 2025 08:47:06 +0000
Subject: [PATCH 3/7] Improve test case

---
 llvm/test/Transforms/InstCombine/constant-vector-insert.ll | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/llvm/test/Transforms/InstCombine/constant-vector-insert.ll b/llvm/test/Transforms/InstCombine/constant-vector-insert.ll
index aa1ac1eac3ba0..821e66e939162 100644
--- a/llvm/test/Transforms/InstCombine/constant-vector-insert.ll
+++ b/llvm/test/Transforms/InstCombine/constant-vector-insert.ll
@@ -16,12 +16,12 @@ entry:
 define <vscale x 4 x i32> @insert_div_splat_lhs() {
 ; CHECK-LABEL: @insert_div_splat_lhs(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[DIV:%.*]] = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v4i32(<vscale x 4 x i32> poison, <4 x i32> zeroinitializer, i64 0)
+; CHECK-NEXT:    [[DIV:%.*]] = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v4i32(<vscale x 4 x i32> splat (i32 5), <4 x i32> splat (i32 2), i64 0)
 ; CHECK-NEXT:    ret <vscale x 4 x i32> [[DIV]]
 ;
 entry:
-  %0 = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v4i32(<vscale x 4 x i32> poison, <4 x i32> splat (i32 9), i64 0)
-  %div = udiv <vscale x 4 x i32> splat (i32 3), %0
+  %0 = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v4i32(<vscale x 4 x i32> splat(i32 2), <4 x i32> splat (i32 5), i64 0)
+  %div = udiv <vscale x 4 x i32> splat (i32 10), %0
   ret <vscale x 4 x i32> %div
 }
 

>From 19c937bda85e9859645e4c0fcba34a162934af24 Mon Sep 17 00:00:00 2001
From: Benjamin Maxwell <benjamin.maxwell at arm.com>
Date: Thu, 23 Oct 2025 09:18:11 +0000
Subject: [PATCH 4/7] Address comments

---
 .../InstCombine/InstructionCombining.cpp      | 33 +++++++++----------
 1 file changed, 16 insertions(+), 17 deletions(-)

diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
index 05e8673b25433..3757c4198b841 100644
--- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -2360,35 +2360,34 @@ Instruction *InstCombinerImpl::foldVectorBinop(BinaryOperator &Inst) {
   assert(cast<VectorType>(RHS->getType())->getElementCount() ==
          cast<VectorType>(Inst.getType())->getElementCount());
 
-  auto foldConstantsThroughSubVectorInsert =
-      [&](Constant *Dest, Value *DestIdx, Type *SubVecType, Constant *SubVector,
-          Constant *Splat, bool SplatLHS) -> Instruction * {
+  auto foldConstantsThroughSubVectorInsertSplat =
+      [&](Value *MaybeSubVector, Value *MaybeSplat,
+          bool SplatLHS) -> Instruction * {
+    Value *Idx;
+    Constant *SubVector, *Dest, *Splat;
+    Splat = matchConstantSplat(MaybeSplat);
+    if (!Splat || !matchConstantSubVector(MaybeSubVector, Dest, SubVector, Idx))
+      return nullptr;
     SubVector =
         constantFoldBinOpWithSplat(Opcode, SubVector, Splat, SplatLHS, DL);
     Dest = constantFoldBinOpWithSplat(Opcode, Dest, Splat, SplatLHS, DL);
     if (!SubVector || !Dest)
       return nullptr;
     auto *InsertVector =
-        Builder.CreateInsertVector(Dest->getType(), Dest, SubVector, DestIdx);
-    InsertVector->removeFromParent();
-    return InsertVector;
+        Builder.CreateInsertVector(Dest->getType(), Dest, SubVector, Idx);
+    return replaceInstUsesWith(Inst, InsertVector);
   };
 
   // If one operand is a constant splat and the other operand is a
   // `vector.insert` where both the destination and subvector are constant,
   // apply the operation to both the destination and subvector, returning a new
   // constant `vector.insert`. This helps constant folding for scalable vectors.
-  for (bool SwapOperands : {false, true}) {
-    Value *Idx, *MaybeSubVector = LHS, *MaybeSplat = RHS;
-    if (SwapOperands)
-      std::swap(MaybeSplat, MaybeSubVector);
-    Constant *SubVector, *Dest, *Splat;
-    if (matchConstantSubVector(MaybeSubVector, Dest, SubVector, Idx) &&
-        (Splat = matchConstantSplat(MaybeSplat)))
-      return foldConstantsThroughSubVectorInsert(
-          Dest, Idx, SubVector->getType(), SubVector, Splat,
-          /*SplatLHS=*/SwapOperands);
-  }
+  if (Instruction *Folded = foldConstantsThroughSubVectorInsertSplat(
+          /*MaybeSubVector=*/LHS, /*MaybeSplat=*/RHS, /*SplatLHS=*/false))
+    return Folded;
+  if (Instruction *Folded = foldConstantsThroughSubVectorInsertSplat(
+          /*MaybeSubVector=*/RHS, /*MaybeSplat=*/LHS, /*SplatLHS=*/true))
+    return Folded;
 
   // If both operands of the binop are vector concatenations, then perform the
   // narrow binop on each pair of the source operands followed by concatenation

>From 827f14bd57b852a21019f9f24c5f9aed14fba6f7 Mon Sep 17 00:00:00 2001
From: Benjamin Maxwell <benjamin.maxwell at arm.com>
Date: Thu, 23 Oct 2025 10:08:53 +0000
Subject: [PATCH 5/7] Add test with non-splat subvector

---
 .../Transforms/InstCombine/constant-vector-insert.ll | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/llvm/test/Transforms/InstCombine/constant-vector-insert.ll b/llvm/test/Transforms/InstCombine/constant-vector-insert.ll
index 821e66e939162..8d6969b3eac58 100644
--- a/llvm/test/Transforms/InstCombine/constant-vector-insert.ll
+++ b/llvm/test/Transforms/InstCombine/constant-vector-insert.ll
@@ -61,6 +61,18 @@ entry:
   ret <vscale x 4 x i32> %add
 }
 
+define <vscale x 4 x i32> @insert_add_non_splat_subvector() {
+; CHECK-LABEL: @insert_add_non_splat_subvector(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[ADD:%.*]] = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v4i32(<vscale x 4 x i32> poison, <4 x i32> <i32 101, i32 102, i32 103, i32 104>, i64 0)
+; CHECK-NEXT:    ret <vscale x 4 x i32> [[ADD]]
+;
+entry:
+  %0 = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v4i32(<vscale x 4 x i32> poison, <4 x i32> <i32 1, i32 2, i32 3, i32 4>, i64 0)
+  %add = add <vscale x 4 x i32> %0, splat (i32 100)
+  ret <vscale x 4 x i32> %add
+}
+
 define <vscale x 8 x i32> @insert_add_scalable_subvector() {
 ; CHECK-LABEL: @insert_add_scalable_subvector(
 ; CHECK-NEXT:  entry:

>From a65d44212642854fd8815ecd65b0b4055c39ab5e Mon Sep 17 00:00:00 2001
From: Benjamin Maxwell <benjamin.maxwell at arm.com>
Date: Thu, 23 Oct 2025 12:23:26 +0000
Subject: [PATCH 6/7] Fixups

---
 llvm/include/llvm/IR/PatternMatch.h           | 25 +++++++++++++++++++
 .../InstCombine/InstructionCombining.cpp      | 22 ++++------------
 .../InstCombine/constant-vector-insert.ll     | 17 +++++++++++++
 3 files changed, 47 insertions(+), 17 deletions(-)

diff --git a/llvm/include/llvm/IR/PatternMatch.h b/llvm/include/llvm/IR/PatternMatch.h
index 99f70b101c2ed..75f677f097c31 100644
--- a/llvm/include/llvm/IR/PatternMatch.h
+++ b/llvm/include/llvm/IR/PatternMatch.h
@@ -198,6 +198,25 @@ struct constantexpr_match {
 /// expression.
 inline constantexpr_match m_ConstantExpr() { return constantexpr_match(); }
 
+template <typename SubPattern_t> struct Splat_match {
+  SubPattern_t SubPattern;
+  Splat_match(const SubPattern_t &SP) : SubPattern(SP) {}
+
+  template <typename OpTy> bool match(OpTy *V) const {
+    if (auto *C = dyn_cast<Constant>(V)) {
+      auto *Splat = C->getSplatValue();
+      return Splat ? SubPattern.match(Splat) : false;
+    }
+    // TODO: Extend to other cases (e.g. shufflevectors).
+    return false;
+  }
+};
+
+/// Match a splat. This is currently limited to constant splats.
+template <typename T> inline Splat_match<T> m_Splat(const T &SubPattern) {
+  return SubPattern;
+}
+
 /// Match an arbitrary basic block value and ignore it.
 inline class_match<BasicBlock> m_BasicBlock() {
   return class_match<BasicBlock>();
@@ -2925,6 +2944,12 @@ inline typename m_Intrinsic_Ty<Opnd0>::Ty m_VecReverse(const Opnd0 &Op0) {
   return m_Intrinsic<Intrinsic::vector_reverse>(Op0);
 }
 
+template <typename Opnd0, typename Opnd1, typename Opnd2>
+inline typename m_Intrinsic_Ty<Opnd0, Opnd1, Opnd2>::Ty
+m_VectorInsert(const Opnd0 &Op0, const Opnd1 &Op1, const Opnd2 &Op2) {
+  return m_Intrinsic<Intrinsic::vector_insert>(Op0, Op1, Op2);
+}
+
 //===----------------------------------------------------------------------===//
 // Matchers for two-operands operators with the operators in either order
 //
diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
index 3757c4198b841..4a38a33e74eab 100644
--- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -2323,20 +2323,6 @@ Constant *InstCombinerImpl::unshuffleConstant(ArrayRef<int> ShMask, Constant *C,
   return ConstantVector::get(NewVecC);
 }
 
-// Match a vector.insert where both the destination and subvector are constant.
-static bool matchConstantSubVector(Value *V, Constant *&Dest,
-                                   Constant *&SubVector, Value *&Idx) {
-  return match(V, m_Intrinsic<Intrinsic::vector_insert>(
-                      m_Constant(Dest), m_Constant(SubVector), m_Value(Idx)));
-}
-
-static Constant *matchConstantSplat(Value *V) {
-  Constant *C;
-  if (match(V, m_Constant(C)))
-    return C->getSplatValue();
-  return nullptr;
-}
-
 // Get the result of `Vector Op Splat` (or Splat Op Vector if \p SplatLHS).
 static Constant *constantFoldBinOpWithSplat(unsigned Opcode, Constant *Vector,
                                             Constant *Splat, bool SplatLHS,
@@ -2364,9 +2350,11 @@ Instruction *InstCombinerImpl::foldVectorBinop(BinaryOperator &Inst) {
       [&](Value *MaybeSubVector, Value *MaybeSplat,
           bool SplatLHS) -> Instruction * {
     Value *Idx;
-    Constant *SubVector, *Dest, *Splat;
-    Splat = matchConstantSplat(MaybeSplat);
-    if (!Splat || !matchConstantSubVector(MaybeSubVector, Dest, SubVector, Idx))
+    Constant *Splat, *SubVector, *Dest;
+    if (!match(MaybeSplat, m_Splat(m_Constant(Splat))) ||
+        !match(MaybeSubVector,
+               m_VectorInsert(m_Constant(Dest), m_Constant(SubVector),
+                              m_Value(Idx))))
       return nullptr;
     SubVector =
         constantFoldBinOpWithSplat(Opcode, SubVector, Splat, SplatLHS, DL);
diff --git a/llvm/test/Transforms/InstCombine/constant-vector-insert.ll b/llvm/test/Transforms/InstCombine/constant-vector-insert.ll
index 8d6969b3eac58..268854054bd7f 100644
--- a/llvm/test/Transforms/InstCombine/constant-vector-insert.ll
+++ b/llvm/test/Transforms/InstCombine/constant-vector-insert.ll
@@ -1,5 +1,10 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
 ; RUN: opt -S -passes=instcombine %s | FileCheck %s
+; RUN: opt -S -passes=instcombine %s \
+; RUN:   -use-constant-int-for-fixed-length-splat \
+; RUN    -use-constant-fp-for-fixed-length-splat \
+; RUN:   -use-constant-int-for-scalable-splat \
+; RUN:   -use-constant-fp-for-scalable-splat | FileCheck %s
 
 define <vscale x 4 x i32> @insert_div() {
 ; CHECK-LABEL: @insert_div(
@@ -73,6 +78,18 @@ entry:
   ret <vscale x 4 x i32> %add
 }
 
+define <vscale x 4 x float> @insert_add_fp() {
+; CHECK-LABEL: @insert_add_fp(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[ADD:%.*]] = call <vscale x 4 x float> @llvm.vector.insert.nxv4f32.v4f32(<vscale x 4 x float> splat (float 6.250000e+00), <4 x float> splat (float 5.500000e+00), i64 0)
+; CHECK-NEXT:    ret <vscale x 4 x float> [[ADD]]
+;
+entry:
+  %0 = call <vscale x 4 x float> @llvm.vector.insert.nxv4f32.v4f32(<vscale x 4 x float> splat(float 1.25), <4 x float> splat (float 0.5), i64 0)
+  %add = fadd <vscale x 4 x float> %0, splat (float 5.0)
+  ret <vscale x 4 x float> %add
+}
+
 define <vscale x 8 x i32> @insert_add_scalable_subvector() {
 ; CHECK-LABEL: @insert_add_scalable_subvector(
 ; CHECK-NEXT:  entry:

>From 7c522b24e6f72b00acdd6c80e2984551778c7b2f Mon Sep 17 00:00:00 2001
From: Benjamin Maxwell <benjamin.maxwell at arm.com>
Date: Thu, 23 Oct 2025 12:39:07 +0000
Subject: [PATCH 7/7] Rename matcher

---
 llvm/include/llvm/IR/PatternMatch.h                      | 5 +++--
 llvm/lib/Transforms/InstCombine/InstructionCombining.cpp | 2 +-
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/llvm/include/llvm/IR/PatternMatch.h b/llvm/include/llvm/IR/PatternMatch.h
index 75f677f097c31..e3ec7e1764da7 100644
--- a/llvm/include/llvm/IR/PatternMatch.h
+++ b/llvm/include/llvm/IR/PatternMatch.h
@@ -212,8 +212,9 @@ template <typename SubPattern_t> struct Splat_match {
   }
 };
 
-/// Match a splat. This is currently limited to constant splats.
-template <typename T> inline Splat_match<T> m_Splat(const T &SubPattern) {
+/// Match a constant splat. TODO: Extend this to non-constant splats.
+template <typename T>
+inline Splat_match<T> m_ConstantSplat(const T &SubPattern) {
   return SubPattern;
 }
 
diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
index 4a38a33e74eab..becc1888152d7 100644
--- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -2351,7 +2351,7 @@ Instruction *InstCombinerImpl::foldVectorBinop(BinaryOperator &Inst) {
           bool SplatLHS) -> Instruction * {
     Value *Idx;
     Constant *Splat, *SubVector, *Dest;
-    if (!match(MaybeSplat, m_Splat(m_Constant(Splat))) ||
+    if (!match(MaybeSplat, m_ConstantSplat(m_Constant(Splat))) ||
         !match(MaybeSubVector,
                m_VectorInsert(m_Constant(Dest), m_Constant(SubVector),
                               m_Value(Idx))))