[llvm] [SDAG] Teach FoldConstantArithmetic to match splats inserted into vectors (PR #163984)

Mon Oct 20 03:08:23 PDT 2025

https://github.com/MacDue updated https://github.com/llvm/llvm-project/pull/163984

>From 1e605fc103c40ea227314cd2cd24ab86d9cdc8a3 Mon Sep 17 00:00:00 2001
From: Benjamin Maxwell <benjamin.maxwell at arm.com>
Date: Fri, 17 Oct 2025 15:50:51 +0000
Subject: [PATCH 1/3] Precommit tests

---
 .../fixed-subvector-insert-into-scalable.ll   | 55 +++++++++++++++++++
 1 file changed, 55 insertions(+)
 create mode 100644 llvm/test/CodeGen/AArch64/fixed-subvector-insert-into-scalable.ll

diff --git a/llvm/test/CodeGen/AArch64/fixed-subvector-insert-into-scalable.ll b/llvm/test/CodeGen/AArch64/fixed-subvector-insert-into-scalable.ll
new file mode 100644
index 0000000000000..41a3270966f22
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/fixed-subvector-insert-into-scalable.ll
@@ -0,0 +1,55 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s
+
+define <vscale x 4 x i32> @insert_div() {
+; CHECK-LABEL: insert_div:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    mov w8, #43691 // =0xaaab
+; CHECK-NEXT:    movi v0.4s, #9
+; CHECK-NEXT:    ptrue p0.s
+; CHECK-NEXT:    movk w8, #43690, lsl #16
+; CHECK-NEXT:    mov z1.s, w8
+; CHECK-NEXT:    umulh z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT:    lsr z0.s, z0.s, #1
+; CHECK-NEXT:    ret
+entry:
+  %0 = tail call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v4i32(<vscale x 4 x i32> undef, <4 x i32> splat (i32 9), i64 0)
+  %div = udiv <vscale x 4 x i32> %0, splat (i32 3)
+  ret <vscale x 4 x i32> %div
+}
+
+define <vscale x 4 x i32> @insert_mul() {
+; CHECK-LABEL: insert_mul:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    movi v0.4s, #1
+; CHECK-NEXT:    mul z0.s, z0.s, #7
+; CHECK-NEXT:    ret
+entry:
+  %0 = tail call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v4i32(<vscale x 4 x i32> undef, <4 x i32> splat (i32 1), i64 0)
+  %mul = mul <vscale x 4 x i32> %0, splat (i32 7)
+  ret <vscale x 4 x i32> %mul
+}
+
+define <vscale x 4 x i32> @insert_add() {
+; CHECK-LABEL: insert_add:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    movi v0.4s, #5
+; CHECK-NEXT:    add z0.s, z0.s, #11 // =0xb
+; CHECK-NEXT:    ret
+entry:
+  %0 = tail call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v4i32(<vscale x 4 x i32> undef, <4 x i32> splat (i32 5), i64 0)
+  %add = add <vscale x 4 x i32> %0, splat (i32 11)
+  ret <vscale x 4 x i32> %add
+}
+
+define <vscale x 4 x i32> @insert_sub() {
+; CHECK-LABEL: insert_sub:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    movi v0.4s, #11
+; CHECK-NEXT:    sub z0.s, z0.s, #11 // =0xb
+; CHECK-NEXT:    ret
+entry:
+  %0 = tail call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v4i32(<vscale x 4 x i32> undef, <4 x i32> splat (i32 11), i64 0)
+  %sub = add <vscale x 4 x i32> %0, splat (i32 -11)
+  ret <vscale x 4 x i32> %sub
+}

>From addf8789c345f21b116a22d55eb7ea46be8e35fb Mon Sep 17 00:00:00 2001
From: Benjamin Maxwell <benjamin.maxwell at arm.com>
Date: Mon, 20 Oct 2025 09:54:14 +0000
Subject: [PATCH 2/3] [SDAG] Teach FoldConstantArithmetic to match splats
 inserted into vectors

This teaches FoldConstantArithmetic to match `insert_subvector undef, (splat X), N2`
as a splat of X. This pattern can occur for scalable vectors when a
fixed-length splat is inserted into an undef vector.

This allows the cases in `fixed-subvector-insert-into-scalable.ll` to be
constant-folded (where previously they would all be computed at runtime).
---
 .../lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 36 +++++++++++++++----
 .../fixed-subvector-insert-into-scalable.ll   | 17 +++------
 2 files changed, 34 insertions(+), 19 deletions(-)

diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 90edaf3ef5471..7e7810f10af4b 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -7338,16 +7338,23 @@ SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL,
            Op.getValueType().getVectorElementCount() == NumElts;
   };
 
-  auto IsBuildVectorSplatVectorOrUndef = [](const SDValue &Op) {
+  // UNDEF: folds to undef
+  // BUILD_VECTOR: may have constant elements
+  // SPLAT_VECTOR: could be a splat of a constant
+  // INSERT_SUBVECTOR: could be inserting a constant splat into an undef vector
+  // - This pattern occurs when a fixed-length vector splat is inserted into
+  //   a scalable vector
+  auto VectorOpMayConstantFold = [](const SDValue &Op) {
     return Op.isUndef() || Op.getOpcode() == ISD::CONDCODE ||
            Op.getOpcode() == ISD::BUILD_VECTOR ||
-           Op.getOpcode() == ISD::SPLAT_VECTOR;
+           Op.getOpcode() == ISD::SPLAT_VECTOR ||
+           Op.getOpcode() == ISD::INSERT_SUBVECTOR;
   };
 
   // All operands must be vector types with the same number of elements as
   // the result type and must be either UNDEF or a build/splat vector
   // or UNDEF scalars.
-  if (!llvm::all_of(Ops, IsBuildVectorSplatVectorOrUndef) ||
+  if (!llvm::all_of(Ops, VectorOpMayConstantFold) ||
       !llvm::all_of(Ops, IsScalarOrSameVectorSize))
     return SDValue();
 
@@ -7374,14 +7381,28 @@ SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL,
   // a combination of BUILD_VECTOR and SPLAT_VECTOR.
   unsigned NumVectorElts = NumElts.isScalable() ? 1 : NumElts.getFixedValue();
 
+  // Preprocess insert_subvector to avoid repeatedly matching the splat.
+  SmallVector<SDValue, 4> PreprocessedOps;
+  for (SDValue Op : Ops) {
+    if (Op.getOpcode() == ISD::INSERT_SUBVECTOR) {
+      // match: `insert_subvector undef, (splat X), N2` as `splat X`
+      SDValue N0 = Op.getOperand(0);
+      auto* BV = dyn_cast<BuildVectorSDNode>(Op.getOperand(1));
+      if (!N0.isUndef() || !BV || !(Op = BV->getSplatValue()))
+          return SDValue();
+    }
+    PreprocessedOps.push_back(Op);
+  }
+
   // Constant fold each scalar lane separately.
   SmallVector<SDValue, 4> ScalarResults;
   for (unsigned I = 0; I != NumVectorElts; I++) {
     SmallVector<SDValue, 4> ScalarOps;
-    for (SDValue Op : Ops) {
+    for (SDValue Op : PreprocessedOps) {
       EVT InSVT = Op.getValueType().getScalarType();
       if (Op.getOpcode() != ISD::BUILD_VECTOR &&
-          Op.getOpcode() != ISD::SPLAT_VECTOR) {
+          Op.getOpcode() != ISD::SPLAT_VECTOR &&
+          Op.getOpcode() != ISD::INSERT_SUBVECTOR) {
         if (Op.isUndef())
           ScalarOps.push_back(getUNDEF(InSVT));
         else
@@ -7389,7 +7410,10 @@ SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL,
         continue;
       }
 
-      SDValue ScalarOp =
+      // insert_subvector has been preprocessed, so if it was of the form
+      // `insert_subvector undef, (splat X), N2`, it has been replaced with the
+      // splat value (X).
+      SDValue ScalarOp = Op.getOpcode() == ISD::INSERT_SUBVECTOR ? Op :
           Op.getOperand(Op.getOpcode() == ISD::SPLAT_VECTOR ? 0 : I);
       EVT ScalarVT = ScalarOp.getValueType();
 
diff --git a/llvm/test/CodeGen/AArch64/fixed-subvector-insert-into-scalable.ll b/llvm/test/CodeGen/AArch64/fixed-subvector-insert-into-scalable.ll
index 41a3270966f22..5ce5baf42f5f5 100644
--- a/llvm/test/CodeGen/AArch64/fixed-subvector-insert-into-scalable.ll
+++ b/llvm/test/CodeGen/AArch64/fixed-subvector-insert-into-scalable.ll
@@ -4,13 +4,7 @@
 define <vscale x 4 x i32> @insert_div() {
 ; CHECK-LABEL: insert_div:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    mov w8, #43691 // =0xaaab
-; CHECK-NEXT:    movi v0.4s, #9
-; CHECK-NEXT:    ptrue p0.s
-; CHECK-NEXT:    movk w8, #43690, lsl #16
-; CHECK-NEXT:    mov z1.s, w8
-; CHECK-NEXT:    umulh z0.s, p0/m, z0.s, z1.s
-; CHECK-NEXT:    lsr z0.s, z0.s, #1
+; CHECK-NEXT:    mov z0.s, #3 // =0x3
 ; CHECK-NEXT:    ret
 entry:
   %0 = tail call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v4i32(<vscale x 4 x i32> undef, <4 x i32> splat (i32 9), i64 0)
@@ -21,8 +15,7 @@ entry:
 define <vscale x 4 x i32> @insert_mul() {
 ; CHECK-LABEL: insert_mul:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    movi v0.4s, #1
-; CHECK-NEXT:    mul z0.s, z0.s, #7
+; CHECK-NEXT:    mov z0.s, #7 // =0x7
 ; CHECK-NEXT:    ret
 entry:
   %0 = tail call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v4i32(<vscale x 4 x i32> undef, <4 x i32> splat (i32 1), i64 0)
@@ -33,8 +26,7 @@ entry:
 define <vscale x 4 x i32> @insert_add() {
 ; CHECK-LABEL: insert_add:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    movi v0.4s, #5
-; CHECK-NEXT:    add z0.s, z0.s, #11 // =0xb
+; CHECK-NEXT:    mov z0.s, #16 // =0x10
 ; CHECK-NEXT:    ret
 entry:
   %0 = tail call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v4i32(<vscale x 4 x i32> undef, <4 x i32> splat (i32 5), i64 0)
@@ -45,8 +37,7 @@ entry:
 define <vscale x 4 x i32> @insert_sub() {
 ; CHECK-LABEL: insert_sub:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    movi v0.4s, #11
-; CHECK-NEXT:    sub z0.s, z0.s, #11 // =0xb
+; CHECK-NEXT:    movi v0.2d, #0000000000000000
 ; CHECK-NEXT:    ret
 entry:
   %0 = tail call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v4i32(<vscale x 4 x i32> undef, <4 x i32> splat (i32 11), i64 0)

>From 7d18495d64819a7aca05332f26d9bea52be6a198 Mon Sep 17 00:00:00 2001
From: Benjamin Maxwell <benjamin.maxwell at arm.com>
Date: Mon, 20 Oct 2025 10:08:07 +0000
Subject: [PATCH 3/3] Format

---
 llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 7e7810f10af4b..b283d4ddd907a 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -7387,9 +7387,9 @@ SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL,
     if (Op.getOpcode() == ISD::INSERT_SUBVECTOR) {
       // match: `insert_subvector undef, (splat X), N2` as `splat X`
       SDValue N0 = Op.getOperand(0);
-      auto* BV = dyn_cast<BuildVectorSDNode>(Op.getOperand(1));
+      auto *BV = dyn_cast<BuildVectorSDNode>(Op.getOperand(1));
       if (!N0.isUndef() || !BV || !(Op = BV->getSplatValue()))
-          return SDValue();
+        return SDValue();
     }
     PreprocessedOps.push_back(Op);
   }
@@ -7413,8 +7413,10 @@ SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL,
       // insert_subvector has been preprocessed, so if it was of the form
       // `insert_subvector undef, (splat X), N2`, it has been replaced with the
       // splat value (X).
-      SDValue ScalarOp = Op.getOpcode() == ISD::INSERT_SUBVECTOR ? Op :
-          Op.getOperand(Op.getOpcode() == ISD::SPLAT_VECTOR ? 0 : I);
+      SDValue ScalarOp =
+          Op.getOpcode() == ISD::INSERT_SUBVECTOR
+              ? Op
+              : Op.getOperand(Op.getOpcode() == ISD::SPLAT_VECTOR ? 0 : I);
       EVT ScalarVT = ScalarOp.getValueType();
 
       // Build vector (integer) scalar operands may need implicit