[llvm] [SDAG] Match BUILD_VECTOR in INSERT_SUBVECTOR to SPLAT_VECTOR fold (PR #163984)
Benjamin Maxwell via llvm-commits
llvm-commits at lists.llvm.org
Fri Oct 17 09:05:09 PDT 2025
https://github.com/MacDue updated https://github.com/llvm/llvm-project/pull/163984
>From 1e605fc103c40ea227314cd2cd24ab86d9cdc8a3 Mon Sep 17 00:00:00 2001
From: Benjamin Maxwell <benjamin.maxwell at arm.com>
Date: Fri, 17 Oct 2025 15:50:51 +0000
Subject: [PATCH 1/3] Precommit tests
---
.../fixed-subvector-insert-into-scalable.ll | 55 +++++++++++++++++++
1 file changed, 55 insertions(+)
create mode 100644 llvm/test/CodeGen/AArch64/fixed-subvector-insert-into-scalable.ll
diff --git a/llvm/test/CodeGen/AArch64/fixed-subvector-insert-into-scalable.ll b/llvm/test/CodeGen/AArch64/fixed-subvector-insert-into-scalable.ll
new file mode 100644
index 0000000000000..41a3270966f22
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/fixed-subvector-insert-into-scalable.ll
@@ -0,0 +1,55 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s
+
+define <vscale x 4 x i32> @insert_div() {
+; CHECK-LABEL: insert_div:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: mov w8, #43691 // =0xaaab
+; CHECK-NEXT: movi v0.4s, #9
+; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: movk w8, #43690, lsl #16
+; CHECK-NEXT: mov z1.s, w8
+; CHECK-NEXT: umulh z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT: lsr z0.s, z0.s, #1
+; CHECK-NEXT: ret
+entry:
+ %0 = tail call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v4i32(<vscale x 4 x i32> undef, <4 x i32> splat (i32 9), i64 0)
+ %div = udiv <vscale x 4 x i32> %0, splat (i32 3)
+ ret <vscale x 4 x i32> %div
+}
+
+define <vscale x 4 x i32> @insert_mul() {
+; CHECK-LABEL: insert_mul:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: movi v0.4s, #1
+; CHECK-NEXT: mul z0.s, z0.s, #7
+; CHECK-NEXT: ret
+entry:
+ %0 = tail call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v4i32(<vscale x 4 x i32> undef, <4 x i32> splat (i32 1), i64 0)
+ %mul = mul <vscale x 4 x i32> %0, splat (i32 7)
+ ret <vscale x 4 x i32> %mul
+}
+
+define <vscale x 4 x i32> @insert_add() {
+; CHECK-LABEL: insert_add:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: movi v0.4s, #5
+; CHECK-NEXT: add z0.s, z0.s, #11 // =0xb
+; CHECK-NEXT: ret
+entry:
+ %0 = tail call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v4i32(<vscale x 4 x i32> undef, <4 x i32> splat (i32 5), i64 0)
+ %add = add <vscale x 4 x i32> %0, splat (i32 11)
+ ret <vscale x 4 x i32> %add
+}
+
+define <vscale x 4 x i32> @insert_sub() {
+; CHECK-LABEL: insert_sub:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: movi v0.4s, #11
+; CHECK-NEXT: sub z0.s, z0.s, #11 // =0xb
+; CHECK-NEXT: ret
+entry:
+ %0 = tail call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v4i32(<vscale x 4 x i32> undef, <4 x i32> splat (i32 11), i64 0)
+ %sub = add <vscale x 4 x i32> %0, splat (i32 -11)
+ ret <vscale x 4 x i32> %sub
+}
>From 76bfc78b0570929581be4e6fbe547e28b413c11e Mon Sep 17 00:00:00 2001
From: Benjamin Maxwell <benjamin.maxwell at arm.com>
Date: Fri, 17 Oct 2025 15:52:08 +0000
Subject: [PATCH 2/3] [SDAG] Match BUILD_VECTOR in INSERT_SUBVECTOR to
SPLAT_VECTOR fold
This allows for more constant folding when inserting fixed-length vector
splats into scalable vectors.
---
llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 8 ++++++--
.../fixed-subvector-insert-into-scalable.ll | 17 ++++-------------
llvm/test/CodeGen/AArch64/vecreduce-add.ll | 2 +-
llvm/test/CodeGen/X86/pr35443.ll | 2 +-
4 files changed, 12 insertions(+), 17 deletions(-)
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index c97300d64d455..8e74bcb25da63 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -28014,9 +28014,13 @@ SDValue DAGCombiner::visitINSERT_SUBVECTOR(SDNode *N) {
// Simplify scalar inserts into an undef vector:
// insert_subvector undef, (splat X), N2 -> splat X
- if (N0.isUndef() && N1.getOpcode() == ISD::SPLAT_VECTOR)
- if (DAG.isConstantValueOfAnyType(N1.getOperand(0)) || N1.hasOneUse())
+ auto *BV0 = dyn_cast<BuildVectorSDNode>(N1);
+ if (N0.isUndef() && (N1.getOpcode() == ISD::SPLAT_VECTOR || BV0)) {
+ SDValue Splat = BV0 ? BV0->getSplatValue() : N1.getOperand(0);
+ if (Splat &&
+ (N1.hasOneUse() || (!BV0 && DAG.isConstantValueOfAnyType(Splat))))
return DAG.getNode(ISD::SPLAT_VECTOR, SDLoc(N), VT, N1.getOperand(0));
+ }
// insert_subvector (splat X), (splat X), N2 -> splat X
if (N0.getOpcode() == ISD::SPLAT_VECTOR && N0.getOpcode() == N1.getOpcode() &&
diff --git a/llvm/test/CodeGen/AArch64/fixed-subvector-insert-into-scalable.ll b/llvm/test/CodeGen/AArch64/fixed-subvector-insert-into-scalable.ll
index 41a3270966f22..5ce5baf42f5f5 100644
--- a/llvm/test/CodeGen/AArch64/fixed-subvector-insert-into-scalable.ll
+++ b/llvm/test/CodeGen/AArch64/fixed-subvector-insert-into-scalable.ll
@@ -4,13 +4,7 @@
define <vscale x 4 x i32> @insert_div() {
; CHECK-LABEL: insert_div:
; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: mov w8, #43691 // =0xaaab
-; CHECK-NEXT: movi v0.4s, #9
-; CHECK-NEXT: ptrue p0.s
-; CHECK-NEXT: movk w8, #43690, lsl #16
-; CHECK-NEXT: mov z1.s, w8
-; CHECK-NEXT: umulh z0.s, p0/m, z0.s, z1.s
-; CHECK-NEXT: lsr z0.s, z0.s, #1
+; CHECK-NEXT: mov z0.s, #3 // =0x3
; CHECK-NEXT: ret
entry:
%0 = tail call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v4i32(<vscale x 4 x i32> undef, <4 x i32> splat (i32 9), i64 0)
@@ -21,8 +15,7 @@ entry:
define <vscale x 4 x i32> @insert_mul() {
; CHECK-LABEL: insert_mul:
; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: movi v0.4s, #1
-; CHECK-NEXT: mul z0.s, z0.s, #7
+; CHECK-NEXT: mov z0.s, #7 // =0x7
; CHECK-NEXT: ret
entry:
%0 = tail call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v4i32(<vscale x 4 x i32> undef, <4 x i32> splat (i32 1), i64 0)
@@ -33,8 +26,7 @@ entry:
define <vscale x 4 x i32> @insert_add() {
; CHECK-LABEL: insert_add:
; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: movi v0.4s, #5
-; CHECK-NEXT: add z0.s, z0.s, #11 // =0xb
+; CHECK-NEXT: mov z0.s, #16 // =0x10
; CHECK-NEXT: ret
entry:
%0 = tail call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v4i32(<vscale x 4 x i32> undef, <4 x i32> splat (i32 5), i64 0)
@@ -45,8 +37,7 @@ entry:
define <vscale x 4 x i32> @insert_sub() {
; CHECK-LABEL: insert_sub:
; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: movi v0.4s, #11
-; CHECK-NEXT: sub z0.s, z0.s, #11 // =0xb
+; CHECK-NEXT: movi v0.2d, #0000000000000000
; CHECK-NEXT: ret
entry:
%0 = tail call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v4i32(<vscale x 4 x i32> undef, <4 x i32> splat (i32 11), i64 0)
diff --git a/llvm/test/CodeGen/AArch64/vecreduce-add.ll b/llvm/test/CodeGen/AArch64/vecreduce-add.ll
index 2d0df562b9a4b..f56e1680f79c6 100644
--- a/llvm/test/CodeGen/AArch64/vecreduce-add.ll
+++ b/llvm/test/CodeGen/AArch64/vecreduce-add.ll
@@ -4778,7 +4778,7 @@ entry:
define i64 @extract_scalable(<2 x i32> %0) "target-features"="+sve2" {
; CHECK-SD-LABEL: extract_scalable:
; CHECK-SD: // %bb.0:
-; CHECK-SD-NEXT: movi v1.2s, #1
+; CHECK-SD-NEXT: mov z1.s, #1 // =0x1
; CHECK-SD-NEXT: ptrue p0.s, vl2
; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $z0
; CHECK-SD-NEXT: sdivr z0.s, p0/m, z0.s, z1.s
diff --git a/llvm/test/CodeGen/X86/pr35443.ll b/llvm/test/CodeGen/X86/pr35443.ll
index 430a1380c7c8c..8438b73acac23 100644
--- a/llvm/test/CodeGen/X86/pr35443.ll
+++ b/llvm/test/CodeGen/X86/pr35443.ll
@@ -8,7 +8,7 @@
define void @pr35443() {
; CHECK-LABEL: pr35443:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vpbroadcastb ac+4(%rip), %xmm0
+; CHECK-NEXT: vpbroadcastb ac+4(%rip), %ymm0
; CHECK-NEXT: vpmovzxbq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero
; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
; CHECK-NEXT: vpsubq %ymm0, %ymm1, %ymm0
>From 67fb3ea85dc805832478eacf39f5797b145b6169 Mon Sep 17 00:00:00 2001
From: Benjamin Maxwell <benjamin.maxwell at arm.com>
Date: Fri, 17 Oct 2025 16:04:08 +0000
Subject: [PATCH 3/3] undef > poison
---
.../AArch64/fixed-subvector-insert-into-scalable.ll | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/llvm/test/CodeGen/AArch64/fixed-subvector-insert-into-scalable.ll b/llvm/test/CodeGen/AArch64/fixed-subvector-insert-into-scalable.ll
index 5ce5baf42f5f5..8758f5a4e244d 100644
--- a/llvm/test/CodeGen/AArch64/fixed-subvector-insert-into-scalable.ll
+++ b/llvm/test/CodeGen/AArch64/fixed-subvector-insert-into-scalable.ll
@@ -7,7 +7,7 @@ define <vscale x 4 x i32> @insert_div() {
; CHECK-NEXT: mov z0.s, #3 // =0x3
; CHECK-NEXT: ret
entry:
- %0 = tail call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v4i32(<vscale x 4 x i32> undef, <4 x i32> splat (i32 9), i64 0)
+ %0 = tail call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v4i32(<vscale x 4 x i32> poison, <4 x i32> splat (i32 9), i64 0)
%div = udiv <vscale x 4 x i32> %0, splat (i32 3)
ret <vscale x 4 x i32> %div
}
@@ -18,7 +18,7 @@ define <vscale x 4 x i32> @insert_mul() {
; CHECK-NEXT: mov z0.s, #7 // =0x7
; CHECK-NEXT: ret
entry:
- %0 = tail call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v4i32(<vscale x 4 x i32> undef, <4 x i32> splat (i32 1), i64 0)
+ %0 = tail call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v4i32(<vscale x 4 x i32> poison, <4 x i32> splat (i32 1), i64 0)
%mul = mul <vscale x 4 x i32> %0, splat (i32 7)
ret <vscale x 4 x i32> %mul
}
@@ -29,7 +29,7 @@ define <vscale x 4 x i32> @insert_add() {
; CHECK-NEXT: mov z0.s, #16 // =0x10
; CHECK-NEXT: ret
entry:
- %0 = tail call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v4i32(<vscale x 4 x i32> undef, <4 x i32> splat (i32 5), i64 0)
+ %0 = tail call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v4i32(<vscale x 4 x i32> poison, <4 x i32> splat (i32 5), i64 0)
%add = add <vscale x 4 x i32> %0, splat (i32 11)
ret <vscale x 4 x i32> %add
}
@@ -40,7 +40,7 @@ define <vscale x 4 x i32> @insert_sub() {
; CHECK-NEXT: movi v0.2d, #0000000000000000
; CHECK-NEXT: ret
entry:
- %0 = tail call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v4i32(<vscale x 4 x i32> undef, <4 x i32> splat (i32 11), i64 0)
+ %0 = tail call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v4i32(<vscale x 4 x i32> poison, <4 x i32> splat (i32 11), i64 0)
%sub = add <vscale x 4 x i32> %0, splat (i32 -11)
ret <vscale x 4 x i32> %sub
}
More information about the llvm-commits
mailing list