[llvm] 7d3af9e - [DAGCombine] insert_subvector undef, (splat X), N2 -> splat X

Wed Apr 6 09:26:44 PDT 2022

Author: Paul Walker
Date: 2022-04-06T17:15:38+01:00
New Revision: 7d3af9ef0fee981fdff80d11149399ce81842990

URL: https://github.com/llvm/llvm-project/commit/7d3af9ef0fee981fdff80d11149399ce81842990
DIFF: https://github.com/llvm/llvm-project/commit/7d3af9ef0fee981fdff80d11149399ce81842990.diff

LOG: [DAGCombine] insert_subvector undef, (splat X), N2 -> splat X

Differential Revision: https://reviews.llvm.org/D120328

Added: 
    

Modified: 
    llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
    llvm/test/CodeGen/AArch64/sve-insert-vector.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 7bf89ca1580d4..0e4bb65818434 100644

--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -22694,6 +22694,11 @@ SDValue DAGCombiner::visitINSERT_SUBVECTOR(SDNode *N) {
       N1.getOperand(1) == N2 && N1.getOperand(0).getValueType() == VT)
     return N1.getOperand(0);
 
+  // Simplify scalar inserts into an undef vector:
+  // insert_subvector undef, (splat X), N2 -> splat X
+  if (N0.isUndef() && N1.getOpcode() == ISD::SPLAT_VECTOR)
+    return DAG.getNode(ISD::SPLAT_VECTOR, SDLoc(N), VT, N1.getOperand(0));
+
   // If we are inserting a bitcast value into an undef, with the same
   // number of elements, just use the bitcast input of the extract.
   // i.e. INSERT_SUBVECTOR UNDEF (BITCAST N1) N2 ->

diff  --git a/llvm/test/CodeGen/AArch64/sve-insert-vector.ll b/llvm/test/CodeGen/AArch64/sve-insert-vector.ll
index d717585d4e480..86e06a64ebb3a 100644
--- a/llvm/test/CodeGen/AArch64/sve-insert-vector.ll
+++ b/llvm/test/CodeGen/AArch64/sve-insert-vector.ll
@@ -573,6 +573,42 @@ define <vscale x 16 x i1> @insert_nxv16i1_nxv4i1_into_poison(<vscale x 4 x i1> %
   ret <vscale x 16 x i1> %v0
 }
 
+; Test constant predicate insert into undef
+define <vscale x 2 x i1> @insert_nxv2i1_v8i1_const_true_into_undef() vscale_range(4,8) {
+; CHECK-LABEL: insert_nxv2i1_v8i1_const_true_into_undef:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    ret
+  %v0 = call <vscale x 2 x i1> @llvm.experimental.vector.insert.nxv2i1.v8i1 (<vscale x 2 x i1> undef, <8 x i1> <i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1>, i64 0)
+  ret <vscale x 2 x i1> %v0
+}
+
+define <vscale x 4 x i1> @insert_nxv4i1_v16i1_const_true_into_undef() vscale_range(4,8) {
+; CHECK-LABEL: insert_nxv4i1_v16i1_const_true_into_undef:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.s
+; CHECK-NEXT:    ret
+  %v0 = call <vscale x 4 x i1> @llvm.experimental.vector.insert.nxv4i1.v16i1 (<vscale x 4 x i1> undef, <16 x i1> <i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1>, i64 0)
+  ret <vscale x 4 x i1> %v0
+}
+
+define <vscale x 8 x i1> @insert_nxv8i1_v32i1_const_true_into_undef() vscale_range(4,8) {
+; CHECK-LABEL: insert_nxv8i1_v32i1_const_true_into_undef:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.h
+; CHECK-NEXT:    ret
+  %v0 = call <vscale x 8 x i1> @llvm.experimental.vector.insert.nxv8i1.v32i1 (<vscale x 8 x i1> undef, <32 x i1> <i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1>, i64 0)
+  ret <vscale x 8 x i1> %v0
+}
+
+define <vscale x 16 x i1> @insert_nxv16i1_v64i1_const_true_into_undef() vscale_range(4,8) {
+; CHECK-LABEL: insert_nxv16i1_v64i1_const_true_into_undef:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.b
+; CHECK-NEXT:    ret
+  %v0 = call <vscale x 16 x i1> @llvm.experimental.vector.insert.nxv16i1.v64i1 (<vscale x 16 x i1> undef, <64 x i1> <i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1>, i64 0)
+  ret <vscale x 16 x i1> %v0
+}
 
 declare <vscale x 3 x i32> @llvm.experimental.vector.insert.nxv3i32.nxv2i32(<vscale x 3 x i32>, <vscale x 2 x i32>, i64)
 declare <vscale x 3 x float> @llvm.experimental.vector.insert.nxv3f32.nxv2f32(<vscale x 3 x float>, <vscale x 2 x float>, i64)
@@ -585,5 +621,9 @@ declare <vscale x 4 x bfloat> @llvm.experimental.vector.insert.nxv4bf16.nxv4bf16
 declare <vscale x 4 x bfloat> @llvm.experimental.vector.insert.nxv4bf16.v4bf16(<vscale x 4 x bfloat>, <4 x bfloat>, i64)
 declare <vscale x 2 x bfloat> @llvm.experimental.vector.insert.nxv2bf16.nxv2bf16(<vscale x 2 x bfloat>, <vscale x 2 x bfloat>, i64)
 
+declare <vscale x 2 x i1> @llvm.experimental.vector.insert.nxv2i1.v8i1(<vscale x 2 x i1>, <8 x i1>, i64)
+declare <vscale x 4 x i1> @llvm.experimental.vector.insert.nxv4i1.v16i1(<vscale x 4 x i1>, <16 x i1>, i64)
+declare <vscale x 8 x i1> @llvm.experimental.vector.insert.nxv8i1.v32i1(<vscale x 8 x i1>, <32 x i1>, i64)
 declare <vscale x 16 x i1> @llvm.experimental.vector.insert.nx16i1.nxv4i1(<vscale x 16 x i1>, <vscale x 4 x i1>, i64)
 declare <vscale x 16 x i1> @llvm.experimental.vector.insert.nx16i1.nxv8i1(<vscale x 16 x i1>, <vscale x 8 x i1>, i64)
+declare <vscale x 16 x i1> @llvm.experimental.vector.insert.nxv16i1.v64i1(<vscale x 16 x i1>, <64 x i1>, i64)