[PATCH] D120328: [DAGCombine] insert_subvector undef, (splat X), N2 -> splat X
Paul Walker via Phabricator via llvm-commits
llvm-commits at lists.llvm.org
Tue Feb 22 07:23:54 PST 2022
paulwalker-arm created this revision.
Herald added subscribers: ecnelises, hiraditya.
paulwalker-arm requested review of this revision.
Herald added a project: LLVM.
Herald added a subscriber: llvm-commits.
Repository:
rG LLVM Github Monorepo
https://reviews.llvm.org/D120328
Files:
llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
llvm/test/CodeGen/AArch64/sve-insert-vector.ll
Index: llvm/test/CodeGen/AArch64/sve-insert-vector.ll
===================================================================
--- llvm/test/CodeGen/AArch64/sve-insert-vector.ll
+++ llvm/test/CodeGen/AArch64/sve-insert-vector.ll
@@ -1,5 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve -mattr=+bf16 < %s | FileCheck %s --check-prefixes=CHECK
+; RUN: llc -mattr=+sve -mattr=+bf16 < %s | FileCheck %s --check-prefixes=CHECK
+
+target triple = "aarch64-unknown-linux-gnu"
define <vscale x 2 x i64> @insert_v2i64_nxv2i64(<vscale x 2 x i64> %vec, <2 x i64> %subvec) nounwind {
; CHECK-LABEL: insert_v2i64_nxv2i64:
@@ -571,6 +573,42 @@
ret <vscale x 16 x i1> %v0
}
+; Test constant preicate insert into undef
+define <vscale x 2 x i1> @insert_nxv2i1_v8i1_const_true_into_undef() vscale_range(4,4) {
+; CHECK-LABEL: insert_nxv2i1_v8i1_const_true_into_undef:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: ret
+ %v0 = call <vscale x 2 x i1> @llvm.experimental.vector.insert.nxv2i1.v8i1 (<vscale x 2 x i1> undef, <8 x i1> <i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1>, i64 0)
+ ret <vscale x 2 x i1> %v0
+}
+
+define <vscale x 4 x i1> @insert_nxv4i1_v16i1_const_true_into_undef() vscale_range(4,4) {
+; CHECK-LABEL: insert_nxv4i1_v16i1_const_true_into_undef:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: ret
+ %v0 = call <vscale x 4 x i1> @llvm.experimental.vector.insert.nxv4i1.v16i1 (<vscale x 4 x i1> undef, <16 x i1> <i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1>, i64 0)
+ ret <vscale x 4 x i1> %v0
+}
+
+define <vscale x 8 x i1> @insert_nxv8i1_v32i1_const_true_into_undef() vscale_range(4,4) {
+; CHECK-LABEL: insert_nxv8i1_v32i1_const_true_into_undef:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.h
+; CHECK-NEXT: ret
+ %v0 = call <vscale x 8 x i1> @llvm.experimental.vector.insert.nxv8i1.v32i1 (<vscale x 8 x i1> undef, <32 x i1> <i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1>, i64 0)
+ ret <vscale x 8 x i1> %v0
+}
+
+define <vscale x 16 x i1> @insert_nxv16i1_v64i1_const_true_into_undef() vscale_range(4,4) {
+; CHECK-LABEL: insert_nxv16i1_v64i1_const_true_into_undef:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.b
+; CHECK-NEXT: ret
+ %v0 = call <vscale x 16 x i1> @llvm.experimental.vector.insert.nxv16i1.v64i1 (<vscale x 16 x i1> undef, <64 x i1> <i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1>, i64 0)
+ ret <vscale x 16 x i1> %v0
+}
declare <vscale x 3 x i32> @llvm.experimental.vector.insert.nxv3i32.nxv2i32(<vscale x 3 x i32>, <vscale x 2 x i32>, i64)
declare <vscale x 3 x float> @llvm.experimental.vector.insert.nxv3f32.nxv2f32(<vscale x 3 x float>, <vscale x 2 x float>, i64)
@@ -583,5 +621,9 @@
declare <vscale x 4 x bfloat> @llvm.experimental.vector.insert.nxv4bf16.v4bf16(<vscale x 4 x bfloat>, <4 x bfloat>, i64)
declare <vscale x 2 x bfloat> @llvm.experimental.vector.insert.nxv2bf16.nxv2bf16(<vscale x 2 x bfloat>, <vscale x 2 x bfloat>, i64)
+declare <vscale x 2 x i1> @llvm.experimental.vector.insert.nxv2i1.v8i1(<vscale x 2 x i1>, <8 x i1>, i64)
+declare <vscale x 4 x i1> @llvm.experimental.vector.insert.nxv4i1.v16i1(<vscale x 4 x i1>, <16 x i1>, i64)
+declare <vscale x 8 x i1> @llvm.experimental.vector.insert.nxv8i1.v32i1(<vscale x 8 x i1>, <32 x i1>, i64)
declare <vscale x 16 x i1> @llvm.experimental.vector.insert.nx16i1.nxv4i1(<vscale x 16 x i1>, <vscale x 4 x i1>, i64)
declare <vscale x 16 x i1> @llvm.experimental.vector.insert.nx16i1.nxv8i1(<vscale x 16 x i1>, <vscale x 8 x i1>, i64)
+declare <vscale x 16 x i1> @llvm.experimental.vector.insert.nxv16i1.v64i1(<vscale x 16 x i1>, <64 x i1>, i64)
Index: llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
===================================================================
--- llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -10092,10 +10092,9 @@
// lowering code.
if (auto *ConstVal = dyn_cast<ConstantSDNode>(SplatVal)) {
// We can hande the zero case during isel.
- if (ConstVal->isZero())
- return Op;
- if (ConstVal->isOne())
+ if (ConstVal->getZExtValue() & 0x1)
return getPTrue(DAG, dl, VT, AArch64SVEPredPattern::all);
+ return Op;
}
// The general case of i1. There isn't any natural way to do this,
// so we use some trickery with whilelo.
Index: llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
===================================================================
--- llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -22581,6 +22581,11 @@
}
}
+ // Simplify scalar inserts into an undef vector:
+ // insert_subvector undef, (splat X), N2 -> splat X
+ if (N0.isUndef() && N1.getOpcode() == ISD::SPLAT_VECTOR)
+ return DAG.getNode(ISD::SPLAT_VECTOR, SDLoc(N), VT, N1.getOperand(0));
+
// Canonicalize insert_subvector dag nodes.
// Example:
// (insert_subvector (insert_subvector A, Idx0), Idx1)
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D120328.410532.patch
Type: text/x-patch
Size: 5603 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20220222/2e892b6f/attachment.bin>
More information about the llvm-commits
mailing list