[llvm] 4c50112 - [AArch64] Add patterns for 64bit vector addp
David Green via llvm-commits
llvm-commits at lists.llvm.org
Thu Sep 19 00:50:48 PDT 2024
Author: David Green
Date: 2024-09-19T08:50:43+01:00
New Revision: 4c50112ba1fb6b3847decebd6f1e374c61950be9
URL: https://github.com/llvm/llvm-project/commit/4c50112ba1fb6b3847decebd6f1e374c61950be9
DIFF: https://github.com/llvm/llvm-project/commit/4c50112ba1fb6b3847decebd6f1e374c61950be9.diff
LOG: [AArch64] Add patterns for 64bit vector addp
This extends the existing patterns for addp to 64bit outputs with a single
input. Whilst the general pattern is similar to the 128bit patterns
(add(uzp1(extract_lo, extract_hi), uzp2(extract_lo, extract_hi))), at the late
stage other optimzations have happened to turn the first uzp1 into trunc and
the second into extract(uzp2) with undef.
Fixes #109108
Added:
Modified:
llvm/lib/Target/AArch64/AArch64InstrInfo.td
llvm/test/CodeGen/AArch64/addp-shuffle.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index a47de9a12caca5..c040ef1862f21a 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -9634,6 +9634,18 @@ def : Pat<(v16i8 (add (AArch64uzp1 (v16i8 FPR128:$Rn), (v16i8 FPR128:$Rm)),
(AArch64uzp2 (v16i8 FPR128:$Rn), (v16i8 FPR128:$Rm)))),
(v16i8 (ADDPv16i8 $Rn, $Rm))>;
+def : Pat<(v2i32 (add (AArch64zip1 (extract_subvector (v4i32 FPR128:$Rn), (i64 0)),
+ (extract_subvector (v4i32 FPR128:$Rn), (i64 2))),
+ (AArch64zip2 (extract_subvector (v4i32 FPR128:$Rn), (i64 0)),
+ (extract_subvector (v4i32 FPR128:$Rn), (i64 2))))),
+ (EXTRACT_SUBREG (ADDPv4i32 $Rn, $Rn), dsub)>;
+def : Pat<(v4i16 (add (trunc (v4i32 (bitconvert FPR128:$Rn))),
+ (extract_subvector (AArch64uzp2 (v8i16 FPR128:$Rn), undef), (i64 0)))),
+ (EXTRACT_SUBREG (ADDPv8i16 $Rn, $Rn), dsub)>;
+def : Pat<(v8i8 (add (trunc (v8i16 (bitconvert FPR128:$Rn))),
+ (extract_subvector (AArch64uzp2 (v16i8 FPR128:$Rn), undef), (i64 0)))),
+ (EXTRACT_SUBREG (ADDPv16i8 $Rn, $Rn), dsub)>;
+
def : Pat<(v2f64 (fadd (AArch64zip1 (v2f64 FPR128:$Rn), (v2f64 FPR128:$Rm)),
(AArch64zip2 (v2f64 FPR128:$Rn), (v2f64 FPR128:$Rm)))),
(v2f64 (FADDPv2f64 $Rn, $Rm))>;
diff --git a/llvm/test/CodeGen/AArch64/addp-shuffle.ll b/llvm/test/CodeGen/AArch64/addp-shuffle.ll
index fb96d11acc275a..54c96820285d32 100644
--- a/llvm/test/CodeGen/AArch64/addp-shuffle.ll
+++ b/llvm/test/CodeGen/AArch64/addp-shuffle.ll
@@ -27,10 +27,8 @@ define <4 x i32> @deinterleave_shuffle_v8i32_c(<8 x i32> %a) {
define <2 x i32> @deinterleave_shuffle_v4i32(<4 x i32> %a) {
; CHECK-LABEL: deinterleave_shuffle_v4i32:
; CHECK: // %bb.0:
-; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8
-; CHECK-NEXT: zip1 v2.2s, v0.2s, v1.2s
-; CHECK-NEXT: zip2 v0.2s, v0.2s, v1.2s
-; CHECK-NEXT: add v0.2s, v2.2s, v0.2s
+; CHECK-NEXT: addp v0.4s, v0.4s, v0.4s
+; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
; CHECK-NEXT: ret
%r0 = shufflevector <4 x i32> %a, <4 x i32> poison, <2 x i32> <i32 0, i32 2>
%r1 = shufflevector <4 x i32> %a, <4 x i32> poison, <2 x i32> <i32 1, i32 3>
@@ -49,6 +47,18 @@ define <8 x i16> @deinterleave_shuffle_v16i16(<16 x i16> %a) {
ret <8 x i16> %o
}
+define <4 x i16> @deinterleave_shuffle_v8i16(<8 x i16> %a) {
+; CHECK-LABEL: deinterleave_shuffle_v8i16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: addp v0.8h, v0.8h, v0.8h
+; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-NEXT: ret
+ %r0 = shufflevector <8 x i16> %a, <8 x i16> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+ %r1 = shufflevector <8 x i16> %a, <8 x i16> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+ %o = add <4 x i16> %r0, %r1
+ ret <4 x i16> %o
+}
+
define <16 x i8> @deinterleave_shuffle_v32i8(<32 x i8> %a) {
; CHECK-LABEL: deinterleave_shuffle_v32i8:
; CHECK: // %bb.0:
@@ -60,6 +70,18 @@ define <16 x i8> @deinterleave_shuffle_v32i8(<32 x i8> %a) {
ret <16 x i8> %o
}
+define <8 x i8> @deinterleave_shuffle_v16i8(<16 x i8> %a) {
+; CHECK-LABEL: deinterleave_shuffle_v16i8:
+; CHECK: // %bb.0:
+; CHECK-NEXT: addp v0.16b, v0.16b, v0.16b
+; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-NEXT: ret
+ %r0 = shufflevector <16 x i8> %a, <16 x i8> poison, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
+ %r1 = shufflevector <16 x i8> %a, <16 x i8> poison, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
+ %o = add <8 x i8> %r0, %r1
+ ret <8 x i8> %o
+}
+
define <4 x i64> @deinterleave_shuffle_v8i64(<8 x i64> %a) {
; CHECK-LABEL: deinterleave_shuffle_v8i64:
; CHECK: // %bb.0:
More information about the llvm-commits
mailing list