[llvm] 16b46dd - [AArch64] More tablegen patterns for addp of two extracts
David Green via llvm-commits
llvm-commits at lists.llvm.org
Sun Jun 18 23:52:50 PDT 2023
Author: David Green
Date: 2023-06-19T07:52:46+01:00
New Revision: 16b46dde0b51ec6d2ec217111acc4de43fe82aa9
URL: https://github.com/llvm/llvm-project/commit/16b46dde0b51ec6d2ec217111acc4de43fe82aa9
DIFF: https://github.com/llvm/llvm-project/commit/16b46dde0b51ec6d2ec217111acc4de43fe82aa9.diff
LOG: [AArch64] More tablegen patterns for addp of two extracts
Similar to D152245, this adds integer addp patterns, using the larger
v4i32 addp from addp extractlow, extracthi.
Added:
Modified:
llvm/lib/Target/AArch64/AArch64InstrInfo.td
llvm/test/CodeGen/AArch64/arm64-neon-add-pairwise.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index 9c1dc83a3e1a1..e6af51629c5a0 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -8517,8 +8517,18 @@ def : Pat<(any_fadd (vector_extract (v8f16 FPR128:$Rn), (i64 0)),
(vector_extract (v8f16 FPR128:$Rn), (i64 1))),
(f16 (FADDPv2i16p (EXTRACT_SUBREG FPR128:$Rn, dsub)))>;
-// Prefer using the bottom lanes of faddp Rn, Rn compared to
-// faddp extractlow(Rn), extracthigh(Rn)
+// Prefer using the bottom lanes of addp Rn, Rn compared to
+// addp extractlow(Rn), extracthigh(Rn)
+def : Pat<(AArch64addp (v2i32 (extract_subvector (v4i32 FPR128:$Rn), (i64 0))),
+ (v2i32 (extract_subvector (v4i32 FPR128:$Rn), (i64 2)))),
+ (v2i32 (EXTRACT_SUBREG (ADDPv4i32 $Rn, $Rn), dsub))>;
+def : Pat<(AArch64addp (v4i16 (extract_subvector (v8i16 FPR128:$Rn), (i64 0))),
+ (v4i16 (extract_subvector (v8i16 FPR128:$Rn), (i64 4)))),
+ (v4i16 (EXTRACT_SUBREG (ADDPv8i16 $Rn, $Rn), dsub))>;
+def : Pat<(AArch64addp (v8i8 (extract_subvector (v16i8 FPR128:$Rn), (i64 0))),
+ (v8i8 (extract_subvector (v16i8 FPR128:$Rn), (i64 8)))),
+ (v8i8 (EXTRACT_SUBREG (ADDPv16i8 $Rn, $Rn), dsub))>;
+
def : Pat<(AArch64faddp (v2f32 (extract_subvector (v4f32 FPR128:$Rn), (i64 0))),
(v2f32 (extract_subvector (v4f32 FPR128:$Rn), (i64 2)))),
(v2f32 (EXTRACT_SUBREG (FADDPv4f32 $Rn, $Rn), dsub))>;
diff --git a/llvm/test/CodeGen/AArch64/arm64-neon-add-pairwise.ll b/llvm/test/CodeGen/AArch64/arm64-neon-add-pairwise.ll
index eefa5a9b43d0b..4fd40bb7e229d 100644
--- a/llvm/test/CodeGen/AArch64/arm64-neon-add-pairwise.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-neon-add-pairwise.ll
@@ -1,6 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
-; RUN: llc -mtriple=arm64-none-linux-gnu -mattr=+neon < %s | FileCheck %s
-; RUN: llc -global-isel=1 -mtriple=arm64-none-linux-gnu -mattr=+neon < %s | FileCheck %s
+; RUN: llc -mtriple=arm64-none-linux-gnu -mattr=+neon < %s | FileCheck %s --check-prefixes=CHECK,CHECK-SD
+; RUN: llc -mtriple=arm64-none-linux-gnu -mattr=+neon -global-isel < %s | FileCheck %s --check-prefixes=CHECK,CHECK-GI
declare <8 x i8> @llvm.aarch64.neon.addp.v8i8(<8 x i8>, <8 x i8>)
@@ -123,3 +123,78 @@ define i32 @test_vaddv.v2i32(<2 x i32> %a) {
}
declare i32 @llvm.aarch64.neon.saddv.i32.v2i32(<2 x i32>)
+
+define i32 @addp_v4i32(<4 x i32> %a, <4 x i32> %b) {
+; CHECK-SD-LABEL: addp_v4i32:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: add v0.4s, v0.4s, v1.4s
+; CHECK-SD-NEXT: addp v0.4s, v0.4s, v0.4s
+; CHECK-SD-NEXT: dup v1.2s, v0.s[1]
+; CHECK-SD-NEXT: add v0.2s, v0.2s, v1.2s
+; CHECK-SD-NEXT: fmov w0, s0
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: addp_v4i32:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: add v0.4s, v0.4s, v1.4s
+; CHECK-GI-NEXT: ext v1.16b, v0.16b, v0.16b, #0
+; CHECK-GI-NEXT: ext v0.16b, v0.16b, v0.16b, #8
+; CHECK-GI-NEXT: addp v0.2s, v1.2s, v0.2s
+; CHECK-GI-NEXT: rev64 v1.2s, v0.2s
+; CHECK-GI-NEXT: add v0.2s, v0.2s, v1.2s
+; CHECK-GI-NEXT: fmov w0, s0
+; CHECK-GI-NEXT: ret
+ %1 = add <4 x i32> %a, %b
+ %2 = shufflevector <4 x i32> %1, <4 x i32> poison, <2 x i32> <i32 0, i32 1>
+ %3 = shufflevector <4 x i32> %1, <4 x i32> poison, <2 x i32> <i32 2, i32 3>
+ %4 = tail call <2 x i32> @llvm.aarch64.neon.addp.v2i32(<2 x i32> %2, <2 x i32> %3)
+ %5 = shufflevector <2 x i32> %4, <2 x i32> poison, <2 x i32> <i32 1, i32 poison>
+ %6 = add <2 x i32> %4, %5
+ %7 = extractelement <2 x i32> %6, i64 0
+ ret i32 %7
+}
+
+define <4 x i16> @addp_v8i16(<8 x i16> %a, <8 x i16> %b) {
+; CHECK-SD-LABEL: addp_v8i16:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: add v0.8h, v0.8h, v1.8h
+; CHECK-SD-NEXT: addp v0.8h, v0.8h, v0.8h
+; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: addp_v8i16:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: add v0.8h, v0.8h, v1.8h
+; CHECK-GI-NEXT: ext v1.16b, v0.16b, v0.16b, #0
+; CHECK-GI-NEXT: ext v0.16b, v0.16b, v0.16b, #8
+; CHECK-GI-NEXT: addp v0.4h, v1.4h, v0.4h
+; CHECK-GI-NEXT: ret
+ %1 = add <8 x i16> %a, %b
+ %2 = shufflevector <8 x i16> %1, <8 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ %3 = shufflevector <8 x i16> %1, <8 x i16> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+ %4 = tail call <4 x i16> @llvm.aarch64.neon.addp.v4i16(<4 x i16> %2, <4 x i16> %3)
+ ret <4 x i16> %4
+}
+
+define <8 x i8> @addp_v16i8(<16 x i8> %a, <16 x i8> %b) {
+; CHECK-SD-LABEL: addp_v16i8:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: add v0.16b, v0.16b, v1.16b
+; CHECK-SD-NEXT: addp v0.16b, v0.16b, v0.16b
+; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: addp_v16i8:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: add v0.16b, v0.16b, v1.16b
+; CHECK-GI-NEXT: ext v1.16b, v0.16b, v0.16b, #0
+; CHECK-GI-NEXT: ext v0.16b, v0.16b, v0.16b, #8
+; CHECK-GI-NEXT: addp v0.8b, v1.8b, v0.8b
+; CHECK-GI-NEXT: ret
+ %1 = add <16 x i8> %a, %b
+ %2 = shufflevector <16 x i8> %1, <16 x i8> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+ %3 = shufflevector <16 x i8> %1, <16 x i8> poison, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ %4 = tail call <8 x i8> @llvm.aarch64.neon.addp.v8i8(<8 x i8> %2, <8 x i8> %3)
+ ret <8 x i8> %4
+}
+
More information about the llvm-commits
mailing list