[llvm] e564852 - [AArch64] Add patterns for fadd(uzp1(x,y), uzp2(x, y)) -> faddp.
David Green via llvm-commits
llvm-commits at lists.llvm.org
Fri Jun 7 10:19:36 PDT 2024
Author: David Green
Date: 2024-06-07T18:19:32+01:00
New Revision: e5648525fd041fe577c65cc392c73040e60fed60
URL: https://github.com/llvm/llvm-project/commit/e5648525fd041fe577c65cc392c73040e60fed60
DIFF: https://github.com/llvm/llvm-project/commit/e5648525fd041fe577c65cc392c73040e60fed60.diff
LOG: [AArch64] Add patterns for fadd(uzp1(x,y), uzp2(x, y)) -> faddp.
Similar to f7018ba0eeaad8dc3e1917cfb986fc9689d72e85, this adds patterns for
floating point faddp from an fadd and shuffles.
Added:
Modified:
llvm/lib/Target/AArch64/AArch64InstrInfo.td
llvm/test/CodeGen/AArch64/addp-shuffle.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index 2ed7850404ce0..91e5bc3caa102 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -9419,6 +9419,17 @@ def : Pat<(v16i8 (add (AArch64uzp1 (v16i8 FPR128:$Rn), (v16i8 FPR128:$Rm)),
(AArch64uzp2 (v16i8 FPR128:$Rn), (v16i8 FPR128:$Rm)))),
(v16i8 (ADDPv16i8 $Rn, $Rm))>;
+def : Pat<(v2f64 (fadd (AArch64zip1 (v2f64 FPR128:$Rn), (v2f64 FPR128:$Rm)),
+ (AArch64zip2 (v2f64 FPR128:$Rn), (v2f64 FPR128:$Rm)))),
+ (v2f64 (FADDPv2f64 $Rn, $Rm))>;
+def : Pat<(v4f32 (fadd (AArch64uzp1 (v4f32 FPR128:$Rn), (v4f32 FPR128:$Rm)),
+ (AArch64uzp2 (v4f32 FPR128:$Rn), (v4f32 FPR128:$Rm)))),
+ (v4f32 (FADDPv4f32 $Rn, $Rm))>;
+let Predicates = [HasFullFP16] in
+def : Pat<(v8f16 (fadd (AArch64uzp1 (v8f16 FPR128:$Rn), (v8f16 FPR128:$Rm)),
+ (AArch64uzp2 (v8f16 FPR128:$Rn), (v8f16 FPR128:$Rm)))),
+ (v8f16 (FADDPv8f16 $Rn, $Rm))>;
+
// Scalar 64-bit shifts in FPR64 registers.
def : Pat<(i64 (int_aarch64_neon_sshl (i64 FPR64:$Rn), (i64 FPR64:$Rm))),
(SSHLv1i64 FPR64:$Rn, FPR64:$Rm)>;
diff --git a/llvm/test/CodeGen/AArch64/addp-shuffle.ll b/llvm/test/CodeGen/AArch64/addp-shuffle.ll
index a187e7e94c20f..7cc5041d59f49 100644
--- a/llvm/test/CodeGen/AArch64/addp-shuffle.ll
+++ b/llvm/test/CodeGen/AArch64/addp-shuffle.ll
@@ -76,9 +76,7 @@ define <4 x i64> @deinterleave_shuffle_v8i64(<8 x i64> %a) {
define <4 x float> @deinterleave_shuffle_v8f32(<8 x float> %a) {
; CHECK-LABEL: deinterleave_shuffle_v8f32:
; CHECK: // %bb.0:
-; CHECK-NEXT: uzp1 v2.4s, v0.4s, v1.4s
-; CHECK-NEXT: uzp2 v0.4s, v0.4s, v1.4s
-; CHECK-NEXT: fadd v0.4s, v2.4s, v0.4s
+; CHECK-NEXT: faddp v0.4s, v0.4s, v1.4s
; CHECK-NEXT: ret
%r0 = shufflevector <8 x float> %a, <8 x float> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
%r1 = shufflevector <8 x float> %a, <8 x float> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
@@ -89,9 +87,7 @@ define <4 x float> @deinterleave_shuffle_v8f32(<8 x float> %a) {
define <4 x float> @deinterleave_shuffle_v8f32_c(<8 x float> %a) {
; CHECK-LABEL: deinterleave_shuffle_v8f32_c:
; CHECK: // %bb.0:
-; CHECK-NEXT: uzp1 v2.4s, v0.4s, v1.4s
-; CHECK-NEXT: uzp2 v0.4s, v0.4s, v1.4s
-; CHECK-NEXT: fadd v0.4s, v0.4s, v2.4s
+; CHECK-NEXT: faddp v0.4s, v0.4s, v1.4s
; CHECK-NEXT: ret
%r0 = shufflevector <8 x float> %a, <8 x float> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
%r1 = shufflevector <8 x float> %a, <8 x float> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
@@ -116,9 +112,7 @@ define <8 x half> @deinterleave_shuffle_v16f16(<16 x half> %a) {
;
; CHECK-FP16-LABEL: deinterleave_shuffle_v16f16:
; CHECK-FP16: // %bb.0:
-; CHECK-FP16-NEXT: uzp1 v2.8h, v0.8h, v1.8h
-; CHECK-FP16-NEXT: uzp2 v0.8h, v0.8h, v1.8h
-; CHECK-FP16-NEXT: fadd v0.8h, v2.8h, v0.8h
+; CHECK-FP16-NEXT: faddp v0.8h, v0.8h, v1.8h
; CHECK-FP16-NEXT: ret
%r0 = shufflevector <16 x half> %a, <16 x half> poison, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
%r1 = shufflevector <16 x half> %a, <16 x half> poison, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
@@ -129,12 +123,9 @@ define <8 x half> @deinterleave_shuffle_v16f16(<16 x half> %a) {
define <4 x double> @deinterleave_shuffle_v8f64(<8 x double> %a) {
; CHECK-LABEL: deinterleave_shuffle_v8f64:
; CHECK: // %bb.0:
-; CHECK-NEXT: zip1 v4.2d, v2.2d, v3.2d
-; CHECK-NEXT: zip1 v5.2d, v0.2d, v1.2d
-; CHECK-NEXT: zip2 v2.2d, v2.2d, v3.2d
-; CHECK-NEXT: zip2 v0.2d, v0.2d, v1.2d
-; CHECK-NEXT: fadd v1.2d, v4.2d, v2.2d
-; CHECK-NEXT: fadd v0.2d, v5.2d, v0.2d
+; CHECK-NEXT: faddp v2.2d, v2.2d, v3.2d
+; CHECK-NEXT: faddp v0.2d, v0.2d, v1.2d
+; CHECK-NEXT: mov v1.16b, v2.16b
; CHECK-NEXT: ret
%r0 = shufflevector <8 x double> %a, <8 x double> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
%r1 = shufflevector <8 x double> %a, <8 x double> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
More information about the llvm-commits
mailing list