[llvm] 4c50112 - [AArch64] Add patterns for 64bit vector addp

David Green via llvm-commits llvm-commits at lists.llvm.org
Thu Sep 19 00:50:48 PDT 2024


Author: David Green
Date: 2024-09-19T08:50:43+01:00
New Revision: 4c50112ba1fb6b3847decebd6f1e374c61950be9

URL: https://github.com/llvm/llvm-project/commit/4c50112ba1fb6b3847decebd6f1e374c61950be9
DIFF: https://github.com/llvm/llvm-project/commit/4c50112ba1fb6b3847decebd6f1e374c61950be9.diff

LOG: [AArch64] Add patterns for 64bit vector addp

This extends the existing patterns for addp to 64bit outputs with a single
input. Whilst the general pattern is similar to the 128bit patterns
(add(uzp1(extract_lo, extract_hi), uzp2(extract_lo, extract_hi))), at the late
stage other optimzations have happened to turn the first uzp1 into trunc and
the second into extract(uzp2) with undef.

Fixes #109108

Added: 
    

Modified: 
    llvm/lib/Target/AArch64/AArch64InstrInfo.td
    llvm/test/CodeGen/AArch64/addp-shuffle.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index a47de9a12caca5..c040ef1862f21a 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -9634,6 +9634,18 @@ def : Pat<(v16i8 (add (AArch64uzp1 (v16i8 FPR128:$Rn), (v16i8 FPR128:$Rm)),
                       (AArch64uzp2 (v16i8 FPR128:$Rn), (v16i8 FPR128:$Rm)))),
           (v16i8 (ADDPv16i8 $Rn, $Rm))>;
 
+def : Pat<(v2i32 (add (AArch64zip1 (extract_subvector (v4i32 FPR128:$Rn), (i64 0)),
+                                   (extract_subvector (v4i32 FPR128:$Rn), (i64 2))),
+                      (AArch64zip2 (extract_subvector (v4i32 FPR128:$Rn), (i64 0)),
+                                   (extract_subvector (v4i32 FPR128:$Rn), (i64 2))))),
+          (EXTRACT_SUBREG (ADDPv4i32 $Rn, $Rn), dsub)>;
+def : Pat<(v4i16 (add (trunc (v4i32 (bitconvert FPR128:$Rn))),
+                      (extract_subvector (AArch64uzp2 (v8i16 FPR128:$Rn), undef), (i64 0)))),
+          (EXTRACT_SUBREG (ADDPv8i16 $Rn, $Rn), dsub)>;
+def : Pat<(v8i8  (add (trunc (v8i16 (bitconvert FPR128:$Rn))),
+                      (extract_subvector (AArch64uzp2 (v16i8 FPR128:$Rn), undef), (i64 0)))),
+          (EXTRACT_SUBREG (ADDPv16i8 $Rn, $Rn), dsub)>;
+
 def : Pat<(v2f64 (fadd (AArch64zip1 (v2f64 FPR128:$Rn), (v2f64 FPR128:$Rm)),
                        (AArch64zip2 (v2f64 FPR128:$Rn), (v2f64 FPR128:$Rm)))),
           (v2f64 (FADDPv2f64 $Rn, $Rm))>;

diff  --git a/llvm/test/CodeGen/AArch64/addp-shuffle.ll b/llvm/test/CodeGen/AArch64/addp-shuffle.ll
index fb96d11acc275a..54c96820285d32 100644
--- a/llvm/test/CodeGen/AArch64/addp-shuffle.ll
+++ b/llvm/test/CodeGen/AArch64/addp-shuffle.ll
@@ -27,10 +27,8 @@ define <4 x i32> @deinterleave_shuffle_v8i32_c(<8 x i32> %a) {
 define <2 x i32> @deinterleave_shuffle_v4i32(<4 x i32> %a) {
 ; CHECK-LABEL: deinterleave_shuffle_v4i32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    ext v1.16b, v0.16b, v0.16b, #8
-; CHECK-NEXT:    zip1 v2.2s, v0.2s, v1.2s
-; CHECK-NEXT:    zip2 v0.2s, v0.2s, v1.2s
-; CHECK-NEXT:    add v0.2s, v2.2s, v0.2s
+; CHECK-NEXT:    addp v0.4s, v0.4s, v0.4s
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
   %r0 = shufflevector <4 x i32> %a, <4 x i32> poison, <2 x i32> <i32 0, i32 2>
   %r1 = shufflevector <4 x i32> %a, <4 x i32> poison, <2 x i32> <i32 1, i32 3>
@@ -49,6 +47,18 @@ define <8 x i16> @deinterleave_shuffle_v16i16(<16 x i16> %a) {
   ret <8 x i16> %o
 }
 
+define <4 x i16> @deinterleave_shuffle_v8i16(<8 x i16> %a) {
+; CHECK-LABEL: deinterleave_shuffle_v8i16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    addp v0.8h, v0.8h, v0.8h
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-NEXT:    ret
+  %r0 = shufflevector <8 x i16> %a, <8 x i16> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+  %r1 = shufflevector <8 x i16> %a, <8 x i16> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+  %o = add <4 x i16> %r0, %r1
+  ret <4 x i16> %o
+}
+
 define <16 x i8> @deinterleave_shuffle_v32i8(<32 x i8> %a) {
 ; CHECK-LABEL: deinterleave_shuffle_v32i8:
 ; CHECK:       // %bb.0:
@@ -60,6 +70,18 @@ define <16 x i8> @deinterleave_shuffle_v32i8(<32 x i8> %a) {
   ret <16 x i8> %o
 }
 
+define <8 x i8> @deinterleave_shuffle_v16i8(<16 x i8> %a) {
+; CHECK-LABEL: deinterleave_shuffle_v16i8:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    addp v0.16b, v0.16b, v0.16b
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-NEXT:    ret
+  %r0 = shufflevector <16 x i8> %a, <16 x i8> poison, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
+  %r1 = shufflevector <16 x i8> %a, <16 x i8> poison, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
+  %o = add <8 x i8> %r0, %r1
+  ret <8 x i8> %o
+}
+
 define <4 x i64> @deinterleave_shuffle_v8i64(<8 x i64> %a) {
 ; CHECK-LABEL: deinterleave_shuffle_v8i64:
 ; CHECK:       // %bb.0:


        


More information about the llvm-commits mailing list