[llvm] 8e3dc13 - [AArch64] Concat zip1 and zip2 is a wider zip1
David Green via llvm-commits
llvm-commits at lists.llvm.org
Sat Feb 18 11:54:34 PST 2023
Author: David Green
Date: 2023-02-18T19:54:29Z
New Revision: 8e3dc1366fb8365c4beb394ef2d34f1c639e8064
URL: https://github.com/llvm/llvm-project/commit/8e3dc1366fb8365c4beb394ef2d34f1c639e8064
DIFF: https://github.com/llvm/llvm-project/commit/8e3dc1366fb8365c4beb394ef2d34f1c639e8064.diff
LOG: [AArch64] Concat zip1 and zip2 is a wider zip1
Given concat(zip1(a, b), zip2(a, b)), we can convert that to a 128bit zip1(a, b)
if we widen a and b out first.
Fixes #54226
Differential Revision: https://reviews.llvm.org/D121088
Added:
Modified:
llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
llvm/test/CodeGen/AArch64/arm64-zip.ll
llvm/test/CodeGen/AArch64/complex-deinterleaving-mixed-cases.ll
llvm/test/CodeGen/AArch64/complex-deinterleaving-uniform-cases.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 3a9372e33ad8..687ff6d29bf1 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -16878,6 +16878,17 @@ static SDValue performConcatVectorsCombine(SDNode *N,
}
}
+ // concat(zip1(a, b), zip2(a, b)) is zip1(a, b)
+ if (N->getNumOperands() == 2 && N0Opc == AArch64ISD::ZIP1 &&
+ N1Opc == AArch64ISD::ZIP2 && N0.getOperand(0) == N1.getOperand(0) &&
+ N0.getOperand(1) == N1.getOperand(1)) {
+ SDValue E0 = DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, N0.getOperand(0),
+ DAG.getUNDEF(N0.getValueType()));
+ SDValue E1 = DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, N0.getOperand(1),
+ DAG.getUNDEF(N0.getValueType()));
+ return DAG.getNode(AArch64ISD::ZIP1, dl, VT, E0, E1);
+ }
+
// If we see a (concat_vectors (v1x64 A), (v1x64 A)) it's really a vector
// splat. The indexed instructions are going to be expecting a DUPLANE64, so
// canonicalise to that.
diff --git a/llvm/test/CodeGen/AArch64/arm64-zip.ll b/llvm/test/CodeGen/AArch64/arm64-zip.ll
index 6d5fe17d9acc..02508c2dfdc6 100644
--- a/llvm/test/CodeGen/AArch64/arm64-zip.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-zip.ll
@@ -142,9 +142,9 @@ define <16 x i8> @vzipQi8_undef(<16 x i8>* %A, <16 x i8>* %B) nounwind {
define <16 x i8> @combine_v16i8(<8 x i8> %0, <8 x i8> %1) {
; CHECK-LABEL: combine_v16i8:
; CHECK: // %bb.0:
-; CHECK-NEXT: zip2.8b v2, v0, v1
-; CHECK-NEXT: zip1.8b v0, v0, v1
-; CHECK-NEXT: mov.d v0[1], v2[0]
+; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
+; CHECK-NEXT: zip1.16b v0, v0, v1
; CHECK-NEXT: ret
%3 = shufflevector <8 x i8> %0, <8 x i8> %1, <16 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11, i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
ret <16 x i8> %3
@@ -153,10 +153,9 @@ define <16 x i8> @combine_v16i8(<8 x i8> %0, <8 x i8> %1) {
define <16 x i8> @combine2_v16i8(<8 x i8> %0, <8 x i8> %1) {
; CHECK-LABEL: combine2_v16i8:
; CHECK: // %bb.0:
-; CHECK-NEXT: zip1.8b v2, v0, v1
-; CHECK-NEXT: zip2.8b v0, v0, v1
-; CHECK-NEXT: mov.d v2[1], v0[0]
-; CHECK-NEXT: mov.16b v0, v2
+; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
+; CHECK-NEXT: zip1.16b v0, v0, v1
; CHECK-NEXT: ret
%3 = shufflevector <8 x i8> %0, <8 x i8> %1, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
%4 = shufflevector <8 x i8> %0, <8 x i8> %1, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
@@ -167,9 +166,9 @@ define <16 x i8> @combine2_v16i8(<8 x i8> %0, <8 x i8> %1) {
define <8 x i16> @combine_v8i16(<4 x i16> %0, <4 x i16> %1) {
; CHECK-LABEL: combine_v8i16:
; CHECK: // %bb.0:
-; CHECK-NEXT: zip2.4h v2, v0, v1
-; CHECK-NEXT: zip1.4h v0, v0, v1
-; CHECK-NEXT: mov.d v0[1], v2[0]
+; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
+; CHECK-NEXT: zip1.8h v0, v0, v1
; CHECK-NEXT: ret
%3 = shufflevector <4 x i16> %0, <4 x i16> %1, <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7>
ret <8 x i16> %3
@@ -178,10 +177,9 @@ define <8 x i16> @combine_v8i16(<4 x i16> %0, <4 x i16> %1) {
define <8 x i16> @combine2_v8i16(<4 x i16> %0, <4 x i16> %1) {
; CHECK-LABEL: combine2_v8i16:
; CHECK: // %bb.0:
-; CHECK-NEXT: zip1.4h v2, v0, v1
-; CHECK-NEXT: zip2.4h v0, v0, v1
-; CHECK-NEXT: mov.d v2[1], v0[0]
-; CHECK-NEXT: mov.16b v0, v2
+; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
+; CHECK-NEXT: zip1.8h v0, v0, v1
; CHECK-NEXT: ret
%3 = shufflevector <4 x i16> %0, <4 x i16> %1, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
%4 = shufflevector <4 x i16> %0, <4 x i16> %1, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
@@ -192,9 +190,9 @@ define <8 x i16> @combine2_v8i16(<4 x i16> %0, <4 x i16> %1) {
define <4 x i32> @combine_v4i32(<2 x i32> %0, <2 x i32> %1) {
; CHECK-LABEL: combine_v4i32:
; CHECK: // %bb.0:
-; CHECK-NEXT: zip2.2s v2, v0, v1
-; CHECK-NEXT: zip1.2s v0, v0, v1
-; CHECK-NEXT: mov.d v0[1], v2[0]
+; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
+; CHECK-NEXT: zip1.4s v0, v0, v1
; CHECK-NEXT: ret
%3 = shufflevector <2 x i32> %0, <2 x i32> %1, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
ret <4 x i32> %3
@@ -203,10 +201,9 @@ define <4 x i32> @combine_v4i32(<2 x i32> %0, <2 x i32> %1) {
define <4 x i32> @combine2_v4i32(<2 x i32> %0, <2 x i32> %1) {
; CHECK-LABEL: combine2_v4i32:
; CHECK: // %bb.0:
-; CHECK-NEXT: zip1.2s v2, v0, v1
-; CHECK-NEXT: zip2.2s v0, v0, v1
-; CHECK-NEXT: mov.d v2[1], v0[0]
-; CHECK-NEXT: mov.16b v0, v2
+; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
+; CHECK-NEXT: zip1.4s v0, v0, v1
; CHECK-NEXT: ret
%3 = shufflevector <2 x i32> %0, <2 x i32> %1, <2 x i32> <i32 0, i32 2>
%4 = shufflevector <2 x i32> %0, <2 x i32> %1, <2 x i32> <i32 1, i32 3>
@@ -217,9 +214,9 @@ define <4 x i32> @combine2_v4i32(<2 x i32> %0, <2 x i32> %1) {
define <16 x i8> @combine_v16i8_undef(<8 x i8> %0, <8 x i8> %1) {
; CHECK-LABEL: combine_v16i8_undef:
; CHECK: // %bb.0:
-; CHECK-NEXT: zip2.8b v2, v0, v1
-; CHECK-NEXT: zip1.8b v0, v0, v1
-; CHECK-NEXT: mov.d v0[1], v2[0]
+; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
+; CHECK-NEXT: zip1.16b v0, v0, v1
; CHECK-NEXT: ret
%3 = shufflevector <8 x i8> %0, <8 x i8> %1, <16 x i32> <i32 0, i32 undef, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11, i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
ret <16 x i8> %3
@@ -228,10 +225,9 @@ define <16 x i8> @combine_v16i8_undef(<8 x i8> %0, <8 x i8> %1) {
define <16 x i8> @combine2_v16i8_undef(<8 x i8> %0, <8 x i8> %1) {
; CHECK-LABEL: combine2_v16i8_undef:
; CHECK: // %bb.0:
-; CHECK-NEXT: zip1.8b v2, v0, v1
-; CHECK-NEXT: zip2.8b v0, v0, v1
-; CHECK-NEXT: mov.d v2[1], v0[0]
-; CHECK-NEXT: mov.16b v0, v2
+; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
+; CHECK-NEXT: zip1.16b v0, v0, v1
; CHECK-NEXT: ret
%3 = shufflevector <8 x i8> %0, <8 x i8> %1, <8 x i32> <i32 0, i32 undef, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
%4 = shufflevector <8 x i8> %0, <8 x i8> %1, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
@@ -242,9 +238,9 @@ define <16 x i8> @combine2_v16i8_undef(<8 x i8> %0, <8 x i8> %1) {
define <8 x i16> @combine_v8i16_undef(<4 x i16> %0, <4 x i16> %1) {
; CHECK-LABEL: combine_v8i16_undef:
; CHECK: // %bb.0:
-; CHECK-NEXT: zip2.4h v2, v0, v1
-; CHECK-NEXT: zip1.4h v0, v0, v1
-; CHECK-NEXT: mov.d v0[1], v2[0]
+; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
+; CHECK-NEXT: zip1.8h v0, v0, v1
; CHECK-NEXT: ret
%3 = shufflevector <4 x i16> %0, <4 x i16> %1, <8 x i32> <i32 0, i32 undef, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7>
ret <8 x i16> %3
diff --git a/llvm/test/CodeGen/AArch64/complex-deinterleaving-mixed-cases.ll b/llvm/test/CodeGen/AArch64/complex-deinterleaving-mixed-cases.ll
index 4050172efd90..d8b30fd9776e 100644
--- a/llvm/test/CodeGen/AArch64/complex-deinterleaving-mixed-cases.ll
+++ b/llvm/test/CodeGen/AArch64/complex-deinterleaving-mixed-cases.ll
@@ -56,9 +56,7 @@ define <4 x float> @add_mul(<4 x float> %a, <4 x float> %b, <4 x float> %c) {
; CHECK-NEXT: fneg v4.2s, v5.2s
; CHECK-NEXT: fmla v3.2s, v0.2s, v2.2s
; CHECK-NEXT: fmla v4.2s, v1.2s, v2.2s
-; CHECK-NEXT: zip2 v1.2s, v4.2s, v3.2s
-; CHECK-NEXT: zip1 v0.2s, v4.2s, v3.2s
-; CHECK-NEXT: mov v0.d[1], v1.d[0]
+; CHECK-NEXT: zip1 v0.4s, v4.4s, v3.4s
; CHECK-NEXT: ret
entry:
%0 = fsub fast <4 x float> %b, %c
@@ -100,9 +98,7 @@ define <4 x float> @mul_mul270_mul(<4 x float> %a, <4 x float> %b, <4 x float> %
; CHECK-NEXT: fneg v3.2s, v3.2s
; CHECK-NEXT: fmla v2.2s, v7.2s, v1.2s
; CHECK-NEXT: fmla v3.2s, v0.2s, v1.2s
-; CHECK-NEXT: zip2 v1.2s, v3.2s, v2.2s
-; CHECK-NEXT: zip1 v0.2s, v3.2s, v2.2s
-; CHECK-NEXT: mov v0.d[1], v1.d[0]
+; CHECK-NEXT: zip1 v0.4s, v3.4s, v2.4s
; CHECK-NEXT: ret
entry:
%strided.vec = shufflevector <4 x float> %c, <4 x float> poison, <2 x i32> <i32 0, i32 2>
@@ -281,9 +277,7 @@ define <4 x float> @mul_triangle_addmul(<4 x float> %a, <4 x float> %b, <4 x flo
; CHECK-NEXT: fmla v16.2s, v0.2s, v5.2s
; CHECK-NEXT: fsub v0.2s, v7.2s, v16.2s
; CHECK-NEXT: fadd v1.2s, v6.2s, v3.2s
-; CHECK-NEXT: zip2 v2.2s, v0.2s, v1.2s
-; CHECK-NEXT: zip1 v0.2s, v0.2s, v1.2s
-; CHECK-NEXT: mov v0.d[1], v2.d[0]
+; CHECK-NEXT: zip1 v0.4s, v0.4s, v1.4s
; CHECK-NEXT: ret
entry:
%ar = shufflevector <4 x float> %a, <4 x float> poison, <2 x i32> <i32 0, i32 2>
@@ -334,10 +328,8 @@ define <4 x float> @mul_triangle_multiuses(<4 x float> %a, <4 x float> %b, ptr %
; CHECK-NEXT: fmla v5.2s, v4.2s, v1.2s
; CHECK-NEXT: fmla v3.2s, v0.2s, v1.2s
; CHECK-NEXT: mov v1.d[1], v2.d[0]
-; CHECK-NEXT: zip2 v4.2s, v3.2s, v5.2s
-; CHECK-NEXT: zip1 v0.2s, v3.2s, v5.2s
+; CHECK-NEXT: zip1 v0.4s, v3.4s, v5.4s
; CHECK-NEXT: str q1, [x0]
-; CHECK-NEXT: mov v0.d[1], v4.d[0]
; CHECK-NEXT: ret
entry:
%strided.vec = shufflevector <4 x float> %a, <4 x float> poison, <2 x i32> <i32 0, i32 2>
diff --git a/llvm/test/CodeGen/AArch64/complex-deinterleaving-uniform-cases.ll b/llvm/test/CodeGen/AArch64/complex-deinterleaving-uniform-cases.ll
index d2ca1cb95ff7..d9a279d1a79e 100644
--- a/llvm/test/CodeGen/AArch64/complex-deinterleaving-uniform-cases.ll
+++ b/llvm/test/CodeGen/AArch64/complex-deinterleaving-uniform-cases.ll
@@ -42,9 +42,7 @@ define <4 x float> @simple_mul_no_contract(<4 x float> %a, <4 x float> %b) {
; CHECK-NEXT: fmul v4.2s, v0.2s, v5.2s
; CHECK-NEXT: fmla v2.2s, v0.2s, v1.2s
; CHECK-NEXT: fsub v0.2s, v3.2s, v4.2s
-; CHECK-NEXT: zip2 v1.2s, v0.2s, v2.2s
-; CHECK-NEXT: zip1 v0.2s, v0.2s, v2.2s
-; CHECK-NEXT: mov v0.d[1], v1.d[0]
+; CHECK-NEXT: zip1 v0.4s, v0.4s, v2.4s
; CHECK-NEXT: ret
entry:
%strided.vec = shufflevector <4 x float> %a, <4 x float> poison, <2 x i32> <i32 0, i32 2>
@@ -125,9 +123,7 @@ define <4 x float> @simple_add_270_false(<4 x float> %a, <4 x float> %b) {
; CHECK-NEXT: zip2 v1.2s, v1.2s, v3.2s
; CHECK-NEXT: fadd v1.2s, v1.2s, v4.2s
; CHECK-NEXT: fsub v0.2s, v0.2s, v2.2s
-; CHECK-NEXT: zip2 v2.2s, v1.2s, v0.2s
-; CHECK-NEXT: zip1 v0.2s, v1.2s, v0.2s
-; CHECK-NEXT: mov v0.d[1], v2.d[0]
+; CHECK-NEXT: zip1 v0.4s, v1.4s, v0.4s
; CHECK-NEXT: ret
entry:
%strided.vec = shufflevector <4 x float> %a, <4 x float> poison, <2 x i32> <i32 0, i32 2>
@@ -169,9 +165,7 @@ define <4 x float> @add_external_use(<4 x float> %a, <4 x float> %b) {
; CHECK-NEXT: zip2 v1.2s, v1.2s, v3.2s
; CHECK-NEXT: fsub v1.2s, v4.2s, v1.2s
; CHECK-NEXT: fadd v0.2s, v0.2s, v2.2s
-; CHECK-NEXT: zip2 v2.2s, v1.2s, v0.2s
-; CHECK-NEXT: zip1 v0.2s, v1.2s, v0.2s
-; CHECK-NEXT: mov v0.d[1], v2.d[0]
+; CHECK-NEXT: zip1 v0.4s, v1.4s, v0.4s
; CHECK-NEXT: ret
entry:
%a.real = shufflevector <4 x float> %a, <4 x float> poison, <2 x i32> <i32 0, i32 2>
More information about the llvm-commits
mailing list