[llvm] [AArch64] Add tablegen patterns for concat(extract-high, extract-high) (PR #118286)
David Green via llvm-commits
llvm-commits at lists.llvm.org
Tue Dec 3 13:42:22 PST 2024
https://github.com/davemgreen updated https://github.com/llvm/llvm-project/pull/118286
>From b5d8b9fa497a096106be9c165dd45b7e2986910b Mon Sep 17 00:00:00 2001
From: David Green <david.green at arm.com>
Date: Tue, 3 Dec 2024 21:42:05 +0000
Subject: [PATCH] [AArch64] Add tablegen patterns for concat(extract-high,
extract-high)
A `concat(extract-high(x), extract-high(y))` is the top half of x inserted into
the bottom half of y. This patch adds a tablegen pattern to make sure that we
generate a single i64 lane insert.
---
.../lib/Target/AArch64/AArch64InstrFormats.td | 4 +
llvm/lib/Target/AArch64/AArch64InstrInfo.td | 27 +++---
llvm/test/CodeGen/AArch64/concat-vector.ll | 82 ++++++-------------
llvm/test/CodeGen/AArch64/vecreduce-add.ll | 3 +-
4 files changed, 46 insertions(+), 70 deletions(-)
diff --git a/llvm/lib/Target/AArch64/AArch64InstrFormats.td b/llvm/lib/Target/AArch64/AArch64InstrFormats.td
index a8ba89f784c8cd..56ff7b0d3a280d 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrFormats.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrFormats.td
@@ -145,8 +145,12 @@ def gi_extract_high_v4i32 :
def extract_high_v8f16 :
ComplexPattern<v4f16, 1, "SelectExtractHigh", [extract_subvector, bitconvert]>;
+def extract_high_v8bf16 :
+ ComplexPattern<v4bf16, 1, "SelectExtractHigh", [extract_subvector, bitconvert]>;
def extract_high_v4f32 :
ComplexPattern<v2f32, 1, "SelectExtractHigh", [extract_subvector, bitconvert]>;
+def extract_high_v2f64 :
+ ComplexPattern<v1f64, 1, "SelectExtractHigh", [extract_subvector, bitconvert]>;
def gi_extract_high_v8f16 :
GIComplexOperandMatcher<v4s16, "selectExtractHigh">,
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index 7614f6215b803c..d015cc15581ad0 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -7352,7 +7352,8 @@ def : Pat<(vector_extract (v8bf16 V128:$Rn), VectorIndexH:$idx),
// All concat_vectors operations are canonicalised to act on i64 vectors for
// AArch64. In the general case we need an instruction, which had just as well be
// INS.
-multiclass ConcatPat<ValueType DstTy, ValueType SrcTy> {
+multiclass ConcatPat<ValueType DstTy, ValueType SrcTy,
+ ComplexPattern ExtractHigh> {
def : Pat<(DstTy (concat_vectors (SrcTy V64:$Rd), V64:$Rn)),
(INSvi64lane (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub), 1,
(INSERT_SUBREG (IMPLICIT_DEF), V64:$Rn, dsub), 0)>;
@@ -7365,16 +7366,22 @@ multiclass ConcatPat<ValueType DstTy, ValueType SrcTy> {
// If the high lanes are undef we can just ignore them:
def : Pat<(DstTy (concat_vectors (SrcTy V64:$Rn), undef)),
(INSERT_SUBREG (IMPLICIT_DEF), V64:$Rn, dsub)>;
-}
-defm : ConcatPat<v2i64, v1i64>;
-defm : ConcatPat<v2f64, v1f64>;
-defm : ConcatPat<v4i32, v2i32>;
-defm : ConcatPat<v4f32, v2f32>;
-defm : ConcatPat<v8i16, v4i16>;
-defm : ConcatPat<v8f16, v4f16>;
-defm : ConcatPat<v8bf16, v4bf16>;
-defm : ConcatPat<v16i8, v8i8>;
+ // Concatting the high half of two vectors is the insert of the first
+ // into the low half of the second.
+ def : Pat<(DstTy (concat_vectors (ExtractHigh (DstTy V128:$Rn)),
+ (ExtractHigh (DstTy V128:$Rm)))),
+ (INSvi64lane V128:$Rm, (i64 0), V128:$Rn, (i64 1))>;
+}
+
+defm : ConcatPat<v2i64, v1i64, extract_high_v2i64>;
+defm : ConcatPat<v2f64, v1f64, extract_high_v2f64>;
+defm : ConcatPat<v4i32, v2i32, extract_high_v4i32>;
+defm : ConcatPat<v4f32, v2f32, extract_high_v4f32>;
+defm : ConcatPat<v8i16, v4i16, extract_high_v8i16>;
+defm : ConcatPat<v8f16, v4f16, extract_high_v8f16>;
+defm : ConcatPat<v8bf16, v4bf16, extract_high_v8bf16>;
+defm : ConcatPat<v16i8, v8i8, extract_high_v16i8>;
//----------------------------------------------------------------------------
// AdvSIMD across lanes instructions
diff --git a/llvm/test/CodeGen/AArch64/concat-vector.ll b/llvm/test/CodeGen/AArch64/concat-vector.ll
index 36583b89ce5fca..0daa6e7f16202a 100644
--- a/llvm/test/CodeGen/AArch64/concat-vector.ll
+++ b/llvm/test/CodeGen/AArch64/concat-vector.ll
@@ -385,19 +385,11 @@ entry:
}
define <8 x i16> @concat_high_high_v8i16(<8 x i16> %a_vec, <8 x i16> %b_vec) {
-; CHECK-SD-LABEL: concat_high_high_v8i16:
-; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: ext v0.16b, v0.16b, v0.16b, #8
-; CHECK-SD-NEXT: ext v1.16b, v1.16b, v1.16b, #8
-; CHECK-SD-NEXT: mov v0.d[1], v1.d[0]
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: concat_high_high_v8i16:
-; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: mov d0, v0.d[1]
-; CHECK-GI-NEXT: mov d1, v1.d[1]
-; CHECK-GI-NEXT: mov v0.d[1], v1.d[0]
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: concat_high_high_v8i16:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: mov v1.d[0], v0.d[1]
+; CHECK-NEXT: mov v0.16b, v1.16b
+; CHECK-NEXT: ret
entry:
%shuffle.i3 = shufflevector <8 x i16> %a_vec, <8 x i16> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
%shuffle.i = shufflevector <8 x i16> %b_vec, <8 x i16> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
@@ -406,19 +398,11 @@ entry:
}
define <8 x half> @concat_high_high_v8f16(<8 x half> %a_vec, <8 x half> %b_vec) {
-; CHECK-SD-LABEL: concat_high_high_v8f16:
-; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: ext v0.16b, v0.16b, v0.16b, #8
-; CHECK-SD-NEXT: ext v1.16b, v1.16b, v1.16b, #8
-; CHECK-SD-NEXT: mov v0.d[1], v1.d[0]
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: concat_high_high_v8f16:
-; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: mov d0, v0.d[1]
-; CHECK-GI-NEXT: mov d1, v1.d[1]
-; CHECK-GI-NEXT: mov v0.d[1], v1.d[0]
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: concat_high_high_v8f16:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: mov v1.d[0], v0.d[1]
+; CHECK-NEXT: mov v0.16b, v1.16b
+; CHECK-NEXT: ret
entry:
%shuffle.i3 = shufflevector <8 x half> %a_vec, <8 x half> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
%shuffle.i = shufflevector <8 x half> %b_vec, <8 x half> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
@@ -427,19 +411,11 @@ entry:
}
define <8 x bfloat> @concat_high_high_v8bf16(<8 x bfloat> %a_vec, <8 x bfloat> %b_vec) {
-; CHECK-SD-LABEL: concat_high_high_v8bf16:
-; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: ext v0.16b, v0.16b, v0.16b, #8
-; CHECK-SD-NEXT: ext v1.16b, v1.16b, v1.16b, #8
-; CHECK-SD-NEXT: mov v0.d[1], v1.d[0]
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: concat_high_high_v8bf16:
-; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: mov d0, v0.d[1]
-; CHECK-GI-NEXT: mov d1, v1.d[1]
-; CHECK-GI-NEXT: mov v0.d[1], v1.d[0]
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: concat_high_high_v8bf16:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: mov v1.d[0], v0.d[1]
+; CHECK-NEXT: mov v0.16b, v1.16b
+; CHECK-NEXT: ret
entry:
%shuffle.i3 = shufflevector <8 x bfloat> %a_vec, <8 x bfloat> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
%shuffle.i = shufflevector <8 x bfloat> %b_vec, <8 x bfloat> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
@@ -455,9 +431,8 @@ define <4 x i32> @concat_high_high_v4i32(<4 x i32> %a_vec, <4 x i32> %b_vec) {
;
; CHECK-GI-LABEL: concat_high_high_v4i32:
; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: mov d0, v0.d[1]
-; CHECK-GI-NEXT: mov d1, v1.d[1]
-; CHECK-GI-NEXT: mov v0.d[1], v1.d[0]
+; CHECK-GI-NEXT: mov v1.d[0], v0.d[1]
+; CHECK-GI-NEXT: mov v0.16b, v1.16b
; CHECK-GI-NEXT: ret
entry:
%shuffle.i3 = shufflevector <4 x i32> %a_vec, <4 x i32> poison, <2 x i32> <i32 2, i32 3>
@@ -474,9 +449,8 @@ define <4 x float> @concat_high_high_v4f32(<4 x float> %a_vec, <4 x float> %b_ve
;
; CHECK-GI-LABEL: concat_high_high_v4f32:
; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: mov d0, v0.d[1]
-; CHECK-GI-NEXT: mov d1, v1.d[1]
-; CHECK-GI-NEXT: mov v0.d[1], v1.d[0]
+; CHECK-GI-NEXT: mov v1.d[0], v0.d[1]
+; CHECK-GI-NEXT: mov v0.16b, v1.16b
; CHECK-GI-NEXT: ret
entry:
%shuffle.i3 = shufflevector <4 x float> %a_vec, <4 x float> poison, <2 x i32> <i32 2, i32 3>
@@ -486,19 +460,11 @@ entry:
}
define <16 x i8> @concat_high_high_v16i8(<16 x i8> %a_vec, <16 x i8> %b_vec) {
-; CHECK-SD-LABEL: concat_high_high_v16i8:
-; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: ext v0.16b, v0.16b, v0.16b, #8
-; CHECK-SD-NEXT: ext v1.16b, v1.16b, v1.16b, #8
-; CHECK-SD-NEXT: mov v0.d[1], v1.d[0]
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: concat_high_high_v16i8:
-; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: mov d0, v0.d[1]
-; CHECK-GI-NEXT: mov d1, v1.d[1]
-; CHECK-GI-NEXT: mov v0.d[1], v1.d[0]
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: concat_high_high_v16i8:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: mov v1.d[0], v0.d[1]
+; CHECK-NEXT: mov v0.16b, v1.16b
+; CHECK-NEXT: ret
entry:
%shuffle.i3 = shufflevector <16 x i8> %a_vec, <16 x i8> poison, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%shuffle.i = shufflevector <16 x i8> %b_vec, <16 x i8> poison, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
diff --git a/llvm/test/CodeGen/AArch64/vecreduce-add.ll b/llvm/test/CodeGen/AArch64/vecreduce-add.ll
index 184aa0226fe774..8473f45f6c803b 100644
--- a/llvm/test/CodeGen/AArch64/vecreduce-add.ll
+++ b/llvm/test/CodeGen/AArch64/vecreduce-add.ll
@@ -4885,8 +4885,7 @@ entry:
define i32 @extract_hi_hi(<8 x i16> %a) {
; CHECK-SD-LABEL: extract_hi_hi:
; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: ext v0.16b, v0.16b, v0.16b, #8
-; CHECK-SD-NEXT: mov v0.d[1], v0.d[0]
+; CHECK-SD-NEXT: mov v0.d[0], v0.d[1]
; CHECK-SD-NEXT: uaddlv s0, v0.8h
; CHECK-SD-NEXT: fmov w0, s0
; CHECK-SD-NEXT: ret
More information about the llvm-commits
mailing list