[llvm] [LLVM][SVE] Extend dup(extract_elt(v,i)) isel patterns to cover all combinations. (PR #115189)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Nov 6 10:16:20 PST 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-aarch64
Author: Paul Walker (paulwalker-arm)
<details>
<summary>Changes</summary>
Adds missing bfloat patterns for unpacked scalable vectors.
Adds patterns for splatting extracts from fixed length vectors.
---
Patch is 36.75 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/115189.diff
2 Files Affected:
- (modified) llvm/lib/Target/AArch64/SVEInstrFormats.td (+109-20)
- (modified) llvm/test/CodeGen/AArch64/aarch64-dup-extract-scalable.ll (+513-28)
``````````diff
diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td
index 5cfcc01afd20f3..f542c7a34ad60e 100644
--- a/llvm/lib/Target/AArch64/SVEInstrFormats.td
+++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td
@@ -59,6 +59,57 @@ class SVEType<ValueType VT> {
!eq(VT, nxv8f16): nxv2f16,
!eq(VT, nxv8bf16): nxv2bf16,
true : untyped);
+
+ // The 64-bit vector subreg of VT.
+ ValueType DSub = !cond(
+ !eq(VT, nxv16i8): v8i8,
+ !eq(VT, nxv8i16): v4i16,
+ !eq(VT, nxv4i32): v2i32,
+ !eq(VT, nxv2i64): v1i64,
+ !eq(VT, nxv2f16): v4f16,
+ !eq(VT, nxv4f16): v4f16,
+ !eq(VT, nxv8f16): v4f16,
+ !eq(VT, nxv2f32): v2f32,
+ !eq(VT, nxv4f32): v2f32,
+ !eq(VT, nxv2f64): v1f64,
+ !eq(VT, nxv2bf16): v4bf16,
+ !eq(VT, nxv4bf16): v4bf16,
+ !eq(VT, nxv8bf16): v4bf16,
+ true : untyped);
+
+ // The 128-bit vector subreg of VT.
+ ValueType ZSub = !cond(
+ !eq(VT, nxv16i8): v16i8,
+ !eq(VT, nxv8i16): v8i16,
+ !eq(VT, nxv4i32): v4i32,
+ !eq(VT, nxv2i64): v2i64,
+ !eq(VT, nxv2f16): v8f16,
+ !eq(VT, nxv4f16): v8f16,
+ !eq(VT, nxv8f16): v8f16,
+ !eq(VT, nxv2f32): v4f32,
+ !eq(VT, nxv4f32): v4f32,
+ !eq(VT, nxv2f64): v2f64,
+ !eq(VT, nxv2bf16): v8bf16,
+ !eq(VT, nxv4bf16): v8bf16,
+ !eq(VT, nxv8bf16): v8bf16,
+ true : untyped);
+
+ // The legal scalar used to hold a vector element.
+ ValueType EltAsScalar = !cond(
+ !eq(VT, nxv16i8): i32,
+ !eq(VT, nxv8i16): i32,
+ !eq(VT, nxv4i32): i32,
+ !eq(VT, nxv2i64): i64,
+ !eq(VT, nxv2f16): f16,
+ !eq(VT, nxv4f16): f16,
+ !eq(VT, nxv8f16): f16,
+ !eq(VT, nxv2f32): f32,
+ !eq(VT, nxv4f32): f32,
+ !eq(VT, nxv2f64): f64,
+ !eq(VT, nxv2bf16): bf16,
+ !eq(VT, nxv4bf16): bf16,
+ !eq(VT, nxv8bf16): bf16,
+ true : untyped);
}
def SDT_AArch64Setcc : SDTypeProfile<1, 4, [
@@ -1402,29 +1453,67 @@ multiclass sve_int_perm_dup_i<string asm> {
def : InstAlias<"mov $Zd, $Qn",
(!cast<Instruction>(NAME # _Q) ZPR128:$Zd, FPR128asZPR:$Qn, 0), 2>;
- // Duplicate extracted element of vector into all vector elements
+ // Duplicate an extracted vector element across a vector.
+
def : Pat<(nxv16i8 (splat_vector (i32 (vector_extract (nxv16i8 ZPR:$vec), sve_elm_idx_extdup_b:$index)))),
(!cast<Instruction>(NAME # _B) ZPR:$vec, sve_elm_idx_extdup_b:$index)>;
- def : Pat<(nxv8i16 (splat_vector (i32 (vector_extract (nxv8i16 ZPR:$vec), sve_elm_idx_extdup_h:$index)))),
- (!cast<Instruction>(NAME # _H) ZPR:$vec, sve_elm_idx_extdup_h:$index)>;
- def : Pat<(nxv4i32 (splat_vector (i32 (vector_extract (nxv4i32 ZPR:$vec), sve_elm_idx_extdup_s:$index)))),
- (!cast<Instruction>(NAME # _S) ZPR:$vec, sve_elm_idx_extdup_s:$index)>;
- def : Pat<(nxv2i64 (splat_vector (i64 (vector_extract (nxv2i64 ZPR:$vec), sve_elm_idx_extdup_d:$index)))),
- (!cast<Instruction>(NAME # _D) ZPR:$vec, sve_elm_idx_extdup_d:$index)>;
- def : Pat<(nxv8f16 (splat_vector (f16 (vector_extract (nxv8f16 ZPR:$vec), sve_elm_idx_extdup_h:$index)))),
- (!cast<Instruction>(NAME # _H) ZPR:$vec, sve_elm_idx_extdup_h:$index)>;
- def : Pat<(nxv8bf16 (splat_vector (bf16 (vector_extract (nxv8bf16 ZPR:$vec), sve_elm_idx_extdup_h:$index)))),
- (!cast<Instruction>(NAME # _H) ZPR:$vec, sve_elm_idx_extdup_h:$index)>;
- def : Pat<(nxv4f16 (splat_vector (f16 (vector_extract (nxv4f16 ZPR:$vec), sve_elm_idx_extdup_s:$index)))),
- (!cast<Instruction>(NAME # _S) ZPR:$vec, sve_elm_idx_extdup_s:$index)>;
- def : Pat<(nxv2f16 (splat_vector (f16 (vector_extract (nxv2f16 ZPR:$vec), sve_elm_idx_extdup_d:$index)))),
- (!cast<Instruction>(NAME # _D) ZPR:$vec, sve_elm_idx_extdup_d:$index)>;
- def : Pat<(nxv4f32 (splat_vector (f32 (vector_extract (nxv4f32 ZPR:$vec), sve_elm_idx_extdup_s:$index)))),
- (!cast<Instruction>(NAME # _S) ZPR:$vec, sve_elm_idx_extdup_s:$index)>;
- def : Pat<(nxv2f32 (splat_vector (f32 (vector_extract (nxv2f32 ZPR:$vec), sve_elm_idx_extdup_d:$index)))),
- (!cast<Instruction>(NAME # _D) ZPR:$vec, sve_elm_idx_extdup_d:$index)>;
- def : Pat<(nxv2f64 (splat_vector (f64 (vector_extract (nxv2f64 ZPR:$vec), sve_elm_idx_extdup_d:$index)))),
+ def : Pat<(nxv16i8 (splat_vector (i32 (vector_extract (v16i8 V128:$vec), sve_elm_idx_extdup_b:$index)))),
+ (!cast<Instruction>(NAME # _B) (SUBREG_TO_REG (i64 0), $vec, zsub), sve_elm_idx_extdup_b:$index)>;
+ def : Pat<(nxv16i8 (splat_vector (i32 (vector_extract (v8i8 V64:$vec), sve_elm_idx_extdup_b:$index)))),
+ (!cast<Instruction>(NAME # _B) (SUBREG_TO_REG (i64 0), $vec, dsub), sve_elm_idx_extdup_b:$index)>;
+
+ foreach VT = [nxv8i16, nxv2f16, nxv4f16, nxv8f16, nxv2bf16, nxv4bf16, nxv8bf16] in {
+ def : Pat<(VT (splat_vector (SVEType<VT>.EltAsScalar (vector_extract (SVEType<VT>.Packed ZPR:$vec), sve_elm_idx_extdup_h:$index)))),
+ (!cast<Instruction>(NAME # _H) ZPR:$vec, sve_elm_idx_extdup_h:$index)>;
+ def : Pat<(VT (splat_vector (SVEType<VT>.EltAsScalar (vector_extract (SVEType<VT>.ZSub V128:$vec), sve_elm_idx_extdup_h:$index)))),
+ (!cast<Instruction>(NAME # _H) (SUBREG_TO_REG (i64 0), $vec, zsub), sve_elm_idx_extdup_h:$index)>;
+ def : Pat<(VT (splat_vector (SVEType<VT>.EltAsScalar (vector_extract (SVEType<VT>.DSub V64:$vec), sve_elm_idx_extdup_h:$index)))),
+ (!cast<Instruction>(NAME # _H) (SUBREG_TO_REG (i64 0), $vec, dsub), sve_elm_idx_extdup_h:$index)>;
+ }
+
+ foreach VT = [nxv4i32, nxv2f32, nxv4f32 ] in {
+ def : Pat<(VT (splat_vector (SVEType<VT>.EltAsScalar (vector_extract (SVEType<VT>.Packed ZPR:$vec), sve_elm_idx_extdup_s:$index)))),
+ (!cast<Instruction>(NAME # _S) ZPR:$vec, sve_elm_idx_extdup_s:$index)>;
+ def : Pat<(VT (splat_vector (SVEType<VT>.EltAsScalar (vector_extract (SVEType<VT>.ZSub V128:$vec), sve_elm_idx_extdup_s:$index)))),
+ (!cast<Instruction>(NAME # _S) (SUBREG_TO_REG (i64 0), $vec, zsub), sve_elm_idx_extdup_s:$index)>;
+ def : Pat<(VT (splat_vector (SVEType<VT>.EltAsScalar (vector_extract (SVEType<VT>.DSub V64:$vec), sve_elm_idx_extdup_s:$index)))),
+ (!cast<Instruction>(NAME # _S) (SUBREG_TO_REG (i64 0), $vec, dsub), sve_elm_idx_extdup_s:$index)>;
+ }
+
+ foreach VT = [nxv2i64, nxv2f64] in {
+ def : Pat<(VT (splat_vector (SVEType<VT>.EltAsScalar (vector_extract (VT ZPR:$vec), sve_elm_idx_extdup_d:$index)))),
+ (!cast<Instruction>(NAME # _D) ZPR:$vec, sve_elm_idx_extdup_d:$index)>;
+ def : Pat<(VT (splat_vector (SVEType<VT>.EltAsScalar (vector_extract (SVEType<VT>.ZSub V128:$vec), sve_elm_idx_extdup_d:$index)))),
+ (!cast<Instruction>(NAME # _D) (SUBREG_TO_REG (i64 0), $vec, zsub), sve_elm_idx_extdup_d:$index)>;
+ def : Pat<(VT (splat_vector (SVEType<VT>.EltAsScalar (vector_extract (SVEType<VT>.DSub V64:$vec), sve_elm_idx_extdup_d:$index)))),
+ (!cast<Instruction>(NAME # _D) (SUBREG_TO_REG (i64 0), $vec, dsub), sve_elm_idx_extdup_d:$index)>;
+ }
+
+ // When extracting from an unpacked vector the index must be scaled to account
+ // for the "holes" in the underlying packed vector type. We get the scaling
+ // for free by "promoting" the element type to one whose underlying vector type
+ // is packed.
+
+ foreach VT = [nxv2f16, nxv4f16, nxv8f16] in {
+ def : Pat<(VT (splat_vector (f16 (vector_extract (nxv4f16 ZPR:$vec), sve_elm_idx_extdup_s:$index)))),
+ (!cast<Instruction>(NAME # _S) ZPR:$vec, sve_elm_idx_extdup_s:$index)>;
+ def : Pat<(VT (splat_vector (f16 (vector_extract (nxv2f16 ZPR:$vec), sve_elm_idx_extdup_d:$index)))),
+ (!cast<Instruction>(NAME # _D) ZPR:$vec, sve_elm_idx_extdup_d:$index)>;
+ }
+
+ foreach VT = [nxv2bf16, nxv4bf16, nxv8bf16] in {
+ def : Pat<(VT (splat_vector (bf16 (vector_extract (nxv4bf16 ZPR:$vec), sve_elm_idx_extdup_s:$index)))),
+ (!cast<Instruction>(NAME # _S) ZPR:$vec, sve_elm_idx_extdup_s:$index)>;
+ def : Pat<(VT (splat_vector (bf16 (vector_extract (nxv2bf16 ZPR:$vec), sve_elm_idx_extdup_d:$index)))),
+ (!cast<Instruction>(NAME # _D) ZPR:$vec, sve_elm_idx_extdup_d:$index)>;
+ }
+
+ foreach VT = [nxv2f32, nxv4f32] in {
+ def : Pat<(VT (splat_vector (f32 (vector_extract (nxv2f32 ZPR:$vec), sve_elm_idx_extdup_d:$index)))),
(!cast<Instruction>(NAME # _D) ZPR:$vec, sve_elm_idx_extdup_d:$index)>;
+ }
+
+ // Duplicate an indexed 128-bit segment across a vector.
def : Pat<(nxv16i8 (AArch64duplane128 nxv16i8:$Op1, i64:$imm)),
(!cast<Instruction>(NAME # _Q) $Op1, $imm)>;
diff --git a/llvm/test/CodeGen/AArch64/aarch64-dup-extract-scalable.ll b/llvm/test/CodeGen/AArch64/aarch64-dup-extract-scalable.ll
index 8c9661730f1f94..0cf8aec52fe258 100644
--- a/llvm/test/CodeGen/AArch64/aarch64-dup-extract-scalable.ll
+++ b/llvm/test/CodeGen/AArch64/aarch64-dup-extract-scalable.ll
@@ -1,8 +1,8 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple aarch64-none-linux-gnu -mattr=+sve | FileCheck %s
-define <vscale x 16 x i8> @dup_extract_i8(<vscale x 16 x i8> %data) {
-; CHECK-LABEL: dup_extract_i8:
+define <vscale x 16 x i8> @dup_extract_nxv16i8_nxv16i8(<vscale x 16 x i8> %data) {
+; CHECK-LABEL: dup_extract_nxv16i8_nxv16i8:
; CHECK: // %bb.0:
; CHECK-NEXT: mov z0.b, z0.b[1]
; CHECK-NEXT: ret
@@ -12,8 +12,32 @@ define <vscale x 16 x i8> @dup_extract_i8(<vscale x 16 x i8> %data) {
ret <vscale x 16 x i8> %.splat
}
-define <vscale x 8 x i16> @dup_extract_i16(<vscale x 8 x i16> %data) {
-; CHECK-LABEL: dup_extract_i16:
+define <vscale x 16 x i8> @dup_extract_nxv16i8_v16i8(<16 x i8> %data) {
+; CHECK-LABEL: dup_extract_nxv16i8_v16i8:
+; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
+; CHECK-NEXT: mov z0.b, z0.b[1]
+; CHECK-NEXT: ret
+ %1 = extractelement <16 x i8> %data, i8 1
+ %.splatinsert = insertelement <vscale x 16 x i8> poison, i8 %1, i32 0
+ %.splat = shufflevector <vscale x 16 x i8> %.splatinsert, <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer
+ ret <vscale x 16 x i8> %.splat
+}
+
+define <vscale x 16 x i8> @dup_extract_nxv16i8_v8i8(<8 x i8> %data) {
+; CHECK-LABEL: dup_extract_nxv16i8_v8i8:
+; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
+; CHECK-NEXT: mov z0.b, z0.b[1]
+; CHECK-NEXT: ret
+ %1 = extractelement <8 x i8> %data, i8 1
+ %.splatinsert = insertelement <vscale x 16 x i8> poison, i8 %1, i32 0
+ %.splat = shufflevector <vscale x 16 x i8> %.splatinsert, <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer
+ ret <vscale x 16 x i8> %.splat
+}
+
+define <vscale x 8 x i16> @dup_extract_nxv8i16_nxv8i16(<vscale x 8 x i16> %data) {
+; CHECK-LABEL: dup_extract_nxv8i16_nxv8i16:
; CHECK: // %bb.0:
; CHECK-NEXT: mov z0.h, z0.h[1]
; CHECK-NEXT: ret
@@ -23,8 +47,32 @@ define <vscale x 8 x i16> @dup_extract_i16(<vscale x 8 x i16> %data) {
ret <vscale x 8 x i16> %.splat
}
-define <vscale x 4 x i32> @dup_extract_i32(<vscale x 4 x i32> %data) {
-; CHECK-LABEL: dup_extract_i32:
+define <vscale x 8 x i16> @dup_extract_nxv8i16_v8i16(<8 x i16> %data) {
+; CHECK-LABEL: dup_extract_nxv8i16_v8i16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
+; CHECK-NEXT: mov z0.h, z0.h[1]
+; CHECK-NEXT: ret
+ %1 = extractelement <8 x i16> %data, i16 1
+ %.splatinsert = insertelement <vscale x 8 x i16> poison, i16 %1, i32 0
+ %.splat = shufflevector <vscale x 8 x i16> %.splatinsert, <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer
+ ret <vscale x 8 x i16> %.splat
+}
+
+define <vscale x 8 x i16> @dup_extract_nxv8i16_v4i16(<4 x i16> %data) {
+; CHECK-LABEL: dup_extract_nxv8i16_v4i16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
+; CHECK-NEXT: mov z0.h, z0.h[1]
+; CHECK-NEXT: ret
+ %1 = extractelement <4 x i16> %data, i16 1
+ %.splatinsert = insertelement <vscale x 8 x i16> poison, i16 %1, i32 0
+ %.splat = shufflevector <vscale x 8 x i16> %.splatinsert, <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer
+ ret <vscale x 8 x i16> %.splat
+}
+
+define <vscale x 4 x i32> @dup_extract_nxv4i32_nxv4i32(<vscale x 4 x i32> %data) {
+; CHECK-LABEL: dup_extract_nxv4i32_nxv4i32:
; CHECK: // %bb.0:
; CHECK-NEXT: mov z0.s, z0.s[1]
; CHECK-NEXT: ret
@@ -34,8 +82,32 @@ define <vscale x 4 x i32> @dup_extract_i32(<vscale x 4 x i32> %data) {
ret <vscale x 4 x i32> %.splat
}
-define <vscale x 2 x i64> @dup_extract_i64(<vscale x 2 x i64> %data) {
-; CHECK-LABEL: dup_extract_i64:
+define <vscale x 4 x i32> @dup_extract_nxv4i32_v4i32(<4 x i32> %data) {
+; CHECK-LABEL: dup_extract_nxv4i32_v4i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
+; CHECK-NEXT: mov z0.s, z0.s[1]
+; CHECK-NEXT: ret
+ %1 = extractelement <4 x i32> %data, i32 1
+ %.splatinsert = insertelement <vscale x 4 x i32> poison, i32 %1, i32 0
+ %.splat = shufflevector <vscale x 4 x i32> %.splatinsert, <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
+ ret <vscale x 4 x i32> %.splat
+}
+
+define <vscale x 4 x i32> @dup_extract_nxv4i32_v2i32(<2 x i32> %data) {
+; CHECK-LABEL: dup_extract_nxv4i32_v2i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
+; CHECK-NEXT: mov z0.s, z0.s[1]
+; CHECK-NEXT: ret
+ %1 = extractelement <2 x i32> %data, i32 1
+ %.splatinsert = insertelement <vscale x 4 x i32> poison, i32 %1, i32 0
+ %.splat = shufflevector <vscale x 4 x i32> %.splatinsert, <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
+ ret <vscale x 4 x i32> %.splat
+}
+
+define <vscale x 2 x i64> @dup_extract_nxv2i64_nxv2i64(<vscale x 2 x i64> %data) {
+; CHECK-LABEL: dup_extract_nxv2i64_nxv2i64:
; CHECK: // %bb.0:
; CHECK-NEXT: mov z0.d, z0.d[1]
; CHECK-NEXT: ret
@@ -45,8 +117,31 @@ define <vscale x 2 x i64> @dup_extract_i64(<vscale x 2 x i64> %data) {
ret <vscale x 2 x i64> %.splat
}
-define <vscale x 8 x half> @dup_extract_f16(<vscale x 8 x half> %data) {
-; CHECK-LABEL: dup_extract_f16:
+define <vscale x 2 x i64> @dup_extract_nxv2i64_v2i64(<2 x i64> %data) {
+; CHECK-LABEL: dup_extract_nxv2i64_v2i64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
+; CHECK-NEXT: mov z0.d, z0.d[1]
+; CHECK-NEXT: ret
+ %1 = extractelement <2 x i64> %data, i64 1
+ %.splatinsert = insertelement <vscale x 2 x i64> poison, i64 %1, i32 0
+ %.splat = shufflevector <vscale x 2 x i64> %.splatinsert, <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
+ ret <vscale x 2 x i64> %.splat
+}
+
+define <vscale x 2 x i64> @dup_extract_nxv2i64_v1i64(<1 x i64> %data) {
+; CHECK-LABEL: dup_extract_nxv2i64_v1i64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov z0.d, x8
+; CHECK-NEXT: ret
+ %1 = extractelement <1 x i64> %data, i64 1
+ %.splatinsert = insertelement <vscale x 2 x i64> poison, i64 %1, i32 0
+ %.splat = shufflevector <vscale x 2 x i64> %.splatinsert, <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
+ ret <vscale x 2 x i64> %.splat
+}
+
+define <vscale x 8 x half> @dup_extract_nxv8f16_nxv8f16(<vscale x 8 x half> %data) {
+; CHECK-LABEL: dup_extract_nxv8f16_nxv8f16:
; CHECK: // %bb.0:
; CHECK-NEXT: mov z0.h, z0.h[1]
; CHECK-NEXT: ret
@@ -56,19 +151,133 @@ define <vscale x 8 x half> @dup_extract_f16(<vscale x 8 x half> %data) {
ret <vscale x 8 x half> %.splat
}
-define <vscale x 4 x half> @dup_extract_f16_4(<vscale x 4 x half> %data) {
-; CHECK-LABEL: dup_extract_f16_4:
+define <vscale x 8 x half> @dup_extract_nxv8f16_nxv4f16(<vscale x 4 x half> %data) {
+; CHECK-LABEL: dup_extract_nxv8f16_nxv4f16:
; CHECK: // %bb.0:
; CHECK-NEXT: mov z0.s, z0.s[1]
; CHECK-NEXT: ret
%1 = extractelement <vscale x 4 x half> %data, i16 1
+ %.splatinsert = insertelement <vscale x 8 x half> poison, half %1, i32 0
+ %.splat = shufflevector <vscale x 8 x half> %.splatinsert, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
+ ret <vscale x 8 x half> %.splat
+}
+
+define <vscale x 8 x half> @dup_extract_nxv8f16_nxv2f16(<vscale x 2 x half> %data) {
+; CHECK-LABEL: dup_extract_nxv8f16_nxv2f16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov z0.d, z0.d[1]
+; CHECK-NEXT: ret
+ %1 = extractelement <vscale x 2 x half> %data, i16 1
+ %.splatinsert = insertelement <vscale x 8 x half> poison, half %1, i32 0
+ %.splat = shufflevector <vscale x 8 x half> %.splatinsert, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
+ ret <vscale x 8 x half> %.splat
+}
+
+define <vscale x 8 x half> @dup_extract_nxv8f16_v8f16(<8 x half> %data) {
+; CHECK-LABEL: dup_extract_nxv8f16_v8f16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
+; CHECK-NEXT: mov z0.h, z0.h[1]
+; CHECK-NEXT: ret
+ %1 = extractelement <8 x half> %data, i16 1
+ %.splatinsert = insertelement <vscale x 8 x half> poison, half %1, i32 0
+ %.splat = shufflevector <vscale x 8 x half> %.splatinsert, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
+ ret <vscale x 8 x half> %.splat
+}
+
+define <vscale x 8 x half> @dup_extract_nxv8f16_v4f16(<4 x half> %data) {
+; CHECK-LABEL: dup_extract_nxv8f16_v4f16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
+; CHECK-NEXT: mov z0.h, z0.h[1]
+; CHECK-NEXT: ret
+ %1 = extractelement <4 x half> %data, i16 1
+ %.splatinsert = insertelement <vscale x 8 x half> poison, half %1, i32 0
+ %.splat = shufflevector <vscale x 8 x half> %.splatinsert, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
+ ret <vscale x 8 x half> %.splat
+}
+
+define <vscale x 4 x half> @dup_extract_nxv4f16_nxv8f16(<vscale x 8 x half> %data) {
+; CHECK-LABEL: dup_extract_nxv4f16_nxv8f16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov z0.h, z0.h[1]
+; CHECK-NEXT: ret
+ %1 = extractelement <vscale x 8 x half> %data, i16 1
+ %.splatinsert = insertelement <vscale x 4 x half> poison, half %1, i32 0
+ %.splat = shufflevector <vscale x 4 x half> %.splatinsert, <vscale x 4 x half> poison, <vscale x 4 x i32> zeroinitializer
+ ret <vscale x 4 x half> %.splat
+}
+
+define <vscale x 4 x half> @dup_extract_nxv4f16_nxv4f16(<vscale x 4 x half> %data) {
+; CHECK-LABEL: dup_extract_nxv4f16_nxv4f16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov z0.s, z0.s[1]
+; CHECK-NEXT: ret
+ %1 = extractelement <vscale x 4 x half> %data, i16 1
+ %.splatinsert = insertelement <vscale x 4 x half> poison, half %1, i32 0
+ %.splat = shufflevector <vscale x 4 x half> %.splatinsert, <vscale x 4 x half> poison, <vscale x 4 x i32> zeroinitializer
+ ret <vscale x 4 x half> %.splat
+}
+
+define <vscale x 4 x half> @dup_extract_nxv4f16_nxv2f16(<vscale x 2 x half> %data) {
+; CHECK-LABEL: dup_extract_nxv4f16_nxv2f16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov z0.d, z0.d[1]
+; CHECK-NEXT: ret
+ %1 = extractelement <vscale x 2 x half> %data, i16 1
+ %.splatinsert = insertelement <vscale x 4 x half> poison, half %1, i32 0
+ %.splat = shufflevector <vscale x 4 x half> %.splatinsert, <vscale x 4 x half> poison, <vscale x 4 x i32> zeroinitializer
+ ret <vscale x 4 x half> %.splat
+}
+
+define <vscale x 4 x half> @dup_extract_nxv4f16_v8f16(<8 x half> %data) {
+; CHECK-LABEL: dup_extract_nxv4f16_v8f16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
+; CHECK-NEXT: mov z0.h, z0.h[1]
+; CHECK-NEXT: ret
+ %1 = extractelement <8 x half> %data, i16 1
+ %.splatinsert = insertelement <vscale x 4 x half> poison, half %1, i32 0
+ %.splat = shufflevector <vscale x 4 x half> %.splatinsert, <vscale x 4 x half> poison, <vscale x 4 x i32> zeroinitializer
+ ret <vscale x 4 x half> %.splat
+}
+
+define <vscale x 4 x half> @dup_extract_nxv4f16_v4f16(<4 x half> %data) {
+; CHECK-LABEL: dup_extract_nxv4f16_v4f16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
+; CHECK-NEXT: mov z0.h, z0.h[1]
+; CHECK-NEXT: ret
+ %1 = extractelement <4 x half> %data, i16 1
%.splatinsert = insertelement <vscale x 4 x half> poison, half %1, i32 0
%.splat = shufflevector <vscale x 4 x half> %.splatinsert, <vscale x 4 x half> poison, <vscale x 4 x i32> zeroinitializer
ret <vscale x 4 x half> %.splat
}
-define <vscale x 2 x half> @dup...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/115189
More information about the llvm-commits
mailing list