[llvm] [LLVM][SVE] Extend dup(extract_elt(v,i)) isel patterns to cover all combinations. (PR #115189)
Paul Walker via llvm-commits
llvm-commits at lists.llvm.org
Wed Nov 6 10:15:45 PST 2024
https://github.com/paulwalker-arm created https://github.com/llvm/llvm-project/pull/115189
Adds missing bfloat patterns for unpacked scalable vectors.
Adds patterns for splatting extracts from fixed length vectors.
>From fc0443c616bdb2af07c78c075509465bf2ee7476 Mon Sep 17 00:00:00 2001
From: Paul Walker <paul.walker at arm.com>
Date: Tue, 5 Nov 2024 15:18:09 +0000
Subject: [PATCH 1/2] [NFC] Increase test coverage for SVE
dup(extract_elt(v,i)) isel patterns.
---
.../AArch64/aarch64-dup-extract-scalable.ll | 570 +++++++++++++++++-
1 file changed, 541 insertions(+), 29 deletions(-)
diff --git a/llvm/test/CodeGen/AArch64/aarch64-dup-extract-scalable.ll b/llvm/test/CodeGen/AArch64/aarch64-dup-extract-scalable.ll
index 8c9661730f1f94..f15dde2b327e18 100644
--- a/llvm/test/CodeGen/AArch64/aarch64-dup-extract-scalable.ll
+++ b/llvm/test/CodeGen/AArch64/aarch64-dup-extract-scalable.ll
@@ -1,8 +1,8 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple aarch64-none-linux-gnu -mattr=+sve | FileCheck %s
-define <vscale x 16 x i8> @dup_extract_i8(<vscale x 16 x i8> %data) {
-; CHECK-LABEL: dup_extract_i8:
+define <vscale x 16 x i8> @dup_extract_nxv16i8_nxv16i8(<vscale x 16 x i8> %data) {
+; CHECK-LABEL: dup_extract_nxv16i8_nxv16i8:
; CHECK: // %bb.0:
; CHECK-NEXT: mov z0.b, z0.b[1]
; CHECK-NEXT: ret
@@ -12,8 +12,33 @@ define <vscale x 16 x i8> @dup_extract_i8(<vscale x 16 x i8> %data) {
ret <vscale x 16 x i8> %.splat
}
-define <vscale x 8 x i16> @dup_extract_i16(<vscale x 8 x i16> %data) {
-; CHECK-LABEL: dup_extract_i16:
+define <vscale x 16 x i8> @dup_extract_nxv16i8_v16i8(<16 x i8> %data) {
+; CHECK-LABEL: dup_extract_nxv16i8_v16i8:
+; CHECK: // %bb.0:
+; CHECK-NEXT: umov w8, v0.b[1]
+; CHECK-NEXT: mov z0.b, w8
+; CHECK-NEXT: ret
+ %1 = extractelement <16 x i8> %data, i8 1
+ %.splatinsert = insertelement <vscale x 16 x i8> poison, i8 %1, i32 0
+ %.splat = shufflevector <vscale x 16 x i8> %.splatinsert, <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer
+ ret <vscale x 16 x i8> %.splat
+}
+
+define <vscale x 16 x i8> @dup_extract_nxv16i8_v8i8(<8 x i8> %data) {
+; CHECK-LABEL: dup_extract_nxv16i8_v8i8:
+; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT: umov w8, v0.b[1]
+; CHECK-NEXT: mov z0.b, w8
+; CHECK-NEXT: ret
+ %1 = extractelement <8 x i8> %data, i8 1
+ %.splatinsert = insertelement <vscale x 16 x i8> poison, i8 %1, i32 0
+ %.splat = shufflevector <vscale x 16 x i8> %.splatinsert, <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer
+ ret <vscale x 16 x i8> %.splat
+}
+
+define <vscale x 8 x i16> @dup_extract_nxv8i16_nxv8i16(<vscale x 8 x i16> %data) {
+; CHECK-LABEL: dup_extract_nxv8i16_nxv8i16:
; CHECK: // %bb.0:
; CHECK-NEXT: mov z0.h, z0.h[1]
; CHECK-NEXT: ret
@@ -23,8 +48,33 @@ define <vscale x 8 x i16> @dup_extract_i16(<vscale x 8 x i16> %data) {
ret <vscale x 8 x i16> %.splat
}
-define <vscale x 4 x i32> @dup_extract_i32(<vscale x 4 x i32> %data) {
-; CHECK-LABEL: dup_extract_i32:
+define <vscale x 8 x i16> @dup_extract_nxv8i16_v8i16(<8 x i16> %data) {
+; CHECK-LABEL: dup_extract_nxv8i16_v8i16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: umov w8, v0.h[1]
+; CHECK-NEXT: mov z0.h, w8
+; CHECK-NEXT: ret
+ %1 = extractelement <8 x i16> %data, i16 1
+ %.splatinsert = insertelement <vscale x 8 x i16> poison, i16 %1, i32 0
+ %.splat = shufflevector <vscale x 8 x i16> %.splatinsert, <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer
+ ret <vscale x 8 x i16> %.splat
+}
+
+define <vscale x 8 x i16> @dup_extract_nxv8i16_v4i16(<4 x i16> %data) {
+; CHECK-LABEL: dup_extract_nxv8i16_v4i16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT: umov w8, v0.h[1]
+; CHECK-NEXT: mov z0.h, w8
+; CHECK-NEXT: ret
+ %1 = extractelement <4 x i16> %data, i16 1
+ %.splatinsert = insertelement <vscale x 8 x i16> poison, i16 %1, i32 0
+ %.splat = shufflevector <vscale x 8 x i16> %.splatinsert, <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer
+ ret <vscale x 8 x i16> %.splat
+}
+
+define <vscale x 4 x i32> @dup_extract_nxv4i32_nxv4i32(<vscale x 4 x i32> %data) {
+; CHECK-LABEL: dup_extract_nxv4i32_nxv4i32:
; CHECK: // %bb.0:
; CHECK-NEXT: mov z0.s, z0.s[1]
; CHECK-NEXT: ret
@@ -34,8 +84,33 @@ define <vscale x 4 x i32> @dup_extract_i32(<vscale x 4 x i32> %data) {
ret <vscale x 4 x i32> %.splat
}
-define <vscale x 2 x i64> @dup_extract_i64(<vscale x 2 x i64> %data) {
-; CHECK-LABEL: dup_extract_i64:
+define <vscale x 4 x i32> @dup_extract_nxv4i32_v4i32(<4 x i32> %data) {
+; CHECK-LABEL: dup_extract_nxv4i32_v4i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov w8, v0.s[1]
+; CHECK-NEXT: mov z0.s, w8
+; CHECK-NEXT: ret
+ %1 = extractelement <4 x i32> %data, i32 1
+ %.splatinsert = insertelement <vscale x 4 x i32> poison, i32 %1, i32 0
+ %.splat = shufflevector <vscale x 4 x i32> %.splatinsert, <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
+ ret <vscale x 4 x i32> %.splat
+}
+
+define <vscale x 4 x i32> @dup_extract_nxv4i32_v2i32(<2 x i32> %data) {
+; CHECK-LABEL: dup_extract_nxv4i32_v2i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT: mov w8, v0.s[1]
+; CHECK-NEXT: mov z0.s, w8
+; CHECK-NEXT: ret
+ %1 = extractelement <2 x i32> %data, i32 1
+ %.splatinsert = insertelement <vscale x 4 x i32> poison, i32 %1, i32 0
+ %.splat = shufflevector <vscale x 4 x i32> %.splatinsert, <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
+ ret <vscale x 4 x i32> %.splat
+}
+
+define <vscale x 2 x i64> @dup_extract_nxv2i64_nxv2i64(<vscale x 2 x i64> %data) {
+; CHECK-LABEL: dup_extract_nxv2i64_nxv2i64:
; CHECK: // %bb.0:
; CHECK-NEXT: mov z0.d, z0.d[1]
; CHECK-NEXT: ret
@@ -45,8 +120,31 @@ define <vscale x 2 x i64> @dup_extract_i64(<vscale x 2 x i64> %data) {
ret <vscale x 2 x i64> %.splat
}
-define <vscale x 8 x half> @dup_extract_f16(<vscale x 8 x half> %data) {
-; CHECK-LABEL: dup_extract_f16:
+define <vscale x 2 x i64> @dup_extract_nxv2i64_v2i64(<2 x i64> %data) {
+; CHECK-LABEL: dup_extract_nxv2i64_v2i64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov x8, v0.d[1]
+; CHECK-NEXT: mov z0.d, x8
+; CHECK-NEXT: ret
+ %1 = extractelement <2 x i64> %data, i64 1
+ %.splatinsert = insertelement <vscale x 2 x i64> poison, i64 %1, i32 0
+ %.splat = shufflevector <vscale x 2 x i64> %.splatinsert, <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
+ ret <vscale x 2 x i64> %.splat
+}
+
+define <vscale x 2 x i64> @dup_extract_nxv2i64_v1i64(<1 x i64> %data) {
+; CHECK-LABEL: dup_extract_nxv2i64_v1i64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov z0.d, x8
+; CHECK-NEXT: ret
+ %1 = extractelement <1 x i64> %data, i64 1
+ %.splatinsert = insertelement <vscale x 2 x i64> poison, i64 %1, i32 0
+ %.splat = shufflevector <vscale x 2 x i64> %.splatinsert, <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
+ ret <vscale x 2 x i64> %.splat
+}
+
+define <vscale x 8 x half> @dup_extract_nxv8f16_nxv8f16(<vscale x 8 x half> %data) {
+; CHECK-LABEL: dup_extract_nxv8f16_nxv8f16:
; CHECK: // %bb.0:
; CHECK-NEXT: mov z0.h, z0.h[1]
; CHECK-NEXT: ret
@@ -56,8 +154,69 @@ define <vscale x 8 x half> @dup_extract_f16(<vscale x 8 x half> %data) {
ret <vscale x 8 x half> %.splat
}
-define <vscale x 4 x half> @dup_extract_f16_4(<vscale x 4 x half> %data) {
-; CHECK-LABEL: dup_extract_f16_4:
+define <vscale x 8 x half> @dup_extract_nxv8f16_nxv4f16(<vscale x 4 x half> %data) {
+; CHECK-LABEL: dup_extract_nxv8f16_nxv4f16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov z0.s, z0.s[1]
+; CHECK-NEXT: mov z0.h, h0
+; CHECK-NEXT: ret
+ %1 = extractelement <vscale x 4 x half> %data, i16 1
+ %.splatinsert = insertelement <vscale x 8 x half> poison, half %1, i32 0
+ %.splat = shufflevector <vscale x 8 x half> %.splatinsert, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
+ ret <vscale x 8 x half> %.splat
+}
+
+define <vscale x 8 x half> @dup_extract_nxv8f16_nxv2f16(<vscale x 2 x half> %data) {
+; CHECK-LABEL: dup_extract_nxv8f16_nxv2f16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov z0.d, z0.d[1]
+; CHECK-NEXT: mov z0.h, h0
+; CHECK-NEXT: ret
+ %1 = extractelement <vscale x 2 x half> %data, i16 1
+ %.splatinsert = insertelement <vscale x 8 x half> poison, half %1, i32 0
+ %.splat = shufflevector <vscale x 8 x half> %.splatinsert, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
+ ret <vscale x 8 x half> %.splat
+}
+
+define <vscale x 8 x half> @dup_extract_nxv8f16_v8f16(<8 x half> %data) {
+; CHECK-LABEL: dup_extract_nxv8f16_v8f16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov h0, v0.h[1]
+; CHECK-NEXT: mov z0.h, h0
+; CHECK-NEXT: ret
+ %1 = extractelement <8 x half> %data, i16 1
+ %.splatinsert = insertelement <vscale x 8 x half> poison, half %1, i32 0
+ %.splat = shufflevector <vscale x 8 x half> %.splatinsert, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
+ ret <vscale x 8 x half> %.splat
+}
+
+define <vscale x 8 x half> @dup_extract_nxv8f16_v4f16(<4 x half> %data) {
+; CHECK-LABEL: dup_extract_nxv8f16_v4f16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT: mov h0, v0.h[1]
+; CHECK-NEXT: mov z0.h, h0
+; CHECK-NEXT: ret
+ %1 = extractelement <4 x half> %data, i16 1
+ %.splatinsert = insertelement <vscale x 8 x half> poison, half %1, i32 0
+ %.splat = shufflevector <vscale x 8 x half> %.splatinsert, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
+ ret <vscale x 8 x half> %.splat
+}
+
+define <vscale x 4 x half> @dup_extract_nxv4f16_nxv8f16(<vscale x 8 x half> %data) {
+; CHECK-LABEL: dup_extract_nxv4f16_nxv8f16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov z0.h, z0.h[1]
+; CHECK-NEXT: mov z0.h, h0
+; CHECK-NEXT: ret
+ %1 = extractelement <vscale x 8 x half> %data, i16 1
+ %.splatinsert = insertelement <vscale x 4 x half> poison, half %1, i32 0
+ %.splat = shufflevector <vscale x 4 x half> %.splatinsert, <vscale x 4 x half> poison, <vscale x 4 x i32> zeroinitializer
+ ret <vscale x 4 x half> %.splat
+}
+
+define <vscale x 4 x half> @dup_extract_nxv4f16_nxv4f16(<vscale x 4 x half> %data) {
+; CHECK-LABEL: dup_extract_nxv4f16_nxv4f16:
; CHECK: // %bb.0:
; CHECK-NEXT: mov z0.s, z0.s[1]
; CHECK-NEXT: ret
@@ -67,30 +226,105 @@ define <vscale x 4 x half> @dup_extract_f16_4(<vscale x 4 x half> %data) {
ret <vscale x 4 x half> %.splat
}
-define <vscale x 2 x half> @dup_extract_f16_2(<vscale x 2 x half> %data) {
-; CHECK-LABEL: dup_extract_f16_2:
+define <vscale x 4 x half> @dup_extract_nxv4f16_nxv2f16(<vscale x 2 x half> %data) {
+; CHECK-LABEL: dup_extract_nxv4f16_nxv2f16:
; CHECK: // %bb.0:
; CHECK-NEXT: mov z0.d, z0.d[1]
+; CHECK-NEXT: mov z0.h, h0
; CHECK-NEXT: ret
%1 = extractelement <vscale x 2 x half> %data, i16 1
+ %.splatinsert = insertelement <vscale x 4 x half> poison, half %1, i32 0
+ %.splat = shufflevector <vscale x 4 x half> %.splatinsert, <vscale x 4 x half> poison, <vscale x 4 x i32> zeroinitializer
+ ret <vscale x 4 x half> %.splat
+}
+
+define <vscale x 4 x half> @dup_extract_nxv4f16_v8f16(<8 x half> %data) {
+; CHECK-LABEL: dup_extract_nxv4f16_v8f16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov h0, v0.h[1]
+; CHECK-NEXT: mov z0.h, h0
+; CHECK-NEXT: ret
+ %1 = extractelement <8 x half> %data, i16 1
+ %.splatinsert = insertelement <vscale x 4 x half> poison, half %1, i32 0
+ %.splat = shufflevector <vscale x 4 x half> %.splatinsert, <vscale x 4 x half> poison, <vscale x 4 x i32> zeroinitializer
+ ret <vscale x 4 x half> %.splat
+}
+
+define <vscale x 4 x half> @dup_extract_nxv4f16_v4f16(<4 x half> %data) {
+; CHECK-LABEL: dup_extract_nxv4f16_v4f16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT: mov h0, v0.h[1]
+; CHECK-NEXT: mov z0.h, h0
+; CHECK-NEXT: ret
+ %1 = extractelement <4 x half> %data, i16 1
+ %.splatinsert = insertelement <vscale x 4 x half> poison, half %1, i32 0
+ %.splat = shufflevector <vscale x 4 x half> %.splatinsert, <vscale x 4 x half> poison, <vscale x 4 x i32> zeroinitializer
+ ret <vscale x 4 x half> %.splat
+}
+
+define <vscale x 2 x half> @dup_extract_nxv2f16_nxv8f16(<vscale x 8 x half> %data) {
+; CHECK-LABEL: dup_extract_nxv2f16_nxv8f16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov z0.h, z0.h[1]
+; CHECK-NEXT: mov z0.h, h0
+; CHECK-NEXT: ret
+ %1 = extractelement <vscale x 8 x half> %data, i16 1
%.splatinsert = insertelement <vscale x 2 x half> poison, half %1, i32 0
%.splat = shufflevector <vscale x 2 x half> %.splatinsert, <vscale x 2 x half> poison, <vscale x 2 x i32> zeroinitializer
ret <vscale x 2 x half> %.splat
}
-define <vscale x 8 x bfloat> @dup_extract_bf16(<vscale x 8 x bfloat> %data) #0 {
-; CHECK-LABEL: dup_extract_bf16:
+define <vscale x 2 x half> @dup_extract_nxv2f16_nxv4f16(<vscale x 4 x half> %data) {
+; CHECK-LABEL: dup_extract_nxv2f16_nxv4f16:
; CHECK: // %bb.0:
-; CHECK-NEXT: mov z0.h, z0.h[1]
+; CHECK-NEXT: mov z0.s, z0.s[1]
+; CHECK-NEXT: mov z0.h, h0
; CHECK-NEXT: ret
- %1 = extractelement <vscale x 8 x bfloat> %data, i16 1
- %.splatinsert = insertelement <vscale x 8 x bfloat> poison, bfloat %1, i32 0
- %.splat = shufflevector <vscale x 8 x bfloat> %.splatinsert, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer
- ret <vscale x 8 x bfloat> %.splat
+ %1 = extractelement <vscale x 4 x half> %data, i16 1
+ %.splatinsert = insertelement <vscale x 2 x half> poison, half %1, i32 0
+ %.splat = shufflevector <vscale x 2 x half> %.splatinsert, <vscale x 2 x half> poison, <vscale x 2 x i32> zeroinitializer
+ ret <vscale x 2 x half> %.splat
+}
+
+define <vscale x 2 x half> @dup_extract_nxv2f16_nxv2f16(<vscale x 2 x half> %data) {
+; CHECK-LABEL: dup_extract_nxv2f16_nxv2f16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov z0.d, z0.d[1]
+; CHECK-NEXT: ret
+ %1 = extractelement <vscale x 2 x half> %data, i16 1
+ %.splatinsert = insertelement <vscale x 2 x half> poison, half %1, i32 0
+ %.splat = shufflevector <vscale x 2 x half> %.splatinsert, <vscale x 2 x half> poison, <vscale x 2 x i32> zeroinitializer
+ ret <vscale x 2 x half> %.splat
+}
+
+define <vscale x 2 x half> @dup_extract_nxv2f16_v8f16(<8 x half> %data) {
+; CHECK-LABEL: dup_extract_nxv2f16_v8f16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov h0, v0.h[1]
+; CHECK-NEXT: mov z0.h, h0
+; CHECK-NEXT: ret
+ %1 = extractelement <8 x half> %data, i16 1
+ %.splatinsert = insertelement <vscale x 2 x half> poison, half %1, i32 0
+ %.splat = shufflevector <vscale x 2 x half> %.splatinsert, <vscale x 2 x half> poison, <vscale x 2 x i32> zeroinitializer
+ ret <vscale x 2 x half> %.splat
+}
+
+define <vscale x 2 x half> @dup_extract_nxv2f16_v4f16(<4 x half> %data) {
+; CHECK-LABEL: dup_extract_nxv2f16_v4f16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT: mov h0, v0.h[1]
+; CHECK-NEXT: mov z0.h, h0
+; CHECK-NEXT: ret
+ %1 = extractelement <4 x half> %data, i16 1
+ %.splatinsert = insertelement <vscale x 2 x half> poison, half %1, i32 0
+ %.splat = shufflevector <vscale x 2 x half> %.splatinsert, <vscale x 2 x half> poison, <vscale x 2 x i32> zeroinitializer
+ ret <vscale x 2 x half> %.splat
}
-define <vscale x 4 x float> @dup_extract_f32(<vscale x 4 x float> %data) {
-; CHECK-LABEL: dup_extract_f32:
+define <vscale x 4 x float> @dup_extract_nxv4f32_nxv4f32(<vscale x 4 x float> %data) {
+; CHECK-LABEL: dup_extract_nxv4f32_nxv4f32:
; CHECK: // %bb.0:
; CHECK-NEXT: mov z0.s, z0.s[1]
; CHECK-NEXT: ret
@@ -100,19 +334,93 @@ define <vscale x 4 x float> @dup_extract_f32(<vscale x 4 x float> %data) {
ret <vscale x 4 x float> %.splat
}
-define <vscale x 2 x float> @dup_extract_f32_2(<vscale x 2 x float> %data) {
-; CHECK-LABEL: dup_extract_f32_2:
+define <vscale x 4 x float> @dup_extract_nxv4f32_nxv2f32(<vscale x 2 x float> %data) {
+; CHECK-LABEL: dup_extract_nxv4f32_nxv2f32:
; CHECK: // %bb.0:
; CHECK-NEXT: mov z0.d, z0.d[1]
+; CHECK-NEXT: mov z0.s, s0
; CHECK-NEXT: ret
%1 = extractelement <vscale x 2 x float> %data, i32 1
+ %.splatinsert = insertelement <vscale x 4 x float> poison, float %1, i32 0
+ %.splat = shufflevector <vscale x 4 x float> %.splatinsert, <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer
+ ret <vscale x 4 x float> %.splat
+}
+
+define <vscale x 4 x float> @dup_extract_nxv4f32_v4f32(<4 x float> %data) {
+; CHECK-LABEL: dup_extract_nxv4f32_v4f32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov s0, v0.s[1]
+; CHECK-NEXT: mov z0.s, s0
+; CHECK-NEXT: ret
+ %1 = extractelement <4 x float> %data, i32 1
+ %.splatinsert = insertelement <vscale x 4 x float> poison, float %1, i32 0
+ %.splat = shufflevector <vscale x 4 x float> %.splatinsert, <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer
+ ret <vscale x 4 x float> %.splat
+}
+
+define <vscale x 4 x float> @dup_extract_nxv4f32_v2f32(<2 x float> %data) {
+; CHECK-LABEL: dup_extract_nxv4f32_v2f32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT: mov s0, v0.s[1]
+; CHECK-NEXT: mov z0.s, s0
+; CHECK-NEXT: ret
+ %1 = extractelement <2 x float> %data, i32 1
+ %.splatinsert = insertelement <vscale x 4 x float> poison, float %1, i32 0
+ %.splat = shufflevector <vscale x 4 x float> %.splatinsert, <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer
+ ret <vscale x 4 x float> %.splat
+}
+
+define <vscale x 2 x float> @dup_extract_nxv2f32_nxv4f32(<vscale x 4 x float> %data) {
+; CHECK-LABEL: dup_extract_nxv2f32_nxv4f32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov z0.s, z0.s[1]
+; CHECK-NEXT: mov z0.s, s0
+; CHECK-NEXT: ret
+ %1 = extractelement <vscale x 4 x float> %data, i32 1
%.splatinsert = insertelement <vscale x 2 x float> poison, float %1, i32 0
%.splat = shufflevector <vscale x 2 x float> %.splatinsert, <vscale x 2 x float> poison, <vscale x 2 x i32> zeroinitializer
ret <vscale x 2 x float> %.splat
}
-define <vscale x 2 x double> @dup_extract_f64(<vscale x 2 x double> %data) {
-; CHECK-LABEL: dup_extract_f64:
+define <vscale x 2 x float> @dup_extract_nxv2f32_nxv2f32(<vscale x 2 x float> %data) {
+; CHECK-LABEL: dup_extract_nxv2f32_nxv2f32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov z0.d, z0.d[1]
+; CHECK-NEXT: ret
+ %1 = extractelement <vscale x 2 x float> %data, i32 1
+ %.splatinsert = insertelement <vscale x 2 x float> poison, float %1, i32 0
+ %.splat = shufflevector <vscale x 2 x float> %.splatinsert, <vscale x 2 x float> poison, <vscale x 2 x i32> zeroinitializer
+ ret <vscale x 2 x float> %.splat
+}
+
+define <vscale x 2 x float> @dup_extract_nxv2f32_v4f32(<4 x float> %data) {
+; CHECK-LABEL: dup_extract_nxv2f32_v4f32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov s0, v0.s[1]
+; CHECK-NEXT: mov z0.s, s0
+; CHECK-NEXT: ret
+ %1 = extractelement <4 x float> %data, i32 1
+ %.splatinsert = insertelement <vscale x 2 x float> poison, float %1, i32 0
+ %.splat = shufflevector <vscale x 2 x float> %.splatinsert, <vscale x 2 x float> poison, <vscale x 2 x i32> zeroinitializer
+ ret <vscale x 2 x float> %.splat
+}
+
+define <vscale x 2 x float> @dup_extract_nxv2f32_v2f32(<2 x float> %data) {
+; CHECK-LABEL: dup_extract_nxv2f32_v2f32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT: mov s0, v0.s[1]
+; CHECK-NEXT: mov z0.s, s0
+; CHECK-NEXT: ret
+ %1 = extractelement <2 x float> %data, i32 1
+ %.splatinsert = insertelement <vscale x 2 x float> poison, float %1, i32 0
+ %.splat = shufflevector <vscale x 2 x float> %.splatinsert, <vscale x 2 x float> poison, <vscale x 2 x i32> zeroinitializer
+ ret <vscale x 2 x float> %.splat
+}
+
+define <vscale x 2 x double> @dup_extract_nxv2f64_nxv2f64(<vscale x 2 x double> %data) {
+; CHECK-LABEL: dup_extract_nxv2f64_nxv2f64:
; CHECK: // %bb.0:
; CHECK-NEXT: mov z0.d, z0.d[1]
; CHECK-NEXT: ret
@@ -122,5 +430,209 @@ define <vscale x 2 x double> @dup_extract_f64(<vscale x 2 x double> %data) {
ret <vscale x 2 x double> %.splat
}
-; +bf16 is required for the bfloat version.
-attributes #0 = { "target-features"="+sve,+bf16" }
+define <vscale x 2 x double> @dup_extract_nxv2f64_v2f64(<2 x double> %data) {
+; CHECK-LABEL: dup_extract_nxv2f64_v2f64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov d0, v0.d[1]
+; CHECK-NEXT: mov z0.d, d0
+; CHECK-NEXT: ret
+ %1 = extractelement <2 x double> %data, i64 1
+ %.splatinsert = insertelement <vscale x 2 x double> poison, double %1, i32 0
+ %.splat = shufflevector <vscale x 2 x double> %.splatinsert, <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer
+ ret <vscale x 2 x double> %.splat
+}
+
+define <vscale x 2 x double> @dup_extract_nxv2f64_v1f64(<1 x double> %data) {
+; CHECK-LABEL: dup_extract_nxv2f64_v1f64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov z0.d, d0
+; CHECK-NEXT: ret
+ %1 = extractelement <1 x double> %data, i64 1
+ %.splatinsert = insertelement <vscale x 2 x double> poison, double %1, i32 0
+ %.splat = shufflevector <vscale x 2 x double> %.splatinsert, <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer
+ ret <vscale x 2 x double> %.splat
+}
+
+define <vscale x 8 x bfloat> @dup_extract_nxv8bf16_nxv8bf16(<vscale x 8 x bfloat> %data) {
+; CHECK-LABEL: dup_extract_nxv8bf16_nxv8bf16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov z0.h, z0.h[1]
+; CHECK-NEXT: ret
+ %1 = extractelement <vscale x 8 x bfloat> %data, i16 1
+ %.splatinsert = insertelement <vscale x 8 x bfloat> poison, bfloat %1, i32 0
+ %.splat = shufflevector <vscale x 8 x bfloat> %.splatinsert, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer
+ ret <vscale x 8 x bfloat> %.splat
+}
+
+define <vscale x 8 x bfloat> @dup_extract_nxv8bf16_nxv4bf16(<vscale x 4 x bfloat> %data) {
+; CHECK-LABEL: dup_extract_nxv8bf16_nxv4bf16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov z0.s, z0.s[1]
+; CHECK-NEXT: mov z0.h, h0
+; CHECK-NEXT: ret
+ %1 = extractelement <vscale x 4 x bfloat> %data, i16 1
+ %.splatinsert = insertelement <vscale x 8 x bfloat> poison, bfloat %1, i32 0
+ %.splat = shufflevector <vscale x 8 x bfloat> %.splatinsert, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer
+ ret <vscale x 8 x bfloat> %.splat
+}
+
+define <vscale x 8 x bfloat> @dup_extract_nxv8bf16_nxv2bf16(<vscale x 2 x bfloat> %data) {
+; CHECK-LABEL: dup_extract_nxv8bf16_nxv2bf16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov z0.d, z0.d[1]
+; CHECK-NEXT: mov z0.h, h0
+; CHECK-NEXT: ret
+ %1 = extractelement <vscale x 2 x bfloat> %data, i16 1
+ %.splatinsert = insertelement <vscale x 8 x bfloat> poison, bfloat %1, i32 0
+ %.splat = shufflevector <vscale x 8 x bfloat> %.splatinsert, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer
+ ret <vscale x 8 x bfloat> %.splat
+}
+
+define <vscale x 8 x bfloat> @dup_extract_nxv8bf16_v8bf16(<8 x bfloat> %data) {
+; CHECK-LABEL: dup_extract_nxv8bf16_v8bf16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov h0, v0.h[1]
+; CHECK-NEXT: mov z0.h, h0
+; CHECK-NEXT: ret
+ %1 = extractelement <8 x bfloat> %data, i16 1
+ %.splatinsert = insertelement <vscale x 8 x bfloat> poison, bfloat %1, i32 0
+ %.splat = shufflevector <vscale x 8 x bfloat> %.splatinsert, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer
+ ret <vscale x 8 x bfloat> %.splat
+}
+
+define <vscale x 8 x bfloat> @dup_extract_nxv8bf16_v4bf16(<4 x bfloat> %data) {
+; CHECK-LABEL: dup_extract_nxv8bf16_v4bf16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT: mov h0, v0.h[1]
+; CHECK-NEXT: mov z0.h, h0
+; CHECK-NEXT: ret
+ %1 = extractelement <4 x bfloat> %data, i16 1
+ %.splatinsert = insertelement <vscale x 8 x bfloat> poison, bfloat %1, i32 0
+ %.splat = shufflevector <vscale x 8 x bfloat> %.splatinsert, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer
+ ret <vscale x 8 x bfloat> %.splat
+}
+
+define <vscale x 4 x bfloat> @dup_extract_nxv4bf16_nxv8bf16(<vscale x 8 x bfloat> %data) {
+; CHECK-LABEL: dup_extract_nxv4bf16_nxv8bf16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov z0.h, z0.h[1]
+; CHECK-NEXT: mov z0.h, h0
+; CHECK-NEXT: ret
+ %1 = extractelement <vscale x 8 x bfloat> %data, i16 1
+ %.splatinsert = insertelement <vscale x 4 x bfloat> poison, bfloat %1, i32 0
+ %.splat = shufflevector <vscale x 4 x bfloat> %.splatinsert, <vscale x 4 x bfloat> poison, <vscale x 4 x i32> zeroinitializer
+ ret <vscale x 4 x bfloat> %.splat
+}
+
+define <vscale x 4 x bfloat> @dup_extract_nxv4bf16_nxv4bf16(<vscale x 4 x bfloat> %data) {
+; CHECK-LABEL: dup_extract_nxv4bf16_nxv4bf16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov z0.s, z0.s[1]
+; CHECK-NEXT: mov z0.h, h0
+; CHECK-NEXT: ret
+ %1 = extractelement <vscale x 4 x bfloat> %data, i16 1
+ %.splatinsert = insertelement <vscale x 4 x bfloat> poison, bfloat %1, i32 0
+ %.splat = shufflevector <vscale x 4 x bfloat> %.splatinsert, <vscale x 4 x bfloat> poison, <vscale x 4 x i32> zeroinitializer
+ ret <vscale x 4 x bfloat> %.splat
+}
+
+define <vscale x 4 x bfloat> @dup_extract_nxv4bf16_nxv2bf16(<vscale x 2 x bfloat> %data) {
+; CHECK-LABEL: dup_extract_nxv4bf16_nxv2bf16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov z0.d, z0.d[1]
+; CHECK-NEXT: mov z0.h, h0
+; CHECK-NEXT: ret
+ %1 = extractelement <vscale x 2 x bfloat> %data, i16 1
+ %.splatinsert = insertelement <vscale x 4 x bfloat> poison, bfloat %1, i32 0
+ %.splat = shufflevector <vscale x 4 x bfloat> %.splatinsert, <vscale x 4 x bfloat> poison, <vscale x 4 x i32> zeroinitializer
+ ret <vscale x 4 x bfloat> %.splat
+}
+
+define <vscale x 4 x bfloat> @dup_extract_nxv4bf16_v8bf16(<8 x bfloat> %data) {
+; CHECK-LABEL: dup_extract_nxv4bf16_v8bf16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov h0, v0.h[1]
+; CHECK-NEXT: mov z0.h, h0
+; CHECK-NEXT: ret
+ %1 = extractelement <8 x bfloat> %data, i16 1
+ %.splatinsert = insertelement <vscale x 4 x bfloat> poison, bfloat %1, i32 0
+ %.splat = shufflevector <vscale x 4 x bfloat> %.splatinsert, <vscale x 4 x bfloat> poison, <vscale x 4 x i32> zeroinitializer
+ ret <vscale x 4 x bfloat> %.splat
+}
+
+define <vscale x 4 x bfloat> @dup_extract_nxv4bf16_v4bf16(<4 x bfloat> %data) {
+; CHECK-LABEL: dup_extract_nxv4bf16_v4bf16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT: mov h0, v0.h[1]
+; CHECK-NEXT: mov z0.h, h0
+; CHECK-NEXT: ret
+ %1 = extractelement <4 x bfloat> %data, i16 1
+ %.splatinsert = insertelement <vscale x 4 x bfloat> poison, bfloat %1, i32 0
+ %.splat = shufflevector <vscale x 4 x bfloat> %.splatinsert, <vscale x 4 x bfloat> poison, <vscale x 4 x i32> zeroinitializer
+ ret <vscale x 4 x bfloat> %.splat
+}
+
+define <vscale x 2 x bfloat> @dup_extract_nxv2bf16_nxv8bf16(<vscale x 8 x bfloat> %data) {
+; CHECK-LABEL: dup_extract_nxv2bf16_nxv8bf16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov z0.h, z0.h[1]
+; CHECK-NEXT: mov z0.h, h0
+; CHECK-NEXT: ret
+ %1 = extractelement <vscale x 8 x bfloat> %data, i16 1
+ %.splatinsert = insertelement <vscale x 2 x bfloat> poison, bfloat %1, i32 0
+ %.splat = shufflevector <vscale x 2 x bfloat> %.splatinsert, <vscale x 2 x bfloat> poison, <vscale x 2 x i32> zeroinitializer
+ ret <vscale x 2 x bfloat> %.splat
+}
+
+define <vscale x 2 x bfloat> @dup_extract_nxv2bf16_nxv4bf16(<vscale x 4 x bfloat> %data) {
+; CHECK-LABEL: dup_extract_nxv2bf16_nxv4bf16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov z0.s, z0.s[1]
+; CHECK-NEXT: mov z0.h, h0
+; CHECK-NEXT: ret
+ %1 = extractelement <vscale x 4 x bfloat> %data, i16 1
+ %.splatinsert = insertelement <vscale x 2 x bfloat> poison, bfloat %1, i32 0
+ %.splat = shufflevector <vscale x 2 x bfloat> %.splatinsert, <vscale x 2 x bfloat> poison, <vscale x 2 x i32> zeroinitializer
+ ret <vscale x 2 x bfloat> %.splat
+}
+
+define <vscale x 2 x bfloat> @dup_extract_nxv2bf16_nxv2bf16(<vscale x 2 x bfloat> %data) {
+; CHECK-LABEL: dup_extract_nxv2bf16_nxv2bf16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov z0.d, z0.d[1]
+; CHECK-NEXT: mov z0.h, h0
+; CHECK-NEXT: ret
+ %1 = extractelement <vscale x 2 x bfloat> %data, i16 1
+ %.splatinsert = insertelement <vscale x 2 x bfloat> poison, bfloat %1, i32 0
+ %.splat = shufflevector <vscale x 2 x bfloat> %.splatinsert, <vscale x 2 x bfloat> poison, <vscale x 2 x i32> zeroinitializer
+ ret <vscale x 2 x bfloat> %.splat
+}
+
+define <vscale x 2 x bfloat> @dup_extract_nxv2bf16_v8bf16(<8 x bfloat> %data) {
+; CHECK-LABEL: dup_extract_nxv2bf16_v8bf16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov h0, v0.h[1]
+; CHECK-NEXT: mov z0.h, h0
+; CHECK-NEXT: ret
+ %1 = extractelement <8 x bfloat> %data, i16 1
+ %.splatinsert = insertelement <vscale x 2 x bfloat> poison, bfloat %1, i32 0
+ %.splat = shufflevector <vscale x 2 x bfloat> %.splatinsert, <vscale x 2 x bfloat> poison, <vscale x 2 x i32> zeroinitializer
+ ret <vscale x 2 x bfloat> %.splat
+}
+
+define <vscale x 2 x bfloat> @dup_extract_nxv2bf16_v4bf16(<4 x bfloat> %data) {
+; CHECK-LABEL: dup_extract_nxv2bf16_v4bf16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT: mov h0, v0.h[1]
+; CHECK-NEXT: mov z0.h, h0
+; CHECK-NEXT: ret
+ %1 = extractelement <4 x bfloat> %data, i16 1
+ %.splatinsert = insertelement <vscale x 2 x bfloat> poison, bfloat %1, i32 0
+ %.splat = shufflevector <vscale x 2 x bfloat> %.splatinsert, <vscale x 2 x bfloat> poison, <vscale x 2 x i32> zeroinitializer
+ ret <vscale x 2 x bfloat> %.splat
+}
+
+attributes #0 = { "target-features"="+sve" }
>From 51a2974bb6f1e086fa3897dce4cb80790aa8281e Mon Sep 17 00:00:00 2001
From: Paul Walker <paul.walker at arm.com>
Date: Tue, 5 Nov 2024 15:18:38 +0000
Subject: [PATCH 2/2] [LLVM][SVE] Extend dup(extract_elt(v,i)) isel patterns to
cover all combinations.
Adds missing bfloat patterns for unpacked scalable vectors.
Adds patterns for splatting extracts from fixed length vectors.
---
llvm/lib/Target/AArch64/SVEInstrFormats.td | 129 +++++++++++++++---
.../AArch64/aarch64-dup-extract-scalable.ll | 123 +++++++----------
2 files changed, 157 insertions(+), 95 deletions(-)
diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td
index 5cfcc01afd20f3..f542c7a34ad60e 100644
--- a/llvm/lib/Target/AArch64/SVEInstrFormats.td
+++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td
@@ -59,6 +59,57 @@ class SVEType<ValueType VT> {
!eq(VT, nxv8f16): nxv2f16,
!eq(VT, nxv8bf16): nxv2bf16,
true : untyped);
+
+ // The 64-bit vector subreg of VT.
+ ValueType DSub = !cond(
+ !eq(VT, nxv16i8): v8i8,
+ !eq(VT, nxv8i16): v4i16,
+ !eq(VT, nxv4i32): v2i32,
+ !eq(VT, nxv2i64): v1i64,
+ !eq(VT, nxv2f16): v4f16,
+ !eq(VT, nxv4f16): v4f16,
+ !eq(VT, nxv8f16): v4f16,
+ !eq(VT, nxv2f32): v2f32,
+ !eq(VT, nxv4f32): v2f32,
+ !eq(VT, nxv2f64): v1f64,
+ !eq(VT, nxv2bf16): v4bf16,
+ !eq(VT, nxv4bf16): v4bf16,
+ !eq(VT, nxv8bf16): v4bf16,
+ true : untyped);
+
+ // The 128-bit vector subreg of VT.
+ ValueType ZSub = !cond(
+ !eq(VT, nxv16i8): v16i8,
+ !eq(VT, nxv8i16): v8i16,
+ !eq(VT, nxv4i32): v4i32,
+ !eq(VT, nxv2i64): v2i64,
+ !eq(VT, nxv2f16): v8f16,
+ !eq(VT, nxv4f16): v8f16,
+ !eq(VT, nxv8f16): v8f16,
+ !eq(VT, nxv2f32): v4f32,
+ !eq(VT, nxv4f32): v4f32,
+ !eq(VT, nxv2f64): v2f64,
+ !eq(VT, nxv2bf16): v8bf16,
+ !eq(VT, nxv4bf16): v8bf16,
+ !eq(VT, nxv8bf16): v8bf16,
+ true : untyped);
+
+ // The legal scalar used to hold a vector element.
+ ValueType EltAsScalar = !cond(
+ !eq(VT, nxv16i8): i32,
+ !eq(VT, nxv8i16): i32,
+ !eq(VT, nxv4i32): i32,
+ !eq(VT, nxv2i64): i64,
+ !eq(VT, nxv2f16): f16,
+ !eq(VT, nxv4f16): f16,
+ !eq(VT, nxv8f16): f16,
+ !eq(VT, nxv2f32): f32,
+ !eq(VT, nxv4f32): f32,
+ !eq(VT, nxv2f64): f64,
+ !eq(VT, nxv2bf16): bf16,
+ !eq(VT, nxv4bf16): bf16,
+ !eq(VT, nxv8bf16): bf16,
+ true : untyped);
}
def SDT_AArch64Setcc : SDTypeProfile<1, 4, [
@@ -1402,29 +1453,67 @@ multiclass sve_int_perm_dup_i<string asm> {
def : InstAlias<"mov $Zd, $Qn",
(!cast<Instruction>(NAME # _Q) ZPR128:$Zd, FPR128asZPR:$Qn, 0), 2>;
- // Duplicate extracted element of vector into all vector elements
+ // Duplicate an extracted vector element across a vector.
+
def : Pat<(nxv16i8 (splat_vector (i32 (vector_extract (nxv16i8 ZPR:$vec), sve_elm_idx_extdup_b:$index)))),
(!cast<Instruction>(NAME # _B) ZPR:$vec, sve_elm_idx_extdup_b:$index)>;
- def : Pat<(nxv8i16 (splat_vector (i32 (vector_extract (nxv8i16 ZPR:$vec), sve_elm_idx_extdup_h:$index)))),
- (!cast<Instruction>(NAME # _H) ZPR:$vec, sve_elm_idx_extdup_h:$index)>;
- def : Pat<(nxv4i32 (splat_vector (i32 (vector_extract (nxv4i32 ZPR:$vec), sve_elm_idx_extdup_s:$index)))),
- (!cast<Instruction>(NAME # _S) ZPR:$vec, sve_elm_idx_extdup_s:$index)>;
- def : Pat<(nxv2i64 (splat_vector (i64 (vector_extract (nxv2i64 ZPR:$vec), sve_elm_idx_extdup_d:$index)))),
- (!cast<Instruction>(NAME # _D) ZPR:$vec, sve_elm_idx_extdup_d:$index)>;
- def : Pat<(nxv8f16 (splat_vector (f16 (vector_extract (nxv8f16 ZPR:$vec), sve_elm_idx_extdup_h:$index)))),
- (!cast<Instruction>(NAME # _H) ZPR:$vec, sve_elm_idx_extdup_h:$index)>;
- def : Pat<(nxv8bf16 (splat_vector (bf16 (vector_extract (nxv8bf16 ZPR:$vec), sve_elm_idx_extdup_h:$index)))),
- (!cast<Instruction>(NAME # _H) ZPR:$vec, sve_elm_idx_extdup_h:$index)>;
- def : Pat<(nxv4f16 (splat_vector (f16 (vector_extract (nxv4f16 ZPR:$vec), sve_elm_idx_extdup_s:$index)))),
- (!cast<Instruction>(NAME # _S) ZPR:$vec, sve_elm_idx_extdup_s:$index)>;
- def : Pat<(nxv2f16 (splat_vector (f16 (vector_extract (nxv2f16 ZPR:$vec), sve_elm_idx_extdup_d:$index)))),
- (!cast<Instruction>(NAME # _D) ZPR:$vec, sve_elm_idx_extdup_d:$index)>;
- def : Pat<(nxv4f32 (splat_vector (f32 (vector_extract (nxv4f32 ZPR:$vec), sve_elm_idx_extdup_s:$index)))),
- (!cast<Instruction>(NAME # _S) ZPR:$vec, sve_elm_idx_extdup_s:$index)>;
- def : Pat<(nxv2f32 (splat_vector (f32 (vector_extract (nxv2f32 ZPR:$vec), sve_elm_idx_extdup_d:$index)))),
- (!cast<Instruction>(NAME # _D) ZPR:$vec, sve_elm_idx_extdup_d:$index)>;
- def : Pat<(nxv2f64 (splat_vector (f64 (vector_extract (nxv2f64 ZPR:$vec), sve_elm_idx_extdup_d:$index)))),
+ def : Pat<(nxv16i8 (splat_vector (i32 (vector_extract (v16i8 V128:$vec), sve_elm_idx_extdup_b:$index)))),
+ (!cast<Instruction>(NAME # _B) (SUBREG_TO_REG (i64 0), $vec, zsub), sve_elm_idx_extdup_b:$index)>;
+ def : Pat<(nxv16i8 (splat_vector (i32 (vector_extract (v8i8 V64:$vec), sve_elm_idx_extdup_b:$index)))),
+ (!cast<Instruction>(NAME # _B) (SUBREG_TO_REG (i64 0), $vec, dsub), sve_elm_idx_extdup_b:$index)>;
+
+ foreach VT = [nxv8i16, nxv2f16, nxv4f16, nxv8f16, nxv2bf16, nxv4bf16, nxv8bf16] in {
+ def : Pat<(VT (splat_vector (SVEType<VT>.EltAsScalar (vector_extract (SVEType<VT>.Packed ZPR:$vec), sve_elm_idx_extdup_h:$index)))),
+ (!cast<Instruction>(NAME # _H) ZPR:$vec, sve_elm_idx_extdup_h:$index)>;
+ def : Pat<(VT (splat_vector (SVEType<VT>.EltAsScalar (vector_extract (SVEType<VT>.ZSub V128:$vec), sve_elm_idx_extdup_h:$index)))),
+ (!cast<Instruction>(NAME # _H) (SUBREG_TO_REG (i64 0), $vec, zsub), sve_elm_idx_extdup_h:$index)>;
+ def : Pat<(VT (splat_vector (SVEType<VT>.EltAsScalar (vector_extract (SVEType<VT>.DSub V64:$vec), sve_elm_idx_extdup_h:$index)))),
+ (!cast<Instruction>(NAME # _H) (SUBREG_TO_REG (i64 0), $vec, dsub), sve_elm_idx_extdup_h:$index)>;
+ }
+
+ foreach VT = [nxv4i32, nxv2f32, nxv4f32 ] in {
+ def : Pat<(VT (splat_vector (SVEType<VT>.EltAsScalar (vector_extract (SVEType<VT>.Packed ZPR:$vec), sve_elm_idx_extdup_s:$index)))),
+ (!cast<Instruction>(NAME # _S) ZPR:$vec, sve_elm_idx_extdup_s:$index)>;
+ def : Pat<(VT (splat_vector (SVEType<VT>.EltAsScalar (vector_extract (SVEType<VT>.ZSub V128:$vec), sve_elm_idx_extdup_s:$index)))),
+ (!cast<Instruction>(NAME # _S) (SUBREG_TO_REG (i64 0), $vec, zsub), sve_elm_idx_extdup_s:$index)>;
+ def : Pat<(VT (splat_vector (SVEType<VT>.EltAsScalar (vector_extract (SVEType<VT>.DSub V64:$vec), sve_elm_idx_extdup_s:$index)))),
+ (!cast<Instruction>(NAME # _S) (SUBREG_TO_REG (i64 0), $vec, dsub), sve_elm_idx_extdup_s:$index)>;
+ }
+
+ foreach VT = [nxv2i64, nxv2f64] in {
+ def : Pat<(VT (splat_vector (SVEType<VT>.EltAsScalar (vector_extract (VT ZPR:$vec), sve_elm_idx_extdup_d:$index)))),
+ (!cast<Instruction>(NAME # _D) ZPR:$vec, sve_elm_idx_extdup_d:$index)>;
+ def : Pat<(VT (splat_vector (SVEType<VT>.EltAsScalar (vector_extract (SVEType<VT>.ZSub V128:$vec), sve_elm_idx_extdup_d:$index)))),
+ (!cast<Instruction>(NAME # _D) (SUBREG_TO_REG (i64 0), $vec, zsub), sve_elm_idx_extdup_d:$index)>;
+ def : Pat<(VT (splat_vector (SVEType<VT>.EltAsScalar (vector_extract (SVEType<VT>.DSub V64:$vec), sve_elm_idx_extdup_d:$index)))),
+ (!cast<Instruction>(NAME # _D) (SUBREG_TO_REG (i64 0), $vec, dsub), sve_elm_idx_extdup_d:$index)>;
+ }
+
+ // When extracting from an unpacked vector the index must be scaled to account
+ // for the "holes" in the underlying packed vector type. We get the scaling
+ // for free by "promoting" the element type to one whose underlying vector type
+ // is packed.
+
+ foreach VT = [nxv2f16, nxv4f16, nxv8f16] in {
+ def : Pat<(VT (splat_vector (f16 (vector_extract (nxv4f16 ZPR:$vec), sve_elm_idx_extdup_s:$index)))),
+ (!cast<Instruction>(NAME # _S) ZPR:$vec, sve_elm_idx_extdup_s:$index)>;
+ def : Pat<(VT (splat_vector (f16 (vector_extract (nxv2f16 ZPR:$vec), sve_elm_idx_extdup_d:$index)))),
+ (!cast<Instruction>(NAME # _D) ZPR:$vec, sve_elm_idx_extdup_d:$index)>;
+ }
+
+ foreach VT = [nxv2bf16, nxv4bf16, nxv8bf16] in {
+ def : Pat<(VT (splat_vector (bf16 (vector_extract (nxv4bf16 ZPR:$vec), sve_elm_idx_extdup_s:$index)))),
+ (!cast<Instruction>(NAME # _S) ZPR:$vec, sve_elm_idx_extdup_s:$index)>;
+ def : Pat<(VT (splat_vector (bf16 (vector_extract (nxv2bf16 ZPR:$vec), sve_elm_idx_extdup_d:$index)))),
+ (!cast<Instruction>(NAME # _D) ZPR:$vec, sve_elm_idx_extdup_d:$index)>;
+ }
+
+ foreach VT = [nxv2f32, nxv4f32] in {
+ def : Pat<(VT (splat_vector (f32 (vector_extract (nxv2f32 ZPR:$vec), sve_elm_idx_extdup_d:$index)))),
(!cast<Instruction>(NAME # _D) ZPR:$vec, sve_elm_idx_extdup_d:$index)>;
+ }
+
+ // Duplicate an indexed 128-bit segment across a vector.
def : Pat<(nxv16i8 (AArch64duplane128 nxv16i8:$Op1, i64:$imm)),
(!cast<Instruction>(NAME # _Q) $Op1, $imm)>;
diff --git a/llvm/test/CodeGen/AArch64/aarch64-dup-extract-scalable.ll b/llvm/test/CodeGen/AArch64/aarch64-dup-extract-scalable.ll
index f15dde2b327e18..0cf8aec52fe258 100644
--- a/llvm/test/CodeGen/AArch64/aarch64-dup-extract-scalable.ll
+++ b/llvm/test/CodeGen/AArch64/aarch64-dup-extract-scalable.ll
@@ -15,8 +15,8 @@ define <vscale x 16 x i8> @dup_extract_nxv16i8_nxv16i8(<vscale x 16 x i8> %data)
define <vscale x 16 x i8> @dup_extract_nxv16i8_v16i8(<16 x i8> %data) {
; CHECK-LABEL: dup_extract_nxv16i8_v16i8:
; CHECK: // %bb.0:
-; CHECK-NEXT: umov w8, v0.b[1]
-; CHECK-NEXT: mov z0.b, w8
+; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
+; CHECK-NEXT: mov z0.b, z0.b[1]
; CHECK-NEXT: ret
%1 = extractelement <16 x i8> %data, i8 1
%.splatinsert = insertelement <vscale x 16 x i8> poison, i8 %1, i32 0
@@ -27,9 +27,8 @@ define <vscale x 16 x i8> @dup_extract_nxv16i8_v16i8(<16 x i8> %data) {
define <vscale x 16 x i8> @dup_extract_nxv16i8_v8i8(<8 x i8> %data) {
; CHECK-LABEL: dup_extract_nxv16i8_v8i8:
; CHECK: // %bb.0:
-; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT: umov w8, v0.b[1]
-; CHECK-NEXT: mov z0.b, w8
+; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
+; CHECK-NEXT: mov z0.b, z0.b[1]
; CHECK-NEXT: ret
%1 = extractelement <8 x i8> %data, i8 1
%.splatinsert = insertelement <vscale x 16 x i8> poison, i8 %1, i32 0
@@ -51,8 +50,8 @@ define <vscale x 8 x i16> @dup_extract_nxv8i16_nxv8i16(<vscale x 8 x i16> %data)
define <vscale x 8 x i16> @dup_extract_nxv8i16_v8i16(<8 x i16> %data) {
; CHECK-LABEL: dup_extract_nxv8i16_v8i16:
; CHECK: // %bb.0:
-; CHECK-NEXT: umov w8, v0.h[1]
-; CHECK-NEXT: mov z0.h, w8
+; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
+; CHECK-NEXT: mov z0.h, z0.h[1]
; CHECK-NEXT: ret
%1 = extractelement <8 x i16> %data, i16 1
%.splatinsert = insertelement <vscale x 8 x i16> poison, i16 %1, i32 0
@@ -63,9 +62,8 @@ define <vscale x 8 x i16> @dup_extract_nxv8i16_v8i16(<8 x i16> %data) {
define <vscale x 8 x i16> @dup_extract_nxv8i16_v4i16(<4 x i16> %data) {
; CHECK-LABEL: dup_extract_nxv8i16_v4i16:
; CHECK: // %bb.0:
-; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT: umov w8, v0.h[1]
-; CHECK-NEXT: mov z0.h, w8
+; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
+; CHECK-NEXT: mov z0.h, z0.h[1]
; CHECK-NEXT: ret
%1 = extractelement <4 x i16> %data, i16 1
%.splatinsert = insertelement <vscale x 8 x i16> poison, i16 %1, i32 0
@@ -87,8 +85,8 @@ define <vscale x 4 x i32> @dup_extract_nxv4i32_nxv4i32(<vscale x 4 x i32> %data)
define <vscale x 4 x i32> @dup_extract_nxv4i32_v4i32(<4 x i32> %data) {
; CHECK-LABEL: dup_extract_nxv4i32_v4i32:
; CHECK: // %bb.0:
-; CHECK-NEXT: mov w8, v0.s[1]
-; CHECK-NEXT: mov z0.s, w8
+; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
+; CHECK-NEXT: mov z0.s, z0.s[1]
; CHECK-NEXT: ret
%1 = extractelement <4 x i32> %data, i32 1
%.splatinsert = insertelement <vscale x 4 x i32> poison, i32 %1, i32 0
@@ -99,9 +97,8 @@ define <vscale x 4 x i32> @dup_extract_nxv4i32_v4i32(<4 x i32> %data) {
define <vscale x 4 x i32> @dup_extract_nxv4i32_v2i32(<2 x i32> %data) {
; CHECK-LABEL: dup_extract_nxv4i32_v2i32:
; CHECK: // %bb.0:
-; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT: mov w8, v0.s[1]
-; CHECK-NEXT: mov z0.s, w8
+; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
+; CHECK-NEXT: mov z0.s, z0.s[1]
; CHECK-NEXT: ret
%1 = extractelement <2 x i32> %data, i32 1
%.splatinsert = insertelement <vscale x 4 x i32> poison, i32 %1, i32 0
@@ -123,8 +120,8 @@ define <vscale x 2 x i64> @dup_extract_nxv2i64_nxv2i64(<vscale x 2 x i64> %data)
define <vscale x 2 x i64> @dup_extract_nxv2i64_v2i64(<2 x i64> %data) {
; CHECK-LABEL: dup_extract_nxv2i64_v2i64:
; CHECK: // %bb.0:
-; CHECK-NEXT: mov x8, v0.d[1]
-; CHECK-NEXT: mov z0.d, x8
+; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
+; CHECK-NEXT: mov z0.d, z0.d[1]
; CHECK-NEXT: ret
%1 = extractelement <2 x i64> %data, i64 1
%.splatinsert = insertelement <vscale x 2 x i64> poison, i64 %1, i32 0
@@ -158,7 +155,6 @@ define <vscale x 8 x half> @dup_extract_nxv8f16_nxv4f16(<vscale x 4 x half> %dat
; CHECK-LABEL: dup_extract_nxv8f16_nxv4f16:
; CHECK: // %bb.0:
; CHECK-NEXT: mov z0.s, z0.s[1]
-; CHECK-NEXT: mov z0.h, h0
; CHECK-NEXT: ret
%1 = extractelement <vscale x 4 x half> %data, i16 1
%.splatinsert = insertelement <vscale x 8 x half> poison, half %1, i32 0
@@ -170,7 +166,6 @@ define <vscale x 8 x half> @dup_extract_nxv8f16_nxv2f16(<vscale x 2 x half> %dat
; CHECK-LABEL: dup_extract_nxv8f16_nxv2f16:
; CHECK: // %bb.0:
; CHECK-NEXT: mov z0.d, z0.d[1]
-; CHECK-NEXT: mov z0.h, h0
; CHECK-NEXT: ret
%1 = extractelement <vscale x 2 x half> %data, i16 1
%.splatinsert = insertelement <vscale x 8 x half> poison, half %1, i32 0
@@ -181,8 +176,8 @@ define <vscale x 8 x half> @dup_extract_nxv8f16_nxv2f16(<vscale x 2 x half> %dat
define <vscale x 8 x half> @dup_extract_nxv8f16_v8f16(<8 x half> %data) {
; CHECK-LABEL: dup_extract_nxv8f16_v8f16:
; CHECK: // %bb.0:
-; CHECK-NEXT: mov h0, v0.h[1]
-; CHECK-NEXT: mov z0.h, h0
+; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
+; CHECK-NEXT: mov z0.h, z0.h[1]
; CHECK-NEXT: ret
%1 = extractelement <8 x half> %data, i16 1
%.splatinsert = insertelement <vscale x 8 x half> poison, half %1, i32 0
@@ -193,9 +188,8 @@ define <vscale x 8 x half> @dup_extract_nxv8f16_v8f16(<8 x half> %data) {
define <vscale x 8 x half> @dup_extract_nxv8f16_v4f16(<4 x half> %data) {
; CHECK-LABEL: dup_extract_nxv8f16_v4f16:
; CHECK: // %bb.0:
-; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT: mov h0, v0.h[1]
-; CHECK-NEXT: mov z0.h, h0
+; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
+; CHECK-NEXT: mov z0.h, z0.h[1]
; CHECK-NEXT: ret
%1 = extractelement <4 x half> %data, i16 1
%.splatinsert = insertelement <vscale x 8 x half> poison, half %1, i32 0
@@ -207,7 +201,6 @@ define <vscale x 4 x half> @dup_extract_nxv4f16_nxv8f16(<vscale x 8 x half> %dat
; CHECK-LABEL: dup_extract_nxv4f16_nxv8f16:
; CHECK: // %bb.0:
; CHECK-NEXT: mov z0.h, z0.h[1]
-; CHECK-NEXT: mov z0.h, h0
; CHECK-NEXT: ret
%1 = extractelement <vscale x 8 x half> %data, i16 1
%.splatinsert = insertelement <vscale x 4 x half> poison, half %1, i32 0
@@ -230,7 +223,6 @@ define <vscale x 4 x half> @dup_extract_nxv4f16_nxv2f16(<vscale x 2 x half> %dat
; CHECK-LABEL: dup_extract_nxv4f16_nxv2f16:
; CHECK: // %bb.0:
; CHECK-NEXT: mov z0.d, z0.d[1]
-; CHECK-NEXT: mov z0.h, h0
; CHECK-NEXT: ret
%1 = extractelement <vscale x 2 x half> %data, i16 1
%.splatinsert = insertelement <vscale x 4 x half> poison, half %1, i32 0
@@ -241,8 +233,8 @@ define <vscale x 4 x half> @dup_extract_nxv4f16_nxv2f16(<vscale x 2 x half> %dat
define <vscale x 4 x half> @dup_extract_nxv4f16_v8f16(<8 x half> %data) {
; CHECK-LABEL: dup_extract_nxv4f16_v8f16:
; CHECK: // %bb.0:
-; CHECK-NEXT: mov h0, v0.h[1]
-; CHECK-NEXT: mov z0.h, h0
+; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
+; CHECK-NEXT: mov z0.h, z0.h[1]
; CHECK-NEXT: ret
%1 = extractelement <8 x half> %data, i16 1
%.splatinsert = insertelement <vscale x 4 x half> poison, half %1, i32 0
@@ -253,9 +245,8 @@ define <vscale x 4 x half> @dup_extract_nxv4f16_v8f16(<8 x half> %data) {
define <vscale x 4 x half> @dup_extract_nxv4f16_v4f16(<4 x half> %data) {
; CHECK-LABEL: dup_extract_nxv4f16_v4f16:
; CHECK: // %bb.0:
-; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT: mov h0, v0.h[1]
-; CHECK-NEXT: mov z0.h, h0
+; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
+; CHECK-NEXT: mov z0.h, z0.h[1]
; CHECK-NEXT: ret
%1 = extractelement <4 x half> %data, i16 1
%.splatinsert = insertelement <vscale x 4 x half> poison, half %1, i32 0
@@ -267,7 +258,6 @@ define <vscale x 2 x half> @dup_extract_nxv2f16_nxv8f16(<vscale x 8 x half> %dat
; CHECK-LABEL: dup_extract_nxv2f16_nxv8f16:
; CHECK: // %bb.0:
; CHECK-NEXT: mov z0.h, z0.h[1]
-; CHECK-NEXT: mov z0.h, h0
; CHECK-NEXT: ret
%1 = extractelement <vscale x 8 x half> %data, i16 1
%.splatinsert = insertelement <vscale x 2 x half> poison, half %1, i32 0
@@ -279,7 +269,6 @@ define <vscale x 2 x half> @dup_extract_nxv2f16_nxv4f16(<vscale x 4 x half> %dat
; CHECK-LABEL: dup_extract_nxv2f16_nxv4f16:
; CHECK: // %bb.0:
; CHECK-NEXT: mov z0.s, z0.s[1]
-; CHECK-NEXT: mov z0.h, h0
; CHECK-NEXT: ret
%1 = extractelement <vscale x 4 x half> %data, i16 1
%.splatinsert = insertelement <vscale x 2 x half> poison, half %1, i32 0
@@ -301,8 +290,8 @@ define <vscale x 2 x half> @dup_extract_nxv2f16_nxv2f16(<vscale x 2 x half> %dat
define <vscale x 2 x half> @dup_extract_nxv2f16_v8f16(<8 x half> %data) {
; CHECK-LABEL: dup_extract_nxv2f16_v8f16:
; CHECK: // %bb.0:
-; CHECK-NEXT: mov h0, v0.h[1]
-; CHECK-NEXT: mov z0.h, h0
+; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
+; CHECK-NEXT: mov z0.h, z0.h[1]
; CHECK-NEXT: ret
%1 = extractelement <8 x half> %data, i16 1
%.splatinsert = insertelement <vscale x 2 x half> poison, half %1, i32 0
@@ -313,9 +302,8 @@ define <vscale x 2 x half> @dup_extract_nxv2f16_v8f16(<8 x half> %data) {
define <vscale x 2 x half> @dup_extract_nxv2f16_v4f16(<4 x half> %data) {
; CHECK-LABEL: dup_extract_nxv2f16_v4f16:
; CHECK: // %bb.0:
-; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT: mov h0, v0.h[1]
-; CHECK-NEXT: mov z0.h, h0
+; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
+; CHECK-NEXT: mov z0.h, z0.h[1]
; CHECK-NEXT: ret
%1 = extractelement <4 x half> %data, i16 1
%.splatinsert = insertelement <vscale x 2 x half> poison, half %1, i32 0
@@ -338,7 +326,6 @@ define <vscale x 4 x float> @dup_extract_nxv4f32_nxv2f32(<vscale x 2 x float> %d
; CHECK-LABEL: dup_extract_nxv4f32_nxv2f32:
; CHECK: // %bb.0:
; CHECK-NEXT: mov z0.d, z0.d[1]
-; CHECK-NEXT: mov z0.s, s0
; CHECK-NEXT: ret
%1 = extractelement <vscale x 2 x float> %data, i32 1
%.splatinsert = insertelement <vscale x 4 x float> poison, float %1, i32 0
@@ -349,8 +336,8 @@ define <vscale x 4 x float> @dup_extract_nxv4f32_nxv2f32(<vscale x 2 x float> %d
define <vscale x 4 x float> @dup_extract_nxv4f32_v4f32(<4 x float> %data) {
; CHECK-LABEL: dup_extract_nxv4f32_v4f32:
; CHECK: // %bb.0:
-; CHECK-NEXT: mov s0, v0.s[1]
-; CHECK-NEXT: mov z0.s, s0
+; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
+; CHECK-NEXT: mov z0.s, z0.s[1]
; CHECK-NEXT: ret
%1 = extractelement <4 x float> %data, i32 1
%.splatinsert = insertelement <vscale x 4 x float> poison, float %1, i32 0
@@ -361,9 +348,8 @@ define <vscale x 4 x float> @dup_extract_nxv4f32_v4f32(<4 x float> %data) {
define <vscale x 4 x float> @dup_extract_nxv4f32_v2f32(<2 x float> %data) {
; CHECK-LABEL: dup_extract_nxv4f32_v2f32:
; CHECK: // %bb.0:
-; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT: mov s0, v0.s[1]
-; CHECK-NEXT: mov z0.s, s0
+; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
+; CHECK-NEXT: mov z0.s, z0.s[1]
; CHECK-NEXT: ret
%1 = extractelement <2 x float> %data, i32 1
%.splatinsert = insertelement <vscale x 4 x float> poison, float %1, i32 0
@@ -375,7 +361,6 @@ define <vscale x 2 x float> @dup_extract_nxv2f32_nxv4f32(<vscale x 4 x float> %d
; CHECK-LABEL: dup_extract_nxv2f32_nxv4f32:
; CHECK: // %bb.0:
; CHECK-NEXT: mov z0.s, z0.s[1]
-; CHECK-NEXT: mov z0.s, s0
; CHECK-NEXT: ret
%1 = extractelement <vscale x 4 x float> %data, i32 1
%.splatinsert = insertelement <vscale x 2 x float> poison, float %1, i32 0
@@ -397,8 +382,8 @@ define <vscale x 2 x float> @dup_extract_nxv2f32_nxv2f32(<vscale x 2 x float> %d
define <vscale x 2 x float> @dup_extract_nxv2f32_v4f32(<4 x float> %data) {
; CHECK-LABEL: dup_extract_nxv2f32_v4f32:
; CHECK: // %bb.0:
-; CHECK-NEXT: mov s0, v0.s[1]
-; CHECK-NEXT: mov z0.s, s0
+; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
+; CHECK-NEXT: mov z0.s, z0.s[1]
; CHECK-NEXT: ret
%1 = extractelement <4 x float> %data, i32 1
%.splatinsert = insertelement <vscale x 2 x float> poison, float %1, i32 0
@@ -409,9 +394,8 @@ define <vscale x 2 x float> @dup_extract_nxv2f32_v4f32(<4 x float> %data) {
define <vscale x 2 x float> @dup_extract_nxv2f32_v2f32(<2 x float> %data) {
; CHECK-LABEL: dup_extract_nxv2f32_v2f32:
; CHECK: // %bb.0:
-; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT: mov s0, v0.s[1]
-; CHECK-NEXT: mov z0.s, s0
+; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
+; CHECK-NEXT: mov z0.s, z0.s[1]
; CHECK-NEXT: ret
%1 = extractelement <2 x float> %data, i32 1
%.splatinsert = insertelement <vscale x 2 x float> poison, float %1, i32 0
@@ -433,8 +417,8 @@ define <vscale x 2 x double> @dup_extract_nxv2f64_nxv2f64(<vscale x 2 x double>
define <vscale x 2 x double> @dup_extract_nxv2f64_v2f64(<2 x double> %data) {
; CHECK-LABEL: dup_extract_nxv2f64_v2f64:
; CHECK: // %bb.0:
-; CHECK-NEXT: mov d0, v0.d[1]
-; CHECK-NEXT: mov z0.d, d0
+; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
+; CHECK-NEXT: mov z0.d, z0.d[1]
; CHECK-NEXT: ret
%1 = extractelement <2 x double> %data, i64 1
%.splatinsert = insertelement <vscale x 2 x double> poison, double %1, i32 0
@@ -468,7 +452,6 @@ define <vscale x 8 x bfloat> @dup_extract_nxv8bf16_nxv4bf16(<vscale x 4 x bfloat
; CHECK-LABEL: dup_extract_nxv8bf16_nxv4bf16:
; CHECK: // %bb.0:
; CHECK-NEXT: mov z0.s, z0.s[1]
-; CHECK-NEXT: mov z0.h, h0
; CHECK-NEXT: ret
%1 = extractelement <vscale x 4 x bfloat> %data, i16 1
%.splatinsert = insertelement <vscale x 8 x bfloat> poison, bfloat %1, i32 0
@@ -480,7 +463,6 @@ define <vscale x 8 x bfloat> @dup_extract_nxv8bf16_nxv2bf16(<vscale x 2 x bfloat
; CHECK-LABEL: dup_extract_nxv8bf16_nxv2bf16:
; CHECK: // %bb.0:
; CHECK-NEXT: mov z0.d, z0.d[1]
-; CHECK-NEXT: mov z0.h, h0
; CHECK-NEXT: ret
%1 = extractelement <vscale x 2 x bfloat> %data, i16 1
%.splatinsert = insertelement <vscale x 8 x bfloat> poison, bfloat %1, i32 0
@@ -491,8 +473,8 @@ define <vscale x 8 x bfloat> @dup_extract_nxv8bf16_nxv2bf16(<vscale x 2 x bfloat
define <vscale x 8 x bfloat> @dup_extract_nxv8bf16_v8bf16(<8 x bfloat> %data) {
; CHECK-LABEL: dup_extract_nxv8bf16_v8bf16:
; CHECK: // %bb.0:
-; CHECK-NEXT: mov h0, v0.h[1]
-; CHECK-NEXT: mov z0.h, h0
+; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
+; CHECK-NEXT: mov z0.h, z0.h[1]
; CHECK-NEXT: ret
%1 = extractelement <8 x bfloat> %data, i16 1
%.splatinsert = insertelement <vscale x 8 x bfloat> poison, bfloat %1, i32 0
@@ -503,9 +485,8 @@ define <vscale x 8 x bfloat> @dup_extract_nxv8bf16_v8bf16(<8 x bfloat> %data) {
define <vscale x 8 x bfloat> @dup_extract_nxv8bf16_v4bf16(<4 x bfloat> %data) {
; CHECK-LABEL: dup_extract_nxv8bf16_v4bf16:
; CHECK: // %bb.0:
-; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT: mov h0, v0.h[1]
-; CHECK-NEXT: mov z0.h, h0
+; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
+; CHECK-NEXT: mov z0.h, z0.h[1]
; CHECK-NEXT: ret
%1 = extractelement <4 x bfloat> %data, i16 1
%.splatinsert = insertelement <vscale x 8 x bfloat> poison, bfloat %1, i32 0
@@ -517,7 +498,6 @@ define <vscale x 4 x bfloat> @dup_extract_nxv4bf16_nxv8bf16(<vscale x 8 x bfloat
; CHECK-LABEL: dup_extract_nxv4bf16_nxv8bf16:
; CHECK: // %bb.0:
; CHECK-NEXT: mov z0.h, z0.h[1]
-; CHECK-NEXT: mov z0.h, h0
; CHECK-NEXT: ret
%1 = extractelement <vscale x 8 x bfloat> %data, i16 1
%.splatinsert = insertelement <vscale x 4 x bfloat> poison, bfloat %1, i32 0
@@ -529,7 +509,6 @@ define <vscale x 4 x bfloat> @dup_extract_nxv4bf16_nxv4bf16(<vscale x 4 x bfloat
; CHECK-LABEL: dup_extract_nxv4bf16_nxv4bf16:
; CHECK: // %bb.0:
; CHECK-NEXT: mov z0.s, z0.s[1]
-; CHECK-NEXT: mov z0.h, h0
; CHECK-NEXT: ret
%1 = extractelement <vscale x 4 x bfloat> %data, i16 1
%.splatinsert = insertelement <vscale x 4 x bfloat> poison, bfloat %1, i32 0
@@ -541,7 +520,6 @@ define <vscale x 4 x bfloat> @dup_extract_nxv4bf16_nxv2bf16(<vscale x 2 x bfloat
; CHECK-LABEL: dup_extract_nxv4bf16_nxv2bf16:
; CHECK: // %bb.0:
; CHECK-NEXT: mov z0.d, z0.d[1]
-; CHECK-NEXT: mov z0.h, h0
; CHECK-NEXT: ret
%1 = extractelement <vscale x 2 x bfloat> %data, i16 1
%.splatinsert = insertelement <vscale x 4 x bfloat> poison, bfloat %1, i32 0
@@ -552,8 +530,8 @@ define <vscale x 4 x bfloat> @dup_extract_nxv4bf16_nxv2bf16(<vscale x 2 x bfloat
define <vscale x 4 x bfloat> @dup_extract_nxv4bf16_v8bf16(<8 x bfloat> %data) {
; CHECK-LABEL: dup_extract_nxv4bf16_v8bf16:
; CHECK: // %bb.0:
-; CHECK-NEXT: mov h0, v0.h[1]
-; CHECK-NEXT: mov z0.h, h0
+; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
+; CHECK-NEXT: mov z0.h, z0.h[1]
; CHECK-NEXT: ret
%1 = extractelement <8 x bfloat> %data, i16 1
%.splatinsert = insertelement <vscale x 4 x bfloat> poison, bfloat %1, i32 0
@@ -564,9 +542,8 @@ define <vscale x 4 x bfloat> @dup_extract_nxv4bf16_v8bf16(<8 x bfloat> %data) {
define <vscale x 4 x bfloat> @dup_extract_nxv4bf16_v4bf16(<4 x bfloat> %data) {
; CHECK-LABEL: dup_extract_nxv4bf16_v4bf16:
; CHECK: // %bb.0:
-; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT: mov h0, v0.h[1]
-; CHECK-NEXT: mov z0.h, h0
+; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
+; CHECK-NEXT: mov z0.h, z0.h[1]
; CHECK-NEXT: ret
%1 = extractelement <4 x bfloat> %data, i16 1
%.splatinsert = insertelement <vscale x 4 x bfloat> poison, bfloat %1, i32 0
@@ -578,7 +555,6 @@ define <vscale x 2 x bfloat> @dup_extract_nxv2bf16_nxv8bf16(<vscale x 8 x bfloat
; CHECK-LABEL: dup_extract_nxv2bf16_nxv8bf16:
; CHECK: // %bb.0:
; CHECK-NEXT: mov z0.h, z0.h[1]
-; CHECK-NEXT: mov z0.h, h0
; CHECK-NEXT: ret
%1 = extractelement <vscale x 8 x bfloat> %data, i16 1
%.splatinsert = insertelement <vscale x 2 x bfloat> poison, bfloat %1, i32 0
@@ -590,7 +566,6 @@ define <vscale x 2 x bfloat> @dup_extract_nxv2bf16_nxv4bf16(<vscale x 4 x bfloat
; CHECK-LABEL: dup_extract_nxv2bf16_nxv4bf16:
; CHECK: // %bb.0:
; CHECK-NEXT: mov z0.s, z0.s[1]
-; CHECK-NEXT: mov z0.h, h0
; CHECK-NEXT: ret
%1 = extractelement <vscale x 4 x bfloat> %data, i16 1
%.splatinsert = insertelement <vscale x 2 x bfloat> poison, bfloat %1, i32 0
@@ -602,7 +577,6 @@ define <vscale x 2 x bfloat> @dup_extract_nxv2bf16_nxv2bf16(<vscale x 2 x bfloat
; CHECK-LABEL: dup_extract_nxv2bf16_nxv2bf16:
; CHECK: // %bb.0:
; CHECK-NEXT: mov z0.d, z0.d[1]
-; CHECK-NEXT: mov z0.h, h0
; CHECK-NEXT: ret
%1 = extractelement <vscale x 2 x bfloat> %data, i16 1
%.splatinsert = insertelement <vscale x 2 x bfloat> poison, bfloat %1, i32 0
@@ -613,8 +587,8 @@ define <vscale x 2 x bfloat> @dup_extract_nxv2bf16_nxv2bf16(<vscale x 2 x bfloat
define <vscale x 2 x bfloat> @dup_extract_nxv2bf16_v8bf16(<8 x bfloat> %data) {
; CHECK-LABEL: dup_extract_nxv2bf16_v8bf16:
; CHECK: // %bb.0:
-; CHECK-NEXT: mov h0, v0.h[1]
-; CHECK-NEXT: mov z0.h, h0
+; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
+; CHECK-NEXT: mov z0.h, z0.h[1]
; CHECK-NEXT: ret
%1 = extractelement <8 x bfloat> %data, i16 1
%.splatinsert = insertelement <vscale x 2 x bfloat> poison, bfloat %1, i32 0
@@ -625,9 +599,8 @@ define <vscale x 2 x bfloat> @dup_extract_nxv2bf16_v8bf16(<8 x bfloat> %data) {
define <vscale x 2 x bfloat> @dup_extract_nxv2bf16_v4bf16(<4 x bfloat> %data) {
; CHECK-LABEL: dup_extract_nxv2bf16_v4bf16:
; CHECK: // %bb.0:
-; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT: mov h0, v0.h[1]
-; CHECK-NEXT: mov z0.h, h0
+; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
+; CHECK-NEXT: mov z0.h, z0.h[1]
; CHECK-NEXT: ret
%1 = extractelement <4 x bfloat> %data, i16 1
%.splatinsert = insertelement <vscale x 2 x bfloat> poison, bfloat %1, i32 0
More information about the llvm-commits
mailing list