[llvm] [RISCV] Lower vector_splice on zvfhmin/zvfbfmin (PR #112579)
Luke Lau via llvm-commits
llvm-commits at lists.llvm.org
Wed Oct 16 09:50:44 PDT 2024
https://github.com/lukel97 updated https://github.com/llvm/llvm-project/pull/112579
>From fe7e653e6306671a8d55bcee4be38adabf217ede Mon Sep 17 00:00:00 2001
From: Luke Lau <luke at igalia.com>
Date: Wed, 16 Oct 2024 17:26:48 +0100
Subject: [PATCH 1/3] Precommit tests
---
llvm/test/Analysis/CostModel/RISCV/splice.ll | 26 +-
llvm/test/CodeGen/RISCV/rvv/vector-splice.ll | 2769 ++++++++++++++++--
2 files changed, 2614 insertions(+), 181 deletions(-)
diff --git a/llvm/test/Analysis/CostModel/RISCV/splice.ll b/llvm/test/Analysis/CostModel/RISCV/splice.ll
index 8d7d1576a532da..ddfaa8c13d425f 100644
--- a/llvm/test/Analysis/CostModel/RISCV/splice.ll
+++ b/llvm/test/Analysis/CostModel/RISCV/splice.ll
@@ -1,6 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
-; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -S -mtriple=riscv64 -mattr=+v,+f,+d,+zfh,+zvfh | FileCheck %s
-; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -S -mtriple=riscv64 -mattr=+v,+f,+d,+zfh,+zvfhmin | FileCheck %s
+; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -S -mtriple=riscv64 -mattr=+v,+f,+d,+zfh,+zvfh,+zvfbfmin | FileCheck %s
+; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -S -mtriple=riscv64 -mattr=+v,+f,+d,+zfh,+zvfhmin,+zvfbfmin | FileCheck %s
; RUN: opt < %s -passes="print<cost-model>" -cost-kind=code-size 2>&1 -disable-output -S -mtriple=riscv64 -mattr=+v,+f,+d,+zfh,+zvfh | FileCheck %s --check-prefix=SIZE
; RUN: opt < %s -passes="print<cost-model>" -cost-kind=code-size 2>&1 -disable-output -S -mtriple=riscv64 -mattr=+v,+f,+d,+zfh,+zvfhmin | FileCheck %s --check-prefix=SIZE
@@ -34,6 +34,13 @@ define void @vector_splice() {
; CHECK-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %splice.nxv16i64 = call <vscale x 16 x i64> @llvm.vector.splice.nxv16i64(<vscale x 16 x i64> zeroinitializer, <vscale x 16 x i64> zeroinitializer, i32 -1)
; CHECK-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %splice.nxv32i64 = call <vscale x 32 x i64> @llvm.vector.splice.nxv32i64(<vscale x 32 x i64> zeroinitializer, <vscale x 32 x i64> zeroinitializer, i32 -1)
; CHECK-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %splice.nxv64i64 = call <vscale x 64 x i64> @llvm.vector.splice.nxv64i64(<vscale x 64 x i64> zeroinitializer, <vscale x 64 x i64> zeroinitializer, i32 -1)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splice.nxv1bf16 = call <vscale x 1 x bfloat> @llvm.vector.splice.nxv1bf16(<vscale x 1 x bfloat> zeroinitializer, <vscale x 1 x bfloat> zeroinitializer, i32 -1)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splice.nxv2bf16 = call <vscale x 2 x bfloat> @llvm.vector.splice.nxv2bf16(<vscale x 2 x bfloat> zeroinitializer, <vscale x 2 x bfloat> zeroinitializer, i32 -1)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splice.nxv4bf16 = call <vscale x 4 x bfloat> @llvm.vector.splice.nxv4bf16(<vscale x 4 x bfloat> zeroinitializer, <vscale x 4 x bfloat> zeroinitializer, i32 -1)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %splice.nxv8bf16 = call <vscale x 8 x bfloat> @llvm.vector.splice.nxv8bf16(<vscale x 8 x bfloat> zeroinitializer, <vscale x 8 x bfloat> zeroinitializer, i32 -1)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %splice.nxv16bf16 = call <vscale x 16 x bfloat> @llvm.vector.splice.nxv16bf16(<vscale x 16 x bfloat> zeroinitializer, <vscale x 16 x bfloat> zeroinitializer, i32 -1)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %splice.nxv32bf16 = call <vscale x 32 x bfloat> @llvm.vector.splice.nxv32bf16(<vscale x 32 x bfloat> zeroinitializer, <vscale x 32 x bfloat> zeroinitializer, i32 -1)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %splice.nxv64bf16 = call <vscale x 64 x bfloat> @llvm.vector.splice.nxv64bf16(<vscale x 64 x bfloat> zeroinitializer, <vscale x 64 x bfloat> zeroinitializer, i32 -1)
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splice.nxv1f16 = call <vscale x 1 x half> @llvm.vector.splice.nxv1f16(<vscale x 1 x half> zeroinitializer, <vscale x 1 x half> zeroinitializer, i32 -1)
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splice.nxv2f16 = call <vscale x 2 x half> @llvm.vector.splice.nxv2f16(<vscale x 2 x half> zeroinitializer, <vscale x 2 x half> zeroinitializer, i32 -1)
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splice.nxv4f16 = call <vscale x 4 x half> @llvm.vector.splice.nxv4f16(<vscale x 4 x half> zeroinitializer, <vscale x 4 x half> zeroinitializer, i32 -1)
@@ -86,6 +93,13 @@ define void @vector_splice() {
; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %splice.nxv16i64 = call <vscale x 16 x i64> @llvm.vector.splice.nxv16i64(<vscale x 16 x i64> zeroinitializer, <vscale x 16 x i64> zeroinitializer, i32 -1)
; SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %splice.nxv32i64 = call <vscale x 32 x i64> @llvm.vector.splice.nxv32i64(<vscale x 32 x i64> zeroinitializer, <vscale x 32 x i64> zeroinitializer, i32 -1)
; SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %splice.nxv64i64 = call <vscale x 64 x i64> @llvm.vector.splice.nxv64i64(<vscale x 64 x i64> zeroinitializer, <vscale x 64 x i64> zeroinitializer, i32 -1)
+; SIZE-NEXT: Cost Model: Invalid cost for instruction: %splice.nxv1bf16 = call <vscale x 1 x bfloat> @llvm.vector.splice.nxv1bf16(<vscale x 1 x bfloat> zeroinitializer, <vscale x 1 x bfloat> zeroinitializer, i32 -1)
+; SIZE-NEXT: Cost Model: Invalid cost for instruction: %splice.nxv2bf16 = call <vscale x 2 x bfloat> @llvm.vector.splice.nxv2bf16(<vscale x 2 x bfloat> zeroinitializer, <vscale x 2 x bfloat> zeroinitializer, i32 -1)
+; SIZE-NEXT: Cost Model: Invalid cost for instruction: %splice.nxv4bf16 = call <vscale x 4 x bfloat> @llvm.vector.splice.nxv4bf16(<vscale x 4 x bfloat> zeroinitializer, <vscale x 4 x bfloat> zeroinitializer, i32 -1)
+; SIZE-NEXT: Cost Model: Invalid cost for instruction: %splice.nxv8bf16 = call <vscale x 8 x bfloat> @llvm.vector.splice.nxv8bf16(<vscale x 8 x bfloat> zeroinitializer, <vscale x 8 x bfloat> zeroinitializer, i32 -1)
+; SIZE-NEXT: Cost Model: Invalid cost for instruction: %splice.nxv16bf16 = call <vscale x 16 x bfloat> @llvm.vector.splice.nxv16bf16(<vscale x 16 x bfloat> zeroinitializer, <vscale x 16 x bfloat> zeroinitializer, i32 -1)
+; SIZE-NEXT: Cost Model: Invalid cost for instruction: %splice.nxv32bf16 = call <vscale x 32 x bfloat> @llvm.vector.splice.nxv32bf16(<vscale x 32 x bfloat> zeroinitializer, <vscale x 32 x bfloat> zeroinitializer, i32 -1)
+; SIZE-NEXT: Cost Model: Invalid cost for instruction: %splice.nxv64bf16 = call <vscale x 64 x bfloat> @llvm.vector.splice.nxv64bf16(<vscale x 64 x bfloat> zeroinitializer, <vscale x 64 x bfloat> zeroinitializer, i32 -1)
; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splice.nxv1f16 = call <vscale x 1 x half> @llvm.vector.splice.nxv1f16(<vscale x 1 x half> zeroinitializer, <vscale x 1 x half> zeroinitializer, i32 -1)
; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splice.nxv2f16 = call <vscale x 2 x half> @llvm.vector.splice.nxv2f16(<vscale x 2 x half> zeroinitializer, <vscale x 2 x half> zeroinitializer, i32 -1)
; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splice.nxv4f16 = call <vscale x 4 x half> @llvm.vector.splice.nxv4f16(<vscale x 4 x half> zeroinitializer, <vscale x 4 x half> zeroinitializer, i32 -1)
@@ -141,6 +155,14 @@ define void @vector_splice() {
%splice.nxv32i64 = call <vscale x 32 x i64> @llvm.vector.splice.nxv32i64(<vscale x 32 x i64> zeroinitializer, <vscale x 32 x i64> zeroinitializer, i32 -1)
%splice.nxv64i64 = call <vscale x 64 x i64> @llvm.vector.splice.nxv64i64(<vscale x 64 x i64> zeroinitializer, <vscale x 64 x i64> zeroinitializer, i32 -1)
+ %splice.nxv1bf16 = call <vscale x 1 x bfloat> @llvm.vector.splice.nxv1bf16(<vscale x 1 x bfloat> zeroinitializer, <vscale x 1 x bfloat> zeroinitializer, i32 -1)
+ %splice.nxv2bf16 = call <vscale x 2 x bfloat> @llvm.vector.splice.nxv2bf16(<vscale x 2 x bfloat> zeroinitializer, <vscale x 2 x bfloat> zeroinitializer, i32 -1)
+ %splice.nxv4bf16 = call <vscale x 4 x bfloat> @llvm.vector.splice.nxv4bf16(<vscale x 4 x bfloat> zeroinitializer, <vscale x 4 x bfloat> zeroinitializer, i32 -1)
+ %splice.nxv8bf16 = call <vscale x 8 x bfloat> @llvm.vector.splice.nxv8bf16(<vscale x 8 x bfloat> zeroinitializer, <vscale x 8 x bfloat> zeroinitializer, i32 -1)
+ %splice.nxv16bf16 = call <vscale x 16 x bfloat> @llvm.vector.splice.nxv16bf16(<vscale x 16 x bfloat> zeroinitializer, <vscale x 16 x bfloat> zeroinitializer, i32 -1)
+ %splice.nxv32bf16 = call <vscale x 32 x bfloat> @llvm.vector.splice.nxv32bf16(<vscale x 32 x bfloat> zeroinitializer, <vscale x 32 x bfloat> zeroinitializer, i32 -1)
+ %splice.nxv64bf16 = call <vscale x 64 x bfloat> @llvm.vector.splice.nxv64bf16(<vscale x 64 x bfloat> zeroinitializer, <vscale x 64 x bfloat> zeroinitializer, i32 -1)
+
%splice.nxv1f16 = call <vscale x 1 x half> @llvm.vector.splice.nxv1f16(<vscale x 1 x half> zeroinitializer, <vscale x 1 x half> zeroinitializer, i32 -1)
%splice.nxv2f16 = call <vscale x 2 x half> @llvm.vector.splice.nxv2f16(<vscale x 2 x half> zeroinitializer, <vscale x 2 x half> zeroinitializer, i32 -1)
%splice.nxv4f16 = call <vscale x 4 x half> @llvm.vector.splice.nxv4f16(<vscale x 4 x half> zeroinitializer, <vscale x 4 x half> zeroinitializer, i32 -1)
diff --git a/llvm/test/CodeGen/RISCV/rvv/vector-splice.ll b/llvm/test/CodeGen/RISCV/rvv/vector-splice.ll
index 8cb6fed2f588a7..3f84f4549ce814 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vector-splice.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vector-splice.ll
@@ -1,6 +1,8 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple riscv32 -mattr=+m,+f,+d,+v,+zfh,+zvfh < %s | FileCheck %s
-; RUN: llc -mtriple riscv64 -mattr=+m,+f,+d,+v,+zfh,+zvfh < %s | FileCheck %s
+; RUN: llc -mtriple riscv32 -mattr=+m,+f,+d,+v,+zvfhmin,+zvfbfmin < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN64
+; RUN: llc -mtriple riscv64 -mattr=+m,+f,+d,+v,+zvfhmin,+zvfbfmin < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN32
+; RUN: llc -mtriple riscv32 -mattr=+m,+f,+d,+v,+zvfh,+zvfbfmin < %s | FileCheck %s --check-prefixes=CHECK,ZVFH32
+; RUN: llc -mtriple riscv64 -mattr=+m,+f,+d,+v,+zvfh,+zvfbfmin < %s | FileCheck %s --check-prefixes=CHECK,ZVFH64
; Tests assume VLEN=128 or vscale_range_min=2.
@@ -1533,6 +1535,1220 @@ define <vscale x 8 x i64> @splice_nxv8i64_offset_max(<vscale x 8 x i64> %a, <vsc
ret <vscale x 8 x i64> %res
}
+declare <vscale x 1 x bfloat> @llvm.vector.splice.nxv1bf16(<vscale x 1 x bfloat>, <vscale x 1 x bfloat>, i32)
+
+define <vscale x 1 x bfloat> @splice_nxv1bf16_offset_zero(<vscale x 1 x bfloat> %a, <vscale x 1 x bfloat> %b) #0 {
+; CHECK-LABEL: splice_nxv1bf16_offset_zero:
+; CHECK: # %bb.0:
+; CHECK-NEXT: ret
+ %res = call <vscale x 1 x bfloat> @llvm.vector.splice.nxv1bf16(<vscale x 1 x bfloat> %a, <vscale x 1 x bfloat> %b, i32 0)
+ ret <vscale x 1 x bfloat> %res
+}
+
+define <vscale x 1 x bfloat> @splice_nxv1bf16_offset_negone(<vscale x 1 x bfloat> %a, <vscale x 1 x bfloat> %b) #0 {
+; CHECK-LABEL: splice_nxv1bf16_offset_negone:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi sp, sp, -16
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: sub sp, sp, a0
+; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x01, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 1 * vlenb
+; CHECK-NEXT: addi a0, sp, 16
+; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma
+; CHECK-NEXT: vse16.v v8, (a0)
+; CHECK-NEXT: csrr a1, vlenb
+; CHECK-NEXT: srli a1, a1, 2
+; CHECK-NEXT: add a0, a0, a1
+; CHECK-NEXT: vse16.v v9, (a0)
+; CHECK-NEXT: addi a0, a0, -2
+; CHECK-NEXT: vle16.v v8, (a0)
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: add sp, sp, a0
+; CHECK-NEXT: addi sp, sp, 16
+; CHECK-NEXT: ret
+ %res = call <vscale x 1 x bfloat> @llvm.vector.splice.nxv1bf16(<vscale x 1 x bfloat> %a, <vscale x 1 x bfloat> %b, i32 -1)
+ ret <vscale x 1 x bfloat> %res
+}
+
+define <vscale x 1 x bfloat> @splice_nxv1bf16_offset_min(<vscale x 1 x bfloat> %a, <vscale x 1 x bfloat> %b) #0 {
+; CHECK-LABEL: splice_nxv1bf16_offset_min:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi sp, sp, -16
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: sub sp, sp, a0
+; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x01, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 1 * vlenb
+; CHECK-NEXT: addi a0, sp, 16
+; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma
+; CHECK-NEXT: vse16.v v8, (a0)
+; CHECK-NEXT: csrr a1, vlenb
+; CHECK-NEXT: srli a1, a1, 2
+; CHECK-NEXT: add a0, a0, a1
+; CHECK-NEXT: li a2, 4
+; CHECK-NEXT: vse16.v v9, (a0)
+; CHECK-NEXT: bltu a1, a2, .LBB104_2
+; CHECK-NEXT: # %bb.1:
+; CHECK-NEXT: li a1, 4
+; CHECK-NEXT: .LBB104_2:
+; CHECK-NEXT: sub a0, a0, a1
+; CHECK-NEXT: vle16.v v8, (a0)
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: add sp, sp, a0
+; CHECK-NEXT: addi sp, sp, 16
+; CHECK-NEXT: ret
+ %res = call <vscale x 1 x bfloat> @llvm.vector.splice.nxv1bf16(<vscale x 1 x bfloat> %a, <vscale x 1 x bfloat> %b, i32 -2)
+ ret <vscale x 1 x bfloat> %res
+}
+
+define <vscale x 1 x bfloat> @splice_nxv1bf16_offset_max(<vscale x 1 x bfloat> %a, <vscale x 1 x bfloat> %b) #0 {
+; CHECK-LABEL: splice_nxv1bf16_offset_max:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi sp, sp, -16
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: sub sp, sp, a0
+; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x01, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 1 * vlenb
+; CHECK-NEXT: addi a0, sp, 16
+; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma
+; CHECK-NEXT: vse16.v v8, (a0)
+; CHECK-NEXT: csrr a1, vlenb
+; CHECK-NEXT: srli a2, a1, 2
+; CHECK-NEXT: add a2, a0, a2
+; CHECK-NEXT: srli a1, a1, 3
+; CHECK-NEXT: addi a1, a1, -1
+; CHECK-NEXT: li a3, 1
+; CHECK-NEXT: vse16.v v9, (a2)
+; CHECK-NEXT: bltu a1, a3, .LBB105_2
+; CHECK-NEXT: # %bb.1:
+; CHECK-NEXT: li a1, 1
+; CHECK-NEXT: .LBB105_2:
+; CHECK-NEXT: slli a1, a1, 1
+; CHECK-NEXT: add a0, a0, a1
+; CHECK-NEXT: vle16.v v8, (a0)
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: add sp, sp, a0
+; CHECK-NEXT: addi sp, sp, 16
+; CHECK-NEXT: ret
+ %res = call <vscale x 1 x bfloat> @llvm.vector.splice.nxv1bf16(<vscale x 1 x bfloat> %a, <vscale x 1 x bfloat> %b, i32 1)
+ ret <vscale x 1 x bfloat> %res
+}
+
+declare <vscale x 2 x bfloat> @llvm.vector.splice.nxv2bf16(<vscale x 2 x bfloat>, <vscale x 2 x bfloat>, i32)
+
+define <vscale x 2 x bfloat> @splice_nxv2bf16_offset_zero(<vscale x 2 x bfloat> %a, <vscale x 2 x bfloat> %b) #0 {
+; CHECK-LABEL: splice_nxv2bf16_offset_zero:
+; CHECK: # %bb.0:
+; CHECK-NEXT: ret
+ %res = call <vscale x 2 x bfloat> @llvm.vector.splice.nxv2bf16(<vscale x 2 x bfloat> %a, <vscale x 2 x bfloat> %b, i32 0)
+ ret <vscale x 2 x bfloat> %res
+}
+
+define <vscale x 2 x bfloat> @splice_nxv2bf16_offset_negone(<vscale x 2 x bfloat> %a, <vscale x 2 x bfloat> %b) #0 {
+; CHECK-LABEL: splice_nxv2bf16_offset_negone:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi sp, sp, -16
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: sub sp, sp, a0
+; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x01, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 1 * vlenb
+; CHECK-NEXT: addi a0, sp, 16
+; CHECK-NEXT: vsetvli a1, zero, e16, mf2, ta, ma
+; CHECK-NEXT: vse16.v v8, (a0)
+; CHECK-NEXT: csrr a1, vlenb
+; CHECK-NEXT: srli a1, a1, 1
+; CHECK-NEXT: add a0, a0, a1
+; CHECK-NEXT: vse16.v v9, (a0)
+; CHECK-NEXT: addi a0, a0, -2
+; CHECK-NEXT: vle16.v v8, (a0)
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: add sp, sp, a0
+; CHECK-NEXT: addi sp, sp, 16
+; CHECK-NEXT: ret
+ %res = call <vscale x 2 x bfloat> @llvm.vector.splice.nxv2bf16(<vscale x 2 x bfloat> %a, <vscale x 2 x bfloat> %b, i32 -1)
+ ret <vscale x 2 x bfloat> %res
+}
+
+define <vscale x 2 x bfloat> @splice_nxv2bf16_offset_min(<vscale x 2 x bfloat> %a, <vscale x 2 x bfloat> %b) #0 {
+; CHECK-LABEL: splice_nxv2bf16_offset_min:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi sp, sp, -16
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: sub sp, sp, a0
+; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x01, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 1 * vlenb
+; CHECK-NEXT: addi a0, sp, 16
+; CHECK-NEXT: vsetvli a1, zero, e16, mf2, ta, ma
+; CHECK-NEXT: vse16.v v8, (a0)
+; CHECK-NEXT: csrr a1, vlenb
+; CHECK-NEXT: srli a1, a1, 1
+; CHECK-NEXT: add a0, a0, a1
+; CHECK-NEXT: li a2, 8
+; CHECK-NEXT: vse16.v v9, (a0)
+; CHECK-NEXT: bltu a1, a2, .LBB108_2
+; CHECK-NEXT: # %bb.1:
+; CHECK-NEXT: li a1, 8
+; CHECK-NEXT: .LBB108_2:
+; CHECK-NEXT: sub a0, a0, a1
+; CHECK-NEXT: vle16.v v8, (a0)
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: add sp, sp, a0
+; CHECK-NEXT: addi sp, sp, 16
+; CHECK-NEXT: ret
+ %res = call <vscale x 2 x bfloat> @llvm.vector.splice.nxv2bf16(<vscale x 2 x bfloat> %a, <vscale x 2 x bfloat> %b, i32 -4)
+ ret <vscale x 2 x bfloat> %res
+}
+
+define <vscale x 2 x bfloat> @splice_nxv2bf16_offset_max(<vscale x 2 x bfloat> %a, <vscale x 2 x bfloat> %b) #0 {
+; CHECK-LABEL: splice_nxv2bf16_offset_max:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi sp, sp, -16
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: sub sp, sp, a0
+; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x01, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 1 * vlenb
+; CHECK-NEXT: addi a0, sp, 16
+; CHECK-NEXT: vsetvli a1, zero, e16, mf2, ta, ma
+; CHECK-NEXT: vse16.v v8, (a0)
+; CHECK-NEXT: csrr a1, vlenb
+; CHECK-NEXT: srli a2, a1, 1
+; CHECK-NEXT: add a2, a0, a2
+; CHECK-NEXT: srli a1, a1, 2
+; CHECK-NEXT: addi a1, a1, -1
+; CHECK-NEXT: li a3, 3
+; CHECK-NEXT: vse16.v v9, (a2)
+; CHECK-NEXT: bltu a1, a3, .LBB109_2
+; CHECK-NEXT: # %bb.1:
+; CHECK-NEXT: li a1, 3
+; CHECK-NEXT: .LBB109_2:
+; CHECK-NEXT: slli a1, a1, 1
+; CHECK-NEXT: add a0, a0, a1
+; CHECK-NEXT: vle16.v v8, (a0)
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: add sp, sp, a0
+; CHECK-NEXT: addi sp, sp, 16
+; CHECK-NEXT: ret
+ %res = call <vscale x 2 x bfloat> @llvm.vector.splice.nxv2bf16(<vscale x 2 x bfloat> %a, <vscale x 2 x bfloat> %b, i32 3)
+ ret <vscale x 2 x bfloat> %res
+}
+
+declare <vscale x 4 x bfloat> @llvm.vector.splice.nxv4bf16(<vscale x 4 x bfloat>, <vscale x 4 x bfloat>, i32)
+
+define <vscale x 4 x bfloat> @splice_nxv4bf16_offset_zero(<vscale x 4 x bfloat> %a, <vscale x 4 x bfloat> %b) #0 {
+; CHECK-LABEL: splice_nxv4bf16_offset_zero:
+; CHECK: # %bb.0:
+; CHECK-NEXT: ret
+ %res = call <vscale x 4 x bfloat> @llvm.vector.splice.nxv4bf16(<vscale x 4 x bfloat> %a, <vscale x 4 x bfloat> %b, i32 0)
+ ret <vscale x 4 x bfloat> %res
+}
+
+define <vscale x 4 x bfloat> @splice_nxv4bf16_offset_negone(<vscale x 4 x bfloat> %a, <vscale x 4 x bfloat> %b) #0 {
+; CHECK-LABEL: splice_nxv4bf16_offset_negone:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi sp, sp, -16
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: slli a0, a0, 1
+; CHECK-NEXT: sub sp, sp, a0
+; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb
+; CHECK-NEXT: addi a0, sp, 16
+; CHECK-NEXT: vs1r.v v8, (a0)
+; CHECK-NEXT: csrr a1, vlenb
+; CHECK-NEXT: add a0, a0, a1
+; CHECK-NEXT: vs1r.v v9, (a0)
+; CHECK-NEXT: addi a0, a0, -2
+; CHECK-NEXT: vl1re16.v v8, (a0)
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: slli a0, a0, 1
+; CHECK-NEXT: add sp, sp, a0
+; CHECK-NEXT: addi sp, sp, 16
+; CHECK-NEXT: ret
+ %res = call <vscale x 4 x bfloat> @llvm.vector.splice.nxv4bf16(<vscale x 4 x bfloat> %a, <vscale x 4 x bfloat> %b, i32 -1)
+ ret <vscale x 4 x bfloat> %res
+}
+
+define <vscale x 4 x bfloat> @splice_nxv4bf16_offset_min(<vscale x 4 x bfloat> %a, <vscale x 4 x bfloat> %b) #0 {
+; CHECK-LABEL: splice_nxv4bf16_offset_min:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi sp, sp, -16
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: slli a0, a0, 1
+; CHECK-NEXT: sub sp, sp, a0
+; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb
+; CHECK-NEXT: addi a0, sp, 16
+; CHECK-NEXT: vs1r.v v8, (a0)
+; CHECK-NEXT: csrr a1, vlenb
+; CHECK-NEXT: add a0, a0, a1
+; CHECK-NEXT: li a2, 16
+; CHECK-NEXT: vs1r.v v9, (a0)
+; CHECK-NEXT: bltu a1, a2, .LBB112_2
+; CHECK-NEXT: # %bb.1:
+; CHECK-NEXT: li a1, 16
+; CHECK-NEXT: .LBB112_2:
+; CHECK-NEXT: sub a0, a0, a1
+; CHECK-NEXT: vl1re16.v v8, (a0)
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: slli a0, a0, 1
+; CHECK-NEXT: add sp, sp, a0
+; CHECK-NEXT: addi sp, sp, 16
+; CHECK-NEXT: ret
+ %res = call <vscale x 4 x bfloat> @llvm.vector.splice.nxv4bf16(<vscale x 4 x bfloat> %a, <vscale x 4 x bfloat> %b, i32 -8)
+ ret <vscale x 4 x bfloat> %res
+}
+
+define <vscale x 4 x bfloat> @splice_nxv4bf16_offset_max(<vscale x 4 x bfloat> %a, <vscale x 4 x bfloat> %b) #0 {
+; CHECK-LABEL: splice_nxv4bf16_offset_max:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi sp, sp, -16
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: slli a0, a0, 1
+; CHECK-NEXT: sub sp, sp, a0
+; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb
+; CHECK-NEXT: addi a0, sp, 16
+; CHECK-NEXT: vs1r.v v8, (a0)
+; CHECK-NEXT: csrr a1, vlenb
+; CHECK-NEXT: add a2, a0, a1
+; CHECK-NEXT: srli a1, a1, 1
+; CHECK-NEXT: addi a1, a1, -1
+; CHECK-NEXT: li a3, 7
+; CHECK-NEXT: vs1r.v v9, (a2)
+; CHECK-NEXT: bltu a1, a3, .LBB113_2
+; CHECK-NEXT: # %bb.1:
+; CHECK-NEXT: li a1, 7
+; CHECK-NEXT: .LBB113_2:
+; CHECK-NEXT: slli a1, a1, 1
+; CHECK-NEXT: add a0, a0, a1
+; CHECK-NEXT: vl1re16.v v8, (a0)
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: slli a0, a0, 1
+; CHECK-NEXT: add sp, sp, a0
+; CHECK-NEXT: addi sp, sp, 16
+; CHECK-NEXT: ret
+ %res = call <vscale x 4 x bfloat> @llvm.vector.splice.nxv4bf16(<vscale x 4 x bfloat> %a, <vscale x 4 x bfloat> %b, i32 7)
+ ret <vscale x 4 x bfloat> %res
+}
+
+declare <vscale x 8 x bfloat> @llvm.vector.splice.nxv8bf16(<vscale x 8 x bfloat>, <vscale x 8 x bfloat>, i32)
+
+define <vscale x 8 x bfloat> @splice_nxv8bf16_offset_zero(<vscale x 8 x bfloat> %a, <vscale x 8 x bfloat> %b) #0 {
+; CHECK-LABEL: splice_nxv8bf16_offset_zero:
+; CHECK: # %bb.0:
+; CHECK-NEXT: ret
+ %res = call <vscale x 8 x bfloat> @llvm.vector.splice.nxv8bf16(<vscale x 8 x bfloat> %a, <vscale x 8 x bfloat> %b, i32 0)
+ ret <vscale x 8 x bfloat> %res
+}
+
+define <vscale x 8 x bfloat> @splice_nxv8bf16_offset_negone(<vscale x 8 x bfloat> %a, <vscale x 8 x bfloat> %b) #0 {
+; CHECK-LABEL: splice_nxv8bf16_offset_negone:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi sp, sp, -16
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: slli a0, a0, 2
+; CHECK-NEXT: sub sp, sp, a0
+; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb
+; CHECK-NEXT: addi a0, sp, 16
+; CHECK-NEXT: vs2r.v v8, (a0)
+; CHECK-NEXT: csrr a1, vlenb
+; CHECK-NEXT: slli a1, a1, 1
+; CHECK-NEXT: add a0, a0, a1
+; CHECK-NEXT: vs2r.v v10, (a0)
+; CHECK-NEXT: addi a0, a0, -2
+; CHECK-NEXT: vl2re16.v v8, (a0)
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: slli a0, a0, 2
+; CHECK-NEXT: add sp, sp, a0
+; CHECK-NEXT: addi sp, sp, 16
+; CHECK-NEXT: ret
+ %res = call <vscale x 8 x bfloat> @llvm.vector.splice.nxv8bf16(<vscale x 8 x bfloat> %a, <vscale x 8 x bfloat> %b, i32 -1)
+ ret <vscale x 8 x bfloat> %res
+}
+
+define <vscale x 8 x bfloat> @splice_nxv8bf16_offset_min(<vscale x 8 x bfloat> %a, <vscale x 8 x bfloat> %b) #0 {
+; CHECK-LABEL: splice_nxv8bf16_offset_min:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi sp, sp, -16
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: slli a0, a0, 2
+; CHECK-NEXT: sub sp, sp, a0
+; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb
+; CHECK-NEXT: addi a0, sp, 16
+; CHECK-NEXT: vs2r.v v8, (a0)
+; CHECK-NEXT: csrr a1, vlenb
+; CHECK-NEXT: slli a1, a1, 1
+; CHECK-NEXT: add a0, a0, a1
+; CHECK-NEXT: li a2, 32
+; CHECK-NEXT: vs2r.v v10, (a0)
+; CHECK-NEXT: bltu a1, a2, .LBB116_2
+; CHECK-NEXT: # %bb.1:
+; CHECK-NEXT: li a1, 32
+; CHECK-NEXT: .LBB116_2:
+; CHECK-NEXT: sub a0, a0, a1
+; CHECK-NEXT: vl2re16.v v8, (a0)
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: slli a0, a0, 2
+; CHECK-NEXT: add sp, sp, a0
+; CHECK-NEXT: addi sp, sp, 16
+; CHECK-NEXT: ret
+ %res = call <vscale x 8 x bfloat> @llvm.vector.splice.nxv8bf16(<vscale x 8 x bfloat> %a, <vscale x 8 x bfloat> %b, i32 -16)
+ ret <vscale x 8 x bfloat> %res
+}
+
+define <vscale x 8 x bfloat> @splice_nxv8bf16_offset_max(<vscale x 8 x bfloat> %a, <vscale x 8 x bfloat> %b) #0 {
+; CHECK-LABEL: splice_nxv8bf16_offset_max:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi sp, sp, -16
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: slli a0, a0, 2
+; CHECK-NEXT: sub sp, sp, a0
+; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb
+; CHECK-NEXT: addi a0, sp, 16
+; CHECK-NEXT: vs2r.v v8, (a0)
+; CHECK-NEXT: csrr a1, vlenb
+; CHECK-NEXT: slli a2, a1, 1
+; CHECK-NEXT: add a2, a0, a2
+; CHECK-NEXT: addi a1, a1, -1
+; CHECK-NEXT: li a3, 15
+; CHECK-NEXT: vs2r.v v10, (a2)
+; CHECK-NEXT: bltu a1, a3, .LBB117_2
+; CHECK-NEXT: # %bb.1:
+; CHECK-NEXT: li a1, 15
+; CHECK-NEXT: .LBB117_2:
+; CHECK-NEXT: slli a1, a1, 1
+; CHECK-NEXT: add a0, a0, a1
+; CHECK-NEXT: vl2re16.v v8, (a0)
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: slli a0, a0, 2
+; CHECK-NEXT: add sp, sp, a0
+; CHECK-NEXT: addi sp, sp, 16
+; CHECK-NEXT: ret
+ %res = call <vscale x 8 x bfloat> @llvm.vector.splice.nxv8bf16(<vscale x 8 x bfloat> %a, <vscale x 8 x bfloat> %b, i32 15)
+ ret <vscale x 8 x bfloat> %res
+}
+
+declare <vscale x 16 x bfloat> @llvm.vector.splice.nxv16bf16(<vscale x 16 x bfloat>, <vscale x 16 x bfloat>, i32)
+
+define <vscale x 16 x bfloat> @splice_nxv16bf16_offset_zero(<vscale x 16 x bfloat> %a, <vscale x 16 x bfloat> %b) #0 {
+; CHECK-LABEL: splice_nxv16bf16_offset_zero:
+; CHECK: # %bb.0:
+; CHECK-NEXT: ret
+ %res = call <vscale x 16 x bfloat> @llvm.vector.splice.nxv16bf16(<vscale x 16 x bfloat> %a, <vscale x 16 x bfloat> %b, i32 0)
+ ret <vscale x 16 x bfloat> %res
+}
+
+define <vscale x 16 x bfloat> @splice_nxv16bf16_offset_negone(<vscale x 16 x bfloat> %a, <vscale x 16 x bfloat> %b) #0 {
+; ZVFHMIN64-LABEL: splice_nxv16bf16_offset_negone:
+; ZVFHMIN64: # %bb.0:
+; ZVFHMIN64-NEXT: addi sp, sp, -48
+; ZVFHMIN64-NEXT: .cfi_def_cfa_offset 48
+; ZVFHMIN64-NEXT: sw ra, 44(sp) # 4-byte Folded Spill
+; ZVFHMIN64-NEXT: sw s0, 40(sp) # 4-byte Folded Spill
+; ZVFHMIN64-NEXT: .cfi_offset ra, -4
+; ZVFHMIN64-NEXT: .cfi_offset s0, -8
+; ZVFHMIN64-NEXT: addi s0, sp, 48
+; ZVFHMIN64-NEXT: .cfi_def_cfa s0, 0
+; ZVFHMIN64-NEXT: csrr a0, vlenb
+; ZVFHMIN64-NEXT: slli a0, a0, 3
+; ZVFHMIN64-NEXT: sub sp, sp, a0
+; ZVFHMIN64-NEXT: andi sp, sp, -32
+; ZVFHMIN64-NEXT: addi a0, sp, 32
+; ZVFHMIN64-NEXT: vs4r.v v8, (a0)
+; ZVFHMIN64-NEXT: csrr a1, vlenb
+; ZVFHMIN64-NEXT: slli a1, a1, 2
+; ZVFHMIN64-NEXT: add a0, a0, a1
+; ZVFHMIN64-NEXT: vs4r.v v12, (a0)
+; ZVFHMIN64-NEXT: addi a0, a0, -2
+; ZVFHMIN64-NEXT: vl4re16.v v8, (a0)
+; ZVFHMIN64-NEXT: addi sp, s0, -48
+; ZVFHMIN64-NEXT: lw ra, 44(sp) # 4-byte Folded Reload
+; ZVFHMIN64-NEXT: lw s0, 40(sp) # 4-byte Folded Reload
+; ZVFHMIN64-NEXT: addi sp, sp, 48
+; ZVFHMIN64-NEXT: ret
+;
+; ZVFHMIN32-LABEL: splice_nxv16bf16_offset_negone:
+; ZVFHMIN32: # %bb.0:
+; ZVFHMIN32-NEXT: addi sp, sp, -48
+; ZVFHMIN32-NEXT: .cfi_def_cfa_offset 48
+; ZVFHMIN32-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
+; ZVFHMIN32-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
+; ZVFHMIN32-NEXT: .cfi_offset ra, -8
+; ZVFHMIN32-NEXT: .cfi_offset s0, -16
+; ZVFHMIN32-NEXT: addi s0, sp, 48
+; ZVFHMIN32-NEXT: .cfi_def_cfa s0, 0
+; ZVFHMIN32-NEXT: csrr a0, vlenb
+; ZVFHMIN32-NEXT: slli a0, a0, 3
+; ZVFHMIN32-NEXT: sub sp, sp, a0
+; ZVFHMIN32-NEXT: andi sp, sp, -32
+; ZVFHMIN32-NEXT: addi a0, sp, 32
+; ZVFHMIN32-NEXT: vs4r.v v8, (a0)
+; ZVFHMIN32-NEXT: csrr a1, vlenb
+; ZVFHMIN32-NEXT: slli a1, a1, 2
+; ZVFHMIN32-NEXT: add a0, a0, a1
+; ZVFHMIN32-NEXT: vs4r.v v12, (a0)
+; ZVFHMIN32-NEXT: addi a0, a0, -2
+; ZVFHMIN32-NEXT: vl4re16.v v8, (a0)
+; ZVFHMIN32-NEXT: addi sp, s0, -48
+; ZVFHMIN32-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
+; ZVFHMIN32-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
+; ZVFHMIN32-NEXT: addi sp, sp, 48
+; ZVFHMIN32-NEXT: ret
+;
+; ZVFH32-LABEL: splice_nxv16bf16_offset_negone:
+; ZVFH32: # %bb.0:
+; ZVFH32-NEXT: addi sp, sp, -48
+; ZVFH32-NEXT: .cfi_def_cfa_offset 48
+; ZVFH32-NEXT: sw ra, 44(sp) # 4-byte Folded Spill
+; ZVFH32-NEXT: sw s0, 40(sp) # 4-byte Folded Spill
+; ZVFH32-NEXT: .cfi_offset ra, -4
+; ZVFH32-NEXT: .cfi_offset s0, -8
+; ZVFH32-NEXT: addi s0, sp, 48
+; ZVFH32-NEXT: .cfi_def_cfa s0, 0
+; ZVFH32-NEXT: csrr a0, vlenb
+; ZVFH32-NEXT: slli a0, a0, 3
+; ZVFH32-NEXT: sub sp, sp, a0
+; ZVFH32-NEXT: andi sp, sp, -32
+; ZVFH32-NEXT: addi a0, sp, 32
+; ZVFH32-NEXT: vs4r.v v8, (a0)
+; ZVFH32-NEXT: csrr a1, vlenb
+; ZVFH32-NEXT: slli a1, a1, 2
+; ZVFH32-NEXT: add a0, a0, a1
+; ZVFH32-NEXT: vs4r.v v12, (a0)
+; ZVFH32-NEXT: addi a0, a0, -2
+; ZVFH32-NEXT: vl4re16.v v8, (a0)
+; ZVFH32-NEXT: addi sp, s0, -48
+; ZVFH32-NEXT: lw ra, 44(sp) # 4-byte Folded Reload
+; ZVFH32-NEXT: lw s0, 40(sp) # 4-byte Folded Reload
+; ZVFH32-NEXT: addi sp, sp, 48
+; ZVFH32-NEXT: ret
+;
+; ZVFH64-LABEL: splice_nxv16bf16_offset_negone:
+; ZVFH64: # %bb.0:
+; ZVFH64-NEXT: addi sp, sp, -48
+; ZVFH64-NEXT: .cfi_def_cfa_offset 48
+; ZVFH64-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
+; ZVFH64-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
+; ZVFH64-NEXT: .cfi_offset ra, -8
+; ZVFH64-NEXT: .cfi_offset s0, -16
+; ZVFH64-NEXT: addi s0, sp, 48
+; ZVFH64-NEXT: .cfi_def_cfa s0, 0
+; ZVFH64-NEXT: csrr a0, vlenb
+; ZVFH64-NEXT: slli a0, a0, 3
+; ZVFH64-NEXT: sub sp, sp, a0
+; ZVFH64-NEXT: andi sp, sp, -32
+; ZVFH64-NEXT: addi a0, sp, 32
+; ZVFH64-NEXT: vs4r.v v8, (a0)
+; ZVFH64-NEXT: csrr a1, vlenb
+; ZVFH64-NEXT: slli a1, a1, 2
+; ZVFH64-NEXT: add a0, a0, a1
+; ZVFH64-NEXT: vs4r.v v12, (a0)
+; ZVFH64-NEXT: addi a0, a0, -2
+; ZVFH64-NEXT: vl4re16.v v8, (a0)
+; ZVFH64-NEXT: addi sp, s0, -48
+; ZVFH64-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
+; ZVFH64-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
+; ZVFH64-NEXT: addi sp, sp, 48
+; ZVFH64-NEXT: ret
+ %res = call <vscale x 16 x bfloat> @llvm.vector.splice.nxv16bf16(<vscale x 16 x bfloat> %a, <vscale x 16 x bfloat> %b, i32 -1)
+ ret <vscale x 16 x bfloat> %res
+}
+
+define <vscale x 16 x bfloat> @splice_nxv16bf16_offset_min(<vscale x 16 x bfloat> %a, <vscale x 16 x bfloat> %b) #0 {
+; ZVFHMIN64-LABEL: splice_nxv16bf16_offset_min:
+; ZVFHMIN64: # %bb.0:
+; ZVFHMIN64-NEXT: addi sp, sp, -48
+; ZVFHMIN64-NEXT: .cfi_def_cfa_offset 48
+; ZVFHMIN64-NEXT: sw ra, 44(sp) # 4-byte Folded Spill
+; ZVFHMIN64-NEXT: sw s0, 40(sp) # 4-byte Folded Spill
+; ZVFHMIN64-NEXT: .cfi_offset ra, -4
+; ZVFHMIN64-NEXT: .cfi_offset s0, -8
+; ZVFHMIN64-NEXT: addi s0, sp, 48
+; ZVFHMIN64-NEXT: .cfi_def_cfa s0, 0
+; ZVFHMIN64-NEXT: csrr a0, vlenb
+; ZVFHMIN64-NEXT: slli a0, a0, 3
+; ZVFHMIN64-NEXT: sub sp, sp, a0
+; ZVFHMIN64-NEXT: andi sp, sp, -32
+; ZVFHMIN64-NEXT: addi a0, sp, 32
+; ZVFHMIN64-NEXT: vs4r.v v8, (a0)
+; ZVFHMIN64-NEXT: csrr a1, vlenb
+; ZVFHMIN64-NEXT: slli a1, a1, 2
+; ZVFHMIN64-NEXT: add a0, a0, a1
+; ZVFHMIN64-NEXT: li a2, 64
+; ZVFHMIN64-NEXT: vs4r.v v12, (a0)
+; ZVFHMIN64-NEXT: bltu a1, a2, .LBB120_2
+; ZVFHMIN64-NEXT: # %bb.1:
+; ZVFHMIN64-NEXT: li a1, 64
+; ZVFHMIN64-NEXT: .LBB120_2:
+; ZVFHMIN64-NEXT: sub a0, a0, a1
+; ZVFHMIN64-NEXT: vl4re16.v v8, (a0)
+; ZVFHMIN64-NEXT: addi sp, s0, -48
+; ZVFHMIN64-NEXT: lw ra, 44(sp) # 4-byte Folded Reload
+; ZVFHMIN64-NEXT: lw s0, 40(sp) # 4-byte Folded Reload
+; ZVFHMIN64-NEXT: addi sp, sp, 48
+; ZVFHMIN64-NEXT: ret
+;
+; ZVFHMIN32-LABEL: splice_nxv16bf16_offset_min:
+; ZVFHMIN32: # %bb.0:
+; ZVFHMIN32-NEXT: addi sp, sp, -48
+; ZVFHMIN32-NEXT: .cfi_def_cfa_offset 48
+; ZVFHMIN32-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
+; ZVFHMIN32-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
+; ZVFHMIN32-NEXT: .cfi_offset ra, -8
+; ZVFHMIN32-NEXT: .cfi_offset s0, -16
+; ZVFHMIN32-NEXT: addi s0, sp, 48
+; ZVFHMIN32-NEXT: .cfi_def_cfa s0, 0
+; ZVFHMIN32-NEXT: csrr a0, vlenb
+; ZVFHMIN32-NEXT: slli a0, a0, 3
+; ZVFHMIN32-NEXT: sub sp, sp, a0
+; ZVFHMIN32-NEXT: andi sp, sp, -32
+; ZVFHMIN32-NEXT: addi a0, sp, 32
+; ZVFHMIN32-NEXT: vs4r.v v8, (a0)
+; ZVFHMIN32-NEXT: csrr a1, vlenb
+; ZVFHMIN32-NEXT: slli a1, a1, 2
+; ZVFHMIN32-NEXT: add a0, a0, a1
+; ZVFHMIN32-NEXT: li a2, 64
+; ZVFHMIN32-NEXT: vs4r.v v12, (a0)
+; ZVFHMIN32-NEXT: bltu a1, a2, .LBB120_2
+; ZVFHMIN32-NEXT: # %bb.1:
+; ZVFHMIN32-NEXT: li a1, 64
+; ZVFHMIN32-NEXT: .LBB120_2:
+; ZVFHMIN32-NEXT: sub a0, a0, a1
+; ZVFHMIN32-NEXT: vl4re16.v v8, (a0)
+; ZVFHMIN32-NEXT: addi sp, s0, -48
+; ZVFHMIN32-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
+; ZVFHMIN32-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
+; ZVFHMIN32-NEXT: addi sp, sp, 48
+; ZVFHMIN32-NEXT: ret
+;
+; ZVFH32-LABEL: splice_nxv16bf16_offset_min:
+; ZVFH32: # %bb.0:
+; ZVFH32-NEXT: addi sp, sp, -48
+; ZVFH32-NEXT: .cfi_def_cfa_offset 48
+; ZVFH32-NEXT: sw ra, 44(sp) # 4-byte Folded Spill
+; ZVFH32-NEXT: sw s0, 40(sp) # 4-byte Folded Spill
+; ZVFH32-NEXT: .cfi_offset ra, -4
+; ZVFH32-NEXT: .cfi_offset s0, -8
+; ZVFH32-NEXT: addi s0, sp, 48
+; ZVFH32-NEXT: .cfi_def_cfa s0, 0
+; ZVFH32-NEXT: csrr a0, vlenb
+; ZVFH32-NEXT: slli a0, a0, 3
+; ZVFH32-NEXT: sub sp, sp, a0
+; ZVFH32-NEXT: andi sp, sp, -32
+; ZVFH32-NEXT: addi a0, sp, 32
+; ZVFH32-NEXT: vs4r.v v8, (a0)
+; ZVFH32-NEXT: csrr a1, vlenb
+; ZVFH32-NEXT: slli a1, a1, 2
+; ZVFH32-NEXT: add a0, a0, a1
+; ZVFH32-NEXT: li a2, 64
+; ZVFH32-NEXT: vs4r.v v12, (a0)
+; ZVFH32-NEXT: bltu a1, a2, .LBB120_2
+; ZVFH32-NEXT: # %bb.1:
+; ZVFH32-NEXT: li a1, 64
+; ZVFH32-NEXT: .LBB120_2:
+; ZVFH32-NEXT: sub a0, a0, a1
+; ZVFH32-NEXT: vl4re16.v v8, (a0)
+; ZVFH32-NEXT: addi sp, s0, -48
+; ZVFH32-NEXT: lw ra, 44(sp) # 4-byte Folded Reload
+; ZVFH32-NEXT: lw s0, 40(sp) # 4-byte Folded Reload
+; ZVFH32-NEXT: addi sp, sp, 48
+; ZVFH32-NEXT: ret
+;
+; ZVFH64-LABEL: splice_nxv16bf16_offset_min:
+; ZVFH64: # %bb.0:
+; ZVFH64-NEXT: addi sp, sp, -48
+; ZVFH64-NEXT: .cfi_def_cfa_offset 48
+; ZVFH64-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
+; ZVFH64-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
+; ZVFH64-NEXT: .cfi_offset ra, -8
+; ZVFH64-NEXT: .cfi_offset s0, -16
+; ZVFH64-NEXT: addi s0, sp, 48
+; ZVFH64-NEXT: .cfi_def_cfa s0, 0
+; ZVFH64-NEXT: csrr a0, vlenb
+; ZVFH64-NEXT: slli a0, a0, 3
+; ZVFH64-NEXT: sub sp, sp, a0
+; ZVFH64-NEXT: andi sp, sp, -32
+; ZVFH64-NEXT: addi a0, sp, 32
+; ZVFH64-NEXT: vs4r.v v8, (a0)
+; ZVFH64-NEXT: csrr a1, vlenb
+; ZVFH64-NEXT: slli a1, a1, 2
+; ZVFH64-NEXT: add a0, a0, a1
+; ZVFH64-NEXT: li a2, 64
+; ZVFH64-NEXT: vs4r.v v12, (a0)
+; ZVFH64-NEXT: bltu a1, a2, .LBB120_2
+; ZVFH64-NEXT: # %bb.1:
+; ZVFH64-NEXT: li a1, 64
+; ZVFH64-NEXT: .LBB120_2:
+; ZVFH64-NEXT: sub a0, a0, a1
+; ZVFH64-NEXT: vl4re16.v v8, (a0)
+; ZVFH64-NEXT: addi sp, s0, -48
+; ZVFH64-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
+; ZVFH64-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
+; ZVFH64-NEXT: addi sp, sp, 48
+; ZVFH64-NEXT: ret
+ %res = call <vscale x 16 x bfloat> @llvm.vector.splice.nxv16bf16(<vscale x 16 x bfloat> %a, <vscale x 16 x bfloat> %b, i32 -32)
+ ret <vscale x 16 x bfloat> %res
+}
+
+define <vscale x 16 x bfloat> @splice_nxv16bf16_offset_max(<vscale x 16 x bfloat> %a, <vscale x 16 x bfloat> %b) #0 {
+; ZVFHMIN64-LABEL: splice_nxv16bf16_offset_max:
+; ZVFHMIN64: # %bb.0:
+; ZVFHMIN64-NEXT: addi sp, sp, -48
+; ZVFHMIN64-NEXT: .cfi_def_cfa_offset 48
+; ZVFHMIN64-NEXT: sw ra, 44(sp) # 4-byte Folded Spill
+; ZVFHMIN64-NEXT: sw s0, 40(sp) # 4-byte Folded Spill
+; ZVFHMIN64-NEXT: .cfi_offset ra, -4
+; ZVFHMIN64-NEXT: .cfi_offset s0, -8
+; ZVFHMIN64-NEXT: addi s0, sp, 48
+; ZVFHMIN64-NEXT: .cfi_def_cfa s0, 0
+; ZVFHMIN64-NEXT: csrr a0, vlenb
+; ZVFHMIN64-NEXT: slli a0, a0, 3
+; ZVFHMIN64-NEXT: sub sp, sp, a0
+; ZVFHMIN64-NEXT: andi sp, sp, -32
+; ZVFHMIN64-NEXT: addi a0, sp, 32
+; ZVFHMIN64-NEXT: vs4r.v v8, (a0)
+; ZVFHMIN64-NEXT: csrr a1, vlenb
+; ZVFHMIN64-NEXT: slli a2, a1, 2
+; ZVFHMIN64-NEXT: add a2, a0, a2
+; ZVFHMIN64-NEXT: slli a1, a1, 1
+; ZVFHMIN64-NEXT: addi a1, a1, -1
+; ZVFHMIN64-NEXT: li a3, 31
+; ZVFHMIN64-NEXT: vs4r.v v12, (a2)
+; ZVFHMIN64-NEXT: bltu a1, a3, .LBB121_2
+; ZVFHMIN64-NEXT: # %bb.1:
+; ZVFHMIN64-NEXT: li a1, 31
+; ZVFHMIN64-NEXT: .LBB121_2:
+; ZVFHMIN64-NEXT: slli a1, a1, 1
+; ZVFHMIN64-NEXT: add a0, a0, a1
+; ZVFHMIN64-NEXT: vl4re16.v v8, (a0)
+; ZVFHMIN64-NEXT: addi sp, s0, -48
+; ZVFHMIN64-NEXT: lw ra, 44(sp) # 4-byte Folded Reload
+; ZVFHMIN64-NEXT: lw s0, 40(sp) # 4-byte Folded Reload
+; ZVFHMIN64-NEXT: addi sp, sp, 48
+; ZVFHMIN64-NEXT: ret
+;
+; ZVFHMIN32-LABEL: splice_nxv16bf16_offset_max:
+; ZVFHMIN32: # %bb.0:
+; ZVFHMIN32-NEXT: addi sp, sp, -48
+; ZVFHMIN32-NEXT: .cfi_def_cfa_offset 48
+; ZVFHMIN32-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
+; ZVFHMIN32-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
+; ZVFHMIN32-NEXT: .cfi_offset ra, -8
+; ZVFHMIN32-NEXT: .cfi_offset s0, -16
+; ZVFHMIN32-NEXT: addi s0, sp, 48
+; ZVFHMIN32-NEXT: .cfi_def_cfa s0, 0
+; ZVFHMIN32-NEXT: csrr a0, vlenb
+; ZVFHMIN32-NEXT: slli a0, a0, 3
+; ZVFHMIN32-NEXT: sub sp, sp, a0
+; ZVFHMIN32-NEXT: andi sp, sp, -32
+; ZVFHMIN32-NEXT: addi a0, sp, 32
+; ZVFHMIN32-NEXT: vs4r.v v8, (a0)
+; ZVFHMIN32-NEXT: csrr a1, vlenb
+; ZVFHMIN32-NEXT: slli a2, a1, 2
+; ZVFHMIN32-NEXT: add a2, a0, a2
+; ZVFHMIN32-NEXT: slli a1, a1, 1
+; ZVFHMIN32-NEXT: addi a1, a1, -1
+; ZVFHMIN32-NEXT: li a3, 31
+; ZVFHMIN32-NEXT: vs4r.v v12, (a2)
+; ZVFHMIN32-NEXT: bltu a1, a3, .LBB121_2
+; ZVFHMIN32-NEXT: # %bb.1:
+; ZVFHMIN32-NEXT: li a1, 31
+; ZVFHMIN32-NEXT: .LBB121_2:
+; ZVFHMIN32-NEXT: slli a1, a1, 1
+; ZVFHMIN32-NEXT: add a0, a0, a1
+; ZVFHMIN32-NEXT: vl4re16.v v8, (a0)
+; ZVFHMIN32-NEXT: addi sp, s0, -48
+; ZVFHMIN32-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
+; ZVFHMIN32-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
+; ZVFHMIN32-NEXT: addi sp, sp, 48
+; ZVFHMIN32-NEXT: ret
+;
+; ZVFH32-LABEL: splice_nxv16bf16_offset_max:
+; ZVFH32: # %bb.0:
+; ZVFH32-NEXT: addi sp, sp, -48
+; ZVFH32-NEXT: .cfi_def_cfa_offset 48
+; ZVFH32-NEXT: sw ra, 44(sp) # 4-byte Folded Spill
+; ZVFH32-NEXT: sw s0, 40(sp) # 4-byte Folded Spill
+; ZVFH32-NEXT: .cfi_offset ra, -4
+; ZVFH32-NEXT: .cfi_offset s0, -8
+; ZVFH32-NEXT: addi s0, sp, 48
+; ZVFH32-NEXT: .cfi_def_cfa s0, 0
+; ZVFH32-NEXT: csrr a0, vlenb
+; ZVFH32-NEXT: slli a0, a0, 3
+; ZVFH32-NEXT: sub sp, sp, a0
+; ZVFH32-NEXT: andi sp, sp, -32
+; ZVFH32-NEXT: addi a0, sp, 32
+; ZVFH32-NEXT: vs4r.v v8, (a0)
+; ZVFH32-NEXT: csrr a1, vlenb
+; ZVFH32-NEXT: slli a2, a1, 2
+; ZVFH32-NEXT: add a2, a0, a2
+; ZVFH32-NEXT: slli a1, a1, 1
+; ZVFH32-NEXT: addi a1, a1, -1
+; ZVFH32-NEXT: li a3, 31
+; ZVFH32-NEXT: vs4r.v v12, (a2)
+; ZVFH32-NEXT: bltu a1, a3, .LBB121_2
+; ZVFH32-NEXT: # %bb.1:
+; ZVFH32-NEXT: li a1, 31
+; ZVFH32-NEXT: .LBB121_2:
+; ZVFH32-NEXT: slli a1, a1, 1
+; ZVFH32-NEXT: add a0, a0, a1
+; ZVFH32-NEXT: vl4re16.v v8, (a0)
+; ZVFH32-NEXT: addi sp, s0, -48
+; ZVFH32-NEXT: lw ra, 44(sp) # 4-byte Folded Reload
+; ZVFH32-NEXT: lw s0, 40(sp) # 4-byte Folded Reload
+; ZVFH32-NEXT: addi sp, sp, 48
+; ZVFH32-NEXT: ret
+;
+; ZVFH64-LABEL: splice_nxv16bf16_offset_max:
+; ZVFH64: # %bb.0:
+; ZVFH64-NEXT: addi sp, sp, -48
+; ZVFH64-NEXT: .cfi_def_cfa_offset 48
+; ZVFH64-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
+; ZVFH64-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
+; ZVFH64-NEXT: .cfi_offset ra, -8
+; ZVFH64-NEXT: .cfi_offset s0, -16
+; ZVFH64-NEXT: addi s0, sp, 48
+; ZVFH64-NEXT: .cfi_def_cfa s0, 0
+; ZVFH64-NEXT: csrr a0, vlenb
+; ZVFH64-NEXT: slli a0, a0, 3
+; ZVFH64-NEXT: sub sp, sp, a0
+; ZVFH64-NEXT: andi sp, sp, -32
+; ZVFH64-NEXT: addi a0, sp, 32
+; ZVFH64-NEXT: vs4r.v v8, (a0)
+; ZVFH64-NEXT: csrr a1, vlenb
+; ZVFH64-NEXT: slli a2, a1, 2
+; ZVFH64-NEXT: add a2, a0, a2
+; ZVFH64-NEXT: slli a1, a1, 1
+; ZVFH64-NEXT: addi a1, a1, -1
+; ZVFH64-NEXT: li a3, 31
+; ZVFH64-NEXT: vs4r.v v12, (a2)
+; ZVFH64-NEXT: bltu a1, a3, .LBB121_2
+; ZVFH64-NEXT: # %bb.1:
+; ZVFH64-NEXT: li a1, 31
+; ZVFH64-NEXT: .LBB121_2:
+; ZVFH64-NEXT: slli a1, a1, 1
+; ZVFH64-NEXT: add a0, a0, a1
+; ZVFH64-NEXT: vl4re16.v v8, (a0)
+; ZVFH64-NEXT: addi sp, s0, -48
+; ZVFH64-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
+; ZVFH64-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
+; ZVFH64-NEXT: addi sp, sp, 48
+; ZVFH64-NEXT: ret
+ %res = call <vscale x 16 x bfloat> @llvm.vector.splice.nxv16bf16(<vscale x 16 x bfloat> %a, <vscale x 16 x bfloat> %b, i32 31)
+ ret <vscale x 16 x bfloat> %res
+}
+
+declare <vscale x 32 x bfloat> @llvm.vector.splice.nxv32bf16(<vscale x 32 x bfloat>, <vscale x 32 x bfloat>, i32)
+
+define <vscale x 32 x bfloat> @splice_nxv32bf16_offset_zero(<vscale x 32 x bfloat> %a, <vscale x 32 x bfloat> %b) #0 {
+; CHECK-LABEL: splice_nxv32bf16_offset_zero:
+; CHECK: # %bb.0:
+; CHECK-NEXT: ret
+ %res = call <vscale x 32 x bfloat> @llvm.vector.splice.nxv32bf16(<vscale x 32 x bfloat> %a, <vscale x 32 x bfloat> %b, i32 0)
+ ret <vscale x 32 x bfloat> %res
+}
+
+define <vscale x 32 x bfloat> @splice_nxv32bf16_offset_negone(<vscale x 32 x bfloat> %a, <vscale x 32 x bfloat> %b) #0 {
+; ZVFHMIN64-LABEL: splice_nxv32bf16_offset_negone:
+; ZVFHMIN64: # %bb.0:
+; ZVFHMIN64-NEXT: addi sp, sp, -80
+; ZVFHMIN64-NEXT: .cfi_def_cfa_offset 80
+; ZVFHMIN64-NEXT: sw ra, 76(sp) # 4-byte Folded Spill
+; ZVFHMIN64-NEXT: sw s0, 72(sp) # 4-byte Folded Spill
+; ZVFHMIN64-NEXT: .cfi_offset ra, -4
+; ZVFHMIN64-NEXT: .cfi_offset s0, -8
+; ZVFHMIN64-NEXT: addi s0, sp, 80
+; ZVFHMIN64-NEXT: .cfi_def_cfa s0, 0
+; ZVFHMIN64-NEXT: csrr a0, vlenb
+; ZVFHMIN64-NEXT: slli a0, a0, 4
+; ZVFHMIN64-NEXT: sub sp, sp, a0
+; ZVFHMIN64-NEXT: andi sp, sp, -64
+; ZVFHMIN64-NEXT: addi a0, sp, 64
+; ZVFHMIN64-NEXT: vs8r.v v8, (a0)
+; ZVFHMIN64-NEXT: csrr a1, vlenb
+; ZVFHMIN64-NEXT: slli a1, a1, 3
+; ZVFHMIN64-NEXT: add a0, a0, a1
+; ZVFHMIN64-NEXT: vs8r.v v16, (a0)
+; ZVFHMIN64-NEXT: addi a0, a0, -2
+; ZVFHMIN64-NEXT: vl8re16.v v8, (a0)
+; ZVFHMIN64-NEXT: addi sp, s0, -80
+; ZVFHMIN64-NEXT: lw ra, 76(sp) # 4-byte Folded Reload
+; ZVFHMIN64-NEXT: lw s0, 72(sp) # 4-byte Folded Reload
+; ZVFHMIN64-NEXT: addi sp, sp, 80
+; ZVFHMIN64-NEXT: ret
+;
+; ZVFHMIN32-LABEL: splice_nxv32bf16_offset_negone:
+; ZVFHMIN32: # %bb.0:
+; ZVFHMIN32-NEXT: addi sp, sp, -80
+; ZVFHMIN32-NEXT: .cfi_def_cfa_offset 80
+; ZVFHMIN32-NEXT: sd ra, 72(sp) # 8-byte Folded Spill
+; ZVFHMIN32-NEXT: sd s0, 64(sp) # 8-byte Folded Spill
+; ZVFHMIN32-NEXT: .cfi_offset ra, -8
+; ZVFHMIN32-NEXT: .cfi_offset s0, -16
+; ZVFHMIN32-NEXT: addi s0, sp, 80
+; ZVFHMIN32-NEXT: .cfi_def_cfa s0, 0
+; ZVFHMIN32-NEXT: csrr a0, vlenb
+; ZVFHMIN32-NEXT: slli a0, a0, 4
+; ZVFHMIN32-NEXT: sub sp, sp, a0
+; ZVFHMIN32-NEXT: andi sp, sp, -64
+; ZVFHMIN32-NEXT: addi a0, sp, 64
+; ZVFHMIN32-NEXT: vs8r.v v8, (a0)
+; ZVFHMIN32-NEXT: csrr a1, vlenb
+; ZVFHMIN32-NEXT: slli a1, a1, 3
+; ZVFHMIN32-NEXT: add a0, a0, a1
+; ZVFHMIN32-NEXT: vs8r.v v16, (a0)
+; ZVFHMIN32-NEXT: addi a0, a0, -2
+; ZVFHMIN32-NEXT: vl8re16.v v8, (a0)
+; ZVFHMIN32-NEXT: addi sp, s0, -80
+; ZVFHMIN32-NEXT: ld ra, 72(sp) # 8-byte Folded Reload
+; ZVFHMIN32-NEXT: ld s0, 64(sp) # 8-byte Folded Reload
+; ZVFHMIN32-NEXT: addi sp, sp, 80
+; ZVFHMIN32-NEXT: ret
+;
+; ZVFH32-LABEL: splice_nxv32bf16_offset_negone:
+; ZVFH32: # %bb.0:
+; ZVFH32-NEXT: addi sp, sp, -80
+; ZVFH32-NEXT: .cfi_def_cfa_offset 80
+; ZVFH32-NEXT: sw ra, 76(sp) # 4-byte Folded Spill
+; ZVFH32-NEXT: sw s0, 72(sp) # 4-byte Folded Spill
+; ZVFH32-NEXT: .cfi_offset ra, -4
+; ZVFH32-NEXT: .cfi_offset s0, -8
+; ZVFH32-NEXT: addi s0, sp, 80
+; ZVFH32-NEXT: .cfi_def_cfa s0, 0
+; ZVFH32-NEXT: csrr a0, vlenb
+; ZVFH32-NEXT: slli a0, a0, 4
+; ZVFH32-NEXT: sub sp, sp, a0
+; ZVFH32-NEXT: andi sp, sp, -64
+; ZVFH32-NEXT: addi a0, sp, 64
+; ZVFH32-NEXT: vs8r.v v8, (a0)
+; ZVFH32-NEXT: csrr a1, vlenb
+; ZVFH32-NEXT: slli a1, a1, 3
+; ZVFH32-NEXT: add a0, a0, a1
+; ZVFH32-NEXT: vs8r.v v16, (a0)
+; ZVFH32-NEXT: addi a0, a0, -2
+; ZVFH32-NEXT: vl8re16.v v8, (a0)
+; ZVFH32-NEXT: addi sp, s0, -80
+; ZVFH32-NEXT: lw ra, 76(sp) # 4-byte Folded Reload
+; ZVFH32-NEXT: lw s0, 72(sp) # 4-byte Folded Reload
+; ZVFH32-NEXT: addi sp, sp, 80
+; ZVFH32-NEXT: ret
+;
+; ZVFH64-LABEL: splice_nxv32bf16_offset_negone:
+; ZVFH64: # %bb.0:
+; ZVFH64-NEXT: addi sp, sp, -80
+; ZVFH64-NEXT: .cfi_def_cfa_offset 80
+; ZVFH64-NEXT: sd ra, 72(sp) # 8-byte Folded Spill
+; ZVFH64-NEXT: sd s0, 64(sp) # 8-byte Folded Spill
+; ZVFH64-NEXT: .cfi_offset ra, -8
+; ZVFH64-NEXT: .cfi_offset s0, -16
+; ZVFH64-NEXT: addi s0, sp, 80
+; ZVFH64-NEXT: .cfi_def_cfa s0, 0
+; ZVFH64-NEXT: csrr a0, vlenb
+; ZVFH64-NEXT: slli a0, a0, 4
+; ZVFH64-NEXT: sub sp, sp, a0
+; ZVFH64-NEXT: andi sp, sp, -64
+; ZVFH64-NEXT: addi a0, sp, 64
+; ZVFH64-NEXT: vs8r.v v8, (a0)
+; ZVFH64-NEXT: csrr a1, vlenb
+; ZVFH64-NEXT: slli a1, a1, 3
+; ZVFH64-NEXT: add a0, a0, a1
+; ZVFH64-NEXT: vs8r.v v16, (a0)
+; ZVFH64-NEXT: addi a0, a0, -2
+; ZVFH64-NEXT: vl8re16.v v8, (a0)
+; ZVFH64-NEXT: addi sp, s0, -80
+; ZVFH64-NEXT: ld ra, 72(sp) # 8-byte Folded Reload
+; ZVFH64-NEXT: ld s0, 64(sp) # 8-byte Folded Reload
+; ZVFH64-NEXT: addi sp, sp, 80
+; ZVFH64-NEXT: ret
+ %res = call <vscale x 32 x bfloat> @llvm.vector.splice.nxv32bf16(<vscale x 32 x bfloat> %a, <vscale x 32 x bfloat> %b, i32 -1)
+ ret <vscale x 32 x bfloat> %res
+}
+
+define <vscale x 32 x bfloat> @splice_nxv32bf16_offset_min(<vscale x 32 x bfloat> %a, <vscale x 32 x bfloat> %b) #0 {
+; ZVFHMIN64-LABEL: splice_nxv32bf16_offset_min:
+; ZVFHMIN64: # %bb.0:
+; ZVFHMIN64-NEXT: addi sp, sp, -80
+; ZVFHMIN64-NEXT: .cfi_def_cfa_offset 80
+; ZVFHMIN64-NEXT: sw ra, 76(sp) # 4-byte Folded Spill
+; ZVFHMIN64-NEXT: sw s0, 72(sp) # 4-byte Folded Spill
+; ZVFHMIN64-NEXT: .cfi_offset ra, -4
+; ZVFHMIN64-NEXT: .cfi_offset s0, -8
+; ZVFHMIN64-NEXT: addi s0, sp, 80
+; ZVFHMIN64-NEXT: .cfi_def_cfa s0, 0
+; ZVFHMIN64-NEXT: csrr a0, vlenb
+; ZVFHMIN64-NEXT: slli a0, a0, 4
+; ZVFHMIN64-NEXT: sub sp, sp, a0
+; ZVFHMIN64-NEXT: andi sp, sp, -64
+; ZVFHMIN64-NEXT: addi a0, sp, 64
+; ZVFHMIN64-NEXT: vs8r.v v8, (a0)
+; ZVFHMIN64-NEXT: csrr a1, vlenb
+; ZVFHMIN64-NEXT: slli a1, a1, 3
+; ZVFHMIN64-NEXT: add a0, a0, a1
+; ZVFHMIN64-NEXT: li a2, 128
+; ZVFHMIN64-NEXT: vs8r.v v16, (a0)
+; ZVFHMIN64-NEXT: bltu a1, a2, .LBB124_2
+; ZVFHMIN64-NEXT: # %bb.1:
+; ZVFHMIN64-NEXT: li a1, 128
+; ZVFHMIN64-NEXT: .LBB124_2:
+; ZVFHMIN64-NEXT: sub a0, a0, a1
+; ZVFHMIN64-NEXT: vl8re16.v v8, (a0)
+; ZVFHMIN64-NEXT: addi sp, s0, -80
+; ZVFHMIN64-NEXT: lw ra, 76(sp) # 4-byte Folded Reload
+; ZVFHMIN64-NEXT: lw s0, 72(sp) # 4-byte Folded Reload
+; ZVFHMIN64-NEXT: addi sp, sp, 80
+; ZVFHMIN64-NEXT: ret
+;
+; ZVFHMIN32-LABEL: splice_nxv32bf16_offset_min:
+; ZVFHMIN32: # %bb.0:
+; ZVFHMIN32-NEXT: addi sp, sp, -80
+; ZVFHMIN32-NEXT: .cfi_def_cfa_offset 80
+; ZVFHMIN32-NEXT: sd ra, 72(sp) # 8-byte Folded Spill
+; ZVFHMIN32-NEXT: sd s0, 64(sp) # 8-byte Folded Spill
+; ZVFHMIN32-NEXT: .cfi_offset ra, -8
+; ZVFHMIN32-NEXT: .cfi_offset s0, -16
+; ZVFHMIN32-NEXT: addi s0, sp, 80
+; ZVFHMIN32-NEXT: .cfi_def_cfa s0, 0
+; ZVFHMIN32-NEXT: csrr a0, vlenb
+; ZVFHMIN32-NEXT: slli a0, a0, 4
+; ZVFHMIN32-NEXT: sub sp, sp, a0
+; ZVFHMIN32-NEXT: andi sp, sp, -64
+; ZVFHMIN32-NEXT: addi a0, sp, 64
+; ZVFHMIN32-NEXT: vs8r.v v8, (a0)
+; ZVFHMIN32-NEXT: csrr a1, vlenb
+; ZVFHMIN32-NEXT: slli a1, a1, 3
+; ZVFHMIN32-NEXT: add a0, a0, a1
+; ZVFHMIN32-NEXT: li a2, 128
+; ZVFHMIN32-NEXT: vs8r.v v16, (a0)
+; ZVFHMIN32-NEXT: bltu a1, a2, .LBB124_2
+; ZVFHMIN32-NEXT: # %bb.1:
+; ZVFHMIN32-NEXT: li a1, 128
+; ZVFHMIN32-NEXT: .LBB124_2:
+; ZVFHMIN32-NEXT: sub a0, a0, a1
+; ZVFHMIN32-NEXT: vl8re16.v v8, (a0)
+; ZVFHMIN32-NEXT: addi sp, s0, -80
+; ZVFHMIN32-NEXT: ld ra, 72(sp) # 8-byte Folded Reload
+; ZVFHMIN32-NEXT: ld s0, 64(sp) # 8-byte Folded Reload
+; ZVFHMIN32-NEXT: addi sp, sp, 80
+; ZVFHMIN32-NEXT: ret
+;
+; ZVFH32-LABEL: splice_nxv32bf16_offset_min:
+; ZVFH32: # %bb.0:
+; ZVFH32-NEXT: addi sp, sp, -80
+; ZVFH32-NEXT: .cfi_def_cfa_offset 80
+; ZVFH32-NEXT: sw ra, 76(sp) # 4-byte Folded Spill
+; ZVFH32-NEXT: sw s0, 72(sp) # 4-byte Folded Spill
+; ZVFH32-NEXT: .cfi_offset ra, -4
+; ZVFH32-NEXT: .cfi_offset s0, -8
+; ZVFH32-NEXT: addi s0, sp, 80
+; ZVFH32-NEXT: .cfi_def_cfa s0, 0
+; ZVFH32-NEXT: csrr a0, vlenb
+; ZVFH32-NEXT: slli a0, a0, 4
+; ZVFH32-NEXT: sub sp, sp, a0
+; ZVFH32-NEXT: andi sp, sp, -64
+; ZVFH32-NEXT: addi a0, sp, 64
+; ZVFH32-NEXT: vs8r.v v8, (a0)
+; ZVFH32-NEXT: csrr a1, vlenb
+; ZVFH32-NEXT: slli a1, a1, 3
+; ZVFH32-NEXT: add a0, a0, a1
+; ZVFH32-NEXT: li a2, 128
+; ZVFH32-NEXT: vs8r.v v16, (a0)
+; ZVFH32-NEXT: bltu a1, a2, .LBB124_2
+; ZVFH32-NEXT: # %bb.1:
+; ZVFH32-NEXT: li a1, 128
+; ZVFH32-NEXT: .LBB124_2:
+; ZVFH32-NEXT: sub a0, a0, a1
+; ZVFH32-NEXT: vl8re16.v v8, (a0)
+; ZVFH32-NEXT: addi sp, s0, -80
+; ZVFH32-NEXT: lw ra, 76(sp) # 4-byte Folded Reload
+; ZVFH32-NEXT: lw s0, 72(sp) # 4-byte Folded Reload
+; ZVFH32-NEXT: addi sp, sp, 80
+; ZVFH32-NEXT: ret
+;
+; ZVFH64-LABEL: splice_nxv32bf16_offset_min:
+; ZVFH64: # %bb.0:
+; ZVFH64-NEXT: addi sp, sp, -80
+; ZVFH64-NEXT: .cfi_def_cfa_offset 80
+; ZVFH64-NEXT: sd ra, 72(sp) # 8-byte Folded Spill
+; ZVFH64-NEXT: sd s0, 64(sp) # 8-byte Folded Spill
+; ZVFH64-NEXT: .cfi_offset ra, -8
+; ZVFH64-NEXT: .cfi_offset s0, -16
+; ZVFH64-NEXT: addi s0, sp, 80
+; ZVFH64-NEXT: .cfi_def_cfa s0, 0
+; ZVFH64-NEXT: csrr a0, vlenb
+; ZVFH64-NEXT: slli a0, a0, 4
+; ZVFH64-NEXT: sub sp, sp, a0
+; ZVFH64-NEXT: andi sp, sp, -64
+; ZVFH64-NEXT: addi a0, sp, 64
+; ZVFH64-NEXT: vs8r.v v8, (a0)
+; ZVFH64-NEXT: csrr a1, vlenb
+; ZVFH64-NEXT: slli a1, a1, 3
+; ZVFH64-NEXT: add a0, a0, a1
+; ZVFH64-NEXT: li a2, 128
+; ZVFH64-NEXT: vs8r.v v16, (a0)
+; ZVFH64-NEXT: bltu a1, a2, .LBB124_2
+; ZVFH64-NEXT: # %bb.1:
+; ZVFH64-NEXT: li a1, 128
+; ZVFH64-NEXT: .LBB124_2:
+; ZVFH64-NEXT: sub a0, a0, a1
+; ZVFH64-NEXT: vl8re16.v v8, (a0)
+; ZVFH64-NEXT: addi sp, s0, -80
+; ZVFH64-NEXT: ld ra, 72(sp) # 8-byte Folded Reload
+; ZVFH64-NEXT: ld s0, 64(sp) # 8-byte Folded Reload
+; ZVFH64-NEXT: addi sp, sp, 80
+; ZVFH64-NEXT: ret
+ %res = call <vscale x 32 x bfloat> @llvm.vector.splice.nxv32bf16(<vscale x 32 x bfloat> %a, <vscale x 32 x bfloat> %b, i32 -64)
+ ret <vscale x 32 x bfloat> %res
+}
+
+define <vscale x 32 x bfloat> @splice_nxv32bf16_offset_max(<vscale x 32 x bfloat> %a, <vscale x 32 x bfloat> %b) #0 {
+; ZVFHMIN64-LABEL: splice_nxv32bf16_offset_max:
+; ZVFHMIN64: # %bb.0:
+; ZVFHMIN64-NEXT: addi sp, sp, -80
+; ZVFHMIN64-NEXT: .cfi_def_cfa_offset 80
+; ZVFHMIN64-NEXT: sw ra, 76(sp) # 4-byte Folded Spill
+; ZVFHMIN64-NEXT: sw s0, 72(sp) # 4-byte Folded Spill
+; ZVFHMIN64-NEXT: .cfi_offset ra, -4
+; ZVFHMIN64-NEXT: .cfi_offset s0, -8
+; ZVFHMIN64-NEXT: addi s0, sp, 80
+; ZVFHMIN64-NEXT: .cfi_def_cfa s0, 0
+; ZVFHMIN64-NEXT: csrr a0, vlenb
+; ZVFHMIN64-NEXT: slli a0, a0, 4
+; ZVFHMIN64-NEXT: sub sp, sp, a0
+; ZVFHMIN64-NEXT: andi sp, sp, -64
+; ZVFHMIN64-NEXT: addi a0, sp, 64
+; ZVFHMIN64-NEXT: vs8r.v v8, (a0)
+; ZVFHMIN64-NEXT: csrr a1, vlenb
+; ZVFHMIN64-NEXT: slli a2, a1, 3
+; ZVFHMIN64-NEXT: add a2, a0, a2
+; ZVFHMIN64-NEXT: slli a1, a1, 2
+; ZVFHMIN64-NEXT: addi a1, a1, -1
+; ZVFHMIN64-NEXT: li a3, 63
+; ZVFHMIN64-NEXT: vs8r.v v16, (a2)
+; ZVFHMIN64-NEXT: bltu a1, a3, .LBB125_2
+; ZVFHMIN64-NEXT: # %bb.1:
+; ZVFHMIN64-NEXT: li a1, 63
+; ZVFHMIN64-NEXT: .LBB125_2:
+; ZVFHMIN64-NEXT: slli a1, a1, 1
+; ZVFHMIN64-NEXT: add a0, a0, a1
+; ZVFHMIN64-NEXT: vl8re16.v v8, (a0)
+; ZVFHMIN64-NEXT: addi sp, s0, -80
+; ZVFHMIN64-NEXT: lw ra, 76(sp) # 4-byte Folded Reload
+; ZVFHMIN64-NEXT: lw s0, 72(sp) # 4-byte Folded Reload
+; ZVFHMIN64-NEXT: addi sp, sp, 80
+; ZVFHMIN64-NEXT: ret
+;
+; ZVFHMIN32-LABEL: splice_nxv32bf16_offset_max:
+; ZVFHMIN32: # %bb.0:
+; ZVFHMIN32-NEXT: addi sp, sp, -80
+; ZVFHMIN32-NEXT: .cfi_def_cfa_offset 80
+; ZVFHMIN32-NEXT: sd ra, 72(sp) # 8-byte Folded Spill
+; ZVFHMIN32-NEXT: sd s0, 64(sp) # 8-byte Folded Spill
+; ZVFHMIN32-NEXT: .cfi_offset ra, -8
+; ZVFHMIN32-NEXT: .cfi_offset s0, -16
+; ZVFHMIN32-NEXT: addi s0, sp, 80
+; ZVFHMIN32-NEXT: .cfi_def_cfa s0, 0
+; ZVFHMIN32-NEXT: csrr a0, vlenb
+; ZVFHMIN32-NEXT: slli a0, a0, 4
+; ZVFHMIN32-NEXT: sub sp, sp, a0
+; ZVFHMIN32-NEXT: andi sp, sp, -64
+; ZVFHMIN32-NEXT: addi a0, sp, 64
+; ZVFHMIN32-NEXT: vs8r.v v8, (a0)
+; ZVFHMIN32-NEXT: csrr a1, vlenb
+; ZVFHMIN32-NEXT: slli a2, a1, 3
+; ZVFHMIN32-NEXT: add a2, a0, a2
+; ZVFHMIN32-NEXT: slli a1, a1, 2
+; ZVFHMIN32-NEXT: addi a1, a1, -1
+; ZVFHMIN32-NEXT: li a3, 63
+; ZVFHMIN32-NEXT: vs8r.v v16, (a2)
+; ZVFHMIN32-NEXT: bltu a1, a3, .LBB125_2
+; ZVFHMIN32-NEXT: # %bb.1:
+; ZVFHMIN32-NEXT: li a1, 63
+; ZVFHMIN32-NEXT: .LBB125_2:
+; ZVFHMIN32-NEXT: slli a1, a1, 1
+; ZVFHMIN32-NEXT: add a0, a0, a1
+; ZVFHMIN32-NEXT: vl8re16.v v8, (a0)
+; ZVFHMIN32-NEXT: addi sp, s0, -80
+; ZVFHMIN32-NEXT: ld ra, 72(sp) # 8-byte Folded Reload
+; ZVFHMIN32-NEXT: ld s0, 64(sp) # 8-byte Folded Reload
+; ZVFHMIN32-NEXT: addi sp, sp, 80
+; ZVFHMIN32-NEXT: ret
+;
+; ZVFH32-LABEL: splice_nxv32bf16_offset_max:
+; ZVFH32: # %bb.0:
+; ZVFH32-NEXT: addi sp, sp, -80
+; ZVFH32-NEXT: .cfi_def_cfa_offset 80
+; ZVFH32-NEXT: sw ra, 76(sp) # 4-byte Folded Spill
+; ZVFH32-NEXT: sw s0, 72(sp) # 4-byte Folded Spill
+; ZVFH32-NEXT: .cfi_offset ra, -4
+; ZVFH32-NEXT: .cfi_offset s0, -8
+; ZVFH32-NEXT: addi s0, sp, 80
+; ZVFH32-NEXT: .cfi_def_cfa s0, 0
+; ZVFH32-NEXT: csrr a0, vlenb
+; ZVFH32-NEXT: slli a0, a0, 4
+; ZVFH32-NEXT: sub sp, sp, a0
+; ZVFH32-NEXT: andi sp, sp, -64
+; ZVFH32-NEXT: addi a0, sp, 64
+; ZVFH32-NEXT: vs8r.v v8, (a0)
+; ZVFH32-NEXT: csrr a1, vlenb
+; ZVFH32-NEXT: slli a2, a1, 3
+; ZVFH32-NEXT: add a2, a0, a2
+; ZVFH32-NEXT: slli a1, a1, 2
+; ZVFH32-NEXT: addi a1, a1, -1
+; ZVFH32-NEXT: li a3, 63
+; ZVFH32-NEXT: vs8r.v v16, (a2)
+; ZVFH32-NEXT: bltu a1, a3, .LBB125_2
+; ZVFH32-NEXT: # %bb.1:
+; ZVFH32-NEXT: li a1, 63
+; ZVFH32-NEXT: .LBB125_2:
+; ZVFH32-NEXT: slli a1, a1, 1
+; ZVFH32-NEXT: add a0, a0, a1
+; ZVFH32-NEXT: vl8re16.v v8, (a0)
+; ZVFH32-NEXT: addi sp, s0, -80
+; ZVFH32-NEXT: lw ra, 76(sp) # 4-byte Folded Reload
+; ZVFH32-NEXT: lw s0, 72(sp) # 4-byte Folded Reload
+; ZVFH32-NEXT: addi sp, sp, 80
+; ZVFH32-NEXT: ret
+;
+; ZVFH64-LABEL: splice_nxv32bf16_offset_max:
+; ZVFH64: # %bb.0:
+; ZVFH64-NEXT: addi sp, sp, -80
+; ZVFH64-NEXT: .cfi_def_cfa_offset 80
+; ZVFH64-NEXT: sd ra, 72(sp) # 8-byte Folded Spill
+; ZVFH64-NEXT: sd s0, 64(sp) # 8-byte Folded Spill
+; ZVFH64-NEXT: .cfi_offset ra, -8
+; ZVFH64-NEXT: .cfi_offset s0, -16
+; ZVFH64-NEXT: addi s0, sp, 80
+; ZVFH64-NEXT: .cfi_def_cfa s0, 0
+; ZVFH64-NEXT: csrr a0, vlenb
+; ZVFH64-NEXT: slli a0, a0, 4
+; ZVFH64-NEXT: sub sp, sp, a0
+; ZVFH64-NEXT: andi sp, sp, -64
+; ZVFH64-NEXT: addi a0, sp, 64
+; ZVFH64-NEXT: vs8r.v v8, (a0)
+; ZVFH64-NEXT: csrr a1, vlenb
+; ZVFH64-NEXT: slli a2, a1, 3
+; ZVFH64-NEXT: add a2, a0, a2
+; ZVFH64-NEXT: slli a1, a1, 2
+; ZVFH64-NEXT: addi a1, a1, -1
+; ZVFH64-NEXT: li a3, 63
+; ZVFH64-NEXT: vs8r.v v16, (a2)
+; ZVFH64-NEXT: bltu a1, a3, .LBB125_2
+; ZVFH64-NEXT: # %bb.1:
+; ZVFH64-NEXT: li a1, 63
+; ZVFH64-NEXT: .LBB125_2:
+; ZVFH64-NEXT: slli a1, a1, 1
+; ZVFH64-NEXT: add a0, a0, a1
+; ZVFH64-NEXT: vl8re16.v v8, (a0)
+; ZVFH64-NEXT: addi sp, s0, -80
+; ZVFH64-NEXT: ld ra, 72(sp) # 8-byte Folded Reload
+; ZVFH64-NEXT: ld s0, 64(sp) # 8-byte Folded Reload
+; ZVFH64-NEXT: addi sp, sp, 80
+; ZVFH64-NEXT: ret
+ %res = call <vscale x 32 x bfloat> @llvm.vector.splice.nxv32bf16(<vscale x 32 x bfloat> %a, <vscale x 32 x bfloat> %b, i32 63)
+ ret <vscale x 32 x bfloat> %res
+}
+
declare <vscale x 1 x half> @llvm.vector.splice.nxv1f16(<vscale x 1 x half>, <vscale x 1 x half>, i32)
define <vscale x 1 x half> @splice_nxv1f16_offset_zero(<vscale x 1 x half> %a, <vscale x 1 x half> %b) #0 {
@@ -1544,45 +2760,229 @@ define <vscale x 1 x half> @splice_nxv1f16_offset_zero(<vscale x 1 x half> %a, <
}
define <vscale x 1 x half> @splice_nxv1f16_offset_negone(<vscale x 1 x half> %a, <vscale x 1 x half> %b) #0 {
-; CHECK-LABEL: splice_nxv1f16_offset_negone:
-; CHECK: # %bb.0:
-; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: srli a0, a0, 3
-; CHECK-NEXT: addi a0, a0, -1
-; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma
-; CHECK-NEXT: vslidedown.vx v8, v8, a0
-; CHECK-NEXT: vslideup.vi v8, v9, 1
-; CHECK-NEXT: ret
+; ZVFHMIN64-LABEL: splice_nxv1f16_offset_negone:
+; ZVFHMIN64: # %bb.0:
+; ZVFHMIN64-NEXT: addi sp, sp, -16
+; ZVFHMIN64-NEXT: .cfi_def_cfa_offset 16
+; ZVFHMIN64-NEXT: csrr a0, vlenb
+; ZVFHMIN64-NEXT: sub sp, sp, a0
+; ZVFHMIN64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x01, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 1 * vlenb
+; ZVFHMIN64-NEXT: addi a0, sp, 16
+; ZVFHMIN64-NEXT: vsetvli a1, zero, e16, mf4, ta, ma
+; ZVFHMIN64-NEXT: vse16.v v8, (a0)
+; ZVFHMIN64-NEXT: csrr a1, vlenb
+; ZVFHMIN64-NEXT: srli a1, a1, 2
+; ZVFHMIN64-NEXT: add a0, a0, a1
+; ZVFHMIN64-NEXT: vse16.v v9, (a0)
+; ZVFHMIN64-NEXT: addi a0, a0, -2
+; ZVFHMIN64-NEXT: vle16.v v8, (a0)
+; ZVFHMIN64-NEXT: csrr a0, vlenb
+; ZVFHMIN64-NEXT: add sp, sp, a0
+; ZVFHMIN64-NEXT: addi sp, sp, 16
+; ZVFHMIN64-NEXT: ret
+;
+; ZVFHMIN32-LABEL: splice_nxv1f16_offset_negone:
+; ZVFHMIN32: # %bb.0:
+; ZVFHMIN32-NEXT: addi sp, sp, -16
+; ZVFHMIN32-NEXT: .cfi_def_cfa_offset 16
+; ZVFHMIN32-NEXT: csrr a0, vlenb
+; ZVFHMIN32-NEXT: sub sp, sp, a0
+; ZVFHMIN32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x01, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 1 * vlenb
+; ZVFHMIN32-NEXT: addi a0, sp, 16
+; ZVFHMIN32-NEXT: vsetvli a1, zero, e16, mf4, ta, ma
+; ZVFHMIN32-NEXT: vse16.v v8, (a0)
+; ZVFHMIN32-NEXT: csrr a1, vlenb
+; ZVFHMIN32-NEXT: srli a1, a1, 2
+; ZVFHMIN32-NEXT: add a0, a0, a1
+; ZVFHMIN32-NEXT: vse16.v v9, (a0)
+; ZVFHMIN32-NEXT: addi a0, a0, -2
+; ZVFHMIN32-NEXT: vle16.v v8, (a0)
+; ZVFHMIN32-NEXT: csrr a0, vlenb
+; ZVFHMIN32-NEXT: add sp, sp, a0
+; ZVFHMIN32-NEXT: addi sp, sp, 16
+; ZVFHMIN32-NEXT: ret
+;
+; ZVFH32-LABEL: splice_nxv1f16_offset_negone:
+; ZVFH32: # %bb.0:
+; ZVFH32-NEXT: csrr a0, vlenb
+; ZVFH32-NEXT: srli a0, a0, 3
+; ZVFH32-NEXT: addi a0, a0, -1
+; ZVFH32-NEXT: vsetvli a1, zero, e16, mf4, ta, ma
+; ZVFH32-NEXT: vslidedown.vx v8, v8, a0
+; ZVFH32-NEXT: vslideup.vi v8, v9, 1
+; ZVFH32-NEXT: ret
+;
+; ZVFH64-LABEL: splice_nxv1f16_offset_negone:
+; ZVFH64: # %bb.0:
+; ZVFH64-NEXT: csrr a0, vlenb
+; ZVFH64-NEXT: srli a0, a0, 3
+; ZVFH64-NEXT: addi a0, a0, -1
+; ZVFH64-NEXT: vsetvli a1, zero, e16, mf4, ta, ma
+; ZVFH64-NEXT: vslidedown.vx v8, v8, a0
+; ZVFH64-NEXT: vslideup.vi v8, v9, 1
+; ZVFH64-NEXT: ret
%res = call <vscale x 1 x half> @llvm.vector.splice.nxv1f16(<vscale x 1 x half> %a, <vscale x 1 x half> %b, i32 -1)
ret <vscale x 1 x half> %res
}
define <vscale x 1 x half> @splice_nxv1f16_offset_min(<vscale x 1 x half> %a, <vscale x 1 x half> %b) #0 {
-; CHECK-LABEL: splice_nxv1f16_offset_min:
-; CHECK: # %bb.0:
-; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: srli a0, a0, 3
-; CHECK-NEXT: addi a0, a0, -2
-; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
-; CHECK-NEXT: vslidedown.vx v8, v8, a0
-; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
-; CHECK-NEXT: vslideup.vi v8, v9, 2
-; CHECK-NEXT: ret
+; ZVFHMIN64-LABEL: splice_nxv1f16_offset_min:
+; ZVFHMIN64: # %bb.0:
+; ZVFHMIN64-NEXT: addi sp, sp, -16
+; ZVFHMIN64-NEXT: .cfi_def_cfa_offset 16
+; ZVFHMIN64-NEXT: csrr a0, vlenb
+; ZVFHMIN64-NEXT: sub sp, sp, a0
+; ZVFHMIN64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x01, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 1 * vlenb
+; ZVFHMIN64-NEXT: addi a0, sp, 16
+; ZVFHMIN64-NEXT: vsetvli a1, zero, e16, mf4, ta, ma
+; ZVFHMIN64-NEXT: vse16.v v8, (a0)
+; ZVFHMIN64-NEXT: csrr a1, vlenb
+; ZVFHMIN64-NEXT: srli a1, a1, 2
+; ZVFHMIN64-NEXT: add a0, a0, a1
+; ZVFHMIN64-NEXT: li a2, 4
+; ZVFHMIN64-NEXT: vse16.v v9, (a0)
+; ZVFHMIN64-NEXT: bltu a1, a2, .LBB128_2
+; ZVFHMIN64-NEXT: # %bb.1:
+; ZVFHMIN64-NEXT: li a1, 4
+; ZVFHMIN64-NEXT: .LBB128_2:
+; ZVFHMIN64-NEXT: sub a0, a0, a1
+; ZVFHMIN64-NEXT: vle16.v v8, (a0)
+; ZVFHMIN64-NEXT: csrr a0, vlenb
+; ZVFHMIN64-NEXT: add sp, sp, a0
+; ZVFHMIN64-NEXT: addi sp, sp, 16
+; ZVFHMIN64-NEXT: ret
+;
+; ZVFHMIN32-LABEL: splice_nxv1f16_offset_min:
+; ZVFHMIN32: # %bb.0:
+; ZVFHMIN32-NEXT: addi sp, sp, -16
+; ZVFHMIN32-NEXT: .cfi_def_cfa_offset 16
+; ZVFHMIN32-NEXT: csrr a0, vlenb
+; ZVFHMIN32-NEXT: sub sp, sp, a0
+; ZVFHMIN32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x01, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 1 * vlenb
+; ZVFHMIN32-NEXT: addi a0, sp, 16
+; ZVFHMIN32-NEXT: vsetvli a1, zero, e16, mf4, ta, ma
+; ZVFHMIN32-NEXT: vse16.v v8, (a0)
+; ZVFHMIN32-NEXT: csrr a1, vlenb
+; ZVFHMIN32-NEXT: srli a1, a1, 2
+; ZVFHMIN32-NEXT: add a0, a0, a1
+; ZVFHMIN32-NEXT: li a2, 4
+; ZVFHMIN32-NEXT: vse16.v v9, (a0)
+; ZVFHMIN32-NEXT: bltu a1, a2, .LBB128_2
+; ZVFHMIN32-NEXT: # %bb.1:
+; ZVFHMIN32-NEXT: li a1, 4
+; ZVFHMIN32-NEXT: .LBB128_2:
+; ZVFHMIN32-NEXT: sub a0, a0, a1
+; ZVFHMIN32-NEXT: vle16.v v8, (a0)
+; ZVFHMIN32-NEXT: csrr a0, vlenb
+; ZVFHMIN32-NEXT: add sp, sp, a0
+; ZVFHMIN32-NEXT: addi sp, sp, 16
+; ZVFHMIN32-NEXT: ret
+;
+; ZVFH32-LABEL: splice_nxv1f16_offset_min:
+; ZVFH32: # %bb.0:
+; ZVFH32-NEXT: csrr a0, vlenb
+; ZVFH32-NEXT: srli a0, a0, 3
+; ZVFH32-NEXT: addi a0, a0, -2
+; ZVFH32-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
+; ZVFH32-NEXT: vslidedown.vx v8, v8, a0
+; ZVFH32-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
+; ZVFH32-NEXT: vslideup.vi v8, v9, 2
+; ZVFH32-NEXT: ret
+;
+; ZVFH64-LABEL: splice_nxv1f16_offset_min:
+; ZVFH64: # %bb.0:
+; ZVFH64-NEXT: csrr a0, vlenb
+; ZVFH64-NEXT: srli a0, a0, 3
+; ZVFH64-NEXT: addi a0, a0, -2
+; ZVFH64-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
+; ZVFH64-NEXT: vslidedown.vx v8, v8, a0
+; ZVFH64-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
+; ZVFH64-NEXT: vslideup.vi v8, v9, 2
+; ZVFH64-NEXT: ret
%res = call <vscale x 1 x half> @llvm.vector.splice.nxv1f16(<vscale x 1 x half> %a, <vscale x 1 x half> %b, i32 -2)
ret <vscale x 1 x half> %res
}
define <vscale x 1 x half> @splice_nxv1f16_offset_max(<vscale x 1 x half> %a, <vscale x 1 x half> %b) #0 {
-; CHECK-LABEL: splice_nxv1f16_offset_max:
-; CHECK: # %bb.0:
-; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: srli a0, a0, 3
-; CHECK-NEXT: addi a0, a0, -1
-; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
-; CHECK-NEXT: vslidedown.vi v8, v8, 1
-; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma
-; CHECK-NEXT: vslideup.vx v8, v9, a0
-; CHECK-NEXT: ret
+; ZVFHMIN64-LABEL: splice_nxv1f16_offset_max:
+; ZVFHMIN64: # %bb.0:
+; ZVFHMIN64-NEXT: addi sp, sp, -16
+; ZVFHMIN64-NEXT: .cfi_def_cfa_offset 16
+; ZVFHMIN64-NEXT: csrr a0, vlenb
+; ZVFHMIN64-NEXT: sub sp, sp, a0
+; ZVFHMIN64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x01, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 1 * vlenb
+; ZVFHMIN64-NEXT: addi a0, sp, 16
+; ZVFHMIN64-NEXT: vsetvli a1, zero, e16, mf4, ta, ma
+; ZVFHMIN64-NEXT: vse16.v v8, (a0)
+; ZVFHMIN64-NEXT: csrr a1, vlenb
+; ZVFHMIN64-NEXT: srli a2, a1, 2
+; ZVFHMIN64-NEXT: add a2, a0, a2
+; ZVFHMIN64-NEXT: srli a1, a1, 3
+; ZVFHMIN64-NEXT: addi a1, a1, -1
+; ZVFHMIN64-NEXT: li a3, 1
+; ZVFHMIN64-NEXT: vse16.v v9, (a2)
+; ZVFHMIN64-NEXT: bltu a1, a3, .LBB129_2
+; ZVFHMIN64-NEXT: # %bb.1:
+; ZVFHMIN64-NEXT: li a1, 1
+; ZVFHMIN64-NEXT: .LBB129_2:
+; ZVFHMIN64-NEXT: slli a1, a1, 1
+; ZVFHMIN64-NEXT: add a0, a0, a1
+; ZVFHMIN64-NEXT: vle16.v v8, (a0)
+; ZVFHMIN64-NEXT: csrr a0, vlenb
+; ZVFHMIN64-NEXT: add sp, sp, a0
+; ZVFHMIN64-NEXT: addi sp, sp, 16
+; ZVFHMIN64-NEXT: ret
+;
+; ZVFHMIN32-LABEL: splice_nxv1f16_offset_max:
+; ZVFHMIN32: # %bb.0:
+; ZVFHMIN32-NEXT: addi sp, sp, -16
+; ZVFHMIN32-NEXT: .cfi_def_cfa_offset 16
+; ZVFHMIN32-NEXT: csrr a0, vlenb
+; ZVFHMIN32-NEXT: sub sp, sp, a0
+; ZVFHMIN32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x01, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 1 * vlenb
+; ZVFHMIN32-NEXT: addi a0, sp, 16
+; ZVFHMIN32-NEXT: vsetvli a1, zero, e16, mf4, ta, ma
+; ZVFHMIN32-NEXT: vse16.v v8, (a0)
+; ZVFHMIN32-NEXT: csrr a1, vlenb
+; ZVFHMIN32-NEXT: srli a2, a1, 2
+; ZVFHMIN32-NEXT: add a2, a0, a2
+; ZVFHMIN32-NEXT: srli a1, a1, 3
+; ZVFHMIN32-NEXT: addi a1, a1, -1
+; ZVFHMIN32-NEXT: li a3, 1
+; ZVFHMIN32-NEXT: vse16.v v9, (a2)
+; ZVFHMIN32-NEXT: bltu a1, a3, .LBB129_2
+; ZVFHMIN32-NEXT: # %bb.1:
+; ZVFHMIN32-NEXT: li a1, 1
+; ZVFHMIN32-NEXT: .LBB129_2:
+; ZVFHMIN32-NEXT: slli a1, a1, 1
+; ZVFHMIN32-NEXT: add a0, a0, a1
+; ZVFHMIN32-NEXT: vle16.v v8, (a0)
+; ZVFHMIN32-NEXT: csrr a0, vlenb
+; ZVFHMIN32-NEXT: add sp, sp, a0
+; ZVFHMIN32-NEXT: addi sp, sp, 16
+; ZVFHMIN32-NEXT: ret
+;
+; ZVFH32-LABEL: splice_nxv1f16_offset_max:
+; ZVFH32: # %bb.0:
+; ZVFH32-NEXT: csrr a0, vlenb
+; ZVFH32-NEXT: srli a0, a0, 3
+; ZVFH32-NEXT: addi a0, a0, -1
+; ZVFH32-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
+; ZVFH32-NEXT: vslidedown.vi v8, v8, 1
+; ZVFH32-NEXT: vsetvli a1, zero, e16, mf4, ta, ma
+; ZVFH32-NEXT: vslideup.vx v8, v9, a0
+; ZVFH32-NEXT: ret
+;
+; ZVFH64-LABEL: splice_nxv1f16_offset_max:
+; ZVFH64: # %bb.0:
+; ZVFH64-NEXT: csrr a0, vlenb
+; ZVFH64-NEXT: srli a0, a0, 3
+; ZVFH64-NEXT: addi a0, a0, -1
+; ZVFH64-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
+; ZVFH64-NEXT: vslidedown.vi v8, v8, 1
+; ZVFH64-NEXT: vsetvli a1, zero, e16, mf4, ta, ma
+; ZVFH64-NEXT: vslideup.vx v8, v9, a0
+; ZVFH64-NEXT: ret
%res = call <vscale x 1 x half> @llvm.vector.splice.nxv1f16(<vscale x 1 x half> %a, <vscale x 1 x half> %b, i32 1)
ret <vscale x 1 x half> %res
}
@@ -1598,45 +2998,229 @@ define <vscale x 2 x half> @splice_nxv2f16_offset_zero(<vscale x 2 x half> %a, <
}
define <vscale x 2 x half> @splice_nxv2f16_offset_negone(<vscale x 2 x half> %a, <vscale x 2 x half> %b) #0 {
-; CHECK-LABEL: splice_nxv2f16_offset_negone:
-; CHECK: # %bb.0:
-; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: srli a0, a0, 2
-; CHECK-NEXT: addi a0, a0, -1
-; CHECK-NEXT: vsetvli a1, zero, e16, mf2, ta, ma
-; CHECK-NEXT: vslidedown.vx v8, v8, a0
-; CHECK-NEXT: vslideup.vi v8, v9, 1
-; CHECK-NEXT: ret
+; ZVFHMIN64-LABEL: splice_nxv2f16_offset_negone:
+; ZVFHMIN64: # %bb.0:
+; ZVFHMIN64-NEXT: addi sp, sp, -16
+; ZVFHMIN64-NEXT: .cfi_def_cfa_offset 16
+; ZVFHMIN64-NEXT: csrr a0, vlenb
+; ZVFHMIN64-NEXT: sub sp, sp, a0
+; ZVFHMIN64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x01, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 1 * vlenb
+; ZVFHMIN64-NEXT: addi a0, sp, 16
+; ZVFHMIN64-NEXT: vsetvli a1, zero, e16, mf2, ta, ma
+; ZVFHMIN64-NEXT: vse16.v v8, (a0)
+; ZVFHMIN64-NEXT: csrr a1, vlenb
+; ZVFHMIN64-NEXT: srli a1, a1, 1
+; ZVFHMIN64-NEXT: add a0, a0, a1
+; ZVFHMIN64-NEXT: vse16.v v9, (a0)
+; ZVFHMIN64-NEXT: addi a0, a0, -2
+; ZVFHMIN64-NEXT: vle16.v v8, (a0)
+; ZVFHMIN64-NEXT: csrr a0, vlenb
+; ZVFHMIN64-NEXT: add sp, sp, a0
+; ZVFHMIN64-NEXT: addi sp, sp, 16
+; ZVFHMIN64-NEXT: ret
+;
+; ZVFHMIN32-LABEL: splice_nxv2f16_offset_negone:
+; ZVFHMIN32: # %bb.0:
+; ZVFHMIN32-NEXT: addi sp, sp, -16
+; ZVFHMIN32-NEXT: .cfi_def_cfa_offset 16
+; ZVFHMIN32-NEXT: csrr a0, vlenb
+; ZVFHMIN32-NEXT: sub sp, sp, a0
+; ZVFHMIN32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x01, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 1 * vlenb
+; ZVFHMIN32-NEXT: addi a0, sp, 16
+; ZVFHMIN32-NEXT: vsetvli a1, zero, e16, mf2, ta, ma
+; ZVFHMIN32-NEXT: vse16.v v8, (a0)
+; ZVFHMIN32-NEXT: csrr a1, vlenb
+; ZVFHMIN32-NEXT: srli a1, a1, 1
+; ZVFHMIN32-NEXT: add a0, a0, a1
+; ZVFHMIN32-NEXT: vse16.v v9, (a0)
+; ZVFHMIN32-NEXT: addi a0, a0, -2
+; ZVFHMIN32-NEXT: vle16.v v8, (a0)
+; ZVFHMIN32-NEXT: csrr a0, vlenb
+; ZVFHMIN32-NEXT: add sp, sp, a0
+; ZVFHMIN32-NEXT: addi sp, sp, 16
+; ZVFHMIN32-NEXT: ret
+;
+; ZVFH32-LABEL: splice_nxv2f16_offset_negone:
+; ZVFH32: # %bb.0:
+; ZVFH32-NEXT: csrr a0, vlenb
+; ZVFH32-NEXT: srli a0, a0, 2
+; ZVFH32-NEXT: addi a0, a0, -1
+; ZVFH32-NEXT: vsetvli a1, zero, e16, mf2, ta, ma
+; ZVFH32-NEXT: vslidedown.vx v8, v8, a0
+; ZVFH32-NEXT: vslideup.vi v8, v9, 1
+; ZVFH32-NEXT: ret
+;
+; ZVFH64-LABEL: splice_nxv2f16_offset_negone:
+; ZVFH64: # %bb.0:
+; ZVFH64-NEXT: csrr a0, vlenb
+; ZVFH64-NEXT: srli a0, a0, 2
+; ZVFH64-NEXT: addi a0, a0, -1
+; ZVFH64-NEXT: vsetvli a1, zero, e16, mf2, ta, ma
+; ZVFH64-NEXT: vslidedown.vx v8, v8, a0
+; ZVFH64-NEXT: vslideup.vi v8, v9, 1
+; ZVFH64-NEXT: ret
%res = call <vscale x 2 x half> @llvm.vector.splice.nxv2f16(<vscale x 2 x half> %a, <vscale x 2 x half> %b, i32 -1)
ret <vscale x 2 x half> %res
}
define <vscale x 2 x half> @splice_nxv2f16_offset_min(<vscale x 2 x half> %a, <vscale x 2 x half> %b) #0 {
-; CHECK-LABEL: splice_nxv2f16_offset_min:
-; CHECK: # %bb.0:
-; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: srli a0, a0, 2
-; CHECK-NEXT: addi a0, a0, -4
-; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
-; CHECK-NEXT: vslidedown.vx v8, v8, a0
-; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
-; CHECK-NEXT: vslideup.vi v8, v9, 4
-; CHECK-NEXT: ret
+; ZVFHMIN64-LABEL: splice_nxv2f16_offset_min:
+; ZVFHMIN64: # %bb.0:
+; ZVFHMIN64-NEXT: addi sp, sp, -16
+; ZVFHMIN64-NEXT: .cfi_def_cfa_offset 16
+; ZVFHMIN64-NEXT: csrr a0, vlenb
+; ZVFHMIN64-NEXT: sub sp, sp, a0
+; ZVFHMIN64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x01, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 1 * vlenb
+; ZVFHMIN64-NEXT: addi a0, sp, 16
+; ZVFHMIN64-NEXT: vsetvli a1, zero, e16, mf2, ta, ma
+; ZVFHMIN64-NEXT: vse16.v v8, (a0)
+; ZVFHMIN64-NEXT: csrr a1, vlenb
+; ZVFHMIN64-NEXT: srli a1, a1, 1
+; ZVFHMIN64-NEXT: add a0, a0, a1
+; ZVFHMIN64-NEXT: li a2, 8
+; ZVFHMIN64-NEXT: vse16.v v9, (a0)
+; ZVFHMIN64-NEXT: bltu a1, a2, .LBB132_2
+; ZVFHMIN64-NEXT: # %bb.1:
+; ZVFHMIN64-NEXT: li a1, 8
+; ZVFHMIN64-NEXT: .LBB132_2:
+; ZVFHMIN64-NEXT: sub a0, a0, a1
+; ZVFHMIN64-NEXT: vle16.v v8, (a0)
+; ZVFHMIN64-NEXT: csrr a0, vlenb
+; ZVFHMIN64-NEXT: add sp, sp, a0
+; ZVFHMIN64-NEXT: addi sp, sp, 16
+; ZVFHMIN64-NEXT: ret
+;
+; ZVFHMIN32-LABEL: splice_nxv2f16_offset_min:
+; ZVFHMIN32: # %bb.0:
+; ZVFHMIN32-NEXT: addi sp, sp, -16
+; ZVFHMIN32-NEXT: .cfi_def_cfa_offset 16
+; ZVFHMIN32-NEXT: csrr a0, vlenb
+; ZVFHMIN32-NEXT: sub sp, sp, a0
+; ZVFHMIN32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x01, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 1 * vlenb
+; ZVFHMIN32-NEXT: addi a0, sp, 16
+; ZVFHMIN32-NEXT: vsetvli a1, zero, e16, mf2, ta, ma
+; ZVFHMIN32-NEXT: vse16.v v8, (a0)
+; ZVFHMIN32-NEXT: csrr a1, vlenb
+; ZVFHMIN32-NEXT: srli a1, a1, 1
+; ZVFHMIN32-NEXT: add a0, a0, a1
+; ZVFHMIN32-NEXT: li a2, 8
+; ZVFHMIN32-NEXT: vse16.v v9, (a0)
+; ZVFHMIN32-NEXT: bltu a1, a2, .LBB132_2
+; ZVFHMIN32-NEXT: # %bb.1:
+; ZVFHMIN32-NEXT: li a1, 8
+; ZVFHMIN32-NEXT: .LBB132_2:
+; ZVFHMIN32-NEXT: sub a0, a0, a1
+; ZVFHMIN32-NEXT: vle16.v v8, (a0)
+; ZVFHMIN32-NEXT: csrr a0, vlenb
+; ZVFHMIN32-NEXT: add sp, sp, a0
+; ZVFHMIN32-NEXT: addi sp, sp, 16
+; ZVFHMIN32-NEXT: ret
+;
+; ZVFH32-LABEL: splice_nxv2f16_offset_min:
+; ZVFH32: # %bb.0:
+; ZVFH32-NEXT: csrr a0, vlenb
+; ZVFH32-NEXT: srli a0, a0, 2
+; ZVFH32-NEXT: addi a0, a0, -4
+; ZVFH32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
+; ZVFH32-NEXT: vslidedown.vx v8, v8, a0
+; ZVFH32-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
+; ZVFH32-NEXT: vslideup.vi v8, v9, 4
+; ZVFH32-NEXT: ret
+;
+; ZVFH64-LABEL: splice_nxv2f16_offset_min:
+; ZVFH64: # %bb.0:
+; ZVFH64-NEXT: csrr a0, vlenb
+; ZVFH64-NEXT: srli a0, a0, 2
+; ZVFH64-NEXT: addi a0, a0, -4
+; ZVFH64-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
+; ZVFH64-NEXT: vslidedown.vx v8, v8, a0
+; ZVFH64-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
+; ZVFH64-NEXT: vslideup.vi v8, v9, 4
+; ZVFH64-NEXT: ret
%res = call <vscale x 2 x half> @llvm.vector.splice.nxv2f16(<vscale x 2 x half> %a, <vscale x 2 x half> %b, i32 -4)
ret <vscale x 2 x half> %res
}
define <vscale x 2 x half> @splice_nxv2f16_offset_max(<vscale x 2 x half> %a, <vscale x 2 x half> %b) #0 {
-; CHECK-LABEL: splice_nxv2f16_offset_max:
-; CHECK: # %bb.0:
-; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: srli a0, a0, 2
-; CHECK-NEXT: addi a0, a0, -3
-; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
-; CHECK-NEXT: vslidedown.vi v8, v8, 3
-; CHECK-NEXT: vsetvli a1, zero, e16, mf2, ta, ma
-; CHECK-NEXT: vslideup.vx v8, v9, a0
-; CHECK-NEXT: ret
+; ZVFHMIN64-LABEL: splice_nxv2f16_offset_max:
+; ZVFHMIN64: # %bb.0:
+; ZVFHMIN64-NEXT: addi sp, sp, -16
+; ZVFHMIN64-NEXT: .cfi_def_cfa_offset 16
+; ZVFHMIN64-NEXT: csrr a0, vlenb
+; ZVFHMIN64-NEXT: sub sp, sp, a0
+; ZVFHMIN64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x01, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 1 * vlenb
+; ZVFHMIN64-NEXT: addi a0, sp, 16
+; ZVFHMIN64-NEXT: vsetvli a1, zero, e16, mf2, ta, ma
+; ZVFHMIN64-NEXT: vse16.v v8, (a0)
+; ZVFHMIN64-NEXT: csrr a1, vlenb
+; ZVFHMIN64-NEXT: srli a2, a1, 1
+; ZVFHMIN64-NEXT: add a2, a0, a2
+; ZVFHMIN64-NEXT: srli a1, a1, 2
+; ZVFHMIN64-NEXT: addi a1, a1, -1
+; ZVFHMIN64-NEXT: li a3, 3
+; ZVFHMIN64-NEXT: vse16.v v9, (a2)
+; ZVFHMIN64-NEXT: bltu a1, a3, .LBB133_2
+; ZVFHMIN64-NEXT: # %bb.1:
+; ZVFHMIN64-NEXT: li a1, 3
+; ZVFHMIN64-NEXT: .LBB133_2:
+; ZVFHMIN64-NEXT: slli a1, a1, 1
+; ZVFHMIN64-NEXT: add a0, a0, a1
+; ZVFHMIN64-NEXT: vle16.v v8, (a0)
+; ZVFHMIN64-NEXT: csrr a0, vlenb
+; ZVFHMIN64-NEXT: add sp, sp, a0
+; ZVFHMIN64-NEXT: addi sp, sp, 16
+; ZVFHMIN64-NEXT: ret
+;
+; ZVFHMIN32-LABEL: splice_nxv2f16_offset_max:
+; ZVFHMIN32: # %bb.0:
+; ZVFHMIN32-NEXT: addi sp, sp, -16
+; ZVFHMIN32-NEXT: .cfi_def_cfa_offset 16
+; ZVFHMIN32-NEXT: csrr a0, vlenb
+; ZVFHMIN32-NEXT: sub sp, sp, a0
+; ZVFHMIN32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x01, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 1 * vlenb
+; ZVFHMIN32-NEXT: addi a0, sp, 16
+; ZVFHMIN32-NEXT: vsetvli a1, zero, e16, mf2, ta, ma
+; ZVFHMIN32-NEXT: vse16.v v8, (a0)
+; ZVFHMIN32-NEXT: csrr a1, vlenb
+; ZVFHMIN32-NEXT: srli a2, a1, 1
+; ZVFHMIN32-NEXT: add a2, a0, a2
+; ZVFHMIN32-NEXT: srli a1, a1, 2
+; ZVFHMIN32-NEXT: addi a1, a1, -1
+; ZVFHMIN32-NEXT: li a3, 3
+; ZVFHMIN32-NEXT: vse16.v v9, (a2)
+; ZVFHMIN32-NEXT: bltu a1, a3, .LBB133_2
+; ZVFHMIN32-NEXT: # %bb.1:
+; ZVFHMIN32-NEXT: li a1, 3
+; ZVFHMIN32-NEXT: .LBB133_2:
+; ZVFHMIN32-NEXT: slli a1, a1, 1
+; ZVFHMIN32-NEXT: add a0, a0, a1
+; ZVFHMIN32-NEXT: vle16.v v8, (a0)
+; ZVFHMIN32-NEXT: csrr a0, vlenb
+; ZVFHMIN32-NEXT: add sp, sp, a0
+; ZVFHMIN32-NEXT: addi sp, sp, 16
+; ZVFHMIN32-NEXT: ret
+;
+; ZVFH32-LABEL: splice_nxv2f16_offset_max:
+; ZVFH32: # %bb.0:
+; ZVFH32-NEXT: csrr a0, vlenb
+; ZVFH32-NEXT: srli a0, a0, 2
+; ZVFH32-NEXT: addi a0, a0, -3
+; ZVFH32-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
+; ZVFH32-NEXT: vslidedown.vi v8, v8, 3
+; ZVFH32-NEXT: vsetvli a1, zero, e16, mf2, ta, ma
+; ZVFH32-NEXT: vslideup.vx v8, v9, a0
+; ZVFH32-NEXT: ret
+;
+; ZVFH64-LABEL: splice_nxv2f16_offset_max:
+; ZVFH64: # %bb.0:
+; ZVFH64-NEXT: csrr a0, vlenb
+; ZVFH64-NEXT: srli a0, a0, 2
+; ZVFH64-NEXT: addi a0, a0, -3
+; ZVFH64-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
+; ZVFH64-NEXT: vslidedown.vi v8, v8, 3
+; ZVFH64-NEXT: vsetvli a1, zero, e16, mf2, ta, ma
+; ZVFH64-NEXT: vslideup.vx v8, v9, a0
+; ZVFH64-NEXT: ret
%res = call <vscale x 2 x half> @llvm.vector.splice.nxv2f16(<vscale x 2 x half> %a, <vscale x 2 x half> %b, i32 3)
ret <vscale x 2 x half> %res
}
@@ -1652,45 +3236,229 @@ define <vscale x 4 x half> @splice_nxv4f16_offset_zero(<vscale x 4 x half> %a, <
}
define <vscale x 4 x half> @splice_nxv4f16_offset_negone(<vscale x 4 x half> %a, <vscale x 4 x half> %b) #0 {
-; CHECK-LABEL: splice_nxv4f16_offset_negone:
-; CHECK: # %bb.0:
-; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: srli a0, a0, 1
-; CHECK-NEXT: addi a0, a0, -1
-; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma
-; CHECK-NEXT: vslidedown.vx v8, v8, a0
-; CHECK-NEXT: vslideup.vi v8, v9, 1
-; CHECK-NEXT: ret
+; ZVFHMIN64-LABEL: splice_nxv4f16_offset_negone:
+; ZVFHMIN64: # %bb.0:
+; ZVFHMIN64-NEXT: addi sp, sp, -16
+; ZVFHMIN64-NEXT: .cfi_def_cfa_offset 16
+; ZVFHMIN64-NEXT: csrr a0, vlenb
+; ZVFHMIN64-NEXT: slli a0, a0, 1
+; ZVFHMIN64-NEXT: sub sp, sp, a0
+; ZVFHMIN64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb
+; ZVFHMIN64-NEXT: addi a0, sp, 16
+; ZVFHMIN64-NEXT: vs1r.v v8, (a0)
+; ZVFHMIN64-NEXT: csrr a1, vlenb
+; ZVFHMIN64-NEXT: add a0, a0, a1
+; ZVFHMIN64-NEXT: vs1r.v v9, (a0)
+; ZVFHMIN64-NEXT: addi a0, a0, -2
+; ZVFHMIN64-NEXT: vl1re16.v v8, (a0)
+; ZVFHMIN64-NEXT: csrr a0, vlenb
+; ZVFHMIN64-NEXT: slli a0, a0, 1
+; ZVFHMIN64-NEXT: add sp, sp, a0
+; ZVFHMIN64-NEXT: addi sp, sp, 16
+; ZVFHMIN64-NEXT: ret
+;
+; ZVFHMIN32-LABEL: splice_nxv4f16_offset_negone:
+; ZVFHMIN32: # %bb.0:
+; ZVFHMIN32-NEXT: addi sp, sp, -16
+; ZVFHMIN32-NEXT: .cfi_def_cfa_offset 16
+; ZVFHMIN32-NEXT: csrr a0, vlenb
+; ZVFHMIN32-NEXT: slli a0, a0, 1
+; ZVFHMIN32-NEXT: sub sp, sp, a0
+; ZVFHMIN32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb
+; ZVFHMIN32-NEXT: addi a0, sp, 16
+; ZVFHMIN32-NEXT: vs1r.v v8, (a0)
+; ZVFHMIN32-NEXT: csrr a1, vlenb
+; ZVFHMIN32-NEXT: add a0, a0, a1
+; ZVFHMIN32-NEXT: vs1r.v v9, (a0)
+; ZVFHMIN32-NEXT: addi a0, a0, -2
+; ZVFHMIN32-NEXT: vl1re16.v v8, (a0)
+; ZVFHMIN32-NEXT: csrr a0, vlenb
+; ZVFHMIN32-NEXT: slli a0, a0, 1
+; ZVFHMIN32-NEXT: add sp, sp, a0
+; ZVFHMIN32-NEXT: addi sp, sp, 16
+; ZVFHMIN32-NEXT: ret
+;
+; ZVFH32-LABEL: splice_nxv4f16_offset_negone:
+; ZVFH32: # %bb.0:
+; ZVFH32-NEXT: csrr a0, vlenb
+; ZVFH32-NEXT: srli a0, a0, 1
+; ZVFH32-NEXT: addi a0, a0, -1
+; ZVFH32-NEXT: vsetvli a1, zero, e16, m1, ta, ma
+; ZVFH32-NEXT: vslidedown.vx v8, v8, a0
+; ZVFH32-NEXT: vslideup.vi v8, v9, 1
+; ZVFH32-NEXT: ret
+;
+; ZVFH64-LABEL: splice_nxv4f16_offset_negone:
+; ZVFH64: # %bb.0:
+; ZVFH64-NEXT: csrr a0, vlenb
+; ZVFH64-NEXT: srli a0, a0, 1
+; ZVFH64-NEXT: addi a0, a0, -1
+; ZVFH64-NEXT: vsetvli a1, zero, e16, m1, ta, ma
+; ZVFH64-NEXT: vslidedown.vx v8, v8, a0
+; ZVFH64-NEXT: vslideup.vi v8, v9, 1
+; ZVFH64-NEXT: ret
%res = call <vscale x 4 x half> @llvm.vector.splice.nxv4f16(<vscale x 4 x half> %a, <vscale x 4 x half> %b, i32 -1)
ret <vscale x 4 x half> %res
}
define <vscale x 4 x half> @splice_nxv4f16_offset_min(<vscale x 4 x half> %a, <vscale x 4 x half> %b) #0 {
-; CHECK-LABEL: splice_nxv4f16_offset_min:
-; CHECK: # %bb.0:
-; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: srli a0, a0, 1
-; CHECK-NEXT: addi a0, a0, -8
-; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; CHECK-NEXT: vslidedown.vx v8, v8, a0
-; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
-; CHECK-NEXT: vslideup.vi v8, v9, 8
-; CHECK-NEXT: ret
+; ZVFHMIN64-LABEL: splice_nxv4f16_offset_min:
+; ZVFHMIN64: # %bb.0:
+; ZVFHMIN64-NEXT: addi sp, sp, -16
+; ZVFHMIN64-NEXT: .cfi_def_cfa_offset 16
+; ZVFHMIN64-NEXT: csrr a0, vlenb
+; ZVFHMIN64-NEXT: slli a0, a0, 1
+; ZVFHMIN64-NEXT: sub sp, sp, a0
+; ZVFHMIN64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb
+; ZVFHMIN64-NEXT: addi a0, sp, 16
+; ZVFHMIN64-NEXT: vs1r.v v8, (a0)
+; ZVFHMIN64-NEXT: csrr a1, vlenb
+; ZVFHMIN64-NEXT: add a0, a0, a1
+; ZVFHMIN64-NEXT: li a2, 16
+; ZVFHMIN64-NEXT: vs1r.v v9, (a0)
+; ZVFHMIN64-NEXT: bltu a1, a2, .LBB136_2
+; ZVFHMIN64-NEXT: # %bb.1:
+; ZVFHMIN64-NEXT: li a1, 16
+; ZVFHMIN64-NEXT: .LBB136_2:
+; ZVFHMIN64-NEXT: sub a0, a0, a1
+; ZVFHMIN64-NEXT: vl1re16.v v8, (a0)
+; ZVFHMIN64-NEXT: csrr a0, vlenb
+; ZVFHMIN64-NEXT: slli a0, a0, 1
+; ZVFHMIN64-NEXT: add sp, sp, a0
+; ZVFHMIN64-NEXT: addi sp, sp, 16
+; ZVFHMIN64-NEXT: ret
+;
+; ZVFHMIN32-LABEL: splice_nxv4f16_offset_min:
+; ZVFHMIN32: # %bb.0:
+; ZVFHMIN32-NEXT: addi sp, sp, -16
+; ZVFHMIN32-NEXT: .cfi_def_cfa_offset 16
+; ZVFHMIN32-NEXT: csrr a0, vlenb
+; ZVFHMIN32-NEXT: slli a0, a0, 1
+; ZVFHMIN32-NEXT: sub sp, sp, a0
+; ZVFHMIN32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb
+; ZVFHMIN32-NEXT: addi a0, sp, 16
+; ZVFHMIN32-NEXT: vs1r.v v8, (a0)
+; ZVFHMIN32-NEXT: csrr a1, vlenb
+; ZVFHMIN32-NEXT: add a0, a0, a1
+; ZVFHMIN32-NEXT: li a2, 16
+; ZVFHMIN32-NEXT: vs1r.v v9, (a0)
+; ZVFHMIN32-NEXT: bltu a1, a2, .LBB136_2
+; ZVFHMIN32-NEXT: # %bb.1:
+; ZVFHMIN32-NEXT: li a1, 16
+; ZVFHMIN32-NEXT: .LBB136_2:
+; ZVFHMIN32-NEXT: sub a0, a0, a1
+; ZVFHMIN32-NEXT: vl1re16.v v8, (a0)
+; ZVFHMIN32-NEXT: csrr a0, vlenb
+; ZVFHMIN32-NEXT: slli a0, a0, 1
+; ZVFHMIN32-NEXT: add sp, sp, a0
+; ZVFHMIN32-NEXT: addi sp, sp, 16
+; ZVFHMIN32-NEXT: ret
+;
+; ZVFH32-LABEL: splice_nxv4f16_offset_min:
+; ZVFH32: # %bb.0:
+; ZVFH32-NEXT: csrr a0, vlenb
+; ZVFH32-NEXT: srli a0, a0, 1
+; ZVFH32-NEXT: addi a0, a0, -8
+; ZVFH32-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; ZVFH32-NEXT: vslidedown.vx v8, v8, a0
+; ZVFH32-NEXT: vsetvli a0, zero, e16, m1, ta, ma
+; ZVFH32-NEXT: vslideup.vi v8, v9, 8
+; ZVFH32-NEXT: ret
+;
+; ZVFH64-LABEL: splice_nxv4f16_offset_min:
+; ZVFH64: # %bb.0:
+; ZVFH64-NEXT: csrr a0, vlenb
+; ZVFH64-NEXT: srli a0, a0, 1
+; ZVFH64-NEXT: addi a0, a0, -8
+; ZVFH64-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; ZVFH64-NEXT: vslidedown.vx v8, v8, a0
+; ZVFH64-NEXT: vsetvli a0, zero, e16, m1, ta, ma
+; ZVFH64-NEXT: vslideup.vi v8, v9, 8
+; ZVFH64-NEXT: ret
%res = call <vscale x 4 x half> @llvm.vector.splice.nxv4f16(<vscale x 4 x half> %a, <vscale x 4 x half> %b, i32 -8)
ret <vscale x 4 x half> %res
}
define <vscale x 4 x half> @splice_nxv4f16_offset_max(<vscale x 4 x half> %a, <vscale x 4 x half> %b) #0 {
-; CHECK-LABEL: splice_nxv4f16_offset_max:
-; CHECK: # %bb.0:
-; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: srli a0, a0, 1
-; CHECK-NEXT: addi a0, a0, -7
-; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma
-; CHECK-NEXT: vslidedown.vi v8, v8, 7
-; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma
-; CHECK-NEXT: vslideup.vx v8, v9, a0
-; CHECK-NEXT: ret
+; ZVFHMIN64-LABEL: splice_nxv4f16_offset_max:
+; ZVFHMIN64: # %bb.0:
+; ZVFHMIN64-NEXT: addi sp, sp, -16
+; ZVFHMIN64-NEXT: .cfi_def_cfa_offset 16
+; ZVFHMIN64-NEXT: csrr a0, vlenb
+; ZVFHMIN64-NEXT: slli a0, a0, 1
+; ZVFHMIN64-NEXT: sub sp, sp, a0
+; ZVFHMIN64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb
+; ZVFHMIN64-NEXT: addi a0, sp, 16
+; ZVFHMIN64-NEXT: vs1r.v v8, (a0)
+; ZVFHMIN64-NEXT: csrr a1, vlenb
+; ZVFHMIN64-NEXT: add a2, a0, a1
+; ZVFHMIN64-NEXT: srli a1, a1, 1
+; ZVFHMIN64-NEXT: addi a1, a1, -1
+; ZVFHMIN64-NEXT: li a3, 7
+; ZVFHMIN64-NEXT: vs1r.v v9, (a2)
+; ZVFHMIN64-NEXT: bltu a1, a3, .LBB137_2
+; ZVFHMIN64-NEXT: # %bb.1:
+; ZVFHMIN64-NEXT: li a1, 7
+; ZVFHMIN64-NEXT: .LBB137_2:
+; ZVFHMIN64-NEXT: slli a1, a1, 1
+; ZVFHMIN64-NEXT: add a0, a0, a1
+; ZVFHMIN64-NEXT: vl1re16.v v8, (a0)
+; ZVFHMIN64-NEXT: csrr a0, vlenb
+; ZVFHMIN64-NEXT: slli a0, a0, 1
+; ZVFHMIN64-NEXT: add sp, sp, a0
+; ZVFHMIN64-NEXT: addi sp, sp, 16
+; ZVFHMIN64-NEXT: ret
+;
+; ZVFHMIN32-LABEL: splice_nxv4f16_offset_max:
+; ZVFHMIN32: # %bb.0:
+; ZVFHMIN32-NEXT: addi sp, sp, -16
+; ZVFHMIN32-NEXT: .cfi_def_cfa_offset 16
+; ZVFHMIN32-NEXT: csrr a0, vlenb
+; ZVFHMIN32-NEXT: slli a0, a0, 1
+; ZVFHMIN32-NEXT: sub sp, sp, a0
+; ZVFHMIN32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb
+; ZVFHMIN32-NEXT: addi a0, sp, 16
+; ZVFHMIN32-NEXT: vs1r.v v8, (a0)
+; ZVFHMIN32-NEXT: csrr a1, vlenb
+; ZVFHMIN32-NEXT: add a2, a0, a1
+; ZVFHMIN32-NEXT: srli a1, a1, 1
+; ZVFHMIN32-NEXT: addi a1, a1, -1
+; ZVFHMIN32-NEXT: li a3, 7
+; ZVFHMIN32-NEXT: vs1r.v v9, (a2)
+; ZVFHMIN32-NEXT: bltu a1, a3, .LBB137_2
+; ZVFHMIN32-NEXT: # %bb.1:
+; ZVFHMIN32-NEXT: li a1, 7
+; ZVFHMIN32-NEXT: .LBB137_2:
+; ZVFHMIN32-NEXT: slli a1, a1, 1
+; ZVFHMIN32-NEXT: add a0, a0, a1
+; ZVFHMIN32-NEXT: vl1re16.v v8, (a0)
+; ZVFHMIN32-NEXT: csrr a0, vlenb
+; ZVFHMIN32-NEXT: slli a0, a0, 1
+; ZVFHMIN32-NEXT: add sp, sp, a0
+; ZVFHMIN32-NEXT: addi sp, sp, 16
+; ZVFHMIN32-NEXT: ret
+;
+; ZVFH32-LABEL: splice_nxv4f16_offset_max:
+; ZVFH32: # %bb.0:
+; ZVFH32-NEXT: csrr a0, vlenb
+; ZVFH32-NEXT: srli a0, a0, 1
+; ZVFH32-NEXT: addi a0, a0, -7
+; ZVFH32-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; ZVFH32-NEXT: vslidedown.vi v8, v8, 7
+; ZVFH32-NEXT: vsetvli a1, zero, e16, m1, ta, ma
+; ZVFH32-NEXT: vslideup.vx v8, v9, a0
+; ZVFH32-NEXT: ret
+;
+; ZVFH64-LABEL: splice_nxv4f16_offset_max:
+; ZVFH64: # %bb.0:
+; ZVFH64-NEXT: csrr a0, vlenb
+; ZVFH64-NEXT: srli a0, a0, 1
+; ZVFH64-NEXT: addi a0, a0, -7
+; ZVFH64-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; ZVFH64-NEXT: vslidedown.vi v8, v8, 7
+; ZVFH64-NEXT: vsetvli a1, zero, e16, m1, ta, ma
+; ZVFH64-NEXT: vslideup.vx v8, v9, a0
+; ZVFH64-NEXT: ret
%res = call <vscale x 4 x half> @llvm.vector.splice.nxv4f16(<vscale x 4 x half> %a, <vscale x 4 x half> %b, i32 7)
ret <vscale x 4 x half> %res
}
@@ -1706,43 +3474,229 @@ define <vscale x 8 x half> @splice_nxv8f16_offset_zero(<vscale x 8 x half> %a, <
}
define <vscale x 8 x half> @splice_nxv8f16_offset_negone(<vscale x 8 x half> %a, <vscale x 8 x half> %b) #0 {
-; CHECK-LABEL: splice_nxv8f16_offset_negone:
-; CHECK: # %bb.0:
-; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: addi a0, a0, -1
-; CHECK-NEXT: vsetivli zero, 1, e16, m2, ta, ma
-; CHECK-NEXT: vslidedown.vx v8, v8, a0
-; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma
-; CHECK-NEXT: vslideup.vi v8, v10, 1
-; CHECK-NEXT: ret
+; ZVFHMIN64-LABEL: splice_nxv8f16_offset_negone:
+; ZVFHMIN64: # %bb.0:
+; ZVFHMIN64-NEXT: addi sp, sp, -16
+; ZVFHMIN64-NEXT: .cfi_def_cfa_offset 16
+; ZVFHMIN64-NEXT: csrr a0, vlenb
+; ZVFHMIN64-NEXT: slli a0, a0, 2
+; ZVFHMIN64-NEXT: sub sp, sp, a0
+; ZVFHMIN64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb
+; ZVFHMIN64-NEXT: addi a0, sp, 16
+; ZVFHMIN64-NEXT: vs2r.v v8, (a0)
+; ZVFHMIN64-NEXT: csrr a1, vlenb
+; ZVFHMIN64-NEXT: slli a1, a1, 1
+; ZVFHMIN64-NEXT: add a0, a0, a1
+; ZVFHMIN64-NEXT: vs2r.v v10, (a0)
+; ZVFHMIN64-NEXT: addi a0, a0, -2
+; ZVFHMIN64-NEXT: vl2re16.v v8, (a0)
+; ZVFHMIN64-NEXT: csrr a0, vlenb
+; ZVFHMIN64-NEXT: slli a0, a0, 2
+; ZVFHMIN64-NEXT: add sp, sp, a0
+; ZVFHMIN64-NEXT: addi sp, sp, 16
+; ZVFHMIN64-NEXT: ret
+;
+; ZVFHMIN32-LABEL: splice_nxv8f16_offset_negone:
+; ZVFHMIN32: # %bb.0:
+; ZVFHMIN32-NEXT: addi sp, sp, -16
+; ZVFHMIN32-NEXT: .cfi_def_cfa_offset 16
+; ZVFHMIN32-NEXT: csrr a0, vlenb
+; ZVFHMIN32-NEXT: slli a0, a0, 2
+; ZVFHMIN32-NEXT: sub sp, sp, a0
+; ZVFHMIN32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb
+; ZVFHMIN32-NEXT: addi a0, sp, 16
+; ZVFHMIN32-NEXT: vs2r.v v8, (a0)
+; ZVFHMIN32-NEXT: csrr a1, vlenb
+; ZVFHMIN32-NEXT: slli a1, a1, 1
+; ZVFHMIN32-NEXT: add a0, a0, a1
+; ZVFHMIN32-NEXT: vs2r.v v10, (a0)
+; ZVFHMIN32-NEXT: addi a0, a0, -2
+; ZVFHMIN32-NEXT: vl2re16.v v8, (a0)
+; ZVFHMIN32-NEXT: csrr a0, vlenb
+; ZVFHMIN32-NEXT: slli a0, a0, 2
+; ZVFHMIN32-NEXT: add sp, sp, a0
+; ZVFHMIN32-NEXT: addi sp, sp, 16
+; ZVFHMIN32-NEXT: ret
+;
+; ZVFH32-LABEL: splice_nxv8f16_offset_negone:
+; ZVFH32: # %bb.0:
+; ZVFH32-NEXT: csrr a0, vlenb
+; ZVFH32-NEXT: addi a0, a0, -1
+; ZVFH32-NEXT: vsetivli zero, 1, e16, m2, ta, ma
+; ZVFH32-NEXT: vslidedown.vx v8, v8, a0
+; ZVFH32-NEXT: vsetvli a0, zero, e16, m2, ta, ma
+; ZVFH32-NEXT: vslideup.vi v8, v10, 1
+; ZVFH32-NEXT: ret
+;
+; ZVFH64-LABEL: splice_nxv8f16_offset_negone:
+; ZVFH64: # %bb.0:
+; ZVFH64-NEXT: csrr a0, vlenb
+; ZVFH64-NEXT: addi a0, a0, -1
+; ZVFH64-NEXT: vsetivli zero, 1, e16, m2, ta, ma
+; ZVFH64-NEXT: vslidedown.vx v8, v8, a0
+; ZVFH64-NEXT: vsetvli a0, zero, e16, m2, ta, ma
+; ZVFH64-NEXT: vslideup.vi v8, v10, 1
+; ZVFH64-NEXT: ret
%res = call <vscale x 8 x half> @llvm.vector.splice.nxv8f16(<vscale x 8 x half> %a, <vscale x 8 x half> %b, i32 -1)
ret <vscale x 8 x half> %res
}
define <vscale x 8 x half> @splice_nxv8f16_offset_min(<vscale x 8 x half> %a, <vscale x 8 x half> %b) #0 {
-; CHECK-LABEL: splice_nxv8f16_offset_min:
-; CHECK: # %bb.0:
-; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: addi a0, a0, -16
-; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma
-; CHECK-NEXT: vslidedown.vx v8, v8, a0
-; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma
-; CHECK-NEXT: vslideup.vi v8, v10, 16
-; CHECK-NEXT: ret
+; ZVFHMIN64-LABEL: splice_nxv8f16_offset_min:
+; ZVFHMIN64: # %bb.0:
+; ZVFHMIN64-NEXT: addi sp, sp, -16
+; ZVFHMIN64-NEXT: .cfi_def_cfa_offset 16
+; ZVFHMIN64-NEXT: csrr a0, vlenb
+; ZVFHMIN64-NEXT: slli a0, a0, 2
+; ZVFHMIN64-NEXT: sub sp, sp, a0
+; ZVFHMIN64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb
+; ZVFHMIN64-NEXT: addi a0, sp, 16
+; ZVFHMIN64-NEXT: vs2r.v v8, (a0)
+; ZVFHMIN64-NEXT: csrr a1, vlenb
+; ZVFHMIN64-NEXT: slli a1, a1, 1
+; ZVFHMIN64-NEXT: add a0, a0, a1
+; ZVFHMIN64-NEXT: li a2, 32
+; ZVFHMIN64-NEXT: vs2r.v v10, (a0)
+; ZVFHMIN64-NEXT: bltu a1, a2, .LBB140_2
+; ZVFHMIN64-NEXT: # %bb.1:
+; ZVFHMIN64-NEXT: li a1, 32
+; ZVFHMIN64-NEXT: .LBB140_2:
+; ZVFHMIN64-NEXT: sub a0, a0, a1
+; ZVFHMIN64-NEXT: vl2re16.v v8, (a0)
+; ZVFHMIN64-NEXT: csrr a0, vlenb
+; ZVFHMIN64-NEXT: slli a0, a0, 2
+; ZVFHMIN64-NEXT: add sp, sp, a0
+; ZVFHMIN64-NEXT: addi sp, sp, 16
+; ZVFHMIN64-NEXT: ret
+;
+; ZVFHMIN32-LABEL: splice_nxv8f16_offset_min:
+; ZVFHMIN32: # %bb.0:
+; ZVFHMIN32-NEXT: addi sp, sp, -16
+; ZVFHMIN32-NEXT: .cfi_def_cfa_offset 16
+; ZVFHMIN32-NEXT: csrr a0, vlenb
+; ZVFHMIN32-NEXT: slli a0, a0, 2
+; ZVFHMIN32-NEXT: sub sp, sp, a0
+; ZVFHMIN32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb
+; ZVFHMIN32-NEXT: addi a0, sp, 16
+; ZVFHMIN32-NEXT: vs2r.v v8, (a0)
+; ZVFHMIN32-NEXT: csrr a1, vlenb
+; ZVFHMIN32-NEXT: slli a1, a1, 1
+; ZVFHMIN32-NEXT: add a0, a0, a1
+; ZVFHMIN32-NEXT: li a2, 32
+; ZVFHMIN32-NEXT: vs2r.v v10, (a0)
+; ZVFHMIN32-NEXT: bltu a1, a2, .LBB140_2
+; ZVFHMIN32-NEXT: # %bb.1:
+; ZVFHMIN32-NEXT: li a1, 32
+; ZVFHMIN32-NEXT: .LBB140_2:
+; ZVFHMIN32-NEXT: sub a0, a0, a1
+; ZVFHMIN32-NEXT: vl2re16.v v8, (a0)
+; ZVFHMIN32-NEXT: csrr a0, vlenb
+; ZVFHMIN32-NEXT: slli a0, a0, 2
+; ZVFHMIN32-NEXT: add sp, sp, a0
+; ZVFHMIN32-NEXT: addi sp, sp, 16
+; ZVFHMIN32-NEXT: ret
+;
+; ZVFH32-LABEL: splice_nxv8f16_offset_min:
+; ZVFH32: # %bb.0:
+; ZVFH32-NEXT: csrr a0, vlenb
+; ZVFH32-NEXT: addi a0, a0, -16
+; ZVFH32-NEXT: vsetivli zero, 16, e16, m2, ta, ma
+; ZVFH32-NEXT: vslidedown.vx v8, v8, a0
+; ZVFH32-NEXT: vsetvli a0, zero, e16, m2, ta, ma
+; ZVFH32-NEXT: vslideup.vi v8, v10, 16
+; ZVFH32-NEXT: ret
+;
+; ZVFH64-LABEL: splice_nxv8f16_offset_min:
+; ZVFH64: # %bb.0:
+; ZVFH64-NEXT: csrr a0, vlenb
+; ZVFH64-NEXT: addi a0, a0, -16
+; ZVFH64-NEXT: vsetivli zero, 16, e16, m2, ta, ma
+; ZVFH64-NEXT: vslidedown.vx v8, v8, a0
+; ZVFH64-NEXT: vsetvli a0, zero, e16, m2, ta, ma
+; ZVFH64-NEXT: vslideup.vi v8, v10, 16
+; ZVFH64-NEXT: ret
%res = call <vscale x 8 x half> @llvm.vector.splice.nxv8f16(<vscale x 8 x half> %a, <vscale x 8 x half> %b, i32 -16)
ret <vscale x 8 x half> %res
}
define <vscale x 8 x half> @splice_nxv8f16_offset_max(<vscale x 8 x half> %a, <vscale x 8 x half> %b) #0 {
-; CHECK-LABEL: splice_nxv8f16_offset_max:
-; CHECK: # %bb.0:
-; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: addi a0, a0, -15
-; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma
-; CHECK-NEXT: vslidedown.vi v8, v8, 15
-; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma
-; CHECK-NEXT: vslideup.vx v8, v10, a0
-; CHECK-NEXT: ret
+; ZVFHMIN64-LABEL: splice_nxv8f16_offset_max:
+; ZVFHMIN64: # %bb.0:
+; ZVFHMIN64-NEXT: addi sp, sp, -16
+; ZVFHMIN64-NEXT: .cfi_def_cfa_offset 16
+; ZVFHMIN64-NEXT: csrr a0, vlenb
+; ZVFHMIN64-NEXT: slli a0, a0, 2
+; ZVFHMIN64-NEXT: sub sp, sp, a0
+; ZVFHMIN64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb
+; ZVFHMIN64-NEXT: addi a0, sp, 16
+; ZVFHMIN64-NEXT: vs2r.v v8, (a0)
+; ZVFHMIN64-NEXT: csrr a1, vlenb
+; ZVFHMIN64-NEXT: slli a2, a1, 1
+; ZVFHMIN64-NEXT: add a2, a0, a2
+; ZVFHMIN64-NEXT: addi a1, a1, -1
+; ZVFHMIN64-NEXT: li a3, 15
+; ZVFHMIN64-NEXT: vs2r.v v10, (a2)
+; ZVFHMIN64-NEXT: bltu a1, a3, .LBB141_2
+; ZVFHMIN64-NEXT: # %bb.1:
+; ZVFHMIN64-NEXT: li a1, 15
+; ZVFHMIN64-NEXT: .LBB141_2:
+; ZVFHMIN64-NEXT: slli a1, a1, 1
+; ZVFHMIN64-NEXT: add a0, a0, a1
+; ZVFHMIN64-NEXT: vl2re16.v v8, (a0)
+; ZVFHMIN64-NEXT: csrr a0, vlenb
+; ZVFHMIN64-NEXT: slli a0, a0, 2
+; ZVFHMIN64-NEXT: add sp, sp, a0
+; ZVFHMIN64-NEXT: addi sp, sp, 16
+; ZVFHMIN64-NEXT: ret
+;
+; ZVFHMIN32-LABEL: splice_nxv8f16_offset_max:
+; ZVFHMIN32: # %bb.0:
+; ZVFHMIN32-NEXT: addi sp, sp, -16
+; ZVFHMIN32-NEXT: .cfi_def_cfa_offset 16
+; ZVFHMIN32-NEXT: csrr a0, vlenb
+; ZVFHMIN32-NEXT: slli a0, a0, 2
+; ZVFHMIN32-NEXT: sub sp, sp, a0
+; ZVFHMIN32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb
+; ZVFHMIN32-NEXT: addi a0, sp, 16
+; ZVFHMIN32-NEXT: vs2r.v v8, (a0)
+; ZVFHMIN32-NEXT: csrr a1, vlenb
+; ZVFHMIN32-NEXT: slli a2, a1, 1
+; ZVFHMIN32-NEXT: add a2, a0, a2
+; ZVFHMIN32-NEXT: addi a1, a1, -1
+; ZVFHMIN32-NEXT: li a3, 15
+; ZVFHMIN32-NEXT: vs2r.v v10, (a2)
+; ZVFHMIN32-NEXT: bltu a1, a3, .LBB141_2
+; ZVFHMIN32-NEXT: # %bb.1:
+; ZVFHMIN32-NEXT: li a1, 15
+; ZVFHMIN32-NEXT: .LBB141_2:
+; ZVFHMIN32-NEXT: slli a1, a1, 1
+; ZVFHMIN32-NEXT: add a0, a0, a1
+; ZVFHMIN32-NEXT: vl2re16.v v8, (a0)
+; ZVFHMIN32-NEXT: csrr a0, vlenb
+; ZVFHMIN32-NEXT: slli a0, a0, 2
+; ZVFHMIN32-NEXT: add sp, sp, a0
+; ZVFHMIN32-NEXT: addi sp, sp, 16
+; ZVFHMIN32-NEXT: ret
+;
+; ZVFH32-LABEL: splice_nxv8f16_offset_max:
+; ZVFH32: # %bb.0:
+; ZVFH32-NEXT: csrr a0, vlenb
+; ZVFH32-NEXT: addi a0, a0, -15
+; ZVFH32-NEXT: vsetvli zero, a0, e16, m2, ta, ma
+; ZVFH32-NEXT: vslidedown.vi v8, v8, 15
+; ZVFH32-NEXT: vsetvli a1, zero, e16, m2, ta, ma
+; ZVFH32-NEXT: vslideup.vx v8, v10, a0
+; ZVFH32-NEXT: ret
+;
+; ZVFH64-LABEL: splice_nxv8f16_offset_max:
+; ZVFH64: # %bb.0:
+; ZVFH64-NEXT: csrr a0, vlenb
+; ZVFH64-NEXT: addi a0, a0, -15
+; ZVFH64-NEXT: vsetvli zero, a0, e16, m2, ta, ma
+; ZVFH64-NEXT: vslidedown.vi v8, v8, 15
+; ZVFH64-NEXT: vsetvli a1, zero, e16, m2, ta, ma
+; ZVFH64-NEXT: vslideup.vx v8, v10, a0
+; ZVFH64-NEXT: ret
%res = call <vscale x 8 x half> @llvm.vector.splice.nxv8f16(<vscale x 8 x half> %a, <vscale x 8 x half> %b, i32 15)
ret <vscale x 8 x half> %res
}
@@ -1758,47 +3712,275 @@ define <vscale x 16 x half> @splice_nxv16f16_offset_zero(<vscale x 16 x half> %a
}
define <vscale x 16 x half> @splice_nxv16f16_offset_negone(<vscale x 16 x half> %a, <vscale x 16 x half> %b) #0 {
-; CHECK-LABEL: splice_nxv16f16_offset_negone:
-; CHECK: # %bb.0:
-; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: slli a0, a0, 1
-; CHECK-NEXT: addi a0, a0, -1
-; CHECK-NEXT: vsetivli zero, 1, e16, m4, ta, ma
-; CHECK-NEXT: vslidedown.vx v8, v8, a0
-; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma
-; CHECK-NEXT: vslideup.vi v8, v12, 1
-; CHECK-NEXT: ret
+; ZVFHMIN64-LABEL: splice_nxv16f16_offset_negone:
+; ZVFHMIN64: # %bb.0:
+; ZVFHMIN64-NEXT: addi sp, sp, -48
+; ZVFHMIN64-NEXT: .cfi_def_cfa_offset 48
+; ZVFHMIN64-NEXT: sw ra, 44(sp) # 4-byte Folded Spill
+; ZVFHMIN64-NEXT: sw s0, 40(sp) # 4-byte Folded Spill
+; ZVFHMIN64-NEXT: .cfi_offset ra, -4
+; ZVFHMIN64-NEXT: .cfi_offset s0, -8
+; ZVFHMIN64-NEXT: addi s0, sp, 48
+; ZVFHMIN64-NEXT: .cfi_def_cfa s0, 0
+; ZVFHMIN64-NEXT: csrr a0, vlenb
+; ZVFHMIN64-NEXT: slli a0, a0, 3
+; ZVFHMIN64-NEXT: sub sp, sp, a0
+; ZVFHMIN64-NEXT: andi sp, sp, -32
+; ZVFHMIN64-NEXT: addi a0, sp, 32
+; ZVFHMIN64-NEXT: vs4r.v v8, (a0)
+; ZVFHMIN64-NEXT: csrr a1, vlenb
+; ZVFHMIN64-NEXT: slli a1, a1, 2
+; ZVFHMIN64-NEXT: add a0, a0, a1
+; ZVFHMIN64-NEXT: vs4r.v v12, (a0)
+; ZVFHMIN64-NEXT: addi a0, a0, -2
+; ZVFHMIN64-NEXT: vl4re16.v v8, (a0)
+; ZVFHMIN64-NEXT: addi sp, s0, -48
+; ZVFHMIN64-NEXT: lw ra, 44(sp) # 4-byte Folded Reload
+; ZVFHMIN64-NEXT: lw s0, 40(sp) # 4-byte Folded Reload
+; ZVFHMIN64-NEXT: addi sp, sp, 48
+; ZVFHMIN64-NEXT: ret
+;
+; ZVFHMIN32-LABEL: splice_nxv16f16_offset_negone:
+; ZVFHMIN32: # %bb.0:
+; ZVFHMIN32-NEXT: addi sp, sp, -48
+; ZVFHMIN32-NEXT: .cfi_def_cfa_offset 48
+; ZVFHMIN32-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
+; ZVFHMIN32-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
+; ZVFHMIN32-NEXT: .cfi_offset ra, -8
+; ZVFHMIN32-NEXT: .cfi_offset s0, -16
+; ZVFHMIN32-NEXT: addi s0, sp, 48
+; ZVFHMIN32-NEXT: .cfi_def_cfa s0, 0
+; ZVFHMIN32-NEXT: csrr a0, vlenb
+; ZVFHMIN32-NEXT: slli a0, a0, 3
+; ZVFHMIN32-NEXT: sub sp, sp, a0
+; ZVFHMIN32-NEXT: andi sp, sp, -32
+; ZVFHMIN32-NEXT: addi a0, sp, 32
+; ZVFHMIN32-NEXT: vs4r.v v8, (a0)
+; ZVFHMIN32-NEXT: csrr a1, vlenb
+; ZVFHMIN32-NEXT: slli a1, a1, 2
+; ZVFHMIN32-NEXT: add a0, a0, a1
+; ZVFHMIN32-NEXT: vs4r.v v12, (a0)
+; ZVFHMIN32-NEXT: addi a0, a0, -2
+; ZVFHMIN32-NEXT: vl4re16.v v8, (a0)
+; ZVFHMIN32-NEXT: addi sp, s0, -48
+; ZVFHMIN32-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
+; ZVFHMIN32-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
+; ZVFHMIN32-NEXT: addi sp, sp, 48
+; ZVFHMIN32-NEXT: ret
+;
+; ZVFH32-LABEL: splice_nxv16f16_offset_negone:
+; ZVFH32: # %bb.0:
+; ZVFH32-NEXT: csrr a0, vlenb
+; ZVFH32-NEXT: slli a0, a0, 1
+; ZVFH32-NEXT: addi a0, a0, -1
+; ZVFH32-NEXT: vsetivli zero, 1, e16, m4, ta, ma
+; ZVFH32-NEXT: vslidedown.vx v8, v8, a0
+; ZVFH32-NEXT: vsetvli a0, zero, e16, m4, ta, ma
+; ZVFH32-NEXT: vslideup.vi v8, v12, 1
+; ZVFH32-NEXT: ret
+;
+; ZVFH64-LABEL: splice_nxv16f16_offset_negone:
+; ZVFH64: # %bb.0:
+; ZVFH64-NEXT: csrr a0, vlenb
+; ZVFH64-NEXT: slli a0, a0, 1
+; ZVFH64-NEXT: addi a0, a0, -1
+; ZVFH64-NEXT: vsetivli zero, 1, e16, m4, ta, ma
+; ZVFH64-NEXT: vslidedown.vx v8, v8, a0
+; ZVFH64-NEXT: vsetvli a0, zero, e16, m4, ta, ma
+; ZVFH64-NEXT: vslideup.vi v8, v12, 1
+; ZVFH64-NEXT: ret
%res = call <vscale x 16 x half> @llvm.vector.splice.nxv16f16(<vscale x 16 x half> %a, <vscale x 16 x half> %b, i32 -1)
ret <vscale x 16 x half> %res
}
define <vscale x 16 x half> @splice_nxv16f16_offset_min(<vscale x 16 x half> %a, <vscale x 16 x half> %b) #0 {
-; CHECK-LABEL: splice_nxv16f16_offset_min:
-; CHECK: # %bb.0:
-; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: slli a0, a0, 1
-; CHECK-NEXT: addi a0, a0, -32
-; CHECK-NEXT: li a1, 32
-; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma
-; CHECK-NEXT: vslidedown.vx v8, v8, a0
-; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma
-; CHECK-NEXT: vslideup.vx v8, v12, a1
-; CHECK-NEXT: ret
+; ZVFHMIN64-LABEL: splice_nxv16f16_offset_min:
+; ZVFHMIN64: # %bb.0:
+; ZVFHMIN64-NEXT: addi sp, sp, -48
+; ZVFHMIN64-NEXT: .cfi_def_cfa_offset 48
+; ZVFHMIN64-NEXT: sw ra, 44(sp) # 4-byte Folded Spill
+; ZVFHMIN64-NEXT: sw s0, 40(sp) # 4-byte Folded Spill
+; ZVFHMIN64-NEXT: .cfi_offset ra, -4
+; ZVFHMIN64-NEXT: .cfi_offset s0, -8
+; ZVFHMIN64-NEXT: addi s0, sp, 48
+; ZVFHMIN64-NEXT: .cfi_def_cfa s0, 0
+; ZVFHMIN64-NEXT: csrr a0, vlenb
+; ZVFHMIN64-NEXT: slli a0, a0, 3
+; ZVFHMIN64-NEXT: sub sp, sp, a0
+; ZVFHMIN64-NEXT: andi sp, sp, -32
+; ZVFHMIN64-NEXT: addi a0, sp, 32
+; ZVFHMIN64-NEXT: vs4r.v v8, (a0)
+; ZVFHMIN64-NEXT: csrr a1, vlenb
+; ZVFHMIN64-NEXT: slli a1, a1, 2
+; ZVFHMIN64-NEXT: add a0, a0, a1
+; ZVFHMIN64-NEXT: li a2, 64
+; ZVFHMIN64-NEXT: vs4r.v v12, (a0)
+; ZVFHMIN64-NEXT: bltu a1, a2, .LBB144_2
+; ZVFHMIN64-NEXT: # %bb.1:
+; ZVFHMIN64-NEXT: li a1, 64
+; ZVFHMIN64-NEXT: .LBB144_2:
+; ZVFHMIN64-NEXT: sub a0, a0, a1
+; ZVFHMIN64-NEXT: vl4re16.v v8, (a0)
+; ZVFHMIN64-NEXT: addi sp, s0, -48
+; ZVFHMIN64-NEXT: lw ra, 44(sp) # 4-byte Folded Reload
+; ZVFHMIN64-NEXT: lw s0, 40(sp) # 4-byte Folded Reload
+; ZVFHMIN64-NEXT: addi sp, sp, 48
+; ZVFHMIN64-NEXT: ret
+;
+; ZVFHMIN32-LABEL: splice_nxv16f16_offset_min:
+; ZVFHMIN32: # %bb.0:
+; ZVFHMIN32-NEXT: addi sp, sp, -48
+; ZVFHMIN32-NEXT: .cfi_def_cfa_offset 48
+; ZVFHMIN32-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
+; ZVFHMIN32-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
+; ZVFHMIN32-NEXT: .cfi_offset ra, -8
+; ZVFHMIN32-NEXT: .cfi_offset s0, -16
+; ZVFHMIN32-NEXT: addi s0, sp, 48
+; ZVFHMIN32-NEXT: .cfi_def_cfa s0, 0
+; ZVFHMIN32-NEXT: csrr a0, vlenb
+; ZVFHMIN32-NEXT: slli a0, a0, 3
+; ZVFHMIN32-NEXT: sub sp, sp, a0
+; ZVFHMIN32-NEXT: andi sp, sp, -32
+; ZVFHMIN32-NEXT: addi a0, sp, 32
+; ZVFHMIN32-NEXT: vs4r.v v8, (a0)
+; ZVFHMIN32-NEXT: csrr a1, vlenb
+; ZVFHMIN32-NEXT: slli a1, a1, 2
+; ZVFHMIN32-NEXT: add a0, a0, a1
+; ZVFHMIN32-NEXT: li a2, 64
+; ZVFHMIN32-NEXT: vs4r.v v12, (a0)
+; ZVFHMIN32-NEXT: bltu a1, a2, .LBB144_2
+; ZVFHMIN32-NEXT: # %bb.1:
+; ZVFHMIN32-NEXT: li a1, 64
+; ZVFHMIN32-NEXT: .LBB144_2:
+; ZVFHMIN32-NEXT: sub a0, a0, a1
+; ZVFHMIN32-NEXT: vl4re16.v v8, (a0)
+; ZVFHMIN32-NEXT: addi sp, s0, -48
+; ZVFHMIN32-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
+; ZVFHMIN32-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
+; ZVFHMIN32-NEXT: addi sp, sp, 48
+; ZVFHMIN32-NEXT: ret
+;
+; ZVFH32-LABEL: splice_nxv16f16_offset_min:
+; ZVFH32: # %bb.0:
+; ZVFH32-NEXT: csrr a0, vlenb
+; ZVFH32-NEXT: slli a0, a0, 1
+; ZVFH32-NEXT: addi a0, a0, -32
+; ZVFH32-NEXT: li a1, 32
+; ZVFH32-NEXT: vsetvli zero, a1, e16, m4, ta, ma
+; ZVFH32-NEXT: vslidedown.vx v8, v8, a0
+; ZVFH32-NEXT: vsetvli a0, zero, e16, m4, ta, ma
+; ZVFH32-NEXT: vslideup.vx v8, v12, a1
+; ZVFH32-NEXT: ret
+;
+; ZVFH64-LABEL: splice_nxv16f16_offset_min:
+; ZVFH64: # %bb.0:
+; ZVFH64-NEXT: csrr a0, vlenb
+; ZVFH64-NEXT: slli a0, a0, 1
+; ZVFH64-NEXT: addi a0, a0, -32
+; ZVFH64-NEXT: li a1, 32
+; ZVFH64-NEXT: vsetvli zero, a1, e16, m4, ta, ma
+; ZVFH64-NEXT: vslidedown.vx v8, v8, a0
+; ZVFH64-NEXT: vsetvli a0, zero, e16, m4, ta, ma
+; ZVFH64-NEXT: vslideup.vx v8, v12, a1
+; ZVFH64-NEXT: ret
%res = call <vscale x 16 x half> @llvm.vector.splice.nxv16f16(<vscale x 16 x half> %a, <vscale x 16 x half> %b, i32 -32)
ret <vscale x 16 x half> %res
}
define <vscale x 16 x half> @splice_nxv16f16_offset_max(<vscale x 16 x half> %a, <vscale x 16 x half> %b) #0 {
-; CHECK-LABEL: splice_nxv16f16_offset_max:
-; CHECK: # %bb.0:
-; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: slli a0, a0, 1
-; CHECK-NEXT: addi a0, a0, -31
-; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma
-; CHECK-NEXT: vslidedown.vi v8, v8, 31
-; CHECK-NEXT: vsetvli a1, zero, e16, m4, ta, ma
-; CHECK-NEXT: vslideup.vx v8, v12, a0
-; CHECK-NEXT: ret
+; ZVFHMIN64-LABEL: splice_nxv16f16_offset_max:
+; ZVFHMIN64: # %bb.0:
+; ZVFHMIN64-NEXT: addi sp, sp, -48
+; ZVFHMIN64-NEXT: .cfi_def_cfa_offset 48
+; ZVFHMIN64-NEXT: sw ra, 44(sp) # 4-byte Folded Spill
+; ZVFHMIN64-NEXT: sw s0, 40(sp) # 4-byte Folded Spill
+; ZVFHMIN64-NEXT: .cfi_offset ra, -4
+; ZVFHMIN64-NEXT: .cfi_offset s0, -8
+; ZVFHMIN64-NEXT: addi s0, sp, 48
+; ZVFHMIN64-NEXT: .cfi_def_cfa s0, 0
+; ZVFHMIN64-NEXT: csrr a0, vlenb
+; ZVFHMIN64-NEXT: slli a0, a0, 3
+; ZVFHMIN64-NEXT: sub sp, sp, a0
+; ZVFHMIN64-NEXT: andi sp, sp, -32
+; ZVFHMIN64-NEXT: addi a0, sp, 32
+; ZVFHMIN64-NEXT: vs4r.v v8, (a0)
+; ZVFHMIN64-NEXT: csrr a1, vlenb
+; ZVFHMIN64-NEXT: slli a2, a1, 2
+; ZVFHMIN64-NEXT: add a2, a0, a2
+; ZVFHMIN64-NEXT: slli a1, a1, 1
+; ZVFHMIN64-NEXT: addi a1, a1, -1
+; ZVFHMIN64-NEXT: li a3, 31
+; ZVFHMIN64-NEXT: vs4r.v v12, (a2)
+; ZVFHMIN64-NEXT: bltu a1, a3, .LBB145_2
+; ZVFHMIN64-NEXT: # %bb.1:
+; ZVFHMIN64-NEXT: li a1, 31
+; ZVFHMIN64-NEXT: .LBB145_2:
+; ZVFHMIN64-NEXT: slli a1, a1, 1
+; ZVFHMIN64-NEXT: add a0, a0, a1
+; ZVFHMIN64-NEXT: vl4re16.v v8, (a0)
+; ZVFHMIN64-NEXT: addi sp, s0, -48
+; ZVFHMIN64-NEXT: lw ra, 44(sp) # 4-byte Folded Reload
+; ZVFHMIN64-NEXT: lw s0, 40(sp) # 4-byte Folded Reload
+; ZVFHMIN64-NEXT: addi sp, sp, 48
+; ZVFHMIN64-NEXT: ret
+;
+; ZVFHMIN32-LABEL: splice_nxv16f16_offset_max:
+; ZVFHMIN32: # %bb.0:
+; ZVFHMIN32-NEXT: addi sp, sp, -48
+; ZVFHMIN32-NEXT: .cfi_def_cfa_offset 48
+; ZVFHMIN32-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
+; ZVFHMIN32-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
+; ZVFHMIN32-NEXT: .cfi_offset ra, -8
+; ZVFHMIN32-NEXT: .cfi_offset s0, -16
+; ZVFHMIN32-NEXT: addi s0, sp, 48
+; ZVFHMIN32-NEXT: .cfi_def_cfa s0, 0
+; ZVFHMIN32-NEXT: csrr a0, vlenb
+; ZVFHMIN32-NEXT: slli a0, a0, 3
+; ZVFHMIN32-NEXT: sub sp, sp, a0
+; ZVFHMIN32-NEXT: andi sp, sp, -32
+; ZVFHMIN32-NEXT: addi a0, sp, 32
+; ZVFHMIN32-NEXT: vs4r.v v8, (a0)
+; ZVFHMIN32-NEXT: csrr a1, vlenb
+; ZVFHMIN32-NEXT: slli a2, a1, 2
+; ZVFHMIN32-NEXT: add a2, a0, a2
+; ZVFHMIN32-NEXT: slli a1, a1, 1
+; ZVFHMIN32-NEXT: addi a1, a1, -1
+; ZVFHMIN32-NEXT: li a3, 31
+; ZVFHMIN32-NEXT: vs4r.v v12, (a2)
+; ZVFHMIN32-NEXT: bltu a1, a3, .LBB145_2
+; ZVFHMIN32-NEXT: # %bb.1:
+; ZVFHMIN32-NEXT: li a1, 31
+; ZVFHMIN32-NEXT: .LBB145_2:
+; ZVFHMIN32-NEXT: slli a1, a1, 1
+; ZVFHMIN32-NEXT: add a0, a0, a1
+; ZVFHMIN32-NEXT: vl4re16.v v8, (a0)
+; ZVFHMIN32-NEXT: addi sp, s0, -48
+; ZVFHMIN32-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
+; ZVFHMIN32-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
+; ZVFHMIN32-NEXT: addi sp, sp, 48
+; ZVFHMIN32-NEXT: ret
+;
+; ZVFH32-LABEL: splice_nxv16f16_offset_max:
+; ZVFH32: # %bb.0:
+; ZVFH32-NEXT: csrr a0, vlenb
+; ZVFH32-NEXT: slli a0, a0, 1
+; ZVFH32-NEXT: addi a0, a0, -31
+; ZVFH32-NEXT: vsetvli zero, a0, e16, m4, ta, ma
+; ZVFH32-NEXT: vslidedown.vi v8, v8, 31
+; ZVFH32-NEXT: vsetvli a1, zero, e16, m4, ta, ma
+; ZVFH32-NEXT: vslideup.vx v8, v12, a0
+; ZVFH32-NEXT: ret
+;
+; ZVFH64-LABEL: splice_nxv16f16_offset_max:
+; ZVFH64: # %bb.0:
+; ZVFH64-NEXT: csrr a0, vlenb
+; ZVFH64-NEXT: slli a0, a0, 1
+; ZVFH64-NEXT: addi a0, a0, -31
+; ZVFH64-NEXT: vsetvli zero, a0, e16, m4, ta, ma
+; ZVFH64-NEXT: vslidedown.vi v8, v8, 31
+; ZVFH64-NEXT: vsetvli a1, zero, e16, m4, ta, ma
+; ZVFH64-NEXT: vslideup.vx v8, v12, a0
+; ZVFH64-NEXT: ret
%res = call <vscale x 16 x half> @llvm.vector.splice.nxv16f16(<vscale x 16 x half> %a, <vscale x 16 x half> %b, i32 31)
ret <vscale x 16 x half> %res
}
@@ -1814,48 +3996,277 @@ define <vscale x 32 x half> @splice_nxv32f16_offset_zero(<vscale x 32 x half> %a
}
define <vscale x 32 x half> @splice_nxv32f16_offset_negone(<vscale x 32 x half> %a, <vscale x 32 x half> %b) #0 {
-; CHECK-LABEL: splice_nxv32f16_offset_negone:
-; CHECK: # %bb.0:
-; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: slli a0, a0, 2
-; CHECK-NEXT: addi a0, a0, -1
-; CHECK-NEXT: vsetivli zero, 1, e16, m8, ta, ma
-; CHECK-NEXT: vslidedown.vx v8, v8, a0
-; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, ma
-; CHECK-NEXT: vslideup.vi v8, v16, 1
-; CHECK-NEXT: ret
+; ZVFHMIN64-LABEL: splice_nxv32f16_offset_negone:
+; ZVFHMIN64: # %bb.0:
+; ZVFHMIN64-NEXT: addi sp, sp, -80
+; ZVFHMIN64-NEXT: .cfi_def_cfa_offset 80
+; ZVFHMIN64-NEXT: sw ra, 76(sp) # 4-byte Folded Spill
+; ZVFHMIN64-NEXT: sw s0, 72(sp) # 4-byte Folded Spill
+; ZVFHMIN64-NEXT: .cfi_offset ra, -4
+; ZVFHMIN64-NEXT: .cfi_offset s0, -8
+; ZVFHMIN64-NEXT: addi s0, sp, 80
+; ZVFHMIN64-NEXT: .cfi_def_cfa s0, 0
+; ZVFHMIN64-NEXT: csrr a0, vlenb
+; ZVFHMIN64-NEXT: slli a0, a0, 4
+; ZVFHMIN64-NEXT: sub sp, sp, a0
+; ZVFHMIN64-NEXT: andi sp, sp, -64
+; ZVFHMIN64-NEXT: addi a0, sp, 64
+; ZVFHMIN64-NEXT: vs8r.v v8, (a0)
+; ZVFHMIN64-NEXT: csrr a1, vlenb
+; ZVFHMIN64-NEXT: slli a1, a1, 3
+; ZVFHMIN64-NEXT: add a0, a0, a1
+; ZVFHMIN64-NEXT: vs8r.v v16, (a0)
+; ZVFHMIN64-NEXT: addi a0, a0, -2
+; ZVFHMIN64-NEXT: vl8re16.v v8, (a0)
+; ZVFHMIN64-NEXT: addi sp, s0, -80
+; ZVFHMIN64-NEXT: lw ra, 76(sp) # 4-byte Folded Reload
+; ZVFHMIN64-NEXT: lw s0, 72(sp) # 4-byte Folded Reload
+; ZVFHMIN64-NEXT: addi sp, sp, 80
+; ZVFHMIN64-NEXT: ret
+;
+; ZVFHMIN32-LABEL: splice_nxv32f16_offset_negone:
+; ZVFHMIN32: # %bb.0:
+; ZVFHMIN32-NEXT: addi sp, sp, -80
+; ZVFHMIN32-NEXT: .cfi_def_cfa_offset 80
+; ZVFHMIN32-NEXT: sd ra, 72(sp) # 8-byte Folded Spill
+; ZVFHMIN32-NEXT: sd s0, 64(sp) # 8-byte Folded Spill
+; ZVFHMIN32-NEXT: .cfi_offset ra, -8
+; ZVFHMIN32-NEXT: .cfi_offset s0, -16
+; ZVFHMIN32-NEXT: addi s0, sp, 80
+; ZVFHMIN32-NEXT: .cfi_def_cfa s0, 0
+; ZVFHMIN32-NEXT: csrr a0, vlenb
+; ZVFHMIN32-NEXT: slli a0, a0, 4
+; ZVFHMIN32-NEXT: sub sp, sp, a0
+; ZVFHMIN32-NEXT: andi sp, sp, -64
+; ZVFHMIN32-NEXT: addi a0, sp, 64
+; ZVFHMIN32-NEXT: vs8r.v v8, (a0)
+; ZVFHMIN32-NEXT: csrr a1, vlenb
+; ZVFHMIN32-NEXT: slli a1, a1, 3
+; ZVFHMIN32-NEXT: add a0, a0, a1
+; ZVFHMIN32-NEXT: vs8r.v v16, (a0)
+; ZVFHMIN32-NEXT: addi a0, a0, -2
+; ZVFHMIN32-NEXT: vl8re16.v v8, (a0)
+; ZVFHMIN32-NEXT: addi sp, s0, -80
+; ZVFHMIN32-NEXT: ld ra, 72(sp) # 8-byte Folded Reload
+; ZVFHMIN32-NEXT: ld s0, 64(sp) # 8-byte Folded Reload
+; ZVFHMIN32-NEXT: addi sp, sp, 80
+; ZVFHMIN32-NEXT: ret
+;
+; ZVFH32-LABEL: splice_nxv32f16_offset_negone:
+; ZVFH32: # %bb.0:
+; ZVFH32-NEXT: csrr a0, vlenb
+; ZVFH32-NEXT: slli a0, a0, 2
+; ZVFH32-NEXT: addi a0, a0, -1
+; ZVFH32-NEXT: vsetivli zero, 1, e16, m8, ta, ma
+; ZVFH32-NEXT: vslidedown.vx v8, v8, a0
+; ZVFH32-NEXT: vsetvli a0, zero, e16, m8, ta, ma
+; ZVFH32-NEXT: vslideup.vi v8, v16, 1
+; ZVFH32-NEXT: ret
+;
+; ZVFH64-LABEL: splice_nxv32f16_offset_negone:
+; ZVFH64: # %bb.0:
+; ZVFH64-NEXT: csrr a0, vlenb
+; ZVFH64-NEXT: slli a0, a0, 2
+; ZVFH64-NEXT: addi a0, a0, -1
+; ZVFH64-NEXT: vsetivli zero, 1, e16, m8, ta, ma
+; ZVFH64-NEXT: vslidedown.vx v8, v8, a0
+; ZVFH64-NEXT: vsetvli a0, zero, e16, m8, ta, ma
+; ZVFH64-NEXT: vslideup.vi v8, v16, 1
+; ZVFH64-NEXT: ret
%res = call <vscale x 32 x half> @llvm.vector.splice.nxv32f16(<vscale x 32 x half> %a, <vscale x 32 x half> %b, i32 -1)
ret <vscale x 32 x half> %res
}
define <vscale x 32 x half> @splice_nxv32f16_offset_min(<vscale x 32 x half> %a, <vscale x 32 x half> %b) #0 {
-; CHECK-LABEL: splice_nxv32f16_offset_min:
-; CHECK: # %bb.0:
-; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: slli a0, a0, 2
-; CHECK-NEXT: addi a0, a0, -64
-; CHECK-NEXT: li a1, 64
-; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma
-; CHECK-NEXT: vslidedown.vx v8, v8, a0
-; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, ma
-; CHECK-NEXT: vslideup.vx v8, v16, a1
-; CHECK-NEXT: ret
+; ZVFHMIN64-LABEL: splice_nxv32f16_offset_min:
+; ZVFHMIN64: # %bb.0:
+; ZVFHMIN64-NEXT: addi sp, sp, -80
+; ZVFHMIN64-NEXT: .cfi_def_cfa_offset 80
+; ZVFHMIN64-NEXT: sw ra, 76(sp) # 4-byte Folded Spill
+; ZVFHMIN64-NEXT: sw s0, 72(sp) # 4-byte Folded Spill
+; ZVFHMIN64-NEXT: .cfi_offset ra, -4
+; ZVFHMIN64-NEXT: .cfi_offset s0, -8
+; ZVFHMIN64-NEXT: addi s0, sp, 80
+; ZVFHMIN64-NEXT: .cfi_def_cfa s0, 0
+; ZVFHMIN64-NEXT: csrr a0, vlenb
+; ZVFHMIN64-NEXT: slli a0, a0, 4
+; ZVFHMIN64-NEXT: sub sp, sp, a0
+; ZVFHMIN64-NEXT: andi sp, sp, -64
+; ZVFHMIN64-NEXT: addi a0, sp, 64
+; ZVFHMIN64-NEXT: vs8r.v v8, (a0)
+; ZVFHMIN64-NEXT: csrr a1, vlenb
+; ZVFHMIN64-NEXT: slli a1, a1, 3
+; ZVFHMIN64-NEXT: add a0, a0, a1
+; ZVFHMIN64-NEXT: li a2, 128
+; ZVFHMIN64-NEXT: vs8r.v v16, (a0)
+; ZVFHMIN64-NEXT: bltu a1, a2, .LBB148_2
+; ZVFHMIN64-NEXT: # %bb.1:
+; ZVFHMIN64-NEXT: li a1, 128
+; ZVFHMIN64-NEXT: .LBB148_2:
+; ZVFHMIN64-NEXT: sub a0, a0, a1
+; ZVFHMIN64-NEXT: vl8re16.v v8, (a0)
+; ZVFHMIN64-NEXT: addi sp, s0, -80
+; ZVFHMIN64-NEXT: lw ra, 76(sp) # 4-byte Folded Reload
+; ZVFHMIN64-NEXT: lw s0, 72(sp) # 4-byte Folded Reload
+; ZVFHMIN64-NEXT: addi sp, sp, 80
+; ZVFHMIN64-NEXT: ret
+;
+; ZVFHMIN32-LABEL: splice_nxv32f16_offset_min:
+; ZVFHMIN32: # %bb.0:
+; ZVFHMIN32-NEXT: addi sp, sp, -80
+; ZVFHMIN32-NEXT: .cfi_def_cfa_offset 80
+; ZVFHMIN32-NEXT: sd ra, 72(sp) # 8-byte Folded Spill
+; ZVFHMIN32-NEXT: sd s0, 64(sp) # 8-byte Folded Spill
+; ZVFHMIN32-NEXT: .cfi_offset ra, -8
+; ZVFHMIN32-NEXT: .cfi_offset s0, -16
+; ZVFHMIN32-NEXT: addi s0, sp, 80
+; ZVFHMIN32-NEXT: .cfi_def_cfa s0, 0
+; ZVFHMIN32-NEXT: csrr a0, vlenb
+; ZVFHMIN32-NEXT: slli a0, a0, 4
+; ZVFHMIN32-NEXT: sub sp, sp, a0
+; ZVFHMIN32-NEXT: andi sp, sp, -64
+; ZVFHMIN32-NEXT: addi a0, sp, 64
+; ZVFHMIN32-NEXT: vs8r.v v8, (a0)
+; ZVFHMIN32-NEXT: csrr a1, vlenb
+; ZVFHMIN32-NEXT: slli a1, a1, 3
+; ZVFHMIN32-NEXT: add a0, a0, a1
+; ZVFHMIN32-NEXT: li a2, 128
+; ZVFHMIN32-NEXT: vs8r.v v16, (a0)
+; ZVFHMIN32-NEXT: bltu a1, a2, .LBB148_2
+; ZVFHMIN32-NEXT: # %bb.1:
+; ZVFHMIN32-NEXT: li a1, 128
+; ZVFHMIN32-NEXT: .LBB148_2:
+; ZVFHMIN32-NEXT: sub a0, a0, a1
+; ZVFHMIN32-NEXT: vl8re16.v v8, (a0)
+; ZVFHMIN32-NEXT: addi sp, s0, -80
+; ZVFHMIN32-NEXT: ld ra, 72(sp) # 8-byte Folded Reload
+; ZVFHMIN32-NEXT: ld s0, 64(sp) # 8-byte Folded Reload
+; ZVFHMIN32-NEXT: addi sp, sp, 80
+; ZVFHMIN32-NEXT: ret
+;
+; ZVFH32-LABEL: splice_nxv32f16_offset_min:
+; ZVFH32: # %bb.0:
+; ZVFH32-NEXT: csrr a0, vlenb
+; ZVFH32-NEXT: slli a0, a0, 2
+; ZVFH32-NEXT: addi a0, a0, -64
+; ZVFH32-NEXT: li a1, 64
+; ZVFH32-NEXT: vsetvli zero, a1, e16, m8, ta, ma
+; ZVFH32-NEXT: vslidedown.vx v8, v8, a0
+; ZVFH32-NEXT: vsetvli a0, zero, e16, m8, ta, ma
+; ZVFH32-NEXT: vslideup.vx v8, v16, a1
+; ZVFH32-NEXT: ret
+;
+; ZVFH64-LABEL: splice_nxv32f16_offset_min:
+; ZVFH64: # %bb.0:
+; ZVFH64-NEXT: csrr a0, vlenb
+; ZVFH64-NEXT: slli a0, a0, 2
+; ZVFH64-NEXT: addi a0, a0, -64
+; ZVFH64-NEXT: li a1, 64
+; ZVFH64-NEXT: vsetvli zero, a1, e16, m8, ta, ma
+; ZVFH64-NEXT: vslidedown.vx v8, v8, a0
+; ZVFH64-NEXT: vsetvli a0, zero, e16, m8, ta, ma
+; ZVFH64-NEXT: vslideup.vx v8, v16, a1
+; ZVFH64-NEXT: ret
%res = call <vscale x 32 x half> @llvm.vector.splice.nxv32f16(<vscale x 32 x half> %a, <vscale x 32 x half> %b, i32 -64)
ret <vscale x 32 x half> %res
}
define <vscale x 32 x half> @splice_nxv32f16_offset_max(<vscale x 32 x half> %a, <vscale x 32 x half> %b) #0 {
-; CHECK-LABEL: splice_nxv32f16_offset_max:
-; CHECK: # %bb.0:
-; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: slli a0, a0, 2
-; CHECK-NEXT: addi a0, a0, -63
-; CHECK-NEXT: li a1, 63
-; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma
-; CHECK-NEXT: vslidedown.vx v8, v8, a1
-; CHECK-NEXT: vsetvli a1, zero, e16, m8, ta, ma
-; CHECK-NEXT: vslideup.vx v8, v16, a0
-; CHECK-NEXT: ret
+; ZVFHMIN64-LABEL: splice_nxv32f16_offset_max:
+; ZVFHMIN64: # %bb.0:
+; ZVFHMIN64-NEXT: addi sp, sp, -80
+; ZVFHMIN64-NEXT: .cfi_def_cfa_offset 80
+; ZVFHMIN64-NEXT: sw ra, 76(sp) # 4-byte Folded Spill
+; ZVFHMIN64-NEXT: sw s0, 72(sp) # 4-byte Folded Spill
+; ZVFHMIN64-NEXT: .cfi_offset ra, -4
+; ZVFHMIN64-NEXT: .cfi_offset s0, -8
+; ZVFHMIN64-NEXT: addi s0, sp, 80
+; ZVFHMIN64-NEXT: .cfi_def_cfa s0, 0
+; ZVFHMIN64-NEXT: csrr a0, vlenb
+; ZVFHMIN64-NEXT: slli a0, a0, 4
+; ZVFHMIN64-NEXT: sub sp, sp, a0
+; ZVFHMIN64-NEXT: andi sp, sp, -64
+; ZVFHMIN64-NEXT: addi a0, sp, 64
+; ZVFHMIN64-NEXT: vs8r.v v8, (a0)
+; ZVFHMIN64-NEXT: csrr a1, vlenb
+; ZVFHMIN64-NEXT: slli a2, a1, 3
+; ZVFHMIN64-NEXT: add a2, a0, a2
+; ZVFHMIN64-NEXT: slli a1, a1, 2
+; ZVFHMIN64-NEXT: addi a1, a1, -1
+; ZVFHMIN64-NEXT: li a3, 63
+; ZVFHMIN64-NEXT: vs8r.v v16, (a2)
+; ZVFHMIN64-NEXT: bltu a1, a3, .LBB149_2
+; ZVFHMIN64-NEXT: # %bb.1:
+; ZVFHMIN64-NEXT: li a1, 63
+; ZVFHMIN64-NEXT: .LBB149_2:
+; ZVFHMIN64-NEXT: slli a1, a1, 1
+; ZVFHMIN64-NEXT: add a0, a0, a1
+; ZVFHMIN64-NEXT: vl8re16.v v8, (a0)
+; ZVFHMIN64-NEXT: addi sp, s0, -80
+; ZVFHMIN64-NEXT: lw ra, 76(sp) # 4-byte Folded Reload
+; ZVFHMIN64-NEXT: lw s0, 72(sp) # 4-byte Folded Reload
+; ZVFHMIN64-NEXT: addi sp, sp, 80
+; ZVFHMIN64-NEXT: ret
+;
+; ZVFHMIN32-LABEL: splice_nxv32f16_offset_max:
+; ZVFHMIN32: # %bb.0:
+; ZVFHMIN32-NEXT: addi sp, sp, -80
+; ZVFHMIN32-NEXT: .cfi_def_cfa_offset 80
+; ZVFHMIN32-NEXT: sd ra, 72(sp) # 8-byte Folded Spill
+; ZVFHMIN32-NEXT: sd s0, 64(sp) # 8-byte Folded Spill
+; ZVFHMIN32-NEXT: .cfi_offset ra, -8
+; ZVFHMIN32-NEXT: .cfi_offset s0, -16
+; ZVFHMIN32-NEXT: addi s0, sp, 80
+; ZVFHMIN32-NEXT: .cfi_def_cfa s0, 0
+; ZVFHMIN32-NEXT: csrr a0, vlenb
+; ZVFHMIN32-NEXT: slli a0, a0, 4
+; ZVFHMIN32-NEXT: sub sp, sp, a0
+; ZVFHMIN32-NEXT: andi sp, sp, -64
+; ZVFHMIN32-NEXT: addi a0, sp, 64
+; ZVFHMIN32-NEXT: vs8r.v v8, (a0)
+; ZVFHMIN32-NEXT: csrr a1, vlenb
+; ZVFHMIN32-NEXT: slli a2, a1, 3
+; ZVFHMIN32-NEXT: add a2, a0, a2
+; ZVFHMIN32-NEXT: slli a1, a1, 2
+; ZVFHMIN32-NEXT: addi a1, a1, -1
+; ZVFHMIN32-NEXT: li a3, 63
+; ZVFHMIN32-NEXT: vs8r.v v16, (a2)
+; ZVFHMIN32-NEXT: bltu a1, a3, .LBB149_2
+; ZVFHMIN32-NEXT: # %bb.1:
+; ZVFHMIN32-NEXT: li a1, 63
+; ZVFHMIN32-NEXT: .LBB149_2:
+; ZVFHMIN32-NEXT: slli a1, a1, 1
+; ZVFHMIN32-NEXT: add a0, a0, a1
+; ZVFHMIN32-NEXT: vl8re16.v v8, (a0)
+; ZVFHMIN32-NEXT: addi sp, s0, -80
+; ZVFHMIN32-NEXT: ld ra, 72(sp) # 8-byte Folded Reload
+; ZVFHMIN32-NEXT: ld s0, 64(sp) # 8-byte Folded Reload
+; ZVFHMIN32-NEXT: addi sp, sp, 80
+; ZVFHMIN32-NEXT: ret
+;
+; ZVFH32-LABEL: splice_nxv32f16_offset_max:
+; ZVFH32: # %bb.0:
+; ZVFH32-NEXT: csrr a0, vlenb
+; ZVFH32-NEXT: slli a0, a0, 2
+; ZVFH32-NEXT: addi a0, a0, -63
+; ZVFH32-NEXT: li a1, 63
+; ZVFH32-NEXT: vsetvli zero, a0, e16, m8, ta, ma
+; ZVFH32-NEXT: vslidedown.vx v8, v8, a1
+; ZVFH32-NEXT: vsetvli a1, zero, e16, m8, ta, ma
+; ZVFH32-NEXT: vslideup.vx v8, v16, a0
+; ZVFH32-NEXT: ret
+;
+; ZVFH64-LABEL: splice_nxv32f16_offset_max:
+; ZVFH64: # %bb.0:
+; ZVFH64-NEXT: csrr a0, vlenb
+; ZVFH64-NEXT: slli a0, a0, 2
+; ZVFH64-NEXT: addi a0, a0, -63
+; ZVFH64-NEXT: li a1, 63
+; ZVFH64-NEXT: vsetvli zero, a0, e16, m8, ta, ma
+; ZVFH64-NEXT: vslidedown.vx v8, v8, a1
+; ZVFH64-NEXT: vsetvli a1, zero, e16, m8, ta, ma
+; ZVFH64-NEXT: vslideup.vx v8, v16, a0
+; ZVFH64-NEXT: ret
%res = call <vscale x 32 x half> @llvm.vector.splice.nxv32f16(<vscale x 32 x half> %a, <vscale x 32 x half> %b, i32 63)
ret <vscale x 32 x half> %res
}
>From 118f8ba50b2ef493853badc1fbcdea1ea9fb5faa Mon Sep 17 00:00:00 2001
From: Luke Lau <luke at igalia.com>
Date: Wed, 16 Oct 2024 17:36:03 +0100
Subject: [PATCH 2/3] [RISCV] Lower vector_splice on zvfhmin/zvfbfmin
Similar to other permutation ops, we can just reuse the existing lowering.
---
llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 2 +-
llvm/test/CodeGen/RISCV/rvv/vector-splice.ll | 2694 ++----------------
2 files changed, 307 insertions(+), 2389 deletions(-)
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index bf333b7b790167..076ed173f64e2e 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -1076,7 +1076,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
setOperationAction({ISD::INSERT_VECTOR_ELT, ISD::CONCAT_VECTORS,
ISD::INSERT_SUBVECTOR, ISD::EXTRACT_SUBVECTOR,
ISD::VECTOR_DEINTERLEAVE, ISD::VECTOR_INTERLEAVE,
- ISD::VECTOR_REVERSE},
+ ISD::VECTOR_REVERSE, ISD::VECTOR_SPLICE},
VT, Custom);
MVT EltVT = VT.getVectorElementType();
if (isTypeLegal(EltVT))
diff --git a/llvm/test/CodeGen/RISCV/rvv/vector-splice.ll b/llvm/test/CodeGen/RISCV/rvv/vector-splice.ll
index 3f84f4549ce814..c9cb6dc6397c3c 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vector-splice.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vector-splice.ll
@@ -1,8 +1,8 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple riscv32 -mattr=+m,+f,+d,+v,+zvfhmin,+zvfbfmin < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN64
-; RUN: llc -mtriple riscv64 -mattr=+m,+f,+d,+v,+zvfhmin,+zvfbfmin < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN32
-; RUN: llc -mtriple riscv32 -mattr=+m,+f,+d,+v,+zvfh,+zvfbfmin < %s | FileCheck %s --check-prefixes=CHECK,ZVFH32
-; RUN: llc -mtriple riscv64 -mattr=+m,+f,+d,+v,+zvfh,+zvfbfmin < %s | FileCheck %s --check-prefixes=CHECK,ZVFH64
+; RUN: llc -mtriple riscv32 -mattr=+m,+f,+d,+v,+zvfhmin,+zvfbfmin < %s | FileCheck %s --check-prefixes=CHECK
+; RUN: llc -mtriple riscv64 -mattr=+m,+f,+d,+v,+zvfhmin,+zvfbfmin < %s | FileCheck %s --check-prefixes=CHECK
+; RUN: llc -mtriple riscv32 -mattr=+m,+f,+d,+v,+zvfh,+zvfbfmin < %s | FileCheck %s --check-prefixes=CHECK
+; RUN: llc -mtriple riscv64 -mattr=+m,+f,+d,+v,+zvfh,+zvfbfmin < %s | FileCheck %s --check-prefixes=CHECK
; Tests assume VLEN=128 or vscale_range_min=2.
@@ -1548,23 +1548,12 @@ define <vscale x 1 x bfloat> @splice_nxv1bf16_offset_zero(<vscale x 1 x bfloat>
define <vscale x 1 x bfloat> @splice_nxv1bf16_offset_negone(<vscale x 1 x bfloat> %a, <vscale x 1 x bfloat> %b) #0 {
; CHECK-LABEL: splice_nxv1bf16_offset_negone:
; CHECK: # %bb.0:
-; CHECK-NEXT: addi sp, sp, -16
-; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: sub sp, sp, a0
-; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x01, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 1 * vlenb
-; CHECK-NEXT: addi a0, sp, 16
+; CHECK-NEXT: srli a0, a0, 3
+; CHECK-NEXT: addi a0, a0, -1
; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma
-; CHECK-NEXT: vse16.v v8, (a0)
-; CHECK-NEXT: csrr a1, vlenb
-; CHECK-NEXT: srli a1, a1, 2
-; CHECK-NEXT: add a0, a0, a1
-; CHECK-NEXT: vse16.v v9, (a0)
-; CHECK-NEXT: addi a0, a0, -2
-; CHECK-NEXT: vle16.v v8, (a0)
-; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: add sp, sp, a0
-; CHECK-NEXT: addi sp, sp, 16
+; CHECK-NEXT: vslidedown.vx v8, v8, a0
+; CHECK-NEXT: vslideup.vi v8, v9, 1
; CHECK-NEXT: ret
%res = call <vscale x 1 x bfloat> @llvm.vector.splice.nxv1bf16(<vscale x 1 x bfloat> %a, <vscale x 1 x bfloat> %b, i32 -1)
ret <vscale x 1 x bfloat> %res
@@ -1573,28 +1562,13 @@ define <vscale x 1 x bfloat> @splice_nxv1bf16_offset_negone(<vscale x 1 x bfloat
define <vscale x 1 x bfloat> @splice_nxv1bf16_offset_min(<vscale x 1 x bfloat> %a, <vscale x 1 x bfloat> %b) #0 {
; CHECK-LABEL: splice_nxv1bf16_offset_min:
; CHECK: # %bb.0:
-; CHECK-NEXT: addi sp, sp, -16
-; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: sub sp, sp, a0
-; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x01, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 1 * vlenb
-; CHECK-NEXT: addi a0, sp, 16
-; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma
-; CHECK-NEXT: vse16.v v8, (a0)
-; CHECK-NEXT: csrr a1, vlenb
-; CHECK-NEXT: srli a1, a1, 2
-; CHECK-NEXT: add a0, a0, a1
-; CHECK-NEXT: li a2, 4
-; CHECK-NEXT: vse16.v v9, (a0)
-; CHECK-NEXT: bltu a1, a2, .LBB104_2
-; CHECK-NEXT: # %bb.1:
-; CHECK-NEXT: li a1, 4
-; CHECK-NEXT: .LBB104_2:
-; CHECK-NEXT: sub a0, a0, a1
-; CHECK-NEXT: vle16.v v8, (a0)
-; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: add sp, sp, a0
-; CHECK-NEXT: addi sp, sp, 16
+; CHECK-NEXT: srli a0, a0, 3
+; CHECK-NEXT: addi a0, a0, -2
+; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
+; CHECK-NEXT: vslidedown.vx v8, v8, a0
+; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
+; CHECK-NEXT: vslideup.vi v8, v9, 2
; CHECK-NEXT: ret
%res = call <vscale x 1 x bfloat> @llvm.vector.splice.nxv1bf16(<vscale x 1 x bfloat> %a, <vscale x 1 x bfloat> %b, i32 -2)
ret <vscale x 1 x bfloat> %res
@@ -1603,31 +1577,13 @@ define <vscale x 1 x bfloat> @splice_nxv1bf16_offset_min(<vscale x 1 x bfloat> %
define <vscale x 1 x bfloat> @splice_nxv1bf16_offset_max(<vscale x 1 x bfloat> %a, <vscale x 1 x bfloat> %b) #0 {
; CHECK-LABEL: splice_nxv1bf16_offset_max:
; CHECK: # %bb.0:
-; CHECK-NEXT: addi sp, sp, -16
-; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: sub sp, sp, a0
-; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x01, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 1 * vlenb
-; CHECK-NEXT: addi a0, sp, 16
+; CHECK-NEXT: srli a0, a0, 3
+; CHECK-NEXT: addi a0, a0, -1
+; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
+; CHECK-NEXT: vslidedown.vi v8, v8, 1
; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma
-; CHECK-NEXT: vse16.v v8, (a0)
-; CHECK-NEXT: csrr a1, vlenb
-; CHECK-NEXT: srli a2, a1, 2
-; CHECK-NEXT: add a2, a0, a2
-; CHECK-NEXT: srli a1, a1, 3
-; CHECK-NEXT: addi a1, a1, -1
-; CHECK-NEXT: li a3, 1
-; CHECK-NEXT: vse16.v v9, (a2)
-; CHECK-NEXT: bltu a1, a3, .LBB105_2
-; CHECK-NEXT: # %bb.1:
-; CHECK-NEXT: li a1, 1
-; CHECK-NEXT: .LBB105_2:
-; CHECK-NEXT: slli a1, a1, 1
-; CHECK-NEXT: add a0, a0, a1
-; CHECK-NEXT: vle16.v v8, (a0)
-; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: add sp, sp, a0
-; CHECK-NEXT: addi sp, sp, 16
+; CHECK-NEXT: vslideup.vx v8, v9, a0
; CHECK-NEXT: ret
%res = call <vscale x 1 x bfloat> @llvm.vector.splice.nxv1bf16(<vscale x 1 x bfloat> %a, <vscale x 1 x bfloat> %b, i32 1)
ret <vscale x 1 x bfloat> %res
@@ -1646,23 +1602,12 @@ define <vscale x 2 x bfloat> @splice_nxv2bf16_offset_zero(<vscale x 2 x bfloat>
define <vscale x 2 x bfloat> @splice_nxv2bf16_offset_negone(<vscale x 2 x bfloat> %a, <vscale x 2 x bfloat> %b) #0 {
; CHECK-LABEL: splice_nxv2bf16_offset_negone:
; CHECK: # %bb.0:
-; CHECK-NEXT: addi sp, sp, -16
-; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: sub sp, sp, a0
-; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x01, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 1 * vlenb
-; CHECK-NEXT: addi a0, sp, 16
+; CHECK-NEXT: srli a0, a0, 2
+; CHECK-NEXT: addi a0, a0, -1
; CHECK-NEXT: vsetvli a1, zero, e16, mf2, ta, ma
-; CHECK-NEXT: vse16.v v8, (a0)
-; CHECK-NEXT: csrr a1, vlenb
-; CHECK-NEXT: srli a1, a1, 1
-; CHECK-NEXT: add a0, a0, a1
-; CHECK-NEXT: vse16.v v9, (a0)
-; CHECK-NEXT: addi a0, a0, -2
-; CHECK-NEXT: vle16.v v8, (a0)
-; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: add sp, sp, a0
-; CHECK-NEXT: addi sp, sp, 16
+; CHECK-NEXT: vslidedown.vx v8, v8, a0
+; CHECK-NEXT: vslideup.vi v8, v9, 1
; CHECK-NEXT: ret
%res = call <vscale x 2 x bfloat> @llvm.vector.splice.nxv2bf16(<vscale x 2 x bfloat> %a, <vscale x 2 x bfloat> %b, i32 -1)
ret <vscale x 2 x bfloat> %res
@@ -1671,28 +1616,13 @@ define <vscale x 2 x bfloat> @splice_nxv2bf16_offset_negone(<vscale x 2 x bfloat
define <vscale x 2 x bfloat> @splice_nxv2bf16_offset_min(<vscale x 2 x bfloat> %a, <vscale x 2 x bfloat> %b) #0 {
; CHECK-LABEL: splice_nxv2bf16_offset_min:
; CHECK: # %bb.0:
-; CHECK-NEXT: addi sp, sp, -16
-; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: sub sp, sp, a0
-; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x01, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 1 * vlenb
-; CHECK-NEXT: addi a0, sp, 16
-; CHECK-NEXT: vsetvli a1, zero, e16, mf2, ta, ma
-; CHECK-NEXT: vse16.v v8, (a0)
-; CHECK-NEXT: csrr a1, vlenb
-; CHECK-NEXT: srli a1, a1, 1
-; CHECK-NEXT: add a0, a0, a1
-; CHECK-NEXT: li a2, 8
-; CHECK-NEXT: vse16.v v9, (a0)
-; CHECK-NEXT: bltu a1, a2, .LBB108_2
-; CHECK-NEXT: # %bb.1:
-; CHECK-NEXT: li a1, 8
-; CHECK-NEXT: .LBB108_2:
-; CHECK-NEXT: sub a0, a0, a1
-; CHECK-NEXT: vle16.v v8, (a0)
-; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: add sp, sp, a0
-; CHECK-NEXT: addi sp, sp, 16
+; CHECK-NEXT: srli a0, a0, 2
+; CHECK-NEXT: addi a0, a0, -4
+; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
+; CHECK-NEXT: vslidedown.vx v8, v8, a0
+; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
+; CHECK-NEXT: vslideup.vi v8, v9, 4
; CHECK-NEXT: ret
%res = call <vscale x 2 x bfloat> @llvm.vector.splice.nxv2bf16(<vscale x 2 x bfloat> %a, <vscale x 2 x bfloat> %b, i32 -4)
ret <vscale x 2 x bfloat> %res
@@ -1701,31 +1631,13 @@ define <vscale x 2 x bfloat> @splice_nxv2bf16_offset_min(<vscale x 2 x bfloat> %
define <vscale x 2 x bfloat> @splice_nxv2bf16_offset_max(<vscale x 2 x bfloat> %a, <vscale x 2 x bfloat> %b) #0 {
; CHECK-LABEL: splice_nxv2bf16_offset_max:
; CHECK: # %bb.0:
-; CHECK-NEXT: addi sp, sp, -16
-; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: sub sp, sp, a0
-; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x01, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 1 * vlenb
-; CHECK-NEXT: addi a0, sp, 16
+; CHECK-NEXT: srli a0, a0, 2
+; CHECK-NEXT: addi a0, a0, -3
+; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
+; CHECK-NEXT: vslidedown.vi v8, v8, 3
; CHECK-NEXT: vsetvli a1, zero, e16, mf2, ta, ma
-; CHECK-NEXT: vse16.v v8, (a0)
-; CHECK-NEXT: csrr a1, vlenb
-; CHECK-NEXT: srli a2, a1, 1
-; CHECK-NEXT: add a2, a0, a2
-; CHECK-NEXT: srli a1, a1, 2
-; CHECK-NEXT: addi a1, a1, -1
-; CHECK-NEXT: li a3, 3
-; CHECK-NEXT: vse16.v v9, (a2)
-; CHECK-NEXT: bltu a1, a3, .LBB109_2
-; CHECK-NEXT: # %bb.1:
-; CHECK-NEXT: li a1, 3
-; CHECK-NEXT: .LBB109_2:
-; CHECK-NEXT: slli a1, a1, 1
-; CHECK-NEXT: add a0, a0, a1
-; CHECK-NEXT: vle16.v v8, (a0)
-; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: add sp, sp, a0
-; CHECK-NEXT: addi sp, sp, 16
+; CHECK-NEXT: vslideup.vx v8, v9, a0
; CHECK-NEXT: ret
%res = call <vscale x 2 x bfloat> @llvm.vector.splice.nxv2bf16(<vscale x 2 x bfloat> %a, <vscale x 2 x bfloat> %b, i32 3)
ret <vscale x 2 x bfloat> %res
@@ -1744,23 +1656,12 @@ define <vscale x 4 x bfloat> @splice_nxv4bf16_offset_zero(<vscale x 4 x bfloat>
define <vscale x 4 x bfloat> @splice_nxv4bf16_offset_negone(<vscale x 4 x bfloat> %a, <vscale x 4 x bfloat> %b) #0 {
; CHECK-LABEL: splice_nxv4bf16_offset_negone:
; CHECK: # %bb.0:
-; CHECK-NEXT: addi sp, sp, -16
-; CHECK-NEXT: .cfi_def_cfa_offset 16
-; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: slli a0, a0, 1
-; CHECK-NEXT: sub sp, sp, a0
-; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb
-; CHECK-NEXT: addi a0, sp, 16
-; CHECK-NEXT: vs1r.v v8, (a0)
-; CHECK-NEXT: csrr a1, vlenb
-; CHECK-NEXT: add a0, a0, a1
-; CHECK-NEXT: vs1r.v v9, (a0)
-; CHECK-NEXT: addi a0, a0, -2
-; CHECK-NEXT: vl1re16.v v8, (a0)
; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: slli a0, a0, 1
-; CHECK-NEXT: add sp, sp, a0
-; CHECK-NEXT: addi sp, sp, 16
+; CHECK-NEXT: srli a0, a0, 1
+; CHECK-NEXT: addi a0, a0, -1
+; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma
+; CHECK-NEXT: vslidedown.vx v8, v8, a0
+; CHECK-NEXT: vslideup.vi v8, v9, 1
; CHECK-NEXT: ret
%res = call <vscale x 4 x bfloat> @llvm.vector.splice.nxv4bf16(<vscale x 4 x bfloat> %a, <vscale x 4 x bfloat> %b, i32 -1)
ret <vscale x 4 x bfloat> %res
@@ -1769,28 +1670,13 @@ define <vscale x 4 x bfloat> @splice_nxv4bf16_offset_negone(<vscale x 4 x bfloat
define <vscale x 4 x bfloat> @splice_nxv4bf16_offset_min(<vscale x 4 x bfloat> %a, <vscale x 4 x bfloat> %b) #0 {
; CHECK-LABEL: splice_nxv4bf16_offset_min:
; CHECK: # %bb.0:
-; CHECK-NEXT: addi sp, sp, -16
-; CHECK-NEXT: .cfi_def_cfa_offset 16
-; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: slli a0, a0, 1
-; CHECK-NEXT: sub sp, sp, a0
-; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb
-; CHECK-NEXT: addi a0, sp, 16
-; CHECK-NEXT: vs1r.v v8, (a0)
-; CHECK-NEXT: csrr a1, vlenb
-; CHECK-NEXT: add a0, a0, a1
-; CHECK-NEXT: li a2, 16
-; CHECK-NEXT: vs1r.v v9, (a0)
-; CHECK-NEXT: bltu a1, a2, .LBB112_2
-; CHECK-NEXT: # %bb.1:
-; CHECK-NEXT: li a1, 16
-; CHECK-NEXT: .LBB112_2:
-; CHECK-NEXT: sub a0, a0, a1
-; CHECK-NEXT: vl1re16.v v8, (a0)
; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: slli a0, a0, 1
-; CHECK-NEXT: add sp, sp, a0
-; CHECK-NEXT: addi sp, sp, 16
+; CHECK-NEXT: srli a0, a0, 1
+; CHECK-NEXT: addi a0, a0, -8
+; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; CHECK-NEXT: vslidedown.vx v8, v8, a0
+; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
+; CHECK-NEXT: vslideup.vi v8, v9, 8
; CHECK-NEXT: ret
%res = call <vscale x 4 x bfloat> @llvm.vector.splice.nxv4bf16(<vscale x 4 x bfloat> %a, <vscale x 4 x bfloat> %b, i32 -8)
ret <vscale x 4 x bfloat> %res
@@ -1799,31 +1685,13 @@ define <vscale x 4 x bfloat> @splice_nxv4bf16_offset_min(<vscale x 4 x bfloat> %
define <vscale x 4 x bfloat> @splice_nxv4bf16_offset_max(<vscale x 4 x bfloat> %a, <vscale x 4 x bfloat> %b) #0 {
; CHECK-LABEL: splice_nxv4bf16_offset_max:
; CHECK: # %bb.0:
-; CHECK-NEXT: addi sp, sp, -16
-; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: slli a0, a0, 1
-; CHECK-NEXT: sub sp, sp, a0
-; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb
-; CHECK-NEXT: addi a0, sp, 16
-; CHECK-NEXT: vs1r.v v8, (a0)
-; CHECK-NEXT: csrr a1, vlenb
-; CHECK-NEXT: add a2, a0, a1
-; CHECK-NEXT: srli a1, a1, 1
-; CHECK-NEXT: addi a1, a1, -1
-; CHECK-NEXT: li a3, 7
-; CHECK-NEXT: vs1r.v v9, (a2)
-; CHECK-NEXT: bltu a1, a3, .LBB113_2
-; CHECK-NEXT: # %bb.1:
-; CHECK-NEXT: li a1, 7
-; CHECK-NEXT: .LBB113_2:
-; CHECK-NEXT: slli a1, a1, 1
-; CHECK-NEXT: add a0, a0, a1
-; CHECK-NEXT: vl1re16.v v8, (a0)
-; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: slli a0, a0, 1
-; CHECK-NEXT: add sp, sp, a0
-; CHECK-NEXT: addi sp, sp, 16
+; CHECK-NEXT: srli a0, a0, 1
+; CHECK-NEXT: addi a0, a0, -7
+; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; CHECK-NEXT: vslidedown.vi v8, v8, 7
+; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma
+; CHECK-NEXT: vslideup.vx v8, v9, a0
; CHECK-NEXT: ret
%res = call <vscale x 4 x bfloat> @llvm.vector.splice.nxv4bf16(<vscale x 4 x bfloat> %a, <vscale x 4 x bfloat> %b, i32 7)
ret <vscale x 4 x bfloat> %res
@@ -1842,24 +1710,12 @@ define <vscale x 8 x bfloat> @splice_nxv8bf16_offset_zero(<vscale x 8 x bfloat>
define <vscale x 8 x bfloat> @splice_nxv8bf16_offset_negone(<vscale x 8 x bfloat> %a, <vscale x 8 x bfloat> %b) #0 {
; CHECK-LABEL: splice_nxv8bf16_offset_negone:
; CHECK: # %bb.0:
-; CHECK-NEXT: addi sp, sp, -16
-; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: slli a0, a0, 2
-; CHECK-NEXT: sub sp, sp, a0
-; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb
-; CHECK-NEXT: addi a0, sp, 16
-; CHECK-NEXT: vs2r.v v8, (a0)
-; CHECK-NEXT: csrr a1, vlenb
-; CHECK-NEXT: slli a1, a1, 1
-; CHECK-NEXT: add a0, a0, a1
-; CHECK-NEXT: vs2r.v v10, (a0)
-; CHECK-NEXT: addi a0, a0, -2
-; CHECK-NEXT: vl2re16.v v8, (a0)
-; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: slli a0, a0, 2
-; CHECK-NEXT: add sp, sp, a0
-; CHECK-NEXT: addi sp, sp, 16
+; CHECK-NEXT: addi a0, a0, -1
+; CHECK-NEXT: vsetivli zero, 1, e16, m2, ta, ma
+; CHECK-NEXT: vslidedown.vx v8, v8, a0
+; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma
+; CHECK-NEXT: vslideup.vi v8, v10, 1
; CHECK-NEXT: ret
%res = call <vscale x 8 x bfloat> @llvm.vector.splice.nxv8bf16(<vscale x 8 x bfloat> %a, <vscale x 8 x bfloat> %b, i32 -1)
ret <vscale x 8 x bfloat> %res
@@ -1868,29 +1724,12 @@ define <vscale x 8 x bfloat> @splice_nxv8bf16_offset_negone(<vscale x 8 x bfloat
define <vscale x 8 x bfloat> @splice_nxv8bf16_offset_min(<vscale x 8 x bfloat> %a, <vscale x 8 x bfloat> %b) #0 {
; CHECK-LABEL: splice_nxv8bf16_offset_min:
; CHECK: # %bb.0:
-; CHECK-NEXT: addi sp, sp, -16
-; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: slli a0, a0, 2
-; CHECK-NEXT: sub sp, sp, a0
-; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb
-; CHECK-NEXT: addi a0, sp, 16
-; CHECK-NEXT: vs2r.v v8, (a0)
-; CHECK-NEXT: csrr a1, vlenb
-; CHECK-NEXT: slli a1, a1, 1
-; CHECK-NEXT: add a0, a0, a1
-; CHECK-NEXT: li a2, 32
-; CHECK-NEXT: vs2r.v v10, (a0)
-; CHECK-NEXT: bltu a1, a2, .LBB116_2
-; CHECK-NEXT: # %bb.1:
-; CHECK-NEXT: li a1, 32
-; CHECK-NEXT: .LBB116_2:
-; CHECK-NEXT: sub a0, a0, a1
-; CHECK-NEXT: vl2re16.v v8, (a0)
-; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: slli a0, a0, 2
-; CHECK-NEXT: add sp, sp, a0
-; CHECK-NEXT: addi sp, sp, 16
+; CHECK-NEXT: addi a0, a0, -16
+; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma
+; CHECK-NEXT: vslidedown.vx v8, v8, a0
+; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma
+; CHECK-NEXT: vslideup.vi v8, v10, 16
; CHECK-NEXT: ret
%res = call <vscale x 8 x bfloat> @llvm.vector.splice.nxv8bf16(<vscale x 8 x bfloat> %a, <vscale x 8 x bfloat> %b, i32 -16)
ret <vscale x 8 x bfloat> %res
@@ -1899,31 +1738,12 @@ define <vscale x 8 x bfloat> @splice_nxv8bf16_offset_min(<vscale x 8 x bfloat> %
define <vscale x 8 x bfloat> @splice_nxv8bf16_offset_max(<vscale x 8 x bfloat> %a, <vscale x 8 x bfloat> %b) #0 {
; CHECK-LABEL: splice_nxv8bf16_offset_max:
; CHECK: # %bb.0:
-; CHECK-NEXT: addi sp, sp, -16
-; CHECK-NEXT: .cfi_def_cfa_offset 16
-; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: slli a0, a0, 2
-; CHECK-NEXT: sub sp, sp, a0
-; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb
-; CHECK-NEXT: addi a0, sp, 16
-; CHECK-NEXT: vs2r.v v8, (a0)
-; CHECK-NEXT: csrr a1, vlenb
-; CHECK-NEXT: slli a2, a1, 1
-; CHECK-NEXT: add a2, a0, a2
-; CHECK-NEXT: addi a1, a1, -1
-; CHECK-NEXT: li a3, 15
-; CHECK-NEXT: vs2r.v v10, (a2)
-; CHECK-NEXT: bltu a1, a3, .LBB117_2
-; CHECK-NEXT: # %bb.1:
-; CHECK-NEXT: li a1, 15
-; CHECK-NEXT: .LBB117_2:
-; CHECK-NEXT: slli a1, a1, 1
-; CHECK-NEXT: add a0, a0, a1
-; CHECK-NEXT: vl2re16.v v8, (a0)
; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: slli a0, a0, 2
-; CHECK-NEXT: add sp, sp, a0
-; CHECK-NEXT: addi sp, sp, 16
+; CHECK-NEXT: addi a0, a0, -15
+; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma
+; CHECK-NEXT: vslidedown.vi v8, v8, 15
+; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma
+; CHECK-NEXT: vslideup.vx v8, v10, a0
; CHECK-NEXT: ret
%res = call <vscale x 8 x bfloat> @llvm.vector.splice.nxv8bf16(<vscale x 8 x bfloat> %a, <vscale x 8 x bfloat> %b, i32 15)
ret <vscale x 8 x bfloat> %res
@@ -1940,401 +1760,47 @@ define <vscale x 16 x bfloat> @splice_nxv16bf16_offset_zero(<vscale x 16 x bfloa
}
define <vscale x 16 x bfloat> @splice_nxv16bf16_offset_negone(<vscale x 16 x bfloat> %a, <vscale x 16 x bfloat> %b) #0 {
-; ZVFHMIN64-LABEL: splice_nxv16bf16_offset_negone:
-; ZVFHMIN64: # %bb.0:
-; ZVFHMIN64-NEXT: addi sp, sp, -48
-; ZVFHMIN64-NEXT: .cfi_def_cfa_offset 48
-; ZVFHMIN64-NEXT: sw ra, 44(sp) # 4-byte Folded Spill
-; ZVFHMIN64-NEXT: sw s0, 40(sp) # 4-byte Folded Spill
-; ZVFHMIN64-NEXT: .cfi_offset ra, -4
-; ZVFHMIN64-NEXT: .cfi_offset s0, -8
-; ZVFHMIN64-NEXT: addi s0, sp, 48
-; ZVFHMIN64-NEXT: .cfi_def_cfa s0, 0
-; ZVFHMIN64-NEXT: csrr a0, vlenb
-; ZVFHMIN64-NEXT: slli a0, a0, 3
-; ZVFHMIN64-NEXT: sub sp, sp, a0
-; ZVFHMIN64-NEXT: andi sp, sp, -32
-; ZVFHMIN64-NEXT: addi a0, sp, 32
-; ZVFHMIN64-NEXT: vs4r.v v8, (a0)
-; ZVFHMIN64-NEXT: csrr a1, vlenb
-; ZVFHMIN64-NEXT: slli a1, a1, 2
-; ZVFHMIN64-NEXT: add a0, a0, a1
-; ZVFHMIN64-NEXT: vs4r.v v12, (a0)
-; ZVFHMIN64-NEXT: addi a0, a0, -2
-; ZVFHMIN64-NEXT: vl4re16.v v8, (a0)
-; ZVFHMIN64-NEXT: addi sp, s0, -48
-; ZVFHMIN64-NEXT: lw ra, 44(sp) # 4-byte Folded Reload
-; ZVFHMIN64-NEXT: lw s0, 40(sp) # 4-byte Folded Reload
-; ZVFHMIN64-NEXT: addi sp, sp, 48
-; ZVFHMIN64-NEXT: ret
-;
-; ZVFHMIN32-LABEL: splice_nxv16bf16_offset_negone:
-; ZVFHMIN32: # %bb.0:
-; ZVFHMIN32-NEXT: addi sp, sp, -48
-; ZVFHMIN32-NEXT: .cfi_def_cfa_offset 48
-; ZVFHMIN32-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
-; ZVFHMIN32-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
-; ZVFHMIN32-NEXT: .cfi_offset ra, -8
-; ZVFHMIN32-NEXT: .cfi_offset s0, -16
-; ZVFHMIN32-NEXT: addi s0, sp, 48
-; ZVFHMIN32-NEXT: .cfi_def_cfa s0, 0
-; ZVFHMIN32-NEXT: csrr a0, vlenb
-; ZVFHMIN32-NEXT: slli a0, a0, 3
-; ZVFHMIN32-NEXT: sub sp, sp, a0
-; ZVFHMIN32-NEXT: andi sp, sp, -32
-; ZVFHMIN32-NEXT: addi a0, sp, 32
-; ZVFHMIN32-NEXT: vs4r.v v8, (a0)
-; ZVFHMIN32-NEXT: csrr a1, vlenb
-; ZVFHMIN32-NEXT: slli a1, a1, 2
-; ZVFHMIN32-NEXT: add a0, a0, a1
-; ZVFHMIN32-NEXT: vs4r.v v12, (a0)
-; ZVFHMIN32-NEXT: addi a0, a0, -2
-; ZVFHMIN32-NEXT: vl4re16.v v8, (a0)
-; ZVFHMIN32-NEXT: addi sp, s0, -48
-; ZVFHMIN32-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
-; ZVFHMIN32-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
-; ZVFHMIN32-NEXT: addi sp, sp, 48
-; ZVFHMIN32-NEXT: ret
-;
-; ZVFH32-LABEL: splice_nxv16bf16_offset_negone:
-; ZVFH32: # %bb.0:
-; ZVFH32-NEXT: addi sp, sp, -48
-; ZVFH32-NEXT: .cfi_def_cfa_offset 48
-; ZVFH32-NEXT: sw ra, 44(sp) # 4-byte Folded Spill
-; ZVFH32-NEXT: sw s0, 40(sp) # 4-byte Folded Spill
-; ZVFH32-NEXT: .cfi_offset ra, -4
-; ZVFH32-NEXT: .cfi_offset s0, -8
-; ZVFH32-NEXT: addi s0, sp, 48
-; ZVFH32-NEXT: .cfi_def_cfa s0, 0
-; ZVFH32-NEXT: csrr a0, vlenb
-; ZVFH32-NEXT: slli a0, a0, 3
-; ZVFH32-NEXT: sub sp, sp, a0
-; ZVFH32-NEXT: andi sp, sp, -32
-; ZVFH32-NEXT: addi a0, sp, 32
-; ZVFH32-NEXT: vs4r.v v8, (a0)
-; ZVFH32-NEXT: csrr a1, vlenb
-; ZVFH32-NEXT: slli a1, a1, 2
-; ZVFH32-NEXT: add a0, a0, a1
-; ZVFH32-NEXT: vs4r.v v12, (a0)
-; ZVFH32-NEXT: addi a0, a0, -2
-; ZVFH32-NEXT: vl4re16.v v8, (a0)
-; ZVFH32-NEXT: addi sp, s0, -48
-; ZVFH32-NEXT: lw ra, 44(sp) # 4-byte Folded Reload
-; ZVFH32-NEXT: lw s0, 40(sp) # 4-byte Folded Reload
-; ZVFH32-NEXT: addi sp, sp, 48
-; ZVFH32-NEXT: ret
-;
-; ZVFH64-LABEL: splice_nxv16bf16_offset_negone:
-; ZVFH64: # %bb.0:
-; ZVFH64-NEXT: addi sp, sp, -48
-; ZVFH64-NEXT: .cfi_def_cfa_offset 48
-; ZVFH64-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
-; ZVFH64-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
-; ZVFH64-NEXT: .cfi_offset ra, -8
-; ZVFH64-NEXT: .cfi_offset s0, -16
-; ZVFH64-NEXT: addi s0, sp, 48
-; ZVFH64-NEXT: .cfi_def_cfa s0, 0
-; ZVFH64-NEXT: csrr a0, vlenb
-; ZVFH64-NEXT: slli a0, a0, 3
-; ZVFH64-NEXT: sub sp, sp, a0
-; ZVFH64-NEXT: andi sp, sp, -32
-; ZVFH64-NEXT: addi a0, sp, 32
-; ZVFH64-NEXT: vs4r.v v8, (a0)
-; ZVFH64-NEXT: csrr a1, vlenb
-; ZVFH64-NEXT: slli a1, a1, 2
-; ZVFH64-NEXT: add a0, a0, a1
-; ZVFH64-NEXT: vs4r.v v12, (a0)
-; ZVFH64-NEXT: addi a0, a0, -2
-; ZVFH64-NEXT: vl4re16.v v8, (a0)
-; ZVFH64-NEXT: addi sp, s0, -48
-; ZVFH64-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
-; ZVFH64-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
-; ZVFH64-NEXT: addi sp, sp, 48
-; ZVFH64-NEXT: ret
+; CHECK-LABEL: splice_nxv16bf16_offset_negone:
+; CHECK: # %bb.0:
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: slli a0, a0, 1
+; CHECK-NEXT: addi a0, a0, -1
+; CHECK-NEXT: vsetivli zero, 1, e16, m4, ta, ma
+; CHECK-NEXT: vslidedown.vx v8, v8, a0
+; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma
+; CHECK-NEXT: vslideup.vi v8, v12, 1
+; CHECK-NEXT: ret
%res = call <vscale x 16 x bfloat> @llvm.vector.splice.nxv16bf16(<vscale x 16 x bfloat> %a, <vscale x 16 x bfloat> %b, i32 -1)
ret <vscale x 16 x bfloat> %res
}
define <vscale x 16 x bfloat> @splice_nxv16bf16_offset_min(<vscale x 16 x bfloat> %a, <vscale x 16 x bfloat> %b) #0 {
-; ZVFHMIN64-LABEL: splice_nxv16bf16_offset_min:
-; ZVFHMIN64: # %bb.0:
-; ZVFHMIN64-NEXT: addi sp, sp, -48
-; ZVFHMIN64-NEXT: .cfi_def_cfa_offset 48
-; ZVFHMIN64-NEXT: sw ra, 44(sp) # 4-byte Folded Spill
-; ZVFHMIN64-NEXT: sw s0, 40(sp) # 4-byte Folded Spill
-; ZVFHMIN64-NEXT: .cfi_offset ra, -4
-; ZVFHMIN64-NEXT: .cfi_offset s0, -8
-; ZVFHMIN64-NEXT: addi s0, sp, 48
-; ZVFHMIN64-NEXT: .cfi_def_cfa s0, 0
-; ZVFHMIN64-NEXT: csrr a0, vlenb
-; ZVFHMIN64-NEXT: slli a0, a0, 3
-; ZVFHMIN64-NEXT: sub sp, sp, a0
-; ZVFHMIN64-NEXT: andi sp, sp, -32
-; ZVFHMIN64-NEXT: addi a0, sp, 32
-; ZVFHMIN64-NEXT: vs4r.v v8, (a0)
-; ZVFHMIN64-NEXT: csrr a1, vlenb
-; ZVFHMIN64-NEXT: slli a1, a1, 2
-; ZVFHMIN64-NEXT: add a0, a0, a1
-; ZVFHMIN64-NEXT: li a2, 64
-; ZVFHMIN64-NEXT: vs4r.v v12, (a0)
-; ZVFHMIN64-NEXT: bltu a1, a2, .LBB120_2
-; ZVFHMIN64-NEXT: # %bb.1:
-; ZVFHMIN64-NEXT: li a1, 64
-; ZVFHMIN64-NEXT: .LBB120_2:
-; ZVFHMIN64-NEXT: sub a0, a0, a1
-; ZVFHMIN64-NEXT: vl4re16.v v8, (a0)
-; ZVFHMIN64-NEXT: addi sp, s0, -48
-; ZVFHMIN64-NEXT: lw ra, 44(sp) # 4-byte Folded Reload
-; ZVFHMIN64-NEXT: lw s0, 40(sp) # 4-byte Folded Reload
-; ZVFHMIN64-NEXT: addi sp, sp, 48
-; ZVFHMIN64-NEXT: ret
-;
-; ZVFHMIN32-LABEL: splice_nxv16bf16_offset_min:
-; ZVFHMIN32: # %bb.0:
-; ZVFHMIN32-NEXT: addi sp, sp, -48
-; ZVFHMIN32-NEXT: .cfi_def_cfa_offset 48
-; ZVFHMIN32-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
-; ZVFHMIN32-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
-; ZVFHMIN32-NEXT: .cfi_offset ra, -8
-; ZVFHMIN32-NEXT: .cfi_offset s0, -16
-; ZVFHMIN32-NEXT: addi s0, sp, 48
-; ZVFHMIN32-NEXT: .cfi_def_cfa s0, 0
-; ZVFHMIN32-NEXT: csrr a0, vlenb
-; ZVFHMIN32-NEXT: slli a0, a0, 3
-; ZVFHMIN32-NEXT: sub sp, sp, a0
-; ZVFHMIN32-NEXT: andi sp, sp, -32
-; ZVFHMIN32-NEXT: addi a0, sp, 32
-; ZVFHMIN32-NEXT: vs4r.v v8, (a0)
-; ZVFHMIN32-NEXT: csrr a1, vlenb
-; ZVFHMIN32-NEXT: slli a1, a1, 2
-; ZVFHMIN32-NEXT: add a0, a0, a1
-; ZVFHMIN32-NEXT: li a2, 64
-; ZVFHMIN32-NEXT: vs4r.v v12, (a0)
-; ZVFHMIN32-NEXT: bltu a1, a2, .LBB120_2
-; ZVFHMIN32-NEXT: # %bb.1:
-; ZVFHMIN32-NEXT: li a1, 64
-; ZVFHMIN32-NEXT: .LBB120_2:
-; ZVFHMIN32-NEXT: sub a0, a0, a1
-; ZVFHMIN32-NEXT: vl4re16.v v8, (a0)
-; ZVFHMIN32-NEXT: addi sp, s0, -48
-; ZVFHMIN32-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
-; ZVFHMIN32-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
-; ZVFHMIN32-NEXT: addi sp, sp, 48
-; ZVFHMIN32-NEXT: ret
-;
-; ZVFH32-LABEL: splice_nxv16bf16_offset_min:
-; ZVFH32: # %bb.0:
-; ZVFH32-NEXT: addi sp, sp, -48
-; ZVFH32-NEXT: .cfi_def_cfa_offset 48
-; ZVFH32-NEXT: sw ra, 44(sp) # 4-byte Folded Spill
-; ZVFH32-NEXT: sw s0, 40(sp) # 4-byte Folded Spill
-; ZVFH32-NEXT: .cfi_offset ra, -4
-; ZVFH32-NEXT: .cfi_offset s0, -8
-; ZVFH32-NEXT: addi s0, sp, 48
-; ZVFH32-NEXT: .cfi_def_cfa s0, 0
-; ZVFH32-NEXT: csrr a0, vlenb
-; ZVFH32-NEXT: slli a0, a0, 3
-; ZVFH32-NEXT: sub sp, sp, a0
-; ZVFH32-NEXT: andi sp, sp, -32
-; ZVFH32-NEXT: addi a0, sp, 32
-; ZVFH32-NEXT: vs4r.v v8, (a0)
-; ZVFH32-NEXT: csrr a1, vlenb
-; ZVFH32-NEXT: slli a1, a1, 2
-; ZVFH32-NEXT: add a0, a0, a1
-; ZVFH32-NEXT: li a2, 64
-; ZVFH32-NEXT: vs4r.v v12, (a0)
-; ZVFH32-NEXT: bltu a1, a2, .LBB120_2
-; ZVFH32-NEXT: # %bb.1:
-; ZVFH32-NEXT: li a1, 64
-; ZVFH32-NEXT: .LBB120_2:
-; ZVFH32-NEXT: sub a0, a0, a1
-; ZVFH32-NEXT: vl4re16.v v8, (a0)
-; ZVFH32-NEXT: addi sp, s0, -48
-; ZVFH32-NEXT: lw ra, 44(sp) # 4-byte Folded Reload
-; ZVFH32-NEXT: lw s0, 40(sp) # 4-byte Folded Reload
-; ZVFH32-NEXT: addi sp, sp, 48
-; ZVFH32-NEXT: ret
-;
-; ZVFH64-LABEL: splice_nxv16bf16_offset_min:
-; ZVFH64: # %bb.0:
-; ZVFH64-NEXT: addi sp, sp, -48
-; ZVFH64-NEXT: .cfi_def_cfa_offset 48
-; ZVFH64-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
-; ZVFH64-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
-; ZVFH64-NEXT: .cfi_offset ra, -8
-; ZVFH64-NEXT: .cfi_offset s0, -16
-; ZVFH64-NEXT: addi s0, sp, 48
-; ZVFH64-NEXT: .cfi_def_cfa s0, 0
-; ZVFH64-NEXT: csrr a0, vlenb
-; ZVFH64-NEXT: slli a0, a0, 3
-; ZVFH64-NEXT: sub sp, sp, a0
-; ZVFH64-NEXT: andi sp, sp, -32
-; ZVFH64-NEXT: addi a0, sp, 32
-; ZVFH64-NEXT: vs4r.v v8, (a0)
-; ZVFH64-NEXT: csrr a1, vlenb
-; ZVFH64-NEXT: slli a1, a1, 2
-; ZVFH64-NEXT: add a0, a0, a1
-; ZVFH64-NEXT: li a2, 64
-; ZVFH64-NEXT: vs4r.v v12, (a0)
-; ZVFH64-NEXT: bltu a1, a2, .LBB120_2
-; ZVFH64-NEXT: # %bb.1:
-; ZVFH64-NEXT: li a1, 64
-; ZVFH64-NEXT: .LBB120_2:
-; ZVFH64-NEXT: sub a0, a0, a1
-; ZVFH64-NEXT: vl4re16.v v8, (a0)
-; ZVFH64-NEXT: addi sp, s0, -48
-; ZVFH64-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
-; ZVFH64-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
-; ZVFH64-NEXT: addi sp, sp, 48
-; ZVFH64-NEXT: ret
+; CHECK-LABEL: splice_nxv16bf16_offset_min:
+; CHECK: # %bb.0:
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: slli a0, a0, 1
+; CHECK-NEXT: addi a0, a0, -32
+; CHECK-NEXT: li a1, 32
+; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma
+; CHECK-NEXT: vslidedown.vx v8, v8, a0
+; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma
+; CHECK-NEXT: vslideup.vx v8, v12, a1
+; CHECK-NEXT: ret
%res = call <vscale x 16 x bfloat> @llvm.vector.splice.nxv16bf16(<vscale x 16 x bfloat> %a, <vscale x 16 x bfloat> %b, i32 -32)
ret <vscale x 16 x bfloat> %res
}
define <vscale x 16 x bfloat> @splice_nxv16bf16_offset_max(<vscale x 16 x bfloat> %a, <vscale x 16 x bfloat> %b) #0 {
-; ZVFHMIN64-LABEL: splice_nxv16bf16_offset_max:
-; ZVFHMIN64: # %bb.0:
-; ZVFHMIN64-NEXT: addi sp, sp, -48
-; ZVFHMIN64-NEXT: .cfi_def_cfa_offset 48
-; ZVFHMIN64-NEXT: sw ra, 44(sp) # 4-byte Folded Spill
-; ZVFHMIN64-NEXT: sw s0, 40(sp) # 4-byte Folded Spill
-; ZVFHMIN64-NEXT: .cfi_offset ra, -4
-; ZVFHMIN64-NEXT: .cfi_offset s0, -8
-; ZVFHMIN64-NEXT: addi s0, sp, 48
-; ZVFHMIN64-NEXT: .cfi_def_cfa s0, 0
-; ZVFHMIN64-NEXT: csrr a0, vlenb
-; ZVFHMIN64-NEXT: slli a0, a0, 3
-; ZVFHMIN64-NEXT: sub sp, sp, a0
-; ZVFHMIN64-NEXT: andi sp, sp, -32
-; ZVFHMIN64-NEXT: addi a0, sp, 32
-; ZVFHMIN64-NEXT: vs4r.v v8, (a0)
-; ZVFHMIN64-NEXT: csrr a1, vlenb
-; ZVFHMIN64-NEXT: slli a2, a1, 2
-; ZVFHMIN64-NEXT: add a2, a0, a2
-; ZVFHMIN64-NEXT: slli a1, a1, 1
-; ZVFHMIN64-NEXT: addi a1, a1, -1
-; ZVFHMIN64-NEXT: li a3, 31
-; ZVFHMIN64-NEXT: vs4r.v v12, (a2)
-; ZVFHMIN64-NEXT: bltu a1, a3, .LBB121_2
-; ZVFHMIN64-NEXT: # %bb.1:
-; ZVFHMIN64-NEXT: li a1, 31
-; ZVFHMIN64-NEXT: .LBB121_2:
-; ZVFHMIN64-NEXT: slli a1, a1, 1
-; ZVFHMIN64-NEXT: add a0, a0, a1
-; ZVFHMIN64-NEXT: vl4re16.v v8, (a0)
-; ZVFHMIN64-NEXT: addi sp, s0, -48
-; ZVFHMIN64-NEXT: lw ra, 44(sp) # 4-byte Folded Reload
-; ZVFHMIN64-NEXT: lw s0, 40(sp) # 4-byte Folded Reload
-; ZVFHMIN64-NEXT: addi sp, sp, 48
-; ZVFHMIN64-NEXT: ret
-;
-; ZVFHMIN32-LABEL: splice_nxv16bf16_offset_max:
-; ZVFHMIN32: # %bb.0:
-; ZVFHMIN32-NEXT: addi sp, sp, -48
-; ZVFHMIN32-NEXT: .cfi_def_cfa_offset 48
-; ZVFHMIN32-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
-; ZVFHMIN32-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
-; ZVFHMIN32-NEXT: .cfi_offset ra, -8
-; ZVFHMIN32-NEXT: .cfi_offset s0, -16
-; ZVFHMIN32-NEXT: addi s0, sp, 48
-; ZVFHMIN32-NEXT: .cfi_def_cfa s0, 0
-; ZVFHMIN32-NEXT: csrr a0, vlenb
-; ZVFHMIN32-NEXT: slli a0, a0, 3
-; ZVFHMIN32-NEXT: sub sp, sp, a0
-; ZVFHMIN32-NEXT: andi sp, sp, -32
-; ZVFHMIN32-NEXT: addi a0, sp, 32
-; ZVFHMIN32-NEXT: vs4r.v v8, (a0)
-; ZVFHMIN32-NEXT: csrr a1, vlenb
-; ZVFHMIN32-NEXT: slli a2, a1, 2
-; ZVFHMIN32-NEXT: add a2, a0, a2
-; ZVFHMIN32-NEXT: slli a1, a1, 1
-; ZVFHMIN32-NEXT: addi a1, a1, -1
-; ZVFHMIN32-NEXT: li a3, 31
-; ZVFHMIN32-NEXT: vs4r.v v12, (a2)
-; ZVFHMIN32-NEXT: bltu a1, a3, .LBB121_2
-; ZVFHMIN32-NEXT: # %bb.1:
-; ZVFHMIN32-NEXT: li a1, 31
-; ZVFHMIN32-NEXT: .LBB121_2:
-; ZVFHMIN32-NEXT: slli a1, a1, 1
-; ZVFHMIN32-NEXT: add a0, a0, a1
-; ZVFHMIN32-NEXT: vl4re16.v v8, (a0)
-; ZVFHMIN32-NEXT: addi sp, s0, -48
-; ZVFHMIN32-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
-; ZVFHMIN32-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
-; ZVFHMIN32-NEXT: addi sp, sp, 48
-; ZVFHMIN32-NEXT: ret
-;
-; ZVFH32-LABEL: splice_nxv16bf16_offset_max:
-; ZVFH32: # %bb.0:
-; ZVFH32-NEXT: addi sp, sp, -48
-; ZVFH32-NEXT: .cfi_def_cfa_offset 48
-; ZVFH32-NEXT: sw ra, 44(sp) # 4-byte Folded Spill
-; ZVFH32-NEXT: sw s0, 40(sp) # 4-byte Folded Spill
-; ZVFH32-NEXT: .cfi_offset ra, -4
-; ZVFH32-NEXT: .cfi_offset s0, -8
-; ZVFH32-NEXT: addi s0, sp, 48
-; ZVFH32-NEXT: .cfi_def_cfa s0, 0
-; ZVFH32-NEXT: csrr a0, vlenb
-; ZVFH32-NEXT: slli a0, a0, 3
-; ZVFH32-NEXT: sub sp, sp, a0
-; ZVFH32-NEXT: andi sp, sp, -32
-; ZVFH32-NEXT: addi a0, sp, 32
-; ZVFH32-NEXT: vs4r.v v8, (a0)
-; ZVFH32-NEXT: csrr a1, vlenb
-; ZVFH32-NEXT: slli a2, a1, 2
-; ZVFH32-NEXT: add a2, a0, a2
-; ZVFH32-NEXT: slli a1, a1, 1
-; ZVFH32-NEXT: addi a1, a1, -1
-; ZVFH32-NEXT: li a3, 31
-; ZVFH32-NEXT: vs4r.v v12, (a2)
-; ZVFH32-NEXT: bltu a1, a3, .LBB121_2
-; ZVFH32-NEXT: # %bb.1:
-; ZVFH32-NEXT: li a1, 31
-; ZVFH32-NEXT: .LBB121_2:
-; ZVFH32-NEXT: slli a1, a1, 1
-; ZVFH32-NEXT: add a0, a0, a1
-; ZVFH32-NEXT: vl4re16.v v8, (a0)
-; ZVFH32-NEXT: addi sp, s0, -48
-; ZVFH32-NEXT: lw ra, 44(sp) # 4-byte Folded Reload
-; ZVFH32-NEXT: lw s0, 40(sp) # 4-byte Folded Reload
-; ZVFH32-NEXT: addi sp, sp, 48
-; ZVFH32-NEXT: ret
-;
-; ZVFH64-LABEL: splice_nxv16bf16_offset_max:
-; ZVFH64: # %bb.0:
-; ZVFH64-NEXT: addi sp, sp, -48
-; ZVFH64-NEXT: .cfi_def_cfa_offset 48
-; ZVFH64-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
-; ZVFH64-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
-; ZVFH64-NEXT: .cfi_offset ra, -8
-; ZVFH64-NEXT: .cfi_offset s0, -16
-; ZVFH64-NEXT: addi s0, sp, 48
-; ZVFH64-NEXT: .cfi_def_cfa s0, 0
-; ZVFH64-NEXT: csrr a0, vlenb
-; ZVFH64-NEXT: slli a0, a0, 3
-; ZVFH64-NEXT: sub sp, sp, a0
-; ZVFH64-NEXT: andi sp, sp, -32
-; ZVFH64-NEXT: addi a0, sp, 32
-; ZVFH64-NEXT: vs4r.v v8, (a0)
-; ZVFH64-NEXT: csrr a1, vlenb
-; ZVFH64-NEXT: slli a2, a1, 2
-; ZVFH64-NEXT: add a2, a0, a2
-; ZVFH64-NEXT: slli a1, a1, 1
-; ZVFH64-NEXT: addi a1, a1, -1
-; ZVFH64-NEXT: li a3, 31
-; ZVFH64-NEXT: vs4r.v v12, (a2)
-; ZVFH64-NEXT: bltu a1, a3, .LBB121_2
-; ZVFH64-NEXT: # %bb.1:
-; ZVFH64-NEXT: li a1, 31
-; ZVFH64-NEXT: .LBB121_2:
-; ZVFH64-NEXT: slli a1, a1, 1
-; ZVFH64-NEXT: add a0, a0, a1
-; ZVFH64-NEXT: vl4re16.v v8, (a0)
-; ZVFH64-NEXT: addi sp, s0, -48
-; ZVFH64-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
-; ZVFH64-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
-; ZVFH64-NEXT: addi sp, sp, 48
-; ZVFH64-NEXT: ret
+; CHECK-LABEL: splice_nxv16bf16_offset_max:
+; CHECK: # %bb.0:
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: slli a0, a0, 1
+; CHECK-NEXT: addi a0, a0, -31
+; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma
+; CHECK-NEXT: vslidedown.vi v8, v8, 31
+; CHECK-NEXT: vsetvli a1, zero, e16, m4, ta, ma
+; CHECK-NEXT: vslideup.vx v8, v12, a0
+; CHECK-NEXT: ret
%res = call <vscale x 16 x bfloat> @llvm.vector.splice.nxv16bf16(<vscale x 16 x bfloat> %a, <vscale x 16 x bfloat> %b, i32 31)
ret <vscale x 16 x bfloat> %res
}
@@ -2350,401 +1816,48 @@ define <vscale x 32 x bfloat> @splice_nxv32bf16_offset_zero(<vscale x 32 x bfloa
}
define <vscale x 32 x bfloat> @splice_nxv32bf16_offset_negone(<vscale x 32 x bfloat> %a, <vscale x 32 x bfloat> %b) #0 {
-; ZVFHMIN64-LABEL: splice_nxv32bf16_offset_negone:
-; ZVFHMIN64: # %bb.0:
-; ZVFHMIN64-NEXT: addi sp, sp, -80
-; ZVFHMIN64-NEXT: .cfi_def_cfa_offset 80
-; ZVFHMIN64-NEXT: sw ra, 76(sp) # 4-byte Folded Spill
-; ZVFHMIN64-NEXT: sw s0, 72(sp) # 4-byte Folded Spill
-; ZVFHMIN64-NEXT: .cfi_offset ra, -4
-; ZVFHMIN64-NEXT: .cfi_offset s0, -8
-; ZVFHMIN64-NEXT: addi s0, sp, 80
-; ZVFHMIN64-NEXT: .cfi_def_cfa s0, 0
-; ZVFHMIN64-NEXT: csrr a0, vlenb
-; ZVFHMIN64-NEXT: slli a0, a0, 4
-; ZVFHMIN64-NEXT: sub sp, sp, a0
-; ZVFHMIN64-NEXT: andi sp, sp, -64
-; ZVFHMIN64-NEXT: addi a0, sp, 64
-; ZVFHMIN64-NEXT: vs8r.v v8, (a0)
-; ZVFHMIN64-NEXT: csrr a1, vlenb
-; ZVFHMIN64-NEXT: slli a1, a1, 3
-; ZVFHMIN64-NEXT: add a0, a0, a1
-; ZVFHMIN64-NEXT: vs8r.v v16, (a0)
-; ZVFHMIN64-NEXT: addi a0, a0, -2
-; ZVFHMIN64-NEXT: vl8re16.v v8, (a0)
-; ZVFHMIN64-NEXT: addi sp, s0, -80
-; ZVFHMIN64-NEXT: lw ra, 76(sp) # 4-byte Folded Reload
-; ZVFHMIN64-NEXT: lw s0, 72(sp) # 4-byte Folded Reload
-; ZVFHMIN64-NEXT: addi sp, sp, 80
-; ZVFHMIN64-NEXT: ret
-;
-; ZVFHMIN32-LABEL: splice_nxv32bf16_offset_negone:
-; ZVFHMIN32: # %bb.0:
-; ZVFHMIN32-NEXT: addi sp, sp, -80
-; ZVFHMIN32-NEXT: .cfi_def_cfa_offset 80
-; ZVFHMIN32-NEXT: sd ra, 72(sp) # 8-byte Folded Spill
-; ZVFHMIN32-NEXT: sd s0, 64(sp) # 8-byte Folded Spill
-; ZVFHMIN32-NEXT: .cfi_offset ra, -8
-; ZVFHMIN32-NEXT: .cfi_offset s0, -16
-; ZVFHMIN32-NEXT: addi s0, sp, 80
-; ZVFHMIN32-NEXT: .cfi_def_cfa s0, 0
-; ZVFHMIN32-NEXT: csrr a0, vlenb
-; ZVFHMIN32-NEXT: slli a0, a0, 4
-; ZVFHMIN32-NEXT: sub sp, sp, a0
-; ZVFHMIN32-NEXT: andi sp, sp, -64
-; ZVFHMIN32-NEXT: addi a0, sp, 64
-; ZVFHMIN32-NEXT: vs8r.v v8, (a0)
-; ZVFHMIN32-NEXT: csrr a1, vlenb
-; ZVFHMIN32-NEXT: slli a1, a1, 3
-; ZVFHMIN32-NEXT: add a0, a0, a1
-; ZVFHMIN32-NEXT: vs8r.v v16, (a0)
-; ZVFHMIN32-NEXT: addi a0, a0, -2
-; ZVFHMIN32-NEXT: vl8re16.v v8, (a0)
-; ZVFHMIN32-NEXT: addi sp, s0, -80
-; ZVFHMIN32-NEXT: ld ra, 72(sp) # 8-byte Folded Reload
-; ZVFHMIN32-NEXT: ld s0, 64(sp) # 8-byte Folded Reload
-; ZVFHMIN32-NEXT: addi sp, sp, 80
-; ZVFHMIN32-NEXT: ret
-;
-; ZVFH32-LABEL: splice_nxv32bf16_offset_negone:
-; ZVFH32: # %bb.0:
-; ZVFH32-NEXT: addi sp, sp, -80
-; ZVFH32-NEXT: .cfi_def_cfa_offset 80
-; ZVFH32-NEXT: sw ra, 76(sp) # 4-byte Folded Spill
-; ZVFH32-NEXT: sw s0, 72(sp) # 4-byte Folded Spill
-; ZVFH32-NEXT: .cfi_offset ra, -4
-; ZVFH32-NEXT: .cfi_offset s0, -8
-; ZVFH32-NEXT: addi s0, sp, 80
-; ZVFH32-NEXT: .cfi_def_cfa s0, 0
-; ZVFH32-NEXT: csrr a0, vlenb
-; ZVFH32-NEXT: slli a0, a0, 4
-; ZVFH32-NEXT: sub sp, sp, a0
-; ZVFH32-NEXT: andi sp, sp, -64
-; ZVFH32-NEXT: addi a0, sp, 64
-; ZVFH32-NEXT: vs8r.v v8, (a0)
-; ZVFH32-NEXT: csrr a1, vlenb
-; ZVFH32-NEXT: slli a1, a1, 3
-; ZVFH32-NEXT: add a0, a0, a1
-; ZVFH32-NEXT: vs8r.v v16, (a0)
-; ZVFH32-NEXT: addi a0, a0, -2
-; ZVFH32-NEXT: vl8re16.v v8, (a0)
-; ZVFH32-NEXT: addi sp, s0, -80
-; ZVFH32-NEXT: lw ra, 76(sp) # 4-byte Folded Reload
-; ZVFH32-NEXT: lw s0, 72(sp) # 4-byte Folded Reload
-; ZVFH32-NEXT: addi sp, sp, 80
-; ZVFH32-NEXT: ret
-;
-; ZVFH64-LABEL: splice_nxv32bf16_offset_negone:
-; ZVFH64: # %bb.0:
-; ZVFH64-NEXT: addi sp, sp, -80
-; ZVFH64-NEXT: .cfi_def_cfa_offset 80
-; ZVFH64-NEXT: sd ra, 72(sp) # 8-byte Folded Spill
-; ZVFH64-NEXT: sd s0, 64(sp) # 8-byte Folded Spill
-; ZVFH64-NEXT: .cfi_offset ra, -8
-; ZVFH64-NEXT: .cfi_offset s0, -16
-; ZVFH64-NEXT: addi s0, sp, 80
-; ZVFH64-NEXT: .cfi_def_cfa s0, 0
-; ZVFH64-NEXT: csrr a0, vlenb
-; ZVFH64-NEXT: slli a0, a0, 4
-; ZVFH64-NEXT: sub sp, sp, a0
-; ZVFH64-NEXT: andi sp, sp, -64
-; ZVFH64-NEXT: addi a0, sp, 64
-; ZVFH64-NEXT: vs8r.v v8, (a0)
-; ZVFH64-NEXT: csrr a1, vlenb
-; ZVFH64-NEXT: slli a1, a1, 3
-; ZVFH64-NEXT: add a0, a0, a1
-; ZVFH64-NEXT: vs8r.v v16, (a0)
-; ZVFH64-NEXT: addi a0, a0, -2
-; ZVFH64-NEXT: vl8re16.v v8, (a0)
-; ZVFH64-NEXT: addi sp, s0, -80
-; ZVFH64-NEXT: ld ra, 72(sp) # 8-byte Folded Reload
-; ZVFH64-NEXT: ld s0, 64(sp) # 8-byte Folded Reload
-; ZVFH64-NEXT: addi sp, sp, 80
-; ZVFH64-NEXT: ret
+; CHECK-LABEL: splice_nxv32bf16_offset_negone:
+; CHECK: # %bb.0:
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: slli a0, a0, 2
+; CHECK-NEXT: addi a0, a0, -1
+; CHECK-NEXT: vsetivli zero, 1, e16, m8, ta, ma
+; CHECK-NEXT: vslidedown.vx v8, v8, a0
+; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, ma
+; CHECK-NEXT: vslideup.vi v8, v16, 1
+; CHECK-NEXT: ret
%res = call <vscale x 32 x bfloat> @llvm.vector.splice.nxv32bf16(<vscale x 32 x bfloat> %a, <vscale x 32 x bfloat> %b, i32 -1)
ret <vscale x 32 x bfloat> %res
}
define <vscale x 32 x bfloat> @splice_nxv32bf16_offset_min(<vscale x 32 x bfloat> %a, <vscale x 32 x bfloat> %b) #0 {
-; ZVFHMIN64-LABEL: splice_nxv32bf16_offset_min:
-; ZVFHMIN64: # %bb.0:
-; ZVFHMIN64-NEXT: addi sp, sp, -80
-; ZVFHMIN64-NEXT: .cfi_def_cfa_offset 80
-; ZVFHMIN64-NEXT: sw ra, 76(sp) # 4-byte Folded Spill
-; ZVFHMIN64-NEXT: sw s0, 72(sp) # 4-byte Folded Spill
-; ZVFHMIN64-NEXT: .cfi_offset ra, -4
-; ZVFHMIN64-NEXT: .cfi_offset s0, -8
-; ZVFHMIN64-NEXT: addi s0, sp, 80
-; ZVFHMIN64-NEXT: .cfi_def_cfa s0, 0
-; ZVFHMIN64-NEXT: csrr a0, vlenb
-; ZVFHMIN64-NEXT: slli a0, a0, 4
-; ZVFHMIN64-NEXT: sub sp, sp, a0
-; ZVFHMIN64-NEXT: andi sp, sp, -64
-; ZVFHMIN64-NEXT: addi a0, sp, 64
-; ZVFHMIN64-NEXT: vs8r.v v8, (a0)
-; ZVFHMIN64-NEXT: csrr a1, vlenb
-; ZVFHMIN64-NEXT: slli a1, a1, 3
-; ZVFHMIN64-NEXT: add a0, a0, a1
-; ZVFHMIN64-NEXT: li a2, 128
-; ZVFHMIN64-NEXT: vs8r.v v16, (a0)
-; ZVFHMIN64-NEXT: bltu a1, a2, .LBB124_2
-; ZVFHMIN64-NEXT: # %bb.1:
-; ZVFHMIN64-NEXT: li a1, 128
-; ZVFHMIN64-NEXT: .LBB124_2:
-; ZVFHMIN64-NEXT: sub a0, a0, a1
-; ZVFHMIN64-NEXT: vl8re16.v v8, (a0)
-; ZVFHMIN64-NEXT: addi sp, s0, -80
-; ZVFHMIN64-NEXT: lw ra, 76(sp) # 4-byte Folded Reload
-; ZVFHMIN64-NEXT: lw s0, 72(sp) # 4-byte Folded Reload
-; ZVFHMIN64-NEXT: addi sp, sp, 80
-; ZVFHMIN64-NEXT: ret
-;
-; ZVFHMIN32-LABEL: splice_nxv32bf16_offset_min:
-; ZVFHMIN32: # %bb.0:
-; ZVFHMIN32-NEXT: addi sp, sp, -80
-; ZVFHMIN32-NEXT: .cfi_def_cfa_offset 80
-; ZVFHMIN32-NEXT: sd ra, 72(sp) # 8-byte Folded Spill
-; ZVFHMIN32-NEXT: sd s0, 64(sp) # 8-byte Folded Spill
-; ZVFHMIN32-NEXT: .cfi_offset ra, -8
-; ZVFHMIN32-NEXT: .cfi_offset s0, -16
-; ZVFHMIN32-NEXT: addi s0, sp, 80
-; ZVFHMIN32-NEXT: .cfi_def_cfa s0, 0
-; ZVFHMIN32-NEXT: csrr a0, vlenb
-; ZVFHMIN32-NEXT: slli a0, a0, 4
-; ZVFHMIN32-NEXT: sub sp, sp, a0
-; ZVFHMIN32-NEXT: andi sp, sp, -64
-; ZVFHMIN32-NEXT: addi a0, sp, 64
-; ZVFHMIN32-NEXT: vs8r.v v8, (a0)
-; ZVFHMIN32-NEXT: csrr a1, vlenb
-; ZVFHMIN32-NEXT: slli a1, a1, 3
-; ZVFHMIN32-NEXT: add a0, a0, a1
-; ZVFHMIN32-NEXT: li a2, 128
-; ZVFHMIN32-NEXT: vs8r.v v16, (a0)
-; ZVFHMIN32-NEXT: bltu a1, a2, .LBB124_2
-; ZVFHMIN32-NEXT: # %bb.1:
-; ZVFHMIN32-NEXT: li a1, 128
-; ZVFHMIN32-NEXT: .LBB124_2:
-; ZVFHMIN32-NEXT: sub a0, a0, a1
-; ZVFHMIN32-NEXT: vl8re16.v v8, (a0)
-; ZVFHMIN32-NEXT: addi sp, s0, -80
-; ZVFHMIN32-NEXT: ld ra, 72(sp) # 8-byte Folded Reload
-; ZVFHMIN32-NEXT: ld s0, 64(sp) # 8-byte Folded Reload
-; ZVFHMIN32-NEXT: addi sp, sp, 80
-; ZVFHMIN32-NEXT: ret
-;
-; ZVFH32-LABEL: splice_nxv32bf16_offset_min:
-; ZVFH32: # %bb.0:
-; ZVFH32-NEXT: addi sp, sp, -80
-; ZVFH32-NEXT: .cfi_def_cfa_offset 80
-; ZVFH32-NEXT: sw ra, 76(sp) # 4-byte Folded Spill
-; ZVFH32-NEXT: sw s0, 72(sp) # 4-byte Folded Spill
-; ZVFH32-NEXT: .cfi_offset ra, -4
-; ZVFH32-NEXT: .cfi_offset s0, -8
-; ZVFH32-NEXT: addi s0, sp, 80
-; ZVFH32-NEXT: .cfi_def_cfa s0, 0
-; ZVFH32-NEXT: csrr a0, vlenb
-; ZVFH32-NEXT: slli a0, a0, 4
-; ZVFH32-NEXT: sub sp, sp, a0
-; ZVFH32-NEXT: andi sp, sp, -64
-; ZVFH32-NEXT: addi a0, sp, 64
-; ZVFH32-NEXT: vs8r.v v8, (a0)
-; ZVFH32-NEXT: csrr a1, vlenb
-; ZVFH32-NEXT: slli a1, a1, 3
-; ZVFH32-NEXT: add a0, a0, a1
-; ZVFH32-NEXT: li a2, 128
-; ZVFH32-NEXT: vs8r.v v16, (a0)
-; ZVFH32-NEXT: bltu a1, a2, .LBB124_2
-; ZVFH32-NEXT: # %bb.1:
-; ZVFH32-NEXT: li a1, 128
-; ZVFH32-NEXT: .LBB124_2:
-; ZVFH32-NEXT: sub a0, a0, a1
-; ZVFH32-NEXT: vl8re16.v v8, (a0)
-; ZVFH32-NEXT: addi sp, s0, -80
-; ZVFH32-NEXT: lw ra, 76(sp) # 4-byte Folded Reload
-; ZVFH32-NEXT: lw s0, 72(sp) # 4-byte Folded Reload
-; ZVFH32-NEXT: addi sp, sp, 80
-; ZVFH32-NEXT: ret
-;
-; ZVFH64-LABEL: splice_nxv32bf16_offset_min:
-; ZVFH64: # %bb.0:
-; ZVFH64-NEXT: addi sp, sp, -80
-; ZVFH64-NEXT: .cfi_def_cfa_offset 80
-; ZVFH64-NEXT: sd ra, 72(sp) # 8-byte Folded Spill
-; ZVFH64-NEXT: sd s0, 64(sp) # 8-byte Folded Spill
-; ZVFH64-NEXT: .cfi_offset ra, -8
-; ZVFH64-NEXT: .cfi_offset s0, -16
-; ZVFH64-NEXT: addi s0, sp, 80
-; ZVFH64-NEXT: .cfi_def_cfa s0, 0
-; ZVFH64-NEXT: csrr a0, vlenb
-; ZVFH64-NEXT: slli a0, a0, 4
-; ZVFH64-NEXT: sub sp, sp, a0
-; ZVFH64-NEXT: andi sp, sp, -64
-; ZVFH64-NEXT: addi a0, sp, 64
-; ZVFH64-NEXT: vs8r.v v8, (a0)
-; ZVFH64-NEXT: csrr a1, vlenb
-; ZVFH64-NEXT: slli a1, a1, 3
-; ZVFH64-NEXT: add a0, a0, a1
-; ZVFH64-NEXT: li a2, 128
-; ZVFH64-NEXT: vs8r.v v16, (a0)
-; ZVFH64-NEXT: bltu a1, a2, .LBB124_2
-; ZVFH64-NEXT: # %bb.1:
-; ZVFH64-NEXT: li a1, 128
-; ZVFH64-NEXT: .LBB124_2:
-; ZVFH64-NEXT: sub a0, a0, a1
-; ZVFH64-NEXT: vl8re16.v v8, (a0)
-; ZVFH64-NEXT: addi sp, s0, -80
-; ZVFH64-NEXT: ld ra, 72(sp) # 8-byte Folded Reload
-; ZVFH64-NEXT: ld s0, 64(sp) # 8-byte Folded Reload
-; ZVFH64-NEXT: addi sp, sp, 80
-; ZVFH64-NEXT: ret
+; CHECK-LABEL: splice_nxv32bf16_offset_min:
+; CHECK: # %bb.0:
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: slli a0, a0, 2
+; CHECK-NEXT: addi a0, a0, -64
+; CHECK-NEXT: li a1, 64
+; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma
+; CHECK-NEXT: vslidedown.vx v8, v8, a0
+; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, ma
+; CHECK-NEXT: vslideup.vx v8, v16, a1
+; CHECK-NEXT: ret
%res = call <vscale x 32 x bfloat> @llvm.vector.splice.nxv32bf16(<vscale x 32 x bfloat> %a, <vscale x 32 x bfloat> %b, i32 -64)
ret <vscale x 32 x bfloat> %res
}
define <vscale x 32 x bfloat> @splice_nxv32bf16_offset_max(<vscale x 32 x bfloat> %a, <vscale x 32 x bfloat> %b) #0 {
-; ZVFHMIN64-LABEL: splice_nxv32bf16_offset_max:
-; ZVFHMIN64: # %bb.0:
-; ZVFHMIN64-NEXT: addi sp, sp, -80
-; ZVFHMIN64-NEXT: .cfi_def_cfa_offset 80
-; ZVFHMIN64-NEXT: sw ra, 76(sp) # 4-byte Folded Spill
-; ZVFHMIN64-NEXT: sw s0, 72(sp) # 4-byte Folded Spill
-; ZVFHMIN64-NEXT: .cfi_offset ra, -4
-; ZVFHMIN64-NEXT: .cfi_offset s0, -8
-; ZVFHMIN64-NEXT: addi s0, sp, 80
-; ZVFHMIN64-NEXT: .cfi_def_cfa s0, 0
-; ZVFHMIN64-NEXT: csrr a0, vlenb
-; ZVFHMIN64-NEXT: slli a0, a0, 4
-; ZVFHMIN64-NEXT: sub sp, sp, a0
-; ZVFHMIN64-NEXT: andi sp, sp, -64
-; ZVFHMIN64-NEXT: addi a0, sp, 64
-; ZVFHMIN64-NEXT: vs8r.v v8, (a0)
-; ZVFHMIN64-NEXT: csrr a1, vlenb
-; ZVFHMIN64-NEXT: slli a2, a1, 3
-; ZVFHMIN64-NEXT: add a2, a0, a2
-; ZVFHMIN64-NEXT: slli a1, a1, 2
-; ZVFHMIN64-NEXT: addi a1, a1, -1
-; ZVFHMIN64-NEXT: li a3, 63
-; ZVFHMIN64-NEXT: vs8r.v v16, (a2)
-; ZVFHMIN64-NEXT: bltu a1, a3, .LBB125_2
-; ZVFHMIN64-NEXT: # %bb.1:
-; ZVFHMIN64-NEXT: li a1, 63
-; ZVFHMIN64-NEXT: .LBB125_2:
-; ZVFHMIN64-NEXT: slli a1, a1, 1
-; ZVFHMIN64-NEXT: add a0, a0, a1
-; ZVFHMIN64-NEXT: vl8re16.v v8, (a0)
-; ZVFHMIN64-NEXT: addi sp, s0, -80
-; ZVFHMIN64-NEXT: lw ra, 76(sp) # 4-byte Folded Reload
-; ZVFHMIN64-NEXT: lw s0, 72(sp) # 4-byte Folded Reload
-; ZVFHMIN64-NEXT: addi sp, sp, 80
-; ZVFHMIN64-NEXT: ret
-;
-; ZVFHMIN32-LABEL: splice_nxv32bf16_offset_max:
-; ZVFHMIN32: # %bb.0:
-; ZVFHMIN32-NEXT: addi sp, sp, -80
-; ZVFHMIN32-NEXT: .cfi_def_cfa_offset 80
-; ZVFHMIN32-NEXT: sd ra, 72(sp) # 8-byte Folded Spill
-; ZVFHMIN32-NEXT: sd s0, 64(sp) # 8-byte Folded Spill
-; ZVFHMIN32-NEXT: .cfi_offset ra, -8
-; ZVFHMIN32-NEXT: .cfi_offset s0, -16
-; ZVFHMIN32-NEXT: addi s0, sp, 80
-; ZVFHMIN32-NEXT: .cfi_def_cfa s0, 0
-; ZVFHMIN32-NEXT: csrr a0, vlenb
-; ZVFHMIN32-NEXT: slli a0, a0, 4
-; ZVFHMIN32-NEXT: sub sp, sp, a0
-; ZVFHMIN32-NEXT: andi sp, sp, -64
-; ZVFHMIN32-NEXT: addi a0, sp, 64
-; ZVFHMIN32-NEXT: vs8r.v v8, (a0)
-; ZVFHMIN32-NEXT: csrr a1, vlenb
-; ZVFHMIN32-NEXT: slli a2, a1, 3
-; ZVFHMIN32-NEXT: add a2, a0, a2
-; ZVFHMIN32-NEXT: slli a1, a1, 2
-; ZVFHMIN32-NEXT: addi a1, a1, -1
-; ZVFHMIN32-NEXT: li a3, 63
-; ZVFHMIN32-NEXT: vs8r.v v16, (a2)
-; ZVFHMIN32-NEXT: bltu a1, a3, .LBB125_2
-; ZVFHMIN32-NEXT: # %bb.1:
-; ZVFHMIN32-NEXT: li a1, 63
-; ZVFHMIN32-NEXT: .LBB125_2:
-; ZVFHMIN32-NEXT: slli a1, a1, 1
-; ZVFHMIN32-NEXT: add a0, a0, a1
-; ZVFHMIN32-NEXT: vl8re16.v v8, (a0)
-; ZVFHMIN32-NEXT: addi sp, s0, -80
-; ZVFHMIN32-NEXT: ld ra, 72(sp) # 8-byte Folded Reload
-; ZVFHMIN32-NEXT: ld s0, 64(sp) # 8-byte Folded Reload
-; ZVFHMIN32-NEXT: addi sp, sp, 80
-; ZVFHMIN32-NEXT: ret
-;
-; ZVFH32-LABEL: splice_nxv32bf16_offset_max:
-; ZVFH32: # %bb.0:
-; ZVFH32-NEXT: addi sp, sp, -80
-; ZVFH32-NEXT: .cfi_def_cfa_offset 80
-; ZVFH32-NEXT: sw ra, 76(sp) # 4-byte Folded Spill
-; ZVFH32-NEXT: sw s0, 72(sp) # 4-byte Folded Spill
-; ZVFH32-NEXT: .cfi_offset ra, -4
-; ZVFH32-NEXT: .cfi_offset s0, -8
-; ZVFH32-NEXT: addi s0, sp, 80
-; ZVFH32-NEXT: .cfi_def_cfa s0, 0
-; ZVFH32-NEXT: csrr a0, vlenb
-; ZVFH32-NEXT: slli a0, a0, 4
-; ZVFH32-NEXT: sub sp, sp, a0
-; ZVFH32-NEXT: andi sp, sp, -64
-; ZVFH32-NEXT: addi a0, sp, 64
-; ZVFH32-NEXT: vs8r.v v8, (a0)
-; ZVFH32-NEXT: csrr a1, vlenb
-; ZVFH32-NEXT: slli a2, a1, 3
-; ZVFH32-NEXT: add a2, a0, a2
-; ZVFH32-NEXT: slli a1, a1, 2
-; ZVFH32-NEXT: addi a1, a1, -1
-; ZVFH32-NEXT: li a3, 63
-; ZVFH32-NEXT: vs8r.v v16, (a2)
-; ZVFH32-NEXT: bltu a1, a3, .LBB125_2
-; ZVFH32-NEXT: # %bb.1:
-; ZVFH32-NEXT: li a1, 63
-; ZVFH32-NEXT: .LBB125_2:
-; ZVFH32-NEXT: slli a1, a1, 1
-; ZVFH32-NEXT: add a0, a0, a1
-; ZVFH32-NEXT: vl8re16.v v8, (a0)
-; ZVFH32-NEXT: addi sp, s0, -80
-; ZVFH32-NEXT: lw ra, 76(sp) # 4-byte Folded Reload
-; ZVFH32-NEXT: lw s0, 72(sp) # 4-byte Folded Reload
-; ZVFH32-NEXT: addi sp, sp, 80
-; ZVFH32-NEXT: ret
-;
-; ZVFH64-LABEL: splice_nxv32bf16_offset_max:
-; ZVFH64: # %bb.0:
-; ZVFH64-NEXT: addi sp, sp, -80
-; ZVFH64-NEXT: .cfi_def_cfa_offset 80
-; ZVFH64-NEXT: sd ra, 72(sp) # 8-byte Folded Spill
-; ZVFH64-NEXT: sd s0, 64(sp) # 8-byte Folded Spill
-; ZVFH64-NEXT: .cfi_offset ra, -8
-; ZVFH64-NEXT: .cfi_offset s0, -16
-; ZVFH64-NEXT: addi s0, sp, 80
-; ZVFH64-NEXT: .cfi_def_cfa s0, 0
-; ZVFH64-NEXT: csrr a0, vlenb
-; ZVFH64-NEXT: slli a0, a0, 4
-; ZVFH64-NEXT: sub sp, sp, a0
-; ZVFH64-NEXT: andi sp, sp, -64
-; ZVFH64-NEXT: addi a0, sp, 64
-; ZVFH64-NEXT: vs8r.v v8, (a0)
-; ZVFH64-NEXT: csrr a1, vlenb
-; ZVFH64-NEXT: slli a2, a1, 3
-; ZVFH64-NEXT: add a2, a0, a2
-; ZVFH64-NEXT: slli a1, a1, 2
-; ZVFH64-NEXT: addi a1, a1, -1
-; ZVFH64-NEXT: li a3, 63
-; ZVFH64-NEXT: vs8r.v v16, (a2)
-; ZVFH64-NEXT: bltu a1, a3, .LBB125_2
-; ZVFH64-NEXT: # %bb.1:
-; ZVFH64-NEXT: li a1, 63
-; ZVFH64-NEXT: .LBB125_2:
-; ZVFH64-NEXT: slli a1, a1, 1
-; ZVFH64-NEXT: add a0, a0, a1
-; ZVFH64-NEXT: vl8re16.v v8, (a0)
-; ZVFH64-NEXT: addi sp, s0, -80
-; ZVFH64-NEXT: ld ra, 72(sp) # 8-byte Folded Reload
-; ZVFH64-NEXT: ld s0, 64(sp) # 8-byte Folded Reload
-; ZVFH64-NEXT: addi sp, sp, 80
-; ZVFH64-NEXT: ret
+; CHECK-LABEL: splice_nxv32bf16_offset_max:
+; CHECK: # %bb.0:
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: slli a0, a0, 2
+; CHECK-NEXT: addi a0, a0, -63
+; CHECK-NEXT: li a1, 63
+; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma
+; CHECK-NEXT: vslidedown.vx v8, v8, a1
+; CHECK-NEXT: vsetvli a1, zero, e16, m8, ta, ma
+; CHECK-NEXT: vslideup.vx v8, v16, a0
+; CHECK-NEXT: ret
%res = call <vscale x 32 x bfloat> @llvm.vector.splice.nxv32bf16(<vscale x 32 x bfloat> %a, <vscale x 32 x bfloat> %b, i32 63)
ret <vscale x 32 x bfloat> %res
}
@@ -2760,229 +1873,45 @@ define <vscale x 1 x half> @splice_nxv1f16_offset_zero(<vscale x 1 x half> %a, <
}
define <vscale x 1 x half> @splice_nxv1f16_offset_negone(<vscale x 1 x half> %a, <vscale x 1 x half> %b) #0 {
-; ZVFHMIN64-LABEL: splice_nxv1f16_offset_negone:
-; ZVFHMIN64: # %bb.0:
-; ZVFHMIN64-NEXT: addi sp, sp, -16
-; ZVFHMIN64-NEXT: .cfi_def_cfa_offset 16
-; ZVFHMIN64-NEXT: csrr a0, vlenb
-; ZVFHMIN64-NEXT: sub sp, sp, a0
-; ZVFHMIN64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x01, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 1 * vlenb
-; ZVFHMIN64-NEXT: addi a0, sp, 16
-; ZVFHMIN64-NEXT: vsetvli a1, zero, e16, mf4, ta, ma
-; ZVFHMIN64-NEXT: vse16.v v8, (a0)
-; ZVFHMIN64-NEXT: csrr a1, vlenb
-; ZVFHMIN64-NEXT: srli a1, a1, 2
-; ZVFHMIN64-NEXT: add a0, a0, a1
-; ZVFHMIN64-NEXT: vse16.v v9, (a0)
-; ZVFHMIN64-NEXT: addi a0, a0, -2
-; ZVFHMIN64-NEXT: vle16.v v8, (a0)
-; ZVFHMIN64-NEXT: csrr a0, vlenb
-; ZVFHMIN64-NEXT: add sp, sp, a0
-; ZVFHMIN64-NEXT: addi sp, sp, 16
-; ZVFHMIN64-NEXT: ret
-;
-; ZVFHMIN32-LABEL: splice_nxv1f16_offset_negone:
-; ZVFHMIN32: # %bb.0:
-; ZVFHMIN32-NEXT: addi sp, sp, -16
-; ZVFHMIN32-NEXT: .cfi_def_cfa_offset 16
-; ZVFHMIN32-NEXT: csrr a0, vlenb
-; ZVFHMIN32-NEXT: sub sp, sp, a0
-; ZVFHMIN32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x01, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 1 * vlenb
-; ZVFHMIN32-NEXT: addi a0, sp, 16
-; ZVFHMIN32-NEXT: vsetvli a1, zero, e16, mf4, ta, ma
-; ZVFHMIN32-NEXT: vse16.v v8, (a0)
-; ZVFHMIN32-NEXT: csrr a1, vlenb
-; ZVFHMIN32-NEXT: srli a1, a1, 2
-; ZVFHMIN32-NEXT: add a0, a0, a1
-; ZVFHMIN32-NEXT: vse16.v v9, (a0)
-; ZVFHMIN32-NEXT: addi a0, a0, -2
-; ZVFHMIN32-NEXT: vle16.v v8, (a0)
-; ZVFHMIN32-NEXT: csrr a0, vlenb
-; ZVFHMIN32-NEXT: add sp, sp, a0
-; ZVFHMIN32-NEXT: addi sp, sp, 16
-; ZVFHMIN32-NEXT: ret
-;
-; ZVFH32-LABEL: splice_nxv1f16_offset_negone:
-; ZVFH32: # %bb.0:
-; ZVFH32-NEXT: csrr a0, vlenb
-; ZVFH32-NEXT: srli a0, a0, 3
-; ZVFH32-NEXT: addi a0, a0, -1
-; ZVFH32-NEXT: vsetvli a1, zero, e16, mf4, ta, ma
-; ZVFH32-NEXT: vslidedown.vx v8, v8, a0
-; ZVFH32-NEXT: vslideup.vi v8, v9, 1
-; ZVFH32-NEXT: ret
-;
-; ZVFH64-LABEL: splice_nxv1f16_offset_negone:
-; ZVFH64: # %bb.0:
-; ZVFH64-NEXT: csrr a0, vlenb
-; ZVFH64-NEXT: srli a0, a0, 3
-; ZVFH64-NEXT: addi a0, a0, -1
-; ZVFH64-NEXT: vsetvli a1, zero, e16, mf4, ta, ma
-; ZVFH64-NEXT: vslidedown.vx v8, v8, a0
-; ZVFH64-NEXT: vslideup.vi v8, v9, 1
-; ZVFH64-NEXT: ret
+; CHECK-LABEL: splice_nxv1f16_offset_negone:
+; CHECK: # %bb.0:
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: srli a0, a0, 3
+; CHECK-NEXT: addi a0, a0, -1
+; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma
+; CHECK-NEXT: vslidedown.vx v8, v8, a0
+; CHECK-NEXT: vslideup.vi v8, v9, 1
+; CHECK-NEXT: ret
%res = call <vscale x 1 x half> @llvm.vector.splice.nxv1f16(<vscale x 1 x half> %a, <vscale x 1 x half> %b, i32 -1)
ret <vscale x 1 x half> %res
}
define <vscale x 1 x half> @splice_nxv1f16_offset_min(<vscale x 1 x half> %a, <vscale x 1 x half> %b) #0 {
-; ZVFHMIN64-LABEL: splice_nxv1f16_offset_min:
-; ZVFHMIN64: # %bb.0:
-; ZVFHMIN64-NEXT: addi sp, sp, -16
-; ZVFHMIN64-NEXT: .cfi_def_cfa_offset 16
-; ZVFHMIN64-NEXT: csrr a0, vlenb
-; ZVFHMIN64-NEXT: sub sp, sp, a0
-; ZVFHMIN64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x01, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 1 * vlenb
-; ZVFHMIN64-NEXT: addi a0, sp, 16
-; ZVFHMIN64-NEXT: vsetvli a1, zero, e16, mf4, ta, ma
-; ZVFHMIN64-NEXT: vse16.v v8, (a0)
-; ZVFHMIN64-NEXT: csrr a1, vlenb
-; ZVFHMIN64-NEXT: srli a1, a1, 2
-; ZVFHMIN64-NEXT: add a0, a0, a1
-; ZVFHMIN64-NEXT: li a2, 4
-; ZVFHMIN64-NEXT: vse16.v v9, (a0)
-; ZVFHMIN64-NEXT: bltu a1, a2, .LBB128_2
-; ZVFHMIN64-NEXT: # %bb.1:
-; ZVFHMIN64-NEXT: li a1, 4
-; ZVFHMIN64-NEXT: .LBB128_2:
-; ZVFHMIN64-NEXT: sub a0, a0, a1
-; ZVFHMIN64-NEXT: vle16.v v8, (a0)
-; ZVFHMIN64-NEXT: csrr a0, vlenb
-; ZVFHMIN64-NEXT: add sp, sp, a0
-; ZVFHMIN64-NEXT: addi sp, sp, 16
-; ZVFHMIN64-NEXT: ret
-;
-; ZVFHMIN32-LABEL: splice_nxv1f16_offset_min:
-; ZVFHMIN32: # %bb.0:
-; ZVFHMIN32-NEXT: addi sp, sp, -16
-; ZVFHMIN32-NEXT: .cfi_def_cfa_offset 16
-; ZVFHMIN32-NEXT: csrr a0, vlenb
-; ZVFHMIN32-NEXT: sub sp, sp, a0
-; ZVFHMIN32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x01, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 1 * vlenb
-; ZVFHMIN32-NEXT: addi a0, sp, 16
-; ZVFHMIN32-NEXT: vsetvli a1, zero, e16, mf4, ta, ma
-; ZVFHMIN32-NEXT: vse16.v v8, (a0)
-; ZVFHMIN32-NEXT: csrr a1, vlenb
-; ZVFHMIN32-NEXT: srli a1, a1, 2
-; ZVFHMIN32-NEXT: add a0, a0, a1
-; ZVFHMIN32-NEXT: li a2, 4
-; ZVFHMIN32-NEXT: vse16.v v9, (a0)
-; ZVFHMIN32-NEXT: bltu a1, a2, .LBB128_2
-; ZVFHMIN32-NEXT: # %bb.1:
-; ZVFHMIN32-NEXT: li a1, 4
-; ZVFHMIN32-NEXT: .LBB128_2:
-; ZVFHMIN32-NEXT: sub a0, a0, a1
-; ZVFHMIN32-NEXT: vle16.v v8, (a0)
-; ZVFHMIN32-NEXT: csrr a0, vlenb
-; ZVFHMIN32-NEXT: add sp, sp, a0
-; ZVFHMIN32-NEXT: addi sp, sp, 16
-; ZVFHMIN32-NEXT: ret
-;
-; ZVFH32-LABEL: splice_nxv1f16_offset_min:
-; ZVFH32: # %bb.0:
-; ZVFH32-NEXT: csrr a0, vlenb
-; ZVFH32-NEXT: srli a0, a0, 3
-; ZVFH32-NEXT: addi a0, a0, -2
-; ZVFH32-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
-; ZVFH32-NEXT: vslidedown.vx v8, v8, a0
-; ZVFH32-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
-; ZVFH32-NEXT: vslideup.vi v8, v9, 2
-; ZVFH32-NEXT: ret
-;
-; ZVFH64-LABEL: splice_nxv1f16_offset_min:
-; ZVFH64: # %bb.0:
-; ZVFH64-NEXT: csrr a0, vlenb
-; ZVFH64-NEXT: srli a0, a0, 3
-; ZVFH64-NEXT: addi a0, a0, -2
-; ZVFH64-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
-; ZVFH64-NEXT: vslidedown.vx v8, v8, a0
-; ZVFH64-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
-; ZVFH64-NEXT: vslideup.vi v8, v9, 2
-; ZVFH64-NEXT: ret
+; CHECK-LABEL: splice_nxv1f16_offset_min:
+; CHECK: # %bb.0:
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: srli a0, a0, 3
+; CHECK-NEXT: addi a0, a0, -2
+; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
+; CHECK-NEXT: vslidedown.vx v8, v8, a0
+; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
+; CHECK-NEXT: vslideup.vi v8, v9, 2
+; CHECK-NEXT: ret
%res = call <vscale x 1 x half> @llvm.vector.splice.nxv1f16(<vscale x 1 x half> %a, <vscale x 1 x half> %b, i32 -2)
ret <vscale x 1 x half> %res
}
define <vscale x 1 x half> @splice_nxv1f16_offset_max(<vscale x 1 x half> %a, <vscale x 1 x half> %b) #0 {
-; ZVFHMIN64-LABEL: splice_nxv1f16_offset_max:
-; ZVFHMIN64: # %bb.0:
-; ZVFHMIN64-NEXT: addi sp, sp, -16
-; ZVFHMIN64-NEXT: .cfi_def_cfa_offset 16
-; ZVFHMIN64-NEXT: csrr a0, vlenb
-; ZVFHMIN64-NEXT: sub sp, sp, a0
-; ZVFHMIN64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x01, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 1 * vlenb
-; ZVFHMIN64-NEXT: addi a0, sp, 16
-; ZVFHMIN64-NEXT: vsetvli a1, zero, e16, mf4, ta, ma
-; ZVFHMIN64-NEXT: vse16.v v8, (a0)
-; ZVFHMIN64-NEXT: csrr a1, vlenb
-; ZVFHMIN64-NEXT: srli a2, a1, 2
-; ZVFHMIN64-NEXT: add a2, a0, a2
-; ZVFHMIN64-NEXT: srli a1, a1, 3
-; ZVFHMIN64-NEXT: addi a1, a1, -1
-; ZVFHMIN64-NEXT: li a3, 1
-; ZVFHMIN64-NEXT: vse16.v v9, (a2)
-; ZVFHMIN64-NEXT: bltu a1, a3, .LBB129_2
-; ZVFHMIN64-NEXT: # %bb.1:
-; ZVFHMIN64-NEXT: li a1, 1
-; ZVFHMIN64-NEXT: .LBB129_2:
-; ZVFHMIN64-NEXT: slli a1, a1, 1
-; ZVFHMIN64-NEXT: add a0, a0, a1
-; ZVFHMIN64-NEXT: vle16.v v8, (a0)
-; ZVFHMIN64-NEXT: csrr a0, vlenb
-; ZVFHMIN64-NEXT: add sp, sp, a0
-; ZVFHMIN64-NEXT: addi sp, sp, 16
-; ZVFHMIN64-NEXT: ret
-;
-; ZVFHMIN32-LABEL: splice_nxv1f16_offset_max:
-; ZVFHMIN32: # %bb.0:
-; ZVFHMIN32-NEXT: addi sp, sp, -16
-; ZVFHMIN32-NEXT: .cfi_def_cfa_offset 16
-; ZVFHMIN32-NEXT: csrr a0, vlenb
-; ZVFHMIN32-NEXT: sub sp, sp, a0
-; ZVFHMIN32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x01, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 1 * vlenb
-; ZVFHMIN32-NEXT: addi a0, sp, 16
-; ZVFHMIN32-NEXT: vsetvli a1, zero, e16, mf4, ta, ma
-; ZVFHMIN32-NEXT: vse16.v v8, (a0)
-; ZVFHMIN32-NEXT: csrr a1, vlenb
-; ZVFHMIN32-NEXT: srli a2, a1, 2
-; ZVFHMIN32-NEXT: add a2, a0, a2
-; ZVFHMIN32-NEXT: srli a1, a1, 3
-; ZVFHMIN32-NEXT: addi a1, a1, -1
-; ZVFHMIN32-NEXT: li a3, 1
-; ZVFHMIN32-NEXT: vse16.v v9, (a2)
-; ZVFHMIN32-NEXT: bltu a1, a3, .LBB129_2
-; ZVFHMIN32-NEXT: # %bb.1:
-; ZVFHMIN32-NEXT: li a1, 1
-; ZVFHMIN32-NEXT: .LBB129_2:
-; ZVFHMIN32-NEXT: slli a1, a1, 1
-; ZVFHMIN32-NEXT: add a0, a0, a1
-; ZVFHMIN32-NEXT: vle16.v v8, (a0)
-; ZVFHMIN32-NEXT: csrr a0, vlenb
-; ZVFHMIN32-NEXT: add sp, sp, a0
-; ZVFHMIN32-NEXT: addi sp, sp, 16
-; ZVFHMIN32-NEXT: ret
-;
-; ZVFH32-LABEL: splice_nxv1f16_offset_max:
-; ZVFH32: # %bb.0:
-; ZVFH32-NEXT: csrr a0, vlenb
-; ZVFH32-NEXT: srli a0, a0, 3
-; ZVFH32-NEXT: addi a0, a0, -1
-; ZVFH32-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
-; ZVFH32-NEXT: vslidedown.vi v8, v8, 1
-; ZVFH32-NEXT: vsetvli a1, zero, e16, mf4, ta, ma
-; ZVFH32-NEXT: vslideup.vx v8, v9, a0
-; ZVFH32-NEXT: ret
-;
-; ZVFH64-LABEL: splice_nxv1f16_offset_max:
-; ZVFH64: # %bb.0:
-; ZVFH64-NEXT: csrr a0, vlenb
-; ZVFH64-NEXT: srli a0, a0, 3
-; ZVFH64-NEXT: addi a0, a0, -1
-; ZVFH64-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
-; ZVFH64-NEXT: vslidedown.vi v8, v8, 1
-; ZVFH64-NEXT: vsetvli a1, zero, e16, mf4, ta, ma
-; ZVFH64-NEXT: vslideup.vx v8, v9, a0
-; ZVFH64-NEXT: ret
+; CHECK-LABEL: splice_nxv1f16_offset_max:
+; CHECK: # %bb.0:
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: srli a0, a0, 3
+; CHECK-NEXT: addi a0, a0, -1
+; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
+; CHECK-NEXT: vslidedown.vi v8, v8, 1
+; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma
+; CHECK-NEXT: vslideup.vx v8, v9, a0
+; CHECK-NEXT: ret
%res = call <vscale x 1 x half> @llvm.vector.splice.nxv1f16(<vscale x 1 x half> %a, <vscale x 1 x half> %b, i32 1)
ret <vscale x 1 x half> %res
}
@@ -2998,229 +1927,45 @@ define <vscale x 2 x half> @splice_nxv2f16_offset_zero(<vscale x 2 x half> %a, <
}
define <vscale x 2 x half> @splice_nxv2f16_offset_negone(<vscale x 2 x half> %a, <vscale x 2 x half> %b) #0 {
-; ZVFHMIN64-LABEL: splice_nxv2f16_offset_negone:
-; ZVFHMIN64: # %bb.0:
-; ZVFHMIN64-NEXT: addi sp, sp, -16
-; ZVFHMIN64-NEXT: .cfi_def_cfa_offset 16
-; ZVFHMIN64-NEXT: csrr a0, vlenb
-; ZVFHMIN64-NEXT: sub sp, sp, a0
-; ZVFHMIN64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x01, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 1 * vlenb
-; ZVFHMIN64-NEXT: addi a0, sp, 16
-; ZVFHMIN64-NEXT: vsetvli a1, zero, e16, mf2, ta, ma
-; ZVFHMIN64-NEXT: vse16.v v8, (a0)
-; ZVFHMIN64-NEXT: csrr a1, vlenb
-; ZVFHMIN64-NEXT: srli a1, a1, 1
-; ZVFHMIN64-NEXT: add a0, a0, a1
-; ZVFHMIN64-NEXT: vse16.v v9, (a0)
-; ZVFHMIN64-NEXT: addi a0, a0, -2
-; ZVFHMIN64-NEXT: vle16.v v8, (a0)
-; ZVFHMIN64-NEXT: csrr a0, vlenb
-; ZVFHMIN64-NEXT: add sp, sp, a0
-; ZVFHMIN64-NEXT: addi sp, sp, 16
-; ZVFHMIN64-NEXT: ret
-;
-; ZVFHMIN32-LABEL: splice_nxv2f16_offset_negone:
-; ZVFHMIN32: # %bb.0:
-; ZVFHMIN32-NEXT: addi sp, sp, -16
-; ZVFHMIN32-NEXT: .cfi_def_cfa_offset 16
-; ZVFHMIN32-NEXT: csrr a0, vlenb
-; ZVFHMIN32-NEXT: sub sp, sp, a0
-; ZVFHMIN32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x01, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 1 * vlenb
-; ZVFHMIN32-NEXT: addi a0, sp, 16
-; ZVFHMIN32-NEXT: vsetvli a1, zero, e16, mf2, ta, ma
-; ZVFHMIN32-NEXT: vse16.v v8, (a0)
-; ZVFHMIN32-NEXT: csrr a1, vlenb
-; ZVFHMIN32-NEXT: srli a1, a1, 1
-; ZVFHMIN32-NEXT: add a0, a0, a1
-; ZVFHMIN32-NEXT: vse16.v v9, (a0)
-; ZVFHMIN32-NEXT: addi a0, a0, -2
-; ZVFHMIN32-NEXT: vle16.v v8, (a0)
-; ZVFHMIN32-NEXT: csrr a0, vlenb
-; ZVFHMIN32-NEXT: add sp, sp, a0
-; ZVFHMIN32-NEXT: addi sp, sp, 16
-; ZVFHMIN32-NEXT: ret
-;
-; ZVFH32-LABEL: splice_nxv2f16_offset_negone:
-; ZVFH32: # %bb.0:
-; ZVFH32-NEXT: csrr a0, vlenb
-; ZVFH32-NEXT: srli a0, a0, 2
-; ZVFH32-NEXT: addi a0, a0, -1
-; ZVFH32-NEXT: vsetvli a1, zero, e16, mf2, ta, ma
-; ZVFH32-NEXT: vslidedown.vx v8, v8, a0
-; ZVFH32-NEXT: vslideup.vi v8, v9, 1
-; ZVFH32-NEXT: ret
-;
-; ZVFH64-LABEL: splice_nxv2f16_offset_negone:
-; ZVFH64: # %bb.0:
-; ZVFH64-NEXT: csrr a0, vlenb
-; ZVFH64-NEXT: srli a0, a0, 2
-; ZVFH64-NEXT: addi a0, a0, -1
-; ZVFH64-NEXT: vsetvli a1, zero, e16, mf2, ta, ma
-; ZVFH64-NEXT: vslidedown.vx v8, v8, a0
-; ZVFH64-NEXT: vslideup.vi v8, v9, 1
-; ZVFH64-NEXT: ret
+; CHECK-LABEL: splice_nxv2f16_offset_negone:
+; CHECK: # %bb.0:
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: srli a0, a0, 2
+; CHECK-NEXT: addi a0, a0, -1
+; CHECK-NEXT: vsetvli a1, zero, e16, mf2, ta, ma
+; CHECK-NEXT: vslidedown.vx v8, v8, a0
+; CHECK-NEXT: vslideup.vi v8, v9, 1
+; CHECK-NEXT: ret
%res = call <vscale x 2 x half> @llvm.vector.splice.nxv2f16(<vscale x 2 x half> %a, <vscale x 2 x half> %b, i32 -1)
ret <vscale x 2 x half> %res
}
define <vscale x 2 x half> @splice_nxv2f16_offset_min(<vscale x 2 x half> %a, <vscale x 2 x half> %b) #0 {
-; ZVFHMIN64-LABEL: splice_nxv2f16_offset_min:
-; ZVFHMIN64: # %bb.0:
-; ZVFHMIN64-NEXT: addi sp, sp, -16
-; ZVFHMIN64-NEXT: .cfi_def_cfa_offset 16
-; ZVFHMIN64-NEXT: csrr a0, vlenb
-; ZVFHMIN64-NEXT: sub sp, sp, a0
-; ZVFHMIN64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x01, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 1 * vlenb
-; ZVFHMIN64-NEXT: addi a0, sp, 16
-; ZVFHMIN64-NEXT: vsetvli a1, zero, e16, mf2, ta, ma
-; ZVFHMIN64-NEXT: vse16.v v8, (a0)
-; ZVFHMIN64-NEXT: csrr a1, vlenb
-; ZVFHMIN64-NEXT: srli a1, a1, 1
-; ZVFHMIN64-NEXT: add a0, a0, a1
-; ZVFHMIN64-NEXT: li a2, 8
-; ZVFHMIN64-NEXT: vse16.v v9, (a0)
-; ZVFHMIN64-NEXT: bltu a1, a2, .LBB132_2
-; ZVFHMIN64-NEXT: # %bb.1:
-; ZVFHMIN64-NEXT: li a1, 8
-; ZVFHMIN64-NEXT: .LBB132_2:
-; ZVFHMIN64-NEXT: sub a0, a0, a1
-; ZVFHMIN64-NEXT: vle16.v v8, (a0)
-; ZVFHMIN64-NEXT: csrr a0, vlenb
-; ZVFHMIN64-NEXT: add sp, sp, a0
-; ZVFHMIN64-NEXT: addi sp, sp, 16
-; ZVFHMIN64-NEXT: ret
-;
-; ZVFHMIN32-LABEL: splice_nxv2f16_offset_min:
-; ZVFHMIN32: # %bb.0:
-; ZVFHMIN32-NEXT: addi sp, sp, -16
-; ZVFHMIN32-NEXT: .cfi_def_cfa_offset 16
-; ZVFHMIN32-NEXT: csrr a0, vlenb
-; ZVFHMIN32-NEXT: sub sp, sp, a0
-; ZVFHMIN32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x01, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 1 * vlenb
-; ZVFHMIN32-NEXT: addi a0, sp, 16
-; ZVFHMIN32-NEXT: vsetvli a1, zero, e16, mf2, ta, ma
-; ZVFHMIN32-NEXT: vse16.v v8, (a0)
-; ZVFHMIN32-NEXT: csrr a1, vlenb
-; ZVFHMIN32-NEXT: srli a1, a1, 1
-; ZVFHMIN32-NEXT: add a0, a0, a1
-; ZVFHMIN32-NEXT: li a2, 8
-; ZVFHMIN32-NEXT: vse16.v v9, (a0)
-; ZVFHMIN32-NEXT: bltu a1, a2, .LBB132_2
-; ZVFHMIN32-NEXT: # %bb.1:
-; ZVFHMIN32-NEXT: li a1, 8
-; ZVFHMIN32-NEXT: .LBB132_2:
-; ZVFHMIN32-NEXT: sub a0, a0, a1
-; ZVFHMIN32-NEXT: vle16.v v8, (a0)
-; ZVFHMIN32-NEXT: csrr a0, vlenb
-; ZVFHMIN32-NEXT: add sp, sp, a0
-; ZVFHMIN32-NEXT: addi sp, sp, 16
-; ZVFHMIN32-NEXT: ret
-;
-; ZVFH32-LABEL: splice_nxv2f16_offset_min:
-; ZVFH32: # %bb.0:
-; ZVFH32-NEXT: csrr a0, vlenb
-; ZVFH32-NEXT: srli a0, a0, 2
-; ZVFH32-NEXT: addi a0, a0, -4
-; ZVFH32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
-; ZVFH32-NEXT: vslidedown.vx v8, v8, a0
-; ZVFH32-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
-; ZVFH32-NEXT: vslideup.vi v8, v9, 4
-; ZVFH32-NEXT: ret
-;
-; ZVFH64-LABEL: splice_nxv2f16_offset_min:
-; ZVFH64: # %bb.0:
-; ZVFH64-NEXT: csrr a0, vlenb
-; ZVFH64-NEXT: srli a0, a0, 2
-; ZVFH64-NEXT: addi a0, a0, -4
-; ZVFH64-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
-; ZVFH64-NEXT: vslidedown.vx v8, v8, a0
-; ZVFH64-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
-; ZVFH64-NEXT: vslideup.vi v8, v9, 4
-; ZVFH64-NEXT: ret
+; CHECK-LABEL: splice_nxv2f16_offset_min:
+; CHECK: # %bb.0:
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: srli a0, a0, 2
+; CHECK-NEXT: addi a0, a0, -4
+; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
+; CHECK-NEXT: vslidedown.vx v8, v8, a0
+; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
+; CHECK-NEXT: vslideup.vi v8, v9, 4
+; CHECK-NEXT: ret
%res = call <vscale x 2 x half> @llvm.vector.splice.nxv2f16(<vscale x 2 x half> %a, <vscale x 2 x half> %b, i32 -4)
ret <vscale x 2 x half> %res
}
define <vscale x 2 x half> @splice_nxv2f16_offset_max(<vscale x 2 x half> %a, <vscale x 2 x half> %b) #0 {
-; ZVFHMIN64-LABEL: splice_nxv2f16_offset_max:
-; ZVFHMIN64: # %bb.0:
-; ZVFHMIN64-NEXT: addi sp, sp, -16
-; ZVFHMIN64-NEXT: .cfi_def_cfa_offset 16
-; ZVFHMIN64-NEXT: csrr a0, vlenb
-; ZVFHMIN64-NEXT: sub sp, sp, a0
-; ZVFHMIN64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x01, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 1 * vlenb
-; ZVFHMIN64-NEXT: addi a0, sp, 16
-; ZVFHMIN64-NEXT: vsetvli a1, zero, e16, mf2, ta, ma
-; ZVFHMIN64-NEXT: vse16.v v8, (a0)
-; ZVFHMIN64-NEXT: csrr a1, vlenb
-; ZVFHMIN64-NEXT: srli a2, a1, 1
-; ZVFHMIN64-NEXT: add a2, a0, a2
-; ZVFHMIN64-NEXT: srli a1, a1, 2
-; ZVFHMIN64-NEXT: addi a1, a1, -1
-; ZVFHMIN64-NEXT: li a3, 3
-; ZVFHMIN64-NEXT: vse16.v v9, (a2)
-; ZVFHMIN64-NEXT: bltu a1, a3, .LBB133_2
-; ZVFHMIN64-NEXT: # %bb.1:
-; ZVFHMIN64-NEXT: li a1, 3
-; ZVFHMIN64-NEXT: .LBB133_2:
-; ZVFHMIN64-NEXT: slli a1, a1, 1
-; ZVFHMIN64-NEXT: add a0, a0, a1
-; ZVFHMIN64-NEXT: vle16.v v8, (a0)
-; ZVFHMIN64-NEXT: csrr a0, vlenb
-; ZVFHMIN64-NEXT: add sp, sp, a0
-; ZVFHMIN64-NEXT: addi sp, sp, 16
-; ZVFHMIN64-NEXT: ret
-;
-; ZVFHMIN32-LABEL: splice_nxv2f16_offset_max:
-; ZVFHMIN32: # %bb.0:
-; ZVFHMIN32-NEXT: addi sp, sp, -16
-; ZVFHMIN32-NEXT: .cfi_def_cfa_offset 16
-; ZVFHMIN32-NEXT: csrr a0, vlenb
-; ZVFHMIN32-NEXT: sub sp, sp, a0
-; ZVFHMIN32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x01, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 1 * vlenb
-; ZVFHMIN32-NEXT: addi a0, sp, 16
-; ZVFHMIN32-NEXT: vsetvli a1, zero, e16, mf2, ta, ma
-; ZVFHMIN32-NEXT: vse16.v v8, (a0)
-; ZVFHMIN32-NEXT: csrr a1, vlenb
-; ZVFHMIN32-NEXT: srli a2, a1, 1
-; ZVFHMIN32-NEXT: add a2, a0, a2
-; ZVFHMIN32-NEXT: srli a1, a1, 2
-; ZVFHMIN32-NEXT: addi a1, a1, -1
-; ZVFHMIN32-NEXT: li a3, 3
-; ZVFHMIN32-NEXT: vse16.v v9, (a2)
-; ZVFHMIN32-NEXT: bltu a1, a3, .LBB133_2
-; ZVFHMIN32-NEXT: # %bb.1:
-; ZVFHMIN32-NEXT: li a1, 3
-; ZVFHMIN32-NEXT: .LBB133_2:
-; ZVFHMIN32-NEXT: slli a1, a1, 1
-; ZVFHMIN32-NEXT: add a0, a0, a1
-; ZVFHMIN32-NEXT: vle16.v v8, (a0)
-; ZVFHMIN32-NEXT: csrr a0, vlenb
-; ZVFHMIN32-NEXT: add sp, sp, a0
-; ZVFHMIN32-NEXT: addi sp, sp, 16
-; ZVFHMIN32-NEXT: ret
-;
-; ZVFH32-LABEL: splice_nxv2f16_offset_max:
-; ZVFH32: # %bb.0:
-; ZVFH32-NEXT: csrr a0, vlenb
-; ZVFH32-NEXT: srli a0, a0, 2
-; ZVFH32-NEXT: addi a0, a0, -3
-; ZVFH32-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
-; ZVFH32-NEXT: vslidedown.vi v8, v8, 3
-; ZVFH32-NEXT: vsetvli a1, zero, e16, mf2, ta, ma
-; ZVFH32-NEXT: vslideup.vx v8, v9, a0
-; ZVFH32-NEXT: ret
-;
-; ZVFH64-LABEL: splice_nxv2f16_offset_max:
-; ZVFH64: # %bb.0:
-; ZVFH64-NEXT: csrr a0, vlenb
-; ZVFH64-NEXT: srli a0, a0, 2
-; ZVFH64-NEXT: addi a0, a0, -3
-; ZVFH64-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
-; ZVFH64-NEXT: vslidedown.vi v8, v8, 3
-; ZVFH64-NEXT: vsetvli a1, zero, e16, mf2, ta, ma
-; ZVFH64-NEXT: vslideup.vx v8, v9, a0
-; ZVFH64-NEXT: ret
+; CHECK-LABEL: splice_nxv2f16_offset_max:
+; CHECK: # %bb.0:
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: srli a0, a0, 2
+; CHECK-NEXT: addi a0, a0, -3
+; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
+; CHECK-NEXT: vslidedown.vi v8, v8, 3
+; CHECK-NEXT: vsetvli a1, zero, e16, mf2, ta, ma
+; CHECK-NEXT: vslideup.vx v8, v9, a0
+; CHECK-NEXT: ret
%res = call <vscale x 2 x half> @llvm.vector.splice.nxv2f16(<vscale x 2 x half> %a, <vscale x 2 x half> %b, i32 3)
ret <vscale x 2 x half> %res
}
@@ -3236,229 +1981,45 @@ define <vscale x 4 x half> @splice_nxv4f16_offset_zero(<vscale x 4 x half> %a, <
}
define <vscale x 4 x half> @splice_nxv4f16_offset_negone(<vscale x 4 x half> %a, <vscale x 4 x half> %b) #0 {
-; ZVFHMIN64-LABEL: splice_nxv4f16_offset_negone:
-; ZVFHMIN64: # %bb.0:
-; ZVFHMIN64-NEXT: addi sp, sp, -16
-; ZVFHMIN64-NEXT: .cfi_def_cfa_offset 16
-; ZVFHMIN64-NEXT: csrr a0, vlenb
-; ZVFHMIN64-NEXT: slli a0, a0, 1
-; ZVFHMIN64-NEXT: sub sp, sp, a0
-; ZVFHMIN64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb
-; ZVFHMIN64-NEXT: addi a0, sp, 16
-; ZVFHMIN64-NEXT: vs1r.v v8, (a0)
-; ZVFHMIN64-NEXT: csrr a1, vlenb
-; ZVFHMIN64-NEXT: add a0, a0, a1
-; ZVFHMIN64-NEXT: vs1r.v v9, (a0)
-; ZVFHMIN64-NEXT: addi a0, a0, -2
-; ZVFHMIN64-NEXT: vl1re16.v v8, (a0)
-; ZVFHMIN64-NEXT: csrr a0, vlenb
-; ZVFHMIN64-NEXT: slli a0, a0, 1
-; ZVFHMIN64-NEXT: add sp, sp, a0
-; ZVFHMIN64-NEXT: addi sp, sp, 16
-; ZVFHMIN64-NEXT: ret
-;
-; ZVFHMIN32-LABEL: splice_nxv4f16_offset_negone:
-; ZVFHMIN32: # %bb.0:
-; ZVFHMIN32-NEXT: addi sp, sp, -16
-; ZVFHMIN32-NEXT: .cfi_def_cfa_offset 16
-; ZVFHMIN32-NEXT: csrr a0, vlenb
-; ZVFHMIN32-NEXT: slli a0, a0, 1
-; ZVFHMIN32-NEXT: sub sp, sp, a0
-; ZVFHMIN32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb
-; ZVFHMIN32-NEXT: addi a0, sp, 16
-; ZVFHMIN32-NEXT: vs1r.v v8, (a0)
-; ZVFHMIN32-NEXT: csrr a1, vlenb
-; ZVFHMIN32-NEXT: add a0, a0, a1
-; ZVFHMIN32-NEXT: vs1r.v v9, (a0)
-; ZVFHMIN32-NEXT: addi a0, a0, -2
-; ZVFHMIN32-NEXT: vl1re16.v v8, (a0)
-; ZVFHMIN32-NEXT: csrr a0, vlenb
-; ZVFHMIN32-NEXT: slli a0, a0, 1
-; ZVFHMIN32-NEXT: add sp, sp, a0
-; ZVFHMIN32-NEXT: addi sp, sp, 16
-; ZVFHMIN32-NEXT: ret
-;
-; ZVFH32-LABEL: splice_nxv4f16_offset_negone:
-; ZVFH32: # %bb.0:
-; ZVFH32-NEXT: csrr a0, vlenb
-; ZVFH32-NEXT: srli a0, a0, 1
-; ZVFH32-NEXT: addi a0, a0, -1
-; ZVFH32-NEXT: vsetvli a1, zero, e16, m1, ta, ma
-; ZVFH32-NEXT: vslidedown.vx v8, v8, a0
-; ZVFH32-NEXT: vslideup.vi v8, v9, 1
-; ZVFH32-NEXT: ret
-;
-; ZVFH64-LABEL: splice_nxv4f16_offset_negone:
-; ZVFH64: # %bb.0:
-; ZVFH64-NEXT: csrr a0, vlenb
-; ZVFH64-NEXT: srli a0, a0, 1
-; ZVFH64-NEXT: addi a0, a0, -1
-; ZVFH64-NEXT: vsetvli a1, zero, e16, m1, ta, ma
-; ZVFH64-NEXT: vslidedown.vx v8, v8, a0
-; ZVFH64-NEXT: vslideup.vi v8, v9, 1
-; ZVFH64-NEXT: ret
+; CHECK-LABEL: splice_nxv4f16_offset_negone:
+; CHECK: # %bb.0:
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: srli a0, a0, 1
+; CHECK-NEXT: addi a0, a0, -1
+; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma
+; CHECK-NEXT: vslidedown.vx v8, v8, a0
+; CHECK-NEXT: vslideup.vi v8, v9, 1
+; CHECK-NEXT: ret
%res = call <vscale x 4 x half> @llvm.vector.splice.nxv4f16(<vscale x 4 x half> %a, <vscale x 4 x half> %b, i32 -1)
ret <vscale x 4 x half> %res
}
define <vscale x 4 x half> @splice_nxv4f16_offset_min(<vscale x 4 x half> %a, <vscale x 4 x half> %b) #0 {
-; ZVFHMIN64-LABEL: splice_nxv4f16_offset_min:
-; ZVFHMIN64: # %bb.0:
-; ZVFHMIN64-NEXT: addi sp, sp, -16
-; ZVFHMIN64-NEXT: .cfi_def_cfa_offset 16
-; ZVFHMIN64-NEXT: csrr a0, vlenb
-; ZVFHMIN64-NEXT: slli a0, a0, 1
-; ZVFHMIN64-NEXT: sub sp, sp, a0
-; ZVFHMIN64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb
-; ZVFHMIN64-NEXT: addi a0, sp, 16
-; ZVFHMIN64-NEXT: vs1r.v v8, (a0)
-; ZVFHMIN64-NEXT: csrr a1, vlenb
-; ZVFHMIN64-NEXT: add a0, a0, a1
-; ZVFHMIN64-NEXT: li a2, 16
-; ZVFHMIN64-NEXT: vs1r.v v9, (a0)
-; ZVFHMIN64-NEXT: bltu a1, a2, .LBB136_2
-; ZVFHMIN64-NEXT: # %bb.1:
-; ZVFHMIN64-NEXT: li a1, 16
-; ZVFHMIN64-NEXT: .LBB136_2:
-; ZVFHMIN64-NEXT: sub a0, a0, a1
-; ZVFHMIN64-NEXT: vl1re16.v v8, (a0)
-; ZVFHMIN64-NEXT: csrr a0, vlenb
-; ZVFHMIN64-NEXT: slli a0, a0, 1
-; ZVFHMIN64-NEXT: add sp, sp, a0
-; ZVFHMIN64-NEXT: addi sp, sp, 16
-; ZVFHMIN64-NEXT: ret
-;
-; ZVFHMIN32-LABEL: splice_nxv4f16_offset_min:
-; ZVFHMIN32: # %bb.0:
-; ZVFHMIN32-NEXT: addi sp, sp, -16
-; ZVFHMIN32-NEXT: .cfi_def_cfa_offset 16
-; ZVFHMIN32-NEXT: csrr a0, vlenb
-; ZVFHMIN32-NEXT: slli a0, a0, 1
-; ZVFHMIN32-NEXT: sub sp, sp, a0
-; ZVFHMIN32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb
-; ZVFHMIN32-NEXT: addi a0, sp, 16
-; ZVFHMIN32-NEXT: vs1r.v v8, (a0)
-; ZVFHMIN32-NEXT: csrr a1, vlenb
-; ZVFHMIN32-NEXT: add a0, a0, a1
-; ZVFHMIN32-NEXT: li a2, 16
-; ZVFHMIN32-NEXT: vs1r.v v9, (a0)
-; ZVFHMIN32-NEXT: bltu a1, a2, .LBB136_2
-; ZVFHMIN32-NEXT: # %bb.1:
-; ZVFHMIN32-NEXT: li a1, 16
-; ZVFHMIN32-NEXT: .LBB136_2:
-; ZVFHMIN32-NEXT: sub a0, a0, a1
-; ZVFHMIN32-NEXT: vl1re16.v v8, (a0)
-; ZVFHMIN32-NEXT: csrr a0, vlenb
-; ZVFHMIN32-NEXT: slli a0, a0, 1
-; ZVFHMIN32-NEXT: add sp, sp, a0
-; ZVFHMIN32-NEXT: addi sp, sp, 16
-; ZVFHMIN32-NEXT: ret
-;
-; ZVFH32-LABEL: splice_nxv4f16_offset_min:
-; ZVFH32: # %bb.0:
-; ZVFH32-NEXT: csrr a0, vlenb
-; ZVFH32-NEXT: srli a0, a0, 1
-; ZVFH32-NEXT: addi a0, a0, -8
-; ZVFH32-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; ZVFH32-NEXT: vslidedown.vx v8, v8, a0
-; ZVFH32-NEXT: vsetvli a0, zero, e16, m1, ta, ma
-; ZVFH32-NEXT: vslideup.vi v8, v9, 8
-; ZVFH32-NEXT: ret
-;
-; ZVFH64-LABEL: splice_nxv4f16_offset_min:
-; ZVFH64: # %bb.0:
-; ZVFH64-NEXT: csrr a0, vlenb
-; ZVFH64-NEXT: srli a0, a0, 1
-; ZVFH64-NEXT: addi a0, a0, -8
-; ZVFH64-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; ZVFH64-NEXT: vslidedown.vx v8, v8, a0
-; ZVFH64-NEXT: vsetvli a0, zero, e16, m1, ta, ma
-; ZVFH64-NEXT: vslideup.vi v8, v9, 8
-; ZVFH64-NEXT: ret
+; CHECK-LABEL: splice_nxv4f16_offset_min:
+; CHECK: # %bb.0:
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: srli a0, a0, 1
+; CHECK-NEXT: addi a0, a0, -8
+; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; CHECK-NEXT: vslidedown.vx v8, v8, a0
+; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
+; CHECK-NEXT: vslideup.vi v8, v9, 8
+; CHECK-NEXT: ret
%res = call <vscale x 4 x half> @llvm.vector.splice.nxv4f16(<vscale x 4 x half> %a, <vscale x 4 x half> %b, i32 -8)
ret <vscale x 4 x half> %res
}
define <vscale x 4 x half> @splice_nxv4f16_offset_max(<vscale x 4 x half> %a, <vscale x 4 x half> %b) #0 {
-; ZVFHMIN64-LABEL: splice_nxv4f16_offset_max:
-; ZVFHMIN64: # %bb.0:
-; ZVFHMIN64-NEXT: addi sp, sp, -16
-; ZVFHMIN64-NEXT: .cfi_def_cfa_offset 16
-; ZVFHMIN64-NEXT: csrr a0, vlenb
-; ZVFHMIN64-NEXT: slli a0, a0, 1
-; ZVFHMIN64-NEXT: sub sp, sp, a0
-; ZVFHMIN64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb
-; ZVFHMIN64-NEXT: addi a0, sp, 16
-; ZVFHMIN64-NEXT: vs1r.v v8, (a0)
-; ZVFHMIN64-NEXT: csrr a1, vlenb
-; ZVFHMIN64-NEXT: add a2, a0, a1
-; ZVFHMIN64-NEXT: srli a1, a1, 1
-; ZVFHMIN64-NEXT: addi a1, a1, -1
-; ZVFHMIN64-NEXT: li a3, 7
-; ZVFHMIN64-NEXT: vs1r.v v9, (a2)
-; ZVFHMIN64-NEXT: bltu a1, a3, .LBB137_2
-; ZVFHMIN64-NEXT: # %bb.1:
-; ZVFHMIN64-NEXT: li a1, 7
-; ZVFHMIN64-NEXT: .LBB137_2:
-; ZVFHMIN64-NEXT: slli a1, a1, 1
-; ZVFHMIN64-NEXT: add a0, a0, a1
-; ZVFHMIN64-NEXT: vl1re16.v v8, (a0)
-; ZVFHMIN64-NEXT: csrr a0, vlenb
-; ZVFHMIN64-NEXT: slli a0, a0, 1
-; ZVFHMIN64-NEXT: add sp, sp, a0
-; ZVFHMIN64-NEXT: addi sp, sp, 16
-; ZVFHMIN64-NEXT: ret
-;
-; ZVFHMIN32-LABEL: splice_nxv4f16_offset_max:
-; ZVFHMIN32: # %bb.0:
-; ZVFHMIN32-NEXT: addi sp, sp, -16
-; ZVFHMIN32-NEXT: .cfi_def_cfa_offset 16
-; ZVFHMIN32-NEXT: csrr a0, vlenb
-; ZVFHMIN32-NEXT: slli a0, a0, 1
-; ZVFHMIN32-NEXT: sub sp, sp, a0
-; ZVFHMIN32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb
-; ZVFHMIN32-NEXT: addi a0, sp, 16
-; ZVFHMIN32-NEXT: vs1r.v v8, (a0)
-; ZVFHMIN32-NEXT: csrr a1, vlenb
-; ZVFHMIN32-NEXT: add a2, a0, a1
-; ZVFHMIN32-NEXT: srli a1, a1, 1
-; ZVFHMIN32-NEXT: addi a1, a1, -1
-; ZVFHMIN32-NEXT: li a3, 7
-; ZVFHMIN32-NEXT: vs1r.v v9, (a2)
-; ZVFHMIN32-NEXT: bltu a1, a3, .LBB137_2
-; ZVFHMIN32-NEXT: # %bb.1:
-; ZVFHMIN32-NEXT: li a1, 7
-; ZVFHMIN32-NEXT: .LBB137_2:
-; ZVFHMIN32-NEXT: slli a1, a1, 1
-; ZVFHMIN32-NEXT: add a0, a0, a1
-; ZVFHMIN32-NEXT: vl1re16.v v8, (a0)
-; ZVFHMIN32-NEXT: csrr a0, vlenb
-; ZVFHMIN32-NEXT: slli a0, a0, 1
-; ZVFHMIN32-NEXT: add sp, sp, a0
-; ZVFHMIN32-NEXT: addi sp, sp, 16
-; ZVFHMIN32-NEXT: ret
-;
-; ZVFH32-LABEL: splice_nxv4f16_offset_max:
-; ZVFH32: # %bb.0:
-; ZVFH32-NEXT: csrr a0, vlenb
-; ZVFH32-NEXT: srli a0, a0, 1
-; ZVFH32-NEXT: addi a0, a0, -7
-; ZVFH32-NEXT: vsetvli zero, a0, e16, m1, ta, ma
-; ZVFH32-NEXT: vslidedown.vi v8, v8, 7
-; ZVFH32-NEXT: vsetvli a1, zero, e16, m1, ta, ma
-; ZVFH32-NEXT: vslideup.vx v8, v9, a0
-; ZVFH32-NEXT: ret
-;
-; ZVFH64-LABEL: splice_nxv4f16_offset_max:
-; ZVFH64: # %bb.0:
-; ZVFH64-NEXT: csrr a0, vlenb
-; ZVFH64-NEXT: srli a0, a0, 1
-; ZVFH64-NEXT: addi a0, a0, -7
-; ZVFH64-NEXT: vsetvli zero, a0, e16, m1, ta, ma
-; ZVFH64-NEXT: vslidedown.vi v8, v8, 7
-; ZVFH64-NEXT: vsetvli a1, zero, e16, m1, ta, ma
-; ZVFH64-NEXT: vslideup.vx v8, v9, a0
-; ZVFH64-NEXT: ret
+; CHECK-LABEL: splice_nxv4f16_offset_max:
+; CHECK: # %bb.0:
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: srli a0, a0, 1
+; CHECK-NEXT: addi a0, a0, -7
+; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; CHECK-NEXT: vslidedown.vi v8, v8, 7
+; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma
+; CHECK-NEXT: vslideup.vx v8, v9, a0
+; CHECK-NEXT: ret
%res = call <vscale x 4 x half> @llvm.vector.splice.nxv4f16(<vscale x 4 x half> %a, <vscale x 4 x half> %b, i32 7)
ret <vscale x 4 x half> %res
}
@@ -3474,229 +2035,43 @@ define <vscale x 8 x half> @splice_nxv8f16_offset_zero(<vscale x 8 x half> %a, <
}
define <vscale x 8 x half> @splice_nxv8f16_offset_negone(<vscale x 8 x half> %a, <vscale x 8 x half> %b) #0 {
-; ZVFHMIN64-LABEL: splice_nxv8f16_offset_negone:
-; ZVFHMIN64: # %bb.0:
-; ZVFHMIN64-NEXT: addi sp, sp, -16
-; ZVFHMIN64-NEXT: .cfi_def_cfa_offset 16
-; ZVFHMIN64-NEXT: csrr a0, vlenb
-; ZVFHMIN64-NEXT: slli a0, a0, 2
-; ZVFHMIN64-NEXT: sub sp, sp, a0
-; ZVFHMIN64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb
-; ZVFHMIN64-NEXT: addi a0, sp, 16
-; ZVFHMIN64-NEXT: vs2r.v v8, (a0)
-; ZVFHMIN64-NEXT: csrr a1, vlenb
-; ZVFHMIN64-NEXT: slli a1, a1, 1
-; ZVFHMIN64-NEXT: add a0, a0, a1
-; ZVFHMIN64-NEXT: vs2r.v v10, (a0)
-; ZVFHMIN64-NEXT: addi a0, a0, -2
-; ZVFHMIN64-NEXT: vl2re16.v v8, (a0)
-; ZVFHMIN64-NEXT: csrr a0, vlenb
-; ZVFHMIN64-NEXT: slli a0, a0, 2
-; ZVFHMIN64-NEXT: add sp, sp, a0
-; ZVFHMIN64-NEXT: addi sp, sp, 16
-; ZVFHMIN64-NEXT: ret
-;
-; ZVFHMIN32-LABEL: splice_nxv8f16_offset_negone:
-; ZVFHMIN32: # %bb.0:
-; ZVFHMIN32-NEXT: addi sp, sp, -16
-; ZVFHMIN32-NEXT: .cfi_def_cfa_offset 16
-; ZVFHMIN32-NEXT: csrr a0, vlenb
-; ZVFHMIN32-NEXT: slli a0, a0, 2
-; ZVFHMIN32-NEXT: sub sp, sp, a0
-; ZVFHMIN32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb
-; ZVFHMIN32-NEXT: addi a0, sp, 16
-; ZVFHMIN32-NEXT: vs2r.v v8, (a0)
-; ZVFHMIN32-NEXT: csrr a1, vlenb
-; ZVFHMIN32-NEXT: slli a1, a1, 1
-; ZVFHMIN32-NEXT: add a0, a0, a1
-; ZVFHMIN32-NEXT: vs2r.v v10, (a0)
-; ZVFHMIN32-NEXT: addi a0, a0, -2
-; ZVFHMIN32-NEXT: vl2re16.v v8, (a0)
-; ZVFHMIN32-NEXT: csrr a0, vlenb
-; ZVFHMIN32-NEXT: slli a0, a0, 2
-; ZVFHMIN32-NEXT: add sp, sp, a0
-; ZVFHMIN32-NEXT: addi sp, sp, 16
-; ZVFHMIN32-NEXT: ret
-;
-; ZVFH32-LABEL: splice_nxv8f16_offset_negone:
-; ZVFH32: # %bb.0:
-; ZVFH32-NEXT: csrr a0, vlenb
-; ZVFH32-NEXT: addi a0, a0, -1
-; ZVFH32-NEXT: vsetivli zero, 1, e16, m2, ta, ma
-; ZVFH32-NEXT: vslidedown.vx v8, v8, a0
-; ZVFH32-NEXT: vsetvli a0, zero, e16, m2, ta, ma
-; ZVFH32-NEXT: vslideup.vi v8, v10, 1
-; ZVFH32-NEXT: ret
-;
-; ZVFH64-LABEL: splice_nxv8f16_offset_negone:
-; ZVFH64: # %bb.0:
-; ZVFH64-NEXT: csrr a0, vlenb
-; ZVFH64-NEXT: addi a0, a0, -1
-; ZVFH64-NEXT: vsetivli zero, 1, e16, m2, ta, ma
-; ZVFH64-NEXT: vslidedown.vx v8, v8, a0
-; ZVFH64-NEXT: vsetvli a0, zero, e16, m2, ta, ma
-; ZVFH64-NEXT: vslideup.vi v8, v10, 1
-; ZVFH64-NEXT: ret
+; CHECK-LABEL: splice_nxv8f16_offset_negone:
+; CHECK: # %bb.0:
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: addi a0, a0, -1
+; CHECK-NEXT: vsetivli zero, 1, e16, m2, ta, ma
+; CHECK-NEXT: vslidedown.vx v8, v8, a0
+; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma
+; CHECK-NEXT: vslideup.vi v8, v10, 1
+; CHECK-NEXT: ret
%res = call <vscale x 8 x half> @llvm.vector.splice.nxv8f16(<vscale x 8 x half> %a, <vscale x 8 x half> %b, i32 -1)
ret <vscale x 8 x half> %res
}
define <vscale x 8 x half> @splice_nxv8f16_offset_min(<vscale x 8 x half> %a, <vscale x 8 x half> %b) #0 {
-; ZVFHMIN64-LABEL: splice_nxv8f16_offset_min:
-; ZVFHMIN64: # %bb.0:
-; ZVFHMIN64-NEXT: addi sp, sp, -16
-; ZVFHMIN64-NEXT: .cfi_def_cfa_offset 16
-; ZVFHMIN64-NEXT: csrr a0, vlenb
-; ZVFHMIN64-NEXT: slli a0, a0, 2
-; ZVFHMIN64-NEXT: sub sp, sp, a0
-; ZVFHMIN64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb
-; ZVFHMIN64-NEXT: addi a0, sp, 16
-; ZVFHMIN64-NEXT: vs2r.v v8, (a0)
-; ZVFHMIN64-NEXT: csrr a1, vlenb
-; ZVFHMIN64-NEXT: slli a1, a1, 1
-; ZVFHMIN64-NEXT: add a0, a0, a1
-; ZVFHMIN64-NEXT: li a2, 32
-; ZVFHMIN64-NEXT: vs2r.v v10, (a0)
-; ZVFHMIN64-NEXT: bltu a1, a2, .LBB140_2
-; ZVFHMIN64-NEXT: # %bb.1:
-; ZVFHMIN64-NEXT: li a1, 32
-; ZVFHMIN64-NEXT: .LBB140_2:
-; ZVFHMIN64-NEXT: sub a0, a0, a1
-; ZVFHMIN64-NEXT: vl2re16.v v8, (a0)
-; ZVFHMIN64-NEXT: csrr a0, vlenb
-; ZVFHMIN64-NEXT: slli a0, a0, 2
-; ZVFHMIN64-NEXT: add sp, sp, a0
-; ZVFHMIN64-NEXT: addi sp, sp, 16
-; ZVFHMIN64-NEXT: ret
-;
-; ZVFHMIN32-LABEL: splice_nxv8f16_offset_min:
-; ZVFHMIN32: # %bb.0:
-; ZVFHMIN32-NEXT: addi sp, sp, -16
-; ZVFHMIN32-NEXT: .cfi_def_cfa_offset 16
-; ZVFHMIN32-NEXT: csrr a0, vlenb
-; ZVFHMIN32-NEXT: slli a0, a0, 2
-; ZVFHMIN32-NEXT: sub sp, sp, a0
-; ZVFHMIN32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb
-; ZVFHMIN32-NEXT: addi a0, sp, 16
-; ZVFHMIN32-NEXT: vs2r.v v8, (a0)
-; ZVFHMIN32-NEXT: csrr a1, vlenb
-; ZVFHMIN32-NEXT: slli a1, a1, 1
-; ZVFHMIN32-NEXT: add a0, a0, a1
-; ZVFHMIN32-NEXT: li a2, 32
-; ZVFHMIN32-NEXT: vs2r.v v10, (a0)
-; ZVFHMIN32-NEXT: bltu a1, a2, .LBB140_2
-; ZVFHMIN32-NEXT: # %bb.1:
-; ZVFHMIN32-NEXT: li a1, 32
-; ZVFHMIN32-NEXT: .LBB140_2:
-; ZVFHMIN32-NEXT: sub a0, a0, a1
-; ZVFHMIN32-NEXT: vl2re16.v v8, (a0)
-; ZVFHMIN32-NEXT: csrr a0, vlenb
-; ZVFHMIN32-NEXT: slli a0, a0, 2
-; ZVFHMIN32-NEXT: add sp, sp, a0
-; ZVFHMIN32-NEXT: addi sp, sp, 16
-; ZVFHMIN32-NEXT: ret
-;
-; ZVFH32-LABEL: splice_nxv8f16_offset_min:
-; ZVFH32: # %bb.0:
-; ZVFH32-NEXT: csrr a0, vlenb
-; ZVFH32-NEXT: addi a0, a0, -16
-; ZVFH32-NEXT: vsetivli zero, 16, e16, m2, ta, ma
-; ZVFH32-NEXT: vslidedown.vx v8, v8, a0
-; ZVFH32-NEXT: vsetvli a0, zero, e16, m2, ta, ma
-; ZVFH32-NEXT: vslideup.vi v8, v10, 16
-; ZVFH32-NEXT: ret
-;
-; ZVFH64-LABEL: splice_nxv8f16_offset_min:
-; ZVFH64: # %bb.0:
-; ZVFH64-NEXT: csrr a0, vlenb
-; ZVFH64-NEXT: addi a0, a0, -16
-; ZVFH64-NEXT: vsetivli zero, 16, e16, m2, ta, ma
-; ZVFH64-NEXT: vslidedown.vx v8, v8, a0
-; ZVFH64-NEXT: vsetvli a0, zero, e16, m2, ta, ma
-; ZVFH64-NEXT: vslideup.vi v8, v10, 16
-; ZVFH64-NEXT: ret
+; CHECK-LABEL: splice_nxv8f16_offset_min:
+; CHECK: # %bb.0:
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: addi a0, a0, -16
+; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma
+; CHECK-NEXT: vslidedown.vx v8, v8, a0
+; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma
+; CHECK-NEXT: vslideup.vi v8, v10, 16
+; CHECK-NEXT: ret
%res = call <vscale x 8 x half> @llvm.vector.splice.nxv8f16(<vscale x 8 x half> %a, <vscale x 8 x half> %b, i32 -16)
ret <vscale x 8 x half> %res
}
define <vscale x 8 x half> @splice_nxv8f16_offset_max(<vscale x 8 x half> %a, <vscale x 8 x half> %b) #0 {
-; ZVFHMIN64-LABEL: splice_nxv8f16_offset_max:
-; ZVFHMIN64: # %bb.0:
-; ZVFHMIN64-NEXT: addi sp, sp, -16
-; ZVFHMIN64-NEXT: .cfi_def_cfa_offset 16
-; ZVFHMIN64-NEXT: csrr a0, vlenb
-; ZVFHMIN64-NEXT: slli a0, a0, 2
-; ZVFHMIN64-NEXT: sub sp, sp, a0
-; ZVFHMIN64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb
-; ZVFHMIN64-NEXT: addi a0, sp, 16
-; ZVFHMIN64-NEXT: vs2r.v v8, (a0)
-; ZVFHMIN64-NEXT: csrr a1, vlenb
-; ZVFHMIN64-NEXT: slli a2, a1, 1
-; ZVFHMIN64-NEXT: add a2, a0, a2
-; ZVFHMIN64-NEXT: addi a1, a1, -1
-; ZVFHMIN64-NEXT: li a3, 15
-; ZVFHMIN64-NEXT: vs2r.v v10, (a2)
-; ZVFHMIN64-NEXT: bltu a1, a3, .LBB141_2
-; ZVFHMIN64-NEXT: # %bb.1:
-; ZVFHMIN64-NEXT: li a1, 15
-; ZVFHMIN64-NEXT: .LBB141_2:
-; ZVFHMIN64-NEXT: slli a1, a1, 1
-; ZVFHMIN64-NEXT: add a0, a0, a1
-; ZVFHMIN64-NEXT: vl2re16.v v8, (a0)
-; ZVFHMIN64-NEXT: csrr a0, vlenb
-; ZVFHMIN64-NEXT: slli a0, a0, 2
-; ZVFHMIN64-NEXT: add sp, sp, a0
-; ZVFHMIN64-NEXT: addi sp, sp, 16
-; ZVFHMIN64-NEXT: ret
-;
-; ZVFHMIN32-LABEL: splice_nxv8f16_offset_max:
-; ZVFHMIN32: # %bb.0:
-; ZVFHMIN32-NEXT: addi sp, sp, -16
-; ZVFHMIN32-NEXT: .cfi_def_cfa_offset 16
-; ZVFHMIN32-NEXT: csrr a0, vlenb
-; ZVFHMIN32-NEXT: slli a0, a0, 2
-; ZVFHMIN32-NEXT: sub sp, sp, a0
-; ZVFHMIN32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb
-; ZVFHMIN32-NEXT: addi a0, sp, 16
-; ZVFHMIN32-NEXT: vs2r.v v8, (a0)
-; ZVFHMIN32-NEXT: csrr a1, vlenb
-; ZVFHMIN32-NEXT: slli a2, a1, 1
-; ZVFHMIN32-NEXT: add a2, a0, a2
-; ZVFHMIN32-NEXT: addi a1, a1, -1
-; ZVFHMIN32-NEXT: li a3, 15
-; ZVFHMIN32-NEXT: vs2r.v v10, (a2)
-; ZVFHMIN32-NEXT: bltu a1, a3, .LBB141_2
-; ZVFHMIN32-NEXT: # %bb.1:
-; ZVFHMIN32-NEXT: li a1, 15
-; ZVFHMIN32-NEXT: .LBB141_2:
-; ZVFHMIN32-NEXT: slli a1, a1, 1
-; ZVFHMIN32-NEXT: add a0, a0, a1
-; ZVFHMIN32-NEXT: vl2re16.v v8, (a0)
-; ZVFHMIN32-NEXT: csrr a0, vlenb
-; ZVFHMIN32-NEXT: slli a0, a0, 2
-; ZVFHMIN32-NEXT: add sp, sp, a0
-; ZVFHMIN32-NEXT: addi sp, sp, 16
-; ZVFHMIN32-NEXT: ret
-;
-; ZVFH32-LABEL: splice_nxv8f16_offset_max:
-; ZVFH32: # %bb.0:
-; ZVFH32-NEXT: csrr a0, vlenb
-; ZVFH32-NEXT: addi a0, a0, -15
-; ZVFH32-NEXT: vsetvli zero, a0, e16, m2, ta, ma
-; ZVFH32-NEXT: vslidedown.vi v8, v8, 15
-; ZVFH32-NEXT: vsetvli a1, zero, e16, m2, ta, ma
-; ZVFH32-NEXT: vslideup.vx v8, v10, a0
-; ZVFH32-NEXT: ret
-;
-; ZVFH64-LABEL: splice_nxv8f16_offset_max:
-; ZVFH64: # %bb.0:
-; ZVFH64-NEXT: csrr a0, vlenb
-; ZVFH64-NEXT: addi a0, a0, -15
-; ZVFH64-NEXT: vsetvli zero, a0, e16, m2, ta, ma
-; ZVFH64-NEXT: vslidedown.vi v8, v8, 15
-; ZVFH64-NEXT: vsetvli a1, zero, e16, m2, ta, ma
-; ZVFH64-NEXT: vslideup.vx v8, v10, a0
-; ZVFH64-NEXT: ret
+; CHECK-LABEL: splice_nxv8f16_offset_max:
+; CHECK: # %bb.0:
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: addi a0, a0, -15
+; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma
+; CHECK-NEXT: vslidedown.vi v8, v8, 15
+; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma
+; CHECK-NEXT: vslideup.vx v8, v10, a0
+; CHECK-NEXT: ret
%res = call <vscale x 8 x half> @llvm.vector.splice.nxv8f16(<vscale x 8 x half> %a, <vscale x 8 x half> %b, i32 15)
ret <vscale x 8 x half> %res
}
@@ -3712,275 +2087,47 @@ define <vscale x 16 x half> @splice_nxv16f16_offset_zero(<vscale x 16 x half> %a
}
define <vscale x 16 x half> @splice_nxv16f16_offset_negone(<vscale x 16 x half> %a, <vscale x 16 x half> %b) #0 {
-; ZVFHMIN64-LABEL: splice_nxv16f16_offset_negone:
-; ZVFHMIN64: # %bb.0:
-; ZVFHMIN64-NEXT: addi sp, sp, -48
-; ZVFHMIN64-NEXT: .cfi_def_cfa_offset 48
-; ZVFHMIN64-NEXT: sw ra, 44(sp) # 4-byte Folded Spill
-; ZVFHMIN64-NEXT: sw s0, 40(sp) # 4-byte Folded Spill
-; ZVFHMIN64-NEXT: .cfi_offset ra, -4
-; ZVFHMIN64-NEXT: .cfi_offset s0, -8
-; ZVFHMIN64-NEXT: addi s0, sp, 48
-; ZVFHMIN64-NEXT: .cfi_def_cfa s0, 0
-; ZVFHMIN64-NEXT: csrr a0, vlenb
-; ZVFHMIN64-NEXT: slli a0, a0, 3
-; ZVFHMIN64-NEXT: sub sp, sp, a0
-; ZVFHMIN64-NEXT: andi sp, sp, -32
-; ZVFHMIN64-NEXT: addi a0, sp, 32
-; ZVFHMIN64-NEXT: vs4r.v v8, (a0)
-; ZVFHMIN64-NEXT: csrr a1, vlenb
-; ZVFHMIN64-NEXT: slli a1, a1, 2
-; ZVFHMIN64-NEXT: add a0, a0, a1
-; ZVFHMIN64-NEXT: vs4r.v v12, (a0)
-; ZVFHMIN64-NEXT: addi a0, a0, -2
-; ZVFHMIN64-NEXT: vl4re16.v v8, (a0)
-; ZVFHMIN64-NEXT: addi sp, s0, -48
-; ZVFHMIN64-NEXT: lw ra, 44(sp) # 4-byte Folded Reload
-; ZVFHMIN64-NEXT: lw s0, 40(sp) # 4-byte Folded Reload
-; ZVFHMIN64-NEXT: addi sp, sp, 48
-; ZVFHMIN64-NEXT: ret
-;
-; ZVFHMIN32-LABEL: splice_nxv16f16_offset_negone:
-; ZVFHMIN32: # %bb.0:
-; ZVFHMIN32-NEXT: addi sp, sp, -48
-; ZVFHMIN32-NEXT: .cfi_def_cfa_offset 48
-; ZVFHMIN32-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
-; ZVFHMIN32-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
-; ZVFHMIN32-NEXT: .cfi_offset ra, -8
-; ZVFHMIN32-NEXT: .cfi_offset s0, -16
-; ZVFHMIN32-NEXT: addi s0, sp, 48
-; ZVFHMIN32-NEXT: .cfi_def_cfa s0, 0
-; ZVFHMIN32-NEXT: csrr a0, vlenb
-; ZVFHMIN32-NEXT: slli a0, a0, 3
-; ZVFHMIN32-NEXT: sub sp, sp, a0
-; ZVFHMIN32-NEXT: andi sp, sp, -32
-; ZVFHMIN32-NEXT: addi a0, sp, 32
-; ZVFHMIN32-NEXT: vs4r.v v8, (a0)
-; ZVFHMIN32-NEXT: csrr a1, vlenb
-; ZVFHMIN32-NEXT: slli a1, a1, 2
-; ZVFHMIN32-NEXT: add a0, a0, a1
-; ZVFHMIN32-NEXT: vs4r.v v12, (a0)
-; ZVFHMIN32-NEXT: addi a0, a0, -2
-; ZVFHMIN32-NEXT: vl4re16.v v8, (a0)
-; ZVFHMIN32-NEXT: addi sp, s0, -48
-; ZVFHMIN32-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
-; ZVFHMIN32-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
-; ZVFHMIN32-NEXT: addi sp, sp, 48
-; ZVFHMIN32-NEXT: ret
-;
-; ZVFH32-LABEL: splice_nxv16f16_offset_negone:
-; ZVFH32: # %bb.0:
-; ZVFH32-NEXT: csrr a0, vlenb
-; ZVFH32-NEXT: slli a0, a0, 1
-; ZVFH32-NEXT: addi a0, a0, -1
-; ZVFH32-NEXT: vsetivli zero, 1, e16, m4, ta, ma
-; ZVFH32-NEXT: vslidedown.vx v8, v8, a0
-; ZVFH32-NEXT: vsetvli a0, zero, e16, m4, ta, ma
-; ZVFH32-NEXT: vslideup.vi v8, v12, 1
-; ZVFH32-NEXT: ret
-;
-; ZVFH64-LABEL: splice_nxv16f16_offset_negone:
-; ZVFH64: # %bb.0:
-; ZVFH64-NEXT: csrr a0, vlenb
-; ZVFH64-NEXT: slli a0, a0, 1
-; ZVFH64-NEXT: addi a0, a0, -1
-; ZVFH64-NEXT: vsetivli zero, 1, e16, m4, ta, ma
-; ZVFH64-NEXT: vslidedown.vx v8, v8, a0
-; ZVFH64-NEXT: vsetvli a0, zero, e16, m4, ta, ma
-; ZVFH64-NEXT: vslideup.vi v8, v12, 1
-; ZVFH64-NEXT: ret
+; CHECK-LABEL: splice_nxv16f16_offset_negone:
+; CHECK: # %bb.0:
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: slli a0, a0, 1
+; CHECK-NEXT: addi a0, a0, -1
+; CHECK-NEXT: vsetivli zero, 1, e16, m4, ta, ma
+; CHECK-NEXT: vslidedown.vx v8, v8, a0
+; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma
+; CHECK-NEXT: vslideup.vi v8, v12, 1
+; CHECK-NEXT: ret
%res = call <vscale x 16 x half> @llvm.vector.splice.nxv16f16(<vscale x 16 x half> %a, <vscale x 16 x half> %b, i32 -1)
ret <vscale x 16 x half> %res
}
define <vscale x 16 x half> @splice_nxv16f16_offset_min(<vscale x 16 x half> %a, <vscale x 16 x half> %b) #0 {
-; ZVFHMIN64-LABEL: splice_nxv16f16_offset_min:
-; ZVFHMIN64: # %bb.0:
-; ZVFHMIN64-NEXT: addi sp, sp, -48
-; ZVFHMIN64-NEXT: .cfi_def_cfa_offset 48
-; ZVFHMIN64-NEXT: sw ra, 44(sp) # 4-byte Folded Spill
-; ZVFHMIN64-NEXT: sw s0, 40(sp) # 4-byte Folded Spill
-; ZVFHMIN64-NEXT: .cfi_offset ra, -4
-; ZVFHMIN64-NEXT: .cfi_offset s0, -8
-; ZVFHMIN64-NEXT: addi s0, sp, 48
-; ZVFHMIN64-NEXT: .cfi_def_cfa s0, 0
-; ZVFHMIN64-NEXT: csrr a0, vlenb
-; ZVFHMIN64-NEXT: slli a0, a0, 3
-; ZVFHMIN64-NEXT: sub sp, sp, a0
-; ZVFHMIN64-NEXT: andi sp, sp, -32
-; ZVFHMIN64-NEXT: addi a0, sp, 32
-; ZVFHMIN64-NEXT: vs4r.v v8, (a0)
-; ZVFHMIN64-NEXT: csrr a1, vlenb
-; ZVFHMIN64-NEXT: slli a1, a1, 2
-; ZVFHMIN64-NEXT: add a0, a0, a1
-; ZVFHMIN64-NEXT: li a2, 64
-; ZVFHMIN64-NEXT: vs4r.v v12, (a0)
-; ZVFHMIN64-NEXT: bltu a1, a2, .LBB144_2
-; ZVFHMIN64-NEXT: # %bb.1:
-; ZVFHMIN64-NEXT: li a1, 64
-; ZVFHMIN64-NEXT: .LBB144_2:
-; ZVFHMIN64-NEXT: sub a0, a0, a1
-; ZVFHMIN64-NEXT: vl4re16.v v8, (a0)
-; ZVFHMIN64-NEXT: addi sp, s0, -48
-; ZVFHMIN64-NEXT: lw ra, 44(sp) # 4-byte Folded Reload
-; ZVFHMIN64-NEXT: lw s0, 40(sp) # 4-byte Folded Reload
-; ZVFHMIN64-NEXT: addi sp, sp, 48
-; ZVFHMIN64-NEXT: ret
-;
-; ZVFHMIN32-LABEL: splice_nxv16f16_offset_min:
-; ZVFHMIN32: # %bb.0:
-; ZVFHMIN32-NEXT: addi sp, sp, -48
-; ZVFHMIN32-NEXT: .cfi_def_cfa_offset 48
-; ZVFHMIN32-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
-; ZVFHMIN32-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
-; ZVFHMIN32-NEXT: .cfi_offset ra, -8
-; ZVFHMIN32-NEXT: .cfi_offset s0, -16
-; ZVFHMIN32-NEXT: addi s0, sp, 48
-; ZVFHMIN32-NEXT: .cfi_def_cfa s0, 0
-; ZVFHMIN32-NEXT: csrr a0, vlenb
-; ZVFHMIN32-NEXT: slli a0, a0, 3
-; ZVFHMIN32-NEXT: sub sp, sp, a0
-; ZVFHMIN32-NEXT: andi sp, sp, -32
-; ZVFHMIN32-NEXT: addi a0, sp, 32
-; ZVFHMIN32-NEXT: vs4r.v v8, (a0)
-; ZVFHMIN32-NEXT: csrr a1, vlenb
-; ZVFHMIN32-NEXT: slli a1, a1, 2
-; ZVFHMIN32-NEXT: add a0, a0, a1
-; ZVFHMIN32-NEXT: li a2, 64
-; ZVFHMIN32-NEXT: vs4r.v v12, (a0)
-; ZVFHMIN32-NEXT: bltu a1, a2, .LBB144_2
-; ZVFHMIN32-NEXT: # %bb.1:
-; ZVFHMIN32-NEXT: li a1, 64
-; ZVFHMIN32-NEXT: .LBB144_2:
-; ZVFHMIN32-NEXT: sub a0, a0, a1
-; ZVFHMIN32-NEXT: vl4re16.v v8, (a0)
-; ZVFHMIN32-NEXT: addi sp, s0, -48
-; ZVFHMIN32-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
-; ZVFHMIN32-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
-; ZVFHMIN32-NEXT: addi sp, sp, 48
-; ZVFHMIN32-NEXT: ret
-;
-; ZVFH32-LABEL: splice_nxv16f16_offset_min:
-; ZVFH32: # %bb.0:
-; ZVFH32-NEXT: csrr a0, vlenb
-; ZVFH32-NEXT: slli a0, a0, 1
-; ZVFH32-NEXT: addi a0, a0, -32
-; ZVFH32-NEXT: li a1, 32
-; ZVFH32-NEXT: vsetvli zero, a1, e16, m4, ta, ma
-; ZVFH32-NEXT: vslidedown.vx v8, v8, a0
-; ZVFH32-NEXT: vsetvli a0, zero, e16, m4, ta, ma
-; ZVFH32-NEXT: vslideup.vx v8, v12, a1
-; ZVFH32-NEXT: ret
-;
-; ZVFH64-LABEL: splice_nxv16f16_offset_min:
-; ZVFH64: # %bb.0:
-; ZVFH64-NEXT: csrr a0, vlenb
-; ZVFH64-NEXT: slli a0, a0, 1
-; ZVFH64-NEXT: addi a0, a0, -32
-; ZVFH64-NEXT: li a1, 32
-; ZVFH64-NEXT: vsetvli zero, a1, e16, m4, ta, ma
-; ZVFH64-NEXT: vslidedown.vx v8, v8, a0
-; ZVFH64-NEXT: vsetvli a0, zero, e16, m4, ta, ma
-; ZVFH64-NEXT: vslideup.vx v8, v12, a1
-; ZVFH64-NEXT: ret
+; CHECK-LABEL: splice_nxv16f16_offset_min:
+; CHECK: # %bb.0:
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: slli a0, a0, 1
+; CHECK-NEXT: addi a0, a0, -32
+; CHECK-NEXT: li a1, 32
+; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma
+; CHECK-NEXT: vslidedown.vx v8, v8, a0
+; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma
+; CHECK-NEXT: vslideup.vx v8, v12, a1
+; CHECK-NEXT: ret
%res = call <vscale x 16 x half> @llvm.vector.splice.nxv16f16(<vscale x 16 x half> %a, <vscale x 16 x half> %b, i32 -32)
ret <vscale x 16 x half> %res
}
define <vscale x 16 x half> @splice_nxv16f16_offset_max(<vscale x 16 x half> %a, <vscale x 16 x half> %b) #0 {
-; ZVFHMIN64-LABEL: splice_nxv16f16_offset_max:
-; ZVFHMIN64: # %bb.0:
-; ZVFHMIN64-NEXT: addi sp, sp, -48
-; ZVFHMIN64-NEXT: .cfi_def_cfa_offset 48
-; ZVFHMIN64-NEXT: sw ra, 44(sp) # 4-byte Folded Spill
-; ZVFHMIN64-NEXT: sw s0, 40(sp) # 4-byte Folded Spill
-; ZVFHMIN64-NEXT: .cfi_offset ra, -4
-; ZVFHMIN64-NEXT: .cfi_offset s0, -8
-; ZVFHMIN64-NEXT: addi s0, sp, 48
-; ZVFHMIN64-NEXT: .cfi_def_cfa s0, 0
-; ZVFHMIN64-NEXT: csrr a0, vlenb
-; ZVFHMIN64-NEXT: slli a0, a0, 3
-; ZVFHMIN64-NEXT: sub sp, sp, a0
-; ZVFHMIN64-NEXT: andi sp, sp, -32
-; ZVFHMIN64-NEXT: addi a0, sp, 32
-; ZVFHMIN64-NEXT: vs4r.v v8, (a0)
-; ZVFHMIN64-NEXT: csrr a1, vlenb
-; ZVFHMIN64-NEXT: slli a2, a1, 2
-; ZVFHMIN64-NEXT: add a2, a0, a2
-; ZVFHMIN64-NEXT: slli a1, a1, 1
-; ZVFHMIN64-NEXT: addi a1, a1, -1
-; ZVFHMIN64-NEXT: li a3, 31
-; ZVFHMIN64-NEXT: vs4r.v v12, (a2)
-; ZVFHMIN64-NEXT: bltu a1, a3, .LBB145_2
-; ZVFHMIN64-NEXT: # %bb.1:
-; ZVFHMIN64-NEXT: li a1, 31
-; ZVFHMIN64-NEXT: .LBB145_2:
-; ZVFHMIN64-NEXT: slli a1, a1, 1
-; ZVFHMIN64-NEXT: add a0, a0, a1
-; ZVFHMIN64-NEXT: vl4re16.v v8, (a0)
-; ZVFHMIN64-NEXT: addi sp, s0, -48
-; ZVFHMIN64-NEXT: lw ra, 44(sp) # 4-byte Folded Reload
-; ZVFHMIN64-NEXT: lw s0, 40(sp) # 4-byte Folded Reload
-; ZVFHMIN64-NEXT: addi sp, sp, 48
-; ZVFHMIN64-NEXT: ret
-;
-; ZVFHMIN32-LABEL: splice_nxv16f16_offset_max:
-; ZVFHMIN32: # %bb.0:
-; ZVFHMIN32-NEXT: addi sp, sp, -48
-; ZVFHMIN32-NEXT: .cfi_def_cfa_offset 48
-; ZVFHMIN32-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
-; ZVFHMIN32-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
-; ZVFHMIN32-NEXT: .cfi_offset ra, -8
-; ZVFHMIN32-NEXT: .cfi_offset s0, -16
-; ZVFHMIN32-NEXT: addi s0, sp, 48
-; ZVFHMIN32-NEXT: .cfi_def_cfa s0, 0
-; ZVFHMIN32-NEXT: csrr a0, vlenb
-; ZVFHMIN32-NEXT: slli a0, a0, 3
-; ZVFHMIN32-NEXT: sub sp, sp, a0
-; ZVFHMIN32-NEXT: andi sp, sp, -32
-; ZVFHMIN32-NEXT: addi a0, sp, 32
-; ZVFHMIN32-NEXT: vs4r.v v8, (a0)
-; ZVFHMIN32-NEXT: csrr a1, vlenb
-; ZVFHMIN32-NEXT: slli a2, a1, 2
-; ZVFHMIN32-NEXT: add a2, a0, a2
-; ZVFHMIN32-NEXT: slli a1, a1, 1
-; ZVFHMIN32-NEXT: addi a1, a1, -1
-; ZVFHMIN32-NEXT: li a3, 31
-; ZVFHMIN32-NEXT: vs4r.v v12, (a2)
-; ZVFHMIN32-NEXT: bltu a1, a3, .LBB145_2
-; ZVFHMIN32-NEXT: # %bb.1:
-; ZVFHMIN32-NEXT: li a1, 31
-; ZVFHMIN32-NEXT: .LBB145_2:
-; ZVFHMIN32-NEXT: slli a1, a1, 1
-; ZVFHMIN32-NEXT: add a0, a0, a1
-; ZVFHMIN32-NEXT: vl4re16.v v8, (a0)
-; ZVFHMIN32-NEXT: addi sp, s0, -48
-; ZVFHMIN32-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
-; ZVFHMIN32-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
-; ZVFHMIN32-NEXT: addi sp, sp, 48
-; ZVFHMIN32-NEXT: ret
-;
-; ZVFH32-LABEL: splice_nxv16f16_offset_max:
-; ZVFH32: # %bb.0:
-; ZVFH32-NEXT: csrr a0, vlenb
-; ZVFH32-NEXT: slli a0, a0, 1
-; ZVFH32-NEXT: addi a0, a0, -31
-; ZVFH32-NEXT: vsetvli zero, a0, e16, m4, ta, ma
-; ZVFH32-NEXT: vslidedown.vi v8, v8, 31
-; ZVFH32-NEXT: vsetvli a1, zero, e16, m4, ta, ma
-; ZVFH32-NEXT: vslideup.vx v8, v12, a0
-; ZVFH32-NEXT: ret
-;
-; ZVFH64-LABEL: splice_nxv16f16_offset_max:
-; ZVFH64: # %bb.0:
-; ZVFH64-NEXT: csrr a0, vlenb
-; ZVFH64-NEXT: slli a0, a0, 1
-; ZVFH64-NEXT: addi a0, a0, -31
-; ZVFH64-NEXT: vsetvli zero, a0, e16, m4, ta, ma
-; ZVFH64-NEXT: vslidedown.vi v8, v8, 31
-; ZVFH64-NEXT: vsetvli a1, zero, e16, m4, ta, ma
-; ZVFH64-NEXT: vslideup.vx v8, v12, a0
-; ZVFH64-NEXT: ret
+; CHECK-LABEL: splice_nxv16f16_offset_max:
+; CHECK: # %bb.0:
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: slli a0, a0, 1
+; CHECK-NEXT: addi a0, a0, -31
+; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma
+; CHECK-NEXT: vslidedown.vi v8, v8, 31
+; CHECK-NEXT: vsetvli a1, zero, e16, m4, ta, ma
+; CHECK-NEXT: vslideup.vx v8, v12, a0
+; CHECK-NEXT: ret
%res = call <vscale x 16 x half> @llvm.vector.splice.nxv16f16(<vscale x 16 x half> %a, <vscale x 16 x half> %b, i32 31)
ret <vscale x 16 x half> %res
}
@@ -3996,277 +2143,48 @@ define <vscale x 32 x half> @splice_nxv32f16_offset_zero(<vscale x 32 x half> %a
}
define <vscale x 32 x half> @splice_nxv32f16_offset_negone(<vscale x 32 x half> %a, <vscale x 32 x half> %b) #0 {
-; ZVFHMIN64-LABEL: splice_nxv32f16_offset_negone:
-; ZVFHMIN64: # %bb.0:
-; ZVFHMIN64-NEXT: addi sp, sp, -80
-; ZVFHMIN64-NEXT: .cfi_def_cfa_offset 80
-; ZVFHMIN64-NEXT: sw ra, 76(sp) # 4-byte Folded Spill
-; ZVFHMIN64-NEXT: sw s0, 72(sp) # 4-byte Folded Spill
-; ZVFHMIN64-NEXT: .cfi_offset ra, -4
-; ZVFHMIN64-NEXT: .cfi_offset s0, -8
-; ZVFHMIN64-NEXT: addi s0, sp, 80
-; ZVFHMIN64-NEXT: .cfi_def_cfa s0, 0
-; ZVFHMIN64-NEXT: csrr a0, vlenb
-; ZVFHMIN64-NEXT: slli a0, a0, 4
-; ZVFHMIN64-NEXT: sub sp, sp, a0
-; ZVFHMIN64-NEXT: andi sp, sp, -64
-; ZVFHMIN64-NEXT: addi a0, sp, 64
-; ZVFHMIN64-NEXT: vs8r.v v8, (a0)
-; ZVFHMIN64-NEXT: csrr a1, vlenb
-; ZVFHMIN64-NEXT: slli a1, a1, 3
-; ZVFHMIN64-NEXT: add a0, a0, a1
-; ZVFHMIN64-NEXT: vs8r.v v16, (a0)
-; ZVFHMIN64-NEXT: addi a0, a0, -2
-; ZVFHMIN64-NEXT: vl8re16.v v8, (a0)
-; ZVFHMIN64-NEXT: addi sp, s0, -80
-; ZVFHMIN64-NEXT: lw ra, 76(sp) # 4-byte Folded Reload
-; ZVFHMIN64-NEXT: lw s0, 72(sp) # 4-byte Folded Reload
-; ZVFHMIN64-NEXT: addi sp, sp, 80
-; ZVFHMIN64-NEXT: ret
-;
-; ZVFHMIN32-LABEL: splice_nxv32f16_offset_negone:
-; ZVFHMIN32: # %bb.0:
-; ZVFHMIN32-NEXT: addi sp, sp, -80
-; ZVFHMIN32-NEXT: .cfi_def_cfa_offset 80
-; ZVFHMIN32-NEXT: sd ra, 72(sp) # 8-byte Folded Spill
-; ZVFHMIN32-NEXT: sd s0, 64(sp) # 8-byte Folded Spill
-; ZVFHMIN32-NEXT: .cfi_offset ra, -8
-; ZVFHMIN32-NEXT: .cfi_offset s0, -16
-; ZVFHMIN32-NEXT: addi s0, sp, 80
-; ZVFHMIN32-NEXT: .cfi_def_cfa s0, 0
-; ZVFHMIN32-NEXT: csrr a0, vlenb
-; ZVFHMIN32-NEXT: slli a0, a0, 4
-; ZVFHMIN32-NEXT: sub sp, sp, a0
-; ZVFHMIN32-NEXT: andi sp, sp, -64
-; ZVFHMIN32-NEXT: addi a0, sp, 64
-; ZVFHMIN32-NEXT: vs8r.v v8, (a0)
-; ZVFHMIN32-NEXT: csrr a1, vlenb
-; ZVFHMIN32-NEXT: slli a1, a1, 3
-; ZVFHMIN32-NEXT: add a0, a0, a1
-; ZVFHMIN32-NEXT: vs8r.v v16, (a0)
-; ZVFHMIN32-NEXT: addi a0, a0, -2
-; ZVFHMIN32-NEXT: vl8re16.v v8, (a0)
-; ZVFHMIN32-NEXT: addi sp, s0, -80
-; ZVFHMIN32-NEXT: ld ra, 72(sp) # 8-byte Folded Reload
-; ZVFHMIN32-NEXT: ld s0, 64(sp) # 8-byte Folded Reload
-; ZVFHMIN32-NEXT: addi sp, sp, 80
-; ZVFHMIN32-NEXT: ret
-;
-; ZVFH32-LABEL: splice_nxv32f16_offset_negone:
-; ZVFH32: # %bb.0:
-; ZVFH32-NEXT: csrr a0, vlenb
-; ZVFH32-NEXT: slli a0, a0, 2
-; ZVFH32-NEXT: addi a0, a0, -1
-; ZVFH32-NEXT: vsetivli zero, 1, e16, m8, ta, ma
-; ZVFH32-NEXT: vslidedown.vx v8, v8, a0
-; ZVFH32-NEXT: vsetvli a0, zero, e16, m8, ta, ma
-; ZVFH32-NEXT: vslideup.vi v8, v16, 1
-; ZVFH32-NEXT: ret
-;
-; ZVFH64-LABEL: splice_nxv32f16_offset_negone:
-; ZVFH64: # %bb.0:
-; ZVFH64-NEXT: csrr a0, vlenb
-; ZVFH64-NEXT: slli a0, a0, 2
-; ZVFH64-NEXT: addi a0, a0, -1
-; ZVFH64-NEXT: vsetivli zero, 1, e16, m8, ta, ma
-; ZVFH64-NEXT: vslidedown.vx v8, v8, a0
-; ZVFH64-NEXT: vsetvli a0, zero, e16, m8, ta, ma
-; ZVFH64-NEXT: vslideup.vi v8, v16, 1
-; ZVFH64-NEXT: ret
+; CHECK-LABEL: splice_nxv32f16_offset_negone:
+; CHECK: # %bb.0:
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: slli a0, a0, 2
+; CHECK-NEXT: addi a0, a0, -1
+; CHECK-NEXT: vsetivli zero, 1, e16, m8, ta, ma
+; CHECK-NEXT: vslidedown.vx v8, v8, a0
+; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, ma
+; CHECK-NEXT: vslideup.vi v8, v16, 1
+; CHECK-NEXT: ret
%res = call <vscale x 32 x half> @llvm.vector.splice.nxv32f16(<vscale x 32 x half> %a, <vscale x 32 x half> %b, i32 -1)
ret <vscale x 32 x half> %res
}
define <vscale x 32 x half> @splice_nxv32f16_offset_min(<vscale x 32 x half> %a, <vscale x 32 x half> %b) #0 {
-; ZVFHMIN64-LABEL: splice_nxv32f16_offset_min:
-; ZVFHMIN64: # %bb.0:
-; ZVFHMIN64-NEXT: addi sp, sp, -80
-; ZVFHMIN64-NEXT: .cfi_def_cfa_offset 80
-; ZVFHMIN64-NEXT: sw ra, 76(sp) # 4-byte Folded Spill
-; ZVFHMIN64-NEXT: sw s0, 72(sp) # 4-byte Folded Spill
-; ZVFHMIN64-NEXT: .cfi_offset ra, -4
-; ZVFHMIN64-NEXT: .cfi_offset s0, -8
-; ZVFHMIN64-NEXT: addi s0, sp, 80
-; ZVFHMIN64-NEXT: .cfi_def_cfa s0, 0
-; ZVFHMIN64-NEXT: csrr a0, vlenb
-; ZVFHMIN64-NEXT: slli a0, a0, 4
-; ZVFHMIN64-NEXT: sub sp, sp, a0
-; ZVFHMIN64-NEXT: andi sp, sp, -64
-; ZVFHMIN64-NEXT: addi a0, sp, 64
-; ZVFHMIN64-NEXT: vs8r.v v8, (a0)
-; ZVFHMIN64-NEXT: csrr a1, vlenb
-; ZVFHMIN64-NEXT: slli a1, a1, 3
-; ZVFHMIN64-NEXT: add a0, a0, a1
-; ZVFHMIN64-NEXT: li a2, 128
-; ZVFHMIN64-NEXT: vs8r.v v16, (a0)
-; ZVFHMIN64-NEXT: bltu a1, a2, .LBB148_2
-; ZVFHMIN64-NEXT: # %bb.1:
-; ZVFHMIN64-NEXT: li a1, 128
-; ZVFHMIN64-NEXT: .LBB148_2:
-; ZVFHMIN64-NEXT: sub a0, a0, a1
-; ZVFHMIN64-NEXT: vl8re16.v v8, (a0)
-; ZVFHMIN64-NEXT: addi sp, s0, -80
-; ZVFHMIN64-NEXT: lw ra, 76(sp) # 4-byte Folded Reload
-; ZVFHMIN64-NEXT: lw s0, 72(sp) # 4-byte Folded Reload
-; ZVFHMIN64-NEXT: addi sp, sp, 80
-; ZVFHMIN64-NEXT: ret
-;
-; ZVFHMIN32-LABEL: splice_nxv32f16_offset_min:
-; ZVFHMIN32: # %bb.0:
-; ZVFHMIN32-NEXT: addi sp, sp, -80
-; ZVFHMIN32-NEXT: .cfi_def_cfa_offset 80
-; ZVFHMIN32-NEXT: sd ra, 72(sp) # 8-byte Folded Spill
-; ZVFHMIN32-NEXT: sd s0, 64(sp) # 8-byte Folded Spill
-; ZVFHMIN32-NEXT: .cfi_offset ra, -8
-; ZVFHMIN32-NEXT: .cfi_offset s0, -16
-; ZVFHMIN32-NEXT: addi s0, sp, 80
-; ZVFHMIN32-NEXT: .cfi_def_cfa s0, 0
-; ZVFHMIN32-NEXT: csrr a0, vlenb
-; ZVFHMIN32-NEXT: slli a0, a0, 4
-; ZVFHMIN32-NEXT: sub sp, sp, a0
-; ZVFHMIN32-NEXT: andi sp, sp, -64
-; ZVFHMIN32-NEXT: addi a0, sp, 64
-; ZVFHMIN32-NEXT: vs8r.v v8, (a0)
-; ZVFHMIN32-NEXT: csrr a1, vlenb
-; ZVFHMIN32-NEXT: slli a1, a1, 3
-; ZVFHMIN32-NEXT: add a0, a0, a1
-; ZVFHMIN32-NEXT: li a2, 128
-; ZVFHMIN32-NEXT: vs8r.v v16, (a0)
-; ZVFHMIN32-NEXT: bltu a1, a2, .LBB148_2
-; ZVFHMIN32-NEXT: # %bb.1:
-; ZVFHMIN32-NEXT: li a1, 128
-; ZVFHMIN32-NEXT: .LBB148_2:
-; ZVFHMIN32-NEXT: sub a0, a0, a1
-; ZVFHMIN32-NEXT: vl8re16.v v8, (a0)
-; ZVFHMIN32-NEXT: addi sp, s0, -80
-; ZVFHMIN32-NEXT: ld ra, 72(sp) # 8-byte Folded Reload
-; ZVFHMIN32-NEXT: ld s0, 64(sp) # 8-byte Folded Reload
-; ZVFHMIN32-NEXT: addi sp, sp, 80
-; ZVFHMIN32-NEXT: ret
-;
-; ZVFH32-LABEL: splice_nxv32f16_offset_min:
-; ZVFH32: # %bb.0:
-; ZVFH32-NEXT: csrr a0, vlenb
-; ZVFH32-NEXT: slli a0, a0, 2
-; ZVFH32-NEXT: addi a0, a0, -64
-; ZVFH32-NEXT: li a1, 64
-; ZVFH32-NEXT: vsetvli zero, a1, e16, m8, ta, ma
-; ZVFH32-NEXT: vslidedown.vx v8, v8, a0
-; ZVFH32-NEXT: vsetvli a0, zero, e16, m8, ta, ma
-; ZVFH32-NEXT: vslideup.vx v8, v16, a1
-; ZVFH32-NEXT: ret
-;
-; ZVFH64-LABEL: splice_nxv32f16_offset_min:
-; ZVFH64: # %bb.0:
-; ZVFH64-NEXT: csrr a0, vlenb
-; ZVFH64-NEXT: slli a0, a0, 2
-; ZVFH64-NEXT: addi a0, a0, -64
-; ZVFH64-NEXT: li a1, 64
-; ZVFH64-NEXT: vsetvli zero, a1, e16, m8, ta, ma
-; ZVFH64-NEXT: vslidedown.vx v8, v8, a0
-; ZVFH64-NEXT: vsetvli a0, zero, e16, m8, ta, ma
-; ZVFH64-NEXT: vslideup.vx v8, v16, a1
-; ZVFH64-NEXT: ret
+; CHECK-LABEL: splice_nxv32f16_offset_min:
+; CHECK: # %bb.0:
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: slli a0, a0, 2
+; CHECK-NEXT: addi a0, a0, -64
+; CHECK-NEXT: li a1, 64
+; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma
+; CHECK-NEXT: vslidedown.vx v8, v8, a0
+; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, ma
+; CHECK-NEXT: vslideup.vx v8, v16, a1
+; CHECK-NEXT: ret
%res = call <vscale x 32 x half> @llvm.vector.splice.nxv32f16(<vscale x 32 x half> %a, <vscale x 32 x half> %b, i32 -64)
ret <vscale x 32 x half> %res
}
define <vscale x 32 x half> @splice_nxv32f16_offset_max(<vscale x 32 x half> %a, <vscale x 32 x half> %b) #0 {
-; ZVFHMIN64-LABEL: splice_nxv32f16_offset_max:
-; ZVFHMIN64: # %bb.0:
-; ZVFHMIN64-NEXT: addi sp, sp, -80
-; ZVFHMIN64-NEXT: .cfi_def_cfa_offset 80
-; ZVFHMIN64-NEXT: sw ra, 76(sp) # 4-byte Folded Spill
-; ZVFHMIN64-NEXT: sw s0, 72(sp) # 4-byte Folded Spill
-; ZVFHMIN64-NEXT: .cfi_offset ra, -4
-; ZVFHMIN64-NEXT: .cfi_offset s0, -8
-; ZVFHMIN64-NEXT: addi s0, sp, 80
-; ZVFHMIN64-NEXT: .cfi_def_cfa s0, 0
-; ZVFHMIN64-NEXT: csrr a0, vlenb
-; ZVFHMIN64-NEXT: slli a0, a0, 4
-; ZVFHMIN64-NEXT: sub sp, sp, a0
-; ZVFHMIN64-NEXT: andi sp, sp, -64
-; ZVFHMIN64-NEXT: addi a0, sp, 64
-; ZVFHMIN64-NEXT: vs8r.v v8, (a0)
-; ZVFHMIN64-NEXT: csrr a1, vlenb
-; ZVFHMIN64-NEXT: slli a2, a1, 3
-; ZVFHMIN64-NEXT: add a2, a0, a2
-; ZVFHMIN64-NEXT: slli a1, a1, 2
-; ZVFHMIN64-NEXT: addi a1, a1, -1
-; ZVFHMIN64-NEXT: li a3, 63
-; ZVFHMIN64-NEXT: vs8r.v v16, (a2)
-; ZVFHMIN64-NEXT: bltu a1, a3, .LBB149_2
-; ZVFHMIN64-NEXT: # %bb.1:
-; ZVFHMIN64-NEXT: li a1, 63
-; ZVFHMIN64-NEXT: .LBB149_2:
-; ZVFHMIN64-NEXT: slli a1, a1, 1
-; ZVFHMIN64-NEXT: add a0, a0, a1
-; ZVFHMIN64-NEXT: vl8re16.v v8, (a0)
-; ZVFHMIN64-NEXT: addi sp, s0, -80
-; ZVFHMIN64-NEXT: lw ra, 76(sp) # 4-byte Folded Reload
-; ZVFHMIN64-NEXT: lw s0, 72(sp) # 4-byte Folded Reload
-; ZVFHMIN64-NEXT: addi sp, sp, 80
-; ZVFHMIN64-NEXT: ret
-;
-; ZVFHMIN32-LABEL: splice_nxv32f16_offset_max:
-; ZVFHMIN32: # %bb.0:
-; ZVFHMIN32-NEXT: addi sp, sp, -80
-; ZVFHMIN32-NEXT: .cfi_def_cfa_offset 80
-; ZVFHMIN32-NEXT: sd ra, 72(sp) # 8-byte Folded Spill
-; ZVFHMIN32-NEXT: sd s0, 64(sp) # 8-byte Folded Spill
-; ZVFHMIN32-NEXT: .cfi_offset ra, -8
-; ZVFHMIN32-NEXT: .cfi_offset s0, -16
-; ZVFHMIN32-NEXT: addi s0, sp, 80
-; ZVFHMIN32-NEXT: .cfi_def_cfa s0, 0
-; ZVFHMIN32-NEXT: csrr a0, vlenb
-; ZVFHMIN32-NEXT: slli a0, a0, 4
-; ZVFHMIN32-NEXT: sub sp, sp, a0
-; ZVFHMIN32-NEXT: andi sp, sp, -64
-; ZVFHMIN32-NEXT: addi a0, sp, 64
-; ZVFHMIN32-NEXT: vs8r.v v8, (a0)
-; ZVFHMIN32-NEXT: csrr a1, vlenb
-; ZVFHMIN32-NEXT: slli a2, a1, 3
-; ZVFHMIN32-NEXT: add a2, a0, a2
-; ZVFHMIN32-NEXT: slli a1, a1, 2
-; ZVFHMIN32-NEXT: addi a1, a1, -1
-; ZVFHMIN32-NEXT: li a3, 63
-; ZVFHMIN32-NEXT: vs8r.v v16, (a2)
-; ZVFHMIN32-NEXT: bltu a1, a3, .LBB149_2
-; ZVFHMIN32-NEXT: # %bb.1:
-; ZVFHMIN32-NEXT: li a1, 63
-; ZVFHMIN32-NEXT: .LBB149_2:
-; ZVFHMIN32-NEXT: slli a1, a1, 1
-; ZVFHMIN32-NEXT: add a0, a0, a1
-; ZVFHMIN32-NEXT: vl8re16.v v8, (a0)
-; ZVFHMIN32-NEXT: addi sp, s0, -80
-; ZVFHMIN32-NEXT: ld ra, 72(sp) # 8-byte Folded Reload
-; ZVFHMIN32-NEXT: ld s0, 64(sp) # 8-byte Folded Reload
-; ZVFHMIN32-NEXT: addi sp, sp, 80
-; ZVFHMIN32-NEXT: ret
-;
-; ZVFH32-LABEL: splice_nxv32f16_offset_max:
-; ZVFH32: # %bb.0:
-; ZVFH32-NEXT: csrr a0, vlenb
-; ZVFH32-NEXT: slli a0, a0, 2
-; ZVFH32-NEXT: addi a0, a0, -63
-; ZVFH32-NEXT: li a1, 63
-; ZVFH32-NEXT: vsetvli zero, a0, e16, m8, ta, ma
-; ZVFH32-NEXT: vslidedown.vx v8, v8, a1
-; ZVFH32-NEXT: vsetvli a1, zero, e16, m8, ta, ma
-; ZVFH32-NEXT: vslideup.vx v8, v16, a0
-; ZVFH32-NEXT: ret
-;
-; ZVFH64-LABEL: splice_nxv32f16_offset_max:
-; ZVFH64: # %bb.0:
-; ZVFH64-NEXT: csrr a0, vlenb
-; ZVFH64-NEXT: slli a0, a0, 2
-; ZVFH64-NEXT: addi a0, a0, -63
-; ZVFH64-NEXT: li a1, 63
-; ZVFH64-NEXT: vsetvli zero, a0, e16, m8, ta, ma
-; ZVFH64-NEXT: vslidedown.vx v8, v8, a1
-; ZVFH64-NEXT: vsetvli a1, zero, e16, m8, ta, ma
-; ZVFH64-NEXT: vslideup.vx v8, v16, a0
-; ZVFH64-NEXT: ret
+; CHECK-LABEL: splice_nxv32f16_offset_max:
+; CHECK: # %bb.0:
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: slli a0, a0, 2
+; CHECK-NEXT: addi a0, a0, -63
+; CHECK-NEXT: li a1, 63
+; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma
+; CHECK-NEXT: vslidedown.vx v8, v8, a1
+; CHECK-NEXT: vsetvli a1, zero, e16, m8, ta, ma
+; CHECK-NEXT: vslideup.vx v8, v16, a0
+; CHECK-NEXT: ret
%res = call <vscale x 32 x half> @llvm.vector.splice.nxv32f16(<vscale x 32 x half> %a, <vscale x 32 x half> %b, i32 63)
ret <vscale x 32 x half> %res
}
>From 6fb6675587e50c1b37329f7808d3450984f5ce88 Mon Sep 17 00:00:00 2001
From: Luke Lau <luke at igalia.com>
Date: Wed, 16 Oct 2024 17:50:24 +0100
Subject: [PATCH 3/3] Remove redundant --check-prefixes
---
llvm/test/CodeGen/RISCV/rvv/vector-splice.ll | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/llvm/test/CodeGen/RISCV/rvv/vector-splice.ll b/llvm/test/CodeGen/RISCV/rvv/vector-splice.ll
index c9cb6dc6397c3c..5460caea196cf8 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vector-splice.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vector-splice.ll
@@ -1,8 +1,8 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple riscv32 -mattr=+m,+f,+d,+v,+zvfhmin,+zvfbfmin < %s | FileCheck %s --check-prefixes=CHECK
-; RUN: llc -mtriple riscv64 -mattr=+m,+f,+d,+v,+zvfhmin,+zvfbfmin < %s | FileCheck %s --check-prefixes=CHECK
-; RUN: llc -mtriple riscv32 -mattr=+m,+f,+d,+v,+zvfh,+zvfbfmin < %s | FileCheck %s --check-prefixes=CHECK
-; RUN: llc -mtriple riscv64 -mattr=+m,+f,+d,+v,+zvfh,+zvfbfmin < %s | FileCheck %s --check-prefixes=CHECK
+; RUN: llc -mtriple riscv32 -mattr=+m,+f,+d,+v,+zvfhmin,+zvfbfmin < %s | FileCheck %s
+; RUN: llc -mtriple riscv64 -mattr=+m,+f,+d,+v,+zvfhmin,+zvfbfmin < %s | FileCheck %s
+; RUN: llc -mtriple riscv32 -mattr=+m,+f,+d,+v,+zvfh,+zvfbfmin < %s | FileCheck %s
+; RUN: llc -mtriple riscv64 -mattr=+m,+f,+d,+v,+zvfh,+zvfbfmin < %s | FileCheck %s
; Tests assume VLEN=128 or vscale_range_min=2.
More information about the llvm-commits
mailing list