[llvm] [RISCV] Lower insert_vector_elt on zvfhmin/zvfbfmin (PR #110221)
Luke Lau via llvm-commits
llvm-commits at lists.llvm.org
Tue Oct 1 23:36:40 PDT 2024
https://github.com/lukel97 updated https://github.com/llvm/llvm-project/pull/110221
>From a8e525d83671087ab3c17f62ff9d635b65e73e5a Mon Sep 17 00:00:00 2001
From: Luke Lau <luke at igalia.com>
Date: Fri, 27 Sep 2024 16:52:07 +0800
Subject: [PATCH 1/3] [RISCV] Lower insert_vector_elt on zvfhmin/zvfbfmin
This is the dual of #110144, but doesn't handle the case when the scalar type is illegal i.e. no zfhmin/zfbfmin. It looks like softening isn't yet implemented for insert_vector_elt operands and it will crash during type legalization, so I've left that configuration out of the tests.
---
llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 22 +-
llvm/test/CodeGen/RISCV/rvv/insertelt-fp.ll | 584 ++++++++++++++++----
2 files changed, 497 insertions(+), 109 deletions(-)
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index b70884d49642ce..41c55f69e33887 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -1076,9 +1076,10 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
setOperationAction({ISD::SINT_TO_FP, ISD::UINT_TO_FP, ISD::VP_SINT_TO_FP,
ISD::VP_UINT_TO_FP},
VT, Custom);
- setOperationAction({ISD::CONCAT_VECTORS, ISD::INSERT_SUBVECTOR,
- ISD::EXTRACT_SUBVECTOR, ISD::VECTOR_DEINTERLEAVE,
- ISD::VECTOR_INTERLEAVE, ISD::VECTOR_REVERSE},
+ setOperationAction({ISD::INSERT_VECTOR_ELT, ISD::CONCAT_VECTORS,
+ ISD::INSERT_SUBVECTOR, ISD::EXTRACT_SUBVECTOR,
+ ISD::VECTOR_DEINTERLEAVE, ISD::VECTOR_INTERLEAVE,
+ ISD::VECTOR_REVERSE},
VT, Custom);
MVT EltVT = VT.getVectorElementType();
if (isTypeLegal(EltVT))
@@ -8756,8 +8757,10 @@ SDValue RISCVTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op,
SelectionDAG &DAG) const {
SDLoc DL(Op);
MVT VecVT = Op.getSimpleValueType();
+ MVT XLenVT = Subtarget.getXLenVT();
SDValue Vec = Op.getOperand(0);
SDValue Val = Op.getOperand(1);
+ MVT ValVT = Val.getSimpleValueType();
SDValue Idx = Op.getOperand(2);
if (VecVT.getVectorElementType() == MVT::i1) {
@@ -8769,6 +8772,17 @@ SDValue RISCVTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op,
return DAG.getNode(ISD::TRUNCATE, DL, VecVT, Vec);
}
+ if ((ValVT == MVT::f16 && !Subtarget.hasVInstructionsF16()) ||
+ ValVT == MVT::bf16) {
+ // If we don't have vfmv.s.f for f16/bf16, insert into fmv.x.h first
+ MVT IntVT = VecVT.changeTypeToInteger();
+ // SDValue IntVal = DAG.getBitcast(IntVT.getVectorElementType(), Val);
+ SDValue IntInsert = DAG.getNode(
+ ISD::INSERT_VECTOR_ELT, DL, IntVT, DAG.getBitcast(IntVT, Vec),
+ DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, XLenVT, Val), Idx);
+ return DAG.getBitcast(VecVT, IntInsert);
+ }
+
MVT ContainerVT = VecVT;
// If the operand is a fixed-length vector, convert to a scalable one.
if (VecVT.isFixedLengthVector()) {
@@ -8812,8 +8826,6 @@ SDValue RISCVTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op,
AlignedIdx);
}
- MVT XLenVT = Subtarget.getXLenVT();
-
bool IsLegalInsert = Subtarget.is64Bit() || Val.getValueType() != MVT::i64;
// Even i64-element vectors on RV32 can be lowered without scalar
// legalization if the most-significant 32 bits of the value are not affected
diff --git a/llvm/test/CodeGen/RISCV/rvv/insertelt-fp.ll b/llvm/test/CodeGen/RISCV/rvv/insertelt-fp.ll
index 8cfa88e6f95697..607e0085c3f468 100644
--- a/llvm/test/CodeGen/RISCV/rvv/insertelt-fp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/insertelt-fp.ll
@@ -1,209 +1,585 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v -target-abi=ilp32d \
-; RUN: -verify-machineinstrs < %s | FileCheck %s
-; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -target-abi=lp64d \
-; RUN: -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zfbfmin,+zvfh,+zvfbfmin,+v -target-abi=ilp32d \
+; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zfbfmin,+zvfh,+zvfbfmin,+v -target-abi=lp64d \
+; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zfbfmin,+zvfhmin,+zvfbfmin,+v -target-abi=ilp32d \
+; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zfbfmin,+zvfhmin,+zvfbfmin,+v -target-abi=lp64d \
+; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN
-define <vscale x 1 x half> @insertelt_nxv1f16_0(<vscale x 1 x half> %v, half %elt) {
-; CHECK-LABEL: insertelt_nxv1f16_0:
+define <vscale x 1 x bfloat> @insertelt_nxv1bf16_0(<vscale x 1 x bfloat> %v, bfloat %elt) {
+; CHECK-LABEL: insertelt_nxv1bf16_0:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a0, zero, e16, m1, tu, ma
-; CHECK-NEXT: vfmv.s.f v8, fa0
+; CHECK-NEXT: fmv.x.h a0, fa0
+; CHECK-NEXT: vsetvli a1, zero, e16, m1, tu, ma
+; CHECK-NEXT: vmv.s.x v8, a0
; CHECK-NEXT: ret
- %r = insertelement <vscale x 1 x half> %v, half %elt, i32 0
- ret <vscale x 1 x half> %r
+ %r = insertelement <vscale x 1 x bfloat> %v, bfloat %elt, i32 0
+ ret <vscale x 1 x bfloat> %r
}
-define <vscale x 1 x half> @insertelt_nxv1f16_imm(<vscale x 1 x half> %v, half %elt) {
-; CHECK-LABEL: insertelt_nxv1f16_imm:
+define <vscale x 1 x bfloat> @insertelt_nxv1bf16_imm(<vscale x 1 x bfloat> %v, bfloat %elt) {
+; CHECK-LABEL: insertelt_nxv1bf16_imm:
; CHECK: # %bb.0:
+; CHECK-NEXT: fmv.x.h a0, fa0
; CHECK-NEXT: vsetivli zero, 4, e16, mf4, tu, ma
-; CHECK-NEXT: vfmv.s.f v9, fa0
+; CHECK-NEXT: vmv.s.x v9, a0
; CHECK-NEXT: vslideup.vi v8, v9, 3
; CHECK-NEXT: ret
- %r = insertelement <vscale x 1 x half> %v, half %elt, i32 3
- ret <vscale x 1 x half> %r
+ %r = insertelement <vscale x 1 x bfloat> %v, bfloat %elt, i32 3
+ ret <vscale x 1 x bfloat> %r
}
-define <vscale x 1 x half> @insertelt_nxv1f16_idx(<vscale x 1 x half> %v, half %elt, i32 zeroext %idx) {
-; CHECK-LABEL: insertelt_nxv1f16_idx:
+define <vscale x 1 x bfloat> @insertelt_nxv1bf16_idx(<vscale x 1 x bfloat> %v, bfloat %elt, i32 zeroext %idx) {
+; CHECK-LABEL: insertelt_nxv1bf16_idx:
; CHECK: # %bb.0:
; CHECK-NEXT: addi a1, a0, 1
-; CHECK-NEXT: vsetvli a2, zero, e16, m1, ta, ma
-; CHECK-NEXT: vfmv.s.f v9, fa0
+; CHECK-NEXT: fmv.x.h a2, fa0
+; CHECK-NEXT: vsetvli a3, zero, e16, m1, ta, ma
+; CHECK-NEXT: vmv.s.x v9, a2
; CHECK-NEXT: vsetvli zero, a1, e16, mf4, tu, ma
; CHECK-NEXT: vslideup.vx v8, v9, a0
; CHECK-NEXT: ret
- %r = insertelement <vscale x 1 x half> %v, half %elt, i32 %idx
- ret <vscale x 1 x half> %r
+ %r = insertelement <vscale x 1 x bfloat> %v, bfloat %elt, i32 %idx
+ ret <vscale x 1 x bfloat> %r
}
-define <vscale x 2 x half> @insertelt_nxv2f16_0(<vscale x 2 x half> %v, half %elt) {
-; CHECK-LABEL: insertelt_nxv2f16_0:
+define <vscale x 2 x bfloat> @insertelt_nxv2bf16_0(<vscale x 2 x bfloat> %v, bfloat %elt) {
+; CHECK-LABEL: insertelt_nxv2bf16_0:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a0, zero, e16, m1, tu, ma
-; CHECK-NEXT: vfmv.s.f v8, fa0
+; CHECK-NEXT: fmv.x.h a0, fa0
+; CHECK-NEXT: vsetvli a1, zero, e16, m1, tu, ma
+; CHECK-NEXT: vmv.s.x v8, a0
; CHECK-NEXT: ret
- %r = insertelement <vscale x 2 x half> %v, half %elt, i32 0
- ret <vscale x 2 x half> %r
+ %r = insertelement <vscale x 2 x bfloat> %v, bfloat %elt, i32 0
+ ret <vscale x 2 x bfloat> %r
}
-define <vscale x 2 x half> @insertelt_nxv2f16_imm(<vscale x 2 x half> %v, half %elt) {
-; CHECK-LABEL: insertelt_nxv2f16_imm:
+define <vscale x 2 x bfloat> @insertelt_nxv2bf16_imm(<vscale x 2 x bfloat> %v, bfloat %elt) {
+; CHECK-LABEL: insertelt_nxv2bf16_imm:
; CHECK: # %bb.0:
+; CHECK-NEXT: fmv.x.h a0, fa0
; CHECK-NEXT: vsetivli zero, 4, e16, mf2, tu, ma
-; CHECK-NEXT: vfmv.s.f v9, fa0
+; CHECK-NEXT: vmv.s.x v9, a0
; CHECK-NEXT: vslideup.vi v8, v9, 3
; CHECK-NEXT: ret
- %r = insertelement <vscale x 2 x half> %v, half %elt, i32 3
- ret <vscale x 2 x half> %r
+ %r = insertelement <vscale x 2 x bfloat> %v, bfloat %elt, i32 3
+ ret <vscale x 2 x bfloat> %r
}
-define <vscale x 2 x half> @insertelt_nxv2f16_idx(<vscale x 2 x half> %v, half %elt, i32 zeroext %idx) {
-; CHECK-LABEL: insertelt_nxv2f16_idx:
+define <vscale x 2 x bfloat> @insertelt_nxv2bf16_idx(<vscale x 2 x bfloat> %v, bfloat %elt, i32 zeroext %idx) {
+; CHECK-LABEL: insertelt_nxv2bf16_idx:
; CHECK: # %bb.0:
; CHECK-NEXT: addi a1, a0, 1
-; CHECK-NEXT: vsetvli a2, zero, e16, m1, ta, ma
-; CHECK-NEXT: vfmv.s.f v9, fa0
+; CHECK-NEXT: fmv.x.h a2, fa0
+; CHECK-NEXT: vsetvli a3, zero, e16, m1, ta, ma
+; CHECK-NEXT: vmv.s.x v9, a2
; CHECK-NEXT: vsetvli zero, a1, e16, mf2, tu, ma
; CHECK-NEXT: vslideup.vx v8, v9, a0
; CHECK-NEXT: ret
- %r = insertelement <vscale x 2 x half> %v, half %elt, i32 %idx
- ret <vscale x 2 x half> %r
+ %r = insertelement <vscale x 2 x bfloat> %v, bfloat %elt, i32 %idx
+ ret <vscale x 2 x bfloat> %r
}
-define <vscale x 4 x half> @insertelt_nxv4f16_0(<vscale x 4 x half> %v, half %elt) {
-; CHECK-LABEL: insertelt_nxv4f16_0:
+define <vscale x 4 x bfloat> @insertelt_nxv4bf16_0(<vscale x 4 x bfloat> %v, bfloat %elt) {
+; CHECK-LABEL: insertelt_nxv4bf16_0:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a0, zero, e16, m1, tu, ma
-; CHECK-NEXT: vfmv.s.f v8, fa0
+; CHECK-NEXT: fmv.x.h a0, fa0
+; CHECK-NEXT: vsetvli a1, zero, e16, m1, tu, ma
+; CHECK-NEXT: vmv.s.x v8, a0
; CHECK-NEXT: ret
- %r = insertelement <vscale x 4 x half> %v, half %elt, i32 0
- ret <vscale x 4 x half> %r
+ %r = insertelement <vscale x 4 x bfloat> %v, bfloat %elt, i32 0
+ ret <vscale x 4 x bfloat> %r
}
-define <vscale x 4 x half> @insertelt_nxv4f16_imm(<vscale x 4 x half> %v, half %elt) {
-; CHECK-LABEL: insertelt_nxv4f16_imm:
+define <vscale x 4 x bfloat> @insertelt_nxv4bf16_imm(<vscale x 4 x bfloat> %v, bfloat %elt) {
+; CHECK-LABEL: insertelt_nxv4bf16_imm:
; CHECK: # %bb.0:
+; CHECK-NEXT: fmv.x.h a0, fa0
; CHECK-NEXT: vsetivli zero, 4, e16, m1, tu, ma
-; CHECK-NEXT: vfmv.s.f v9, fa0
+; CHECK-NEXT: vmv.s.x v9, a0
; CHECK-NEXT: vslideup.vi v8, v9, 3
; CHECK-NEXT: ret
- %r = insertelement <vscale x 4 x half> %v, half %elt, i32 3
- ret <vscale x 4 x half> %r
+ %r = insertelement <vscale x 4 x bfloat> %v, bfloat %elt, i32 3
+ ret <vscale x 4 x bfloat> %r
}
-define <vscale x 4 x half> @insertelt_nxv4f16_idx(<vscale x 4 x half> %v, half %elt, i32 zeroext %idx) {
-; CHECK-LABEL: insertelt_nxv4f16_idx:
+define <vscale x 4 x bfloat> @insertelt_nxv4bf16_idx(<vscale x 4 x bfloat> %v, bfloat %elt, i32 zeroext %idx) {
+; CHECK-LABEL: insertelt_nxv4bf16_idx:
; CHECK: # %bb.0:
; CHECK-NEXT: addi a1, a0, 1
-; CHECK-NEXT: vsetvli a2, zero, e16, m1, ta, ma
-; CHECK-NEXT: vfmv.s.f v9, fa0
+; CHECK-NEXT: fmv.x.h a2, fa0
+; CHECK-NEXT: vsetvli a3, zero, e16, m1, ta, ma
+; CHECK-NEXT: vmv.s.x v9, a2
; CHECK-NEXT: vsetvli zero, a1, e16, m1, tu, ma
; CHECK-NEXT: vslideup.vx v8, v9, a0
; CHECK-NEXT: ret
- %r = insertelement <vscale x 4 x half> %v, half %elt, i32 %idx
- ret <vscale x 4 x half> %r
+ %r = insertelement <vscale x 4 x bfloat> %v, bfloat %elt, i32 %idx
+ ret <vscale x 4 x bfloat> %r
}
-define <vscale x 8 x half> @insertelt_nxv8f16_0(<vscale x 8 x half> %v, half %elt) {
-; CHECK-LABEL: insertelt_nxv8f16_0:
+define <vscale x 8 x bfloat> @insertelt_nxv8bf16_0(<vscale x 8 x bfloat> %v, bfloat %elt) {
+; CHECK-LABEL: insertelt_nxv8bf16_0:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a0, zero, e16, m1, tu, ma
-; CHECK-NEXT: vfmv.s.f v8, fa0
+; CHECK-NEXT: fmv.x.h a0, fa0
+; CHECK-NEXT: vsetvli a1, zero, e16, m1, tu, ma
+; CHECK-NEXT: vmv.s.x v8, a0
; CHECK-NEXT: ret
- %r = insertelement <vscale x 8 x half> %v, half %elt, i32 0
- ret <vscale x 8 x half> %r
+ %r = insertelement <vscale x 8 x bfloat> %v, bfloat %elt, i32 0
+ ret <vscale x 8 x bfloat> %r
}
-define <vscale x 8 x half> @insertelt_nxv8f16_imm(<vscale x 8 x half> %v, half %elt) {
-; CHECK-LABEL: insertelt_nxv8f16_imm:
+define <vscale x 8 x bfloat> @insertelt_nxv8bf16_imm(<vscale x 8 x bfloat> %v, bfloat %elt) {
+; CHECK-LABEL: insertelt_nxv8bf16_imm:
; CHECK: # %bb.0:
+; CHECK-NEXT: fmv.x.h a0, fa0
; CHECK-NEXT: vsetivli zero, 4, e16, m1, tu, ma
-; CHECK-NEXT: vfmv.s.f v10, fa0
+; CHECK-NEXT: vmv.s.x v10, a0
; CHECK-NEXT: vslideup.vi v8, v10, 3
; CHECK-NEXT: ret
- %r = insertelement <vscale x 8 x half> %v, half %elt, i32 3
- ret <vscale x 8 x half> %r
+ %r = insertelement <vscale x 8 x bfloat> %v, bfloat %elt, i32 3
+ ret <vscale x 8 x bfloat> %r
}
-define <vscale x 8 x half> @insertelt_nxv8f16_idx(<vscale x 8 x half> %v, half %elt, i32 zeroext %idx) {
-; CHECK-LABEL: insertelt_nxv8f16_idx:
+define <vscale x 8 x bfloat> @insertelt_nxv8bf16_idx(<vscale x 8 x bfloat> %v, bfloat %elt, i32 zeroext %idx) {
+; CHECK-LABEL: insertelt_nxv8bf16_idx:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma
-; CHECK-NEXT: vfmv.s.f v10, fa0
+; CHECK-NEXT: fmv.x.h a1, fa0
+; CHECK-NEXT: vsetvli a2, zero, e16, m1, ta, ma
+; CHECK-NEXT: vmv.s.x v10, a1
; CHECK-NEXT: addi a1, a0, 1
; CHECK-NEXT: vsetvli zero, a1, e16, m2, tu, ma
; CHECK-NEXT: vslideup.vx v8, v10, a0
; CHECK-NEXT: ret
- %r = insertelement <vscale x 8 x half> %v, half %elt, i32 %idx
- ret <vscale x 8 x half> %r
+ %r = insertelement <vscale x 8 x bfloat> %v, bfloat %elt, i32 %idx
+ ret <vscale x 8 x bfloat> %r
}
-define <vscale x 16 x half> @insertelt_nxv16f16_0(<vscale x 16 x half> %v, half %elt) {
-; CHECK-LABEL: insertelt_nxv16f16_0:
+define <vscale x 16 x bfloat> @insertelt_nxv16bf16_0(<vscale x 16 x bfloat> %v, bfloat %elt) {
+; CHECK-LABEL: insertelt_nxv16bf16_0:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a0, zero, e16, m1, tu, ma
-; CHECK-NEXT: vfmv.s.f v8, fa0
+; CHECK-NEXT: fmv.x.h a0, fa0
+; CHECK-NEXT: vsetvli a1, zero, e16, m1, tu, ma
+; CHECK-NEXT: vmv.s.x v8, a0
; CHECK-NEXT: ret
- %r = insertelement <vscale x 16 x half> %v, half %elt, i32 0
- ret <vscale x 16 x half> %r
+ %r = insertelement <vscale x 16 x bfloat> %v, bfloat %elt, i32 0
+ ret <vscale x 16 x bfloat> %r
}
-define <vscale x 16 x half> @insertelt_nxv16f16_imm(<vscale x 16 x half> %v, half %elt) {
-; CHECK-LABEL: insertelt_nxv16f16_imm:
+define <vscale x 16 x bfloat> @insertelt_nxv16bf16_imm(<vscale x 16 x bfloat> %v, bfloat %elt) {
+; CHECK-LABEL: insertelt_nxv16bf16_imm:
; CHECK: # %bb.0:
+; CHECK-NEXT: fmv.x.h a0, fa0
; CHECK-NEXT: vsetivli zero, 4, e16, m1, tu, ma
-; CHECK-NEXT: vfmv.s.f v12, fa0
+; CHECK-NEXT: vmv.s.x v12, a0
; CHECK-NEXT: vslideup.vi v8, v12, 3
; CHECK-NEXT: ret
- %r = insertelement <vscale x 16 x half> %v, half %elt, i32 3
- ret <vscale x 16 x half> %r
+ %r = insertelement <vscale x 16 x bfloat> %v, bfloat %elt, i32 3
+ ret <vscale x 16 x bfloat> %r
}
-define <vscale x 16 x half> @insertelt_nxv16f16_idx(<vscale x 16 x half> %v, half %elt, i32 zeroext %idx) {
-; CHECK-LABEL: insertelt_nxv16f16_idx:
+define <vscale x 16 x bfloat> @insertelt_nxv16bf16_idx(<vscale x 16 x bfloat> %v, bfloat %elt, i32 zeroext %idx) {
+; CHECK-LABEL: insertelt_nxv16bf16_idx:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma
-; CHECK-NEXT: vfmv.s.f v12, fa0
+; CHECK-NEXT: fmv.x.h a1, fa0
+; CHECK-NEXT: vsetvli a2, zero, e16, m1, ta, ma
+; CHECK-NEXT: vmv.s.x v12, a1
; CHECK-NEXT: addi a1, a0, 1
; CHECK-NEXT: vsetvli zero, a1, e16, m4, tu, ma
; CHECK-NEXT: vslideup.vx v8, v12, a0
; CHECK-NEXT: ret
- %r = insertelement <vscale x 16 x half> %v, half %elt, i32 %idx
- ret <vscale x 16 x half> %r
+ %r = insertelement <vscale x 16 x bfloat> %v, bfloat %elt, i32 %idx
+ ret <vscale x 16 x bfloat> %r
}
-define <vscale x 32 x half> @insertelt_nxv32f16_0(<vscale x 32 x half> %v, half %elt) {
-; CHECK-LABEL: insertelt_nxv32f16_0:
+define <vscale x 32 x bfloat> @insertelt_nxv32bf16_0(<vscale x 32 x bfloat> %v, bfloat %elt) {
+; CHECK-LABEL: insertelt_nxv32bf16_0:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a0, zero, e16, m1, tu, ma
-; CHECK-NEXT: vfmv.s.f v8, fa0
+; CHECK-NEXT: fmv.x.h a0, fa0
+; CHECK-NEXT: vsetvli a1, zero, e16, m1, tu, ma
+; CHECK-NEXT: vmv.s.x v8, a0
; CHECK-NEXT: ret
- %r = insertelement <vscale x 32 x half> %v, half %elt, i32 0
- ret <vscale x 32 x half> %r
+ %r = insertelement <vscale x 32 x bfloat> %v, bfloat %elt, i32 0
+ ret <vscale x 32 x bfloat> %r
}
-define <vscale x 32 x half> @insertelt_nxv32f16_imm(<vscale x 32 x half> %v, half %elt) {
-; CHECK-LABEL: insertelt_nxv32f16_imm:
+define <vscale x 32 x bfloat> @insertelt_nxv32bf16_imm(<vscale x 32 x bfloat> %v, bfloat %elt) {
+; CHECK-LABEL: insertelt_nxv32bf16_imm:
; CHECK: # %bb.0:
+; CHECK-NEXT: fmv.x.h a0, fa0
; CHECK-NEXT: vsetivli zero, 4, e16, m1, tu, ma
-; CHECK-NEXT: vfmv.s.f v16, fa0
+; CHECK-NEXT: vmv.s.x v16, a0
; CHECK-NEXT: vslideup.vi v8, v16, 3
; CHECK-NEXT: ret
- %r = insertelement <vscale x 32 x half> %v, half %elt, i32 3
- ret <vscale x 32 x half> %r
+ %r = insertelement <vscale x 32 x bfloat> %v, bfloat %elt, i32 3
+ ret <vscale x 32 x bfloat> %r
}
-define <vscale x 32 x half> @insertelt_nxv32f16_idx(<vscale x 32 x half> %v, half %elt, i32 zeroext %idx) {
-; CHECK-LABEL: insertelt_nxv32f16_idx:
+define <vscale x 32 x bfloat> @insertelt_nxv32bf16_idx(<vscale x 32 x bfloat> %v, bfloat %elt, i32 zeroext %idx) {
+; CHECK-LABEL: insertelt_nxv32bf16_idx:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma
-; CHECK-NEXT: vfmv.s.f v16, fa0
+; CHECK-NEXT: fmv.x.h a1, fa0
+; CHECK-NEXT: vsetvli a2, zero, e16, m1, ta, ma
+; CHECK-NEXT: vmv.s.x v16, a1
; CHECK-NEXT: addi a1, a0, 1
; CHECK-NEXT: vsetvli zero, a1, e16, m8, tu, ma
; CHECK-NEXT: vslideup.vx v8, v16, a0
; CHECK-NEXT: ret
+ %r = insertelement <vscale x 32 x bfloat> %v, bfloat %elt, i32 %idx
+ ret <vscale x 32 x bfloat> %r
+}
+
+define <vscale x 1 x half> @insertelt_nxv1f16_0(<vscale x 1 x half> %v, half %elt) {
+; ZVFH-LABEL: insertelt_nxv1f16_0:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: vsetvli a0, zero, e16, m1, tu, ma
+; ZVFH-NEXT: vfmv.s.f v8, fa0
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: insertelt_nxv1f16_0:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: fmv.x.h a0, fa0
+; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m1, tu, ma
+; ZVFHMIN-NEXT: vmv.s.x v8, a0
+; ZVFHMIN-NEXT: ret
+ %r = insertelement <vscale x 1 x half> %v, half %elt, i32 0
+ ret <vscale x 1 x half> %r
+}
+
+define <vscale x 1 x half> @insertelt_nxv1f16_imm(<vscale x 1 x half> %v, half %elt) {
+; ZVFH-LABEL: insertelt_nxv1f16_imm:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: vsetivli zero, 4, e16, mf4, tu, ma
+; ZVFH-NEXT: vfmv.s.f v9, fa0
+; ZVFH-NEXT: vslideup.vi v8, v9, 3
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: insertelt_nxv1f16_imm:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: fmv.x.h a0, fa0
+; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf4, tu, ma
+; ZVFHMIN-NEXT: vmv.s.x v9, a0
+; ZVFHMIN-NEXT: vslideup.vi v8, v9, 3
+; ZVFHMIN-NEXT: ret
+ %r = insertelement <vscale x 1 x half> %v, half %elt, i32 3
+ ret <vscale x 1 x half> %r
+}
+
+define <vscale x 1 x half> @insertelt_nxv1f16_idx(<vscale x 1 x half> %v, half %elt, i32 zeroext %idx) {
+; ZVFH-LABEL: insertelt_nxv1f16_idx:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: addi a1, a0, 1
+; ZVFH-NEXT: vsetvli a2, zero, e16, m1, ta, ma
+; ZVFH-NEXT: vfmv.s.f v9, fa0
+; ZVFH-NEXT: vsetvli zero, a1, e16, mf4, tu, ma
+; ZVFH-NEXT: vslideup.vx v8, v9, a0
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: insertelt_nxv1f16_idx:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: addi a1, a0, 1
+; ZVFHMIN-NEXT: fmv.x.h a2, fa0
+; ZVFHMIN-NEXT: vsetvli a3, zero, e16, m1, ta, ma
+; ZVFHMIN-NEXT: vmv.s.x v9, a2
+; ZVFHMIN-NEXT: vsetvli zero, a1, e16, mf4, tu, ma
+; ZVFHMIN-NEXT: vslideup.vx v8, v9, a0
+; ZVFHMIN-NEXT: ret
+ %r = insertelement <vscale x 1 x half> %v, half %elt, i32 %idx
+ ret <vscale x 1 x half> %r
+}
+
+define <vscale x 2 x half> @insertelt_nxv2f16_0(<vscale x 2 x half> %v, half %elt) {
+; ZVFH-LABEL: insertelt_nxv2f16_0:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: vsetvli a0, zero, e16, m1, tu, ma
+; ZVFH-NEXT: vfmv.s.f v8, fa0
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: insertelt_nxv2f16_0:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: fmv.x.h a0, fa0
+; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m1, tu, ma
+; ZVFHMIN-NEXT: vmv.s.x v8, a0
+; ZVFHMIN-NEXT: ret
+ %r = insertelement <vscale x 2 x half> %v, half %elt, i32 0
+ ret <vscale x 2 x half> %r
+}
+
+define <vscale x 2 x half> @insertelt_nxv2f16_imm(<vscale x 2 x half> %v, half %elt) {
+; ZVFH-LABEL: insertelt_nxv2f16_imm:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: vsetivli zero, 4, e16, mf2, tu, ma
+; ZVFH-NEXT: vfmv.s.f v9, fa0
+; ZVFH-NEXT: vslideup.vi v8, v9, 3
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: insertelt_nxv2f16_imm:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: fmv.x.h a0, fa0
+; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, tu, ma
+; ZVFHMIN-NEXT: vmv.s.x v9, a0
+; ZVFHMIN-NEXT: vslideup.vi v8, v9, 3
+; ZVFHMIN-NEXT: ret
+ %r = insertelement <vscale x 2 x half> %v, half %elt, i32 3
+ ret <vscale x 2 x half> %r
+}
+
+define <vscale x 2 x half> @insertelt_nxv2f16_idx(<vscale x 2 x half> %v, half %elt, i32 zeroext %idx) {
+; ZVFH-LABEL: insertelt_nxv2f16_idx:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: addi a1, a0, 1
+; ZVFH-NEXT: vsetvli a2, zero, e16, m1, ta, ma
+; ZVFH-NEXT: vfmv.s.f v9, fa0
+; ZVFH-NEXT: vsetvli zero, a1, e16, mf2, tu, ma
+; ZVFH-NEXT: vslideup.vx v8, v9, a0
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: insertelt_nxv2f16_idx:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: addi a1, a0, 1
+; ZVFHMIN-NEXT: fmv.x.h a2, fa0
+; ZVFHMIN-NEXT: vsetvli a3, zero, e16, m1, ta, ma
+; ZVFHMIN-NEXT: vmv.s.x v9, a2
+; ZVFHMIN-NEXT: vsetvli zero, a1, e16, mf2, tu, ma
+; ZVFHMIN-NEXT: vslideup.vx v8, v9, a0
+; ZVFHMIN-NEXT: ret
+ %r = insertelement <vscale x 2 x half> %v, half %elt, i32 %idx
+ ret <vscale x 2 x half> %r
+}
+
+define <vscale x 4 x half> @insertelt_nxv4f16_0(<vscale x 4 x half> %v, half %elt) {
+; ZVFH-LABEL: insertelt_nxv4f16_0:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: vsetvli a0, zero, e16, m1, tu, ma
+; ZVFH-NEXT: vfmv.s.f v8, fa0
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: insertelt_nxv4f16_0:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: fmv.x.h a0, fa0
+; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m1, tu, ma
+; ZVFHMIN-NEXT: vmv.s.x v8, a0
+; ZVFHMIN-NEXT: ret
+ %r = insertelement <vscale x 4 x half> %v, half %elt, i32 0
+ ret <vscale x 4 x half> %r
+}
+
+define <vscale x 4 x half> @insertelt_nxv4f16_imm(<vscale x 4 x half> %v, half %elt) {
+; ZVFH-LABEL: insertelt_nxv4f16_imm:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: vsetivli zero, 4, e16, m1, tu, ma
+; ZVFH-NEXT: vfmv.s.f v9, fa0
+; ZVFH-NEXT: vslideup.vi v8, v9, 3
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: insertelt_nxv4f16_imm:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: fmv.x.h a0, fa0
+; ZVFHMIN-NEXT: vsetivli zero, 4, e16, m1, tu, ma
+; ZVFHMIN-NEXT: vmv.s.x v9, a0
+; ZVFHMIN-NEXT: vslideup.vi v8, v9, 3
+; ZVFHMIN-NEXT: ret
+ %r = insertelement <vscale x 4 x half> %v, half %elt, i32 3
+ ret <vscale x 4 x half> %r
+}
+
+define <vscale x 4 x half> @insertelt_nxv4f16_idx(<vscale x 4 x half> %v, half %elt, i32 zeroext %idx) {
+; ZVFH-LABEL: insertelt_nxv4f16_idx:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: addi a1, a0, 1
+; ZVFH-NEXT: vsetvli a2, zero, e16, m1, ta, ma
+; ZVFH-NEXT: vfmv.s.f v9, fa0
+; ZVFH-NEXT: vsetvli zero, a1, e16, m1, tu, ma
+; ZVFH-NEXT: vslideup.vx v8, v9, a0
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: insertelt_nxv4f16_idx:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: addi a1, a0, 1
+; ZVFHMIN-NEXT: fmv.x.h a2, fa0
+; ZVFHMIN-NEXT: vsetvli a3, zero, e16, m1, ta, ma
+; ZVFHMIN-NEXT: vmv.s.x v9, a2
+; ZVFHMIN-NEXT: vsetvli zero, a1, e16, m1, tu, ma
+; ZVFHMIN-NEXT: vslideup.vx v8, v9, a0
+; ZVFHMIN-NEXT: ret
+ %r = insertelement <vscale x 4 x half> %v, half %elt, i32 %idx
+ ret <vscale x 4 x half> %r
+}
+
+define <vscale x 8 x half> @insertelt_nxv8f16_0(<vscale x 8 x half> %v, half %elt) {
+; ZVFH-LABEL: insertelt_nxv8f16_0:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: vsetvli a0, zero, e16, m1, tu, ma
+; ZVFH-NEXT: vfmv.s.f v8, fa0
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: insertelt_nxv8f16_0:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: fmv.x.h a0, fa0
+; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m1, tu, ma
+; ZVFHMIN-NEXT: vmv.s.x v8, a0
+; ZVFHMIN-NEXT: ret
+ %r = insertelement <vscale x 8 x half> %v, half %elt, i32 0
+ ret <vscale x 8 x half> %r
+}
+
+define <vscale x 8 x half> @insertelt_nxv8f16_imm(<vscale x 8 x half> %v, half %elt) {
+; ZVFH-LABEL: insertelt_nxv8f16_imm:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: vsetivli zero, 4, e16, m1, tu, ma
+; ZVFH-NEXT: vfmv.s.f v10, fa0
+; ZVFH-NEXT: vslideup.vi v8, v10, 3
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: insertelt_nxv8f16_imm:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: fmv.x.h a0, fa0
+; ZVFHMIN-NEXT: vsetivli zero, 4, e16, m1, tu, ma
+; ZVFHMIN-NEXT: vmv.s.x v10, a0
+; ZVFHMIN-NEXT: vslideup.vi v8, v10, 3
+; ZVFHMIN-NEXT: ret
+ %r = insertelement <vscale x 8 x half> %v, half %elt, i32 3
+ ret <vscale x 8 x half> %r
+}
+
+define <vscale x 8 x half> @insertelt_nxv8f16_idx(<vscale x 8 x half> %v, half %elt, i32 zeroext %idx) {
+; ZVFH-LABEL: insertelt_nxv8f16_idx:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: vsetvli a1, zero, e16, m1, ta, ma
+; ZVFH-NEXT: vfmv.s.f v10, fa0
+; ZVFH-NEXT: addi a1, a0, 1
+; ZVFH-NEXT: vsetvli zero, a1, e16, m2, tu, ma
+; ZVFH-NEXT: vslideup.vx v8, v10, a0
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: insertelt_nxv8f16_idx:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: fmv.x.h a1, fa0
+; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m1, ta, ma
+; ZVFHMIN-NEXT: vmv.s.x v10, a1
+; ZVFHMIN-NEXT: addi a1, a0, 1
+; ZVFHMIN-NEXT: vsetvli zero, a1, e16, m2, tu, ma
+; ZVFHMIN-NEXT: vslideup.vx v8, v10, a0
+; ZVFHMIN-NEXT: ret
+ %r = insertelement <vscale x 8 x half> %v, half %elt, i32 %idx
+ ret <vscale x 8 x half> %r
+}
+
+define <vscale x 16 x half> @insertelt_nxv16f16_0(<vscale x 16 x half> %v, half %elt) {
+; ZVFH-LABEL: insertelt_nxv16f16_0:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: vsetvli a0, zero, e16, m1, tu, ma
+; ZVFH-NEXT: vfmv.s.f v8, fa0
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: insertelt_nxv16f16_0:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: fmv.x.h a0, fa0
+; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m1, tu, ma
+; ZVFHMIN-NEXT: vmv.s.x v8, a0
+; ZVFHMIN-NEXT: ret
+ %r = insertelement <vscale x 16 x half> %v, half %elt, i32 0
+ ret <vscale x 16 x half> %r
+}
+
+define <vscale x 16 x half> @insertelt_nxv16f16_imm(<vscale x 16 x half> %v, half %elt) {
+; ZVFH-LABEL: insertelt_nxv16f16_imm:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: vsetivli zero, 4, e16, m1, tu, ma
+; ZVFH-NEXT: vfmv.s.f v12, fa0
+; ZVFH-NEXT: vslideup.vi v8, v12, 3
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: insertelt_nxv16f16_imm:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: fmv.x.h a0, fa0
+; ZVFHMIN-NEXT: vsetivli zero, 4, e16, m1, tu, ma
+; ZVFHMIN-NEXT: vmv.s.x v12, a0
+; ZVFHMIN-NEXT: vslideup.vi v8, v12, 3
+; ZVFHMIN-NEXT: ret
+ %r = insertelement <vscale x 16 x half> %v, half %elt, i32 3
+ ret <vscale x 16 x half> %r
+}
+
+define <vscale x 16 x half> @insertelt_nxv16f16_idx(<vscale x 16 x half> %v, half %elt, i32 zeroext %idx) {
+; ZVFH-LABEL: insertelt_nxv16f16_idx:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: vsetvli a1, zero, e16, m1, ta, ma
+; ZVFH-NEXT: vfmv.s.f v12, fa0
+; ZVFH-NEXT: addi a1, a0, 1
+; ZVFH-NEXT: vsetvli zero, a1, e16, m4, tu, ma
+; ZVFH-NEXT: vslideup.vx v8, v12, a0
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: insertelt_nxv16f16_idx:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: fmv.x.h a1, fa0
+; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m1, ta, ma
+; ZVFHMIN-NEXT: vmv.s.x v12, a1
+; ZVFHMIN-NEXT: addi a1, a0, 1
+; ZVFHMIN-NEXT: vsetvli zero, a1, e16, m4, tu, ma
+; ZVFHMIN-NEXT: vslideup.vx v8, v12, a0
+; ZVFHMIN-NEXT: ret
+ %r = insertelement <vscale x 16 x half> %v, half %elt, i32 %idx
+ ret <vscale x 16 x half> %r
+}
+
+define <vscale x 32 x half> @insertelt_nxv32f16_0(<vscale x 32 x half> %v, half %elt) {
+; ZVFH-LABEL: insertelt_nxv32f16_0:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: vsetvli a0, zero, e16, m1, tu, ma
+; ZVFH-NEXT: vfmv.s.f v8, fa0
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: insertelt_nxv32f16_0:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: fmv.x.h a0, fa0
+; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m1, tu, ma
+; ZVFHMIN-NEXT: vmv.s.x v8, a0
+; ZVFHMIN-NEXT: ret
+ %r = insertelement <vscale x 32 x half> %v, half %elt, i32 0
+ ret <vscale x 32 x half> %r
+}
+
+define <vscale x 32 x half> @insertelt_nxv32f16_imm(<vscale x 32 x half> %v, half %elt) {
+; ZVFH-LABEL: insertelt_nxv32f16_imm:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: vsetivli zero, 4, e16, m1, tu, ma
+; ZVFH-NEXT: vfmv.s.f v16, fa0
+; ZVFH-NEXT: vslideup.vi v8, v16, 3
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: insertelt_nxv32f16_imm:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: fmv.x.h a0, fa0
+; ZVFHMIN-NEXT: vsetivli zero, 4, e16, m1, tu, ma
+; ZVFHMIN-NEXT: vmv.s.x v16, a0
+; ZVFHMIN-NEXT: vslideup.vi v8, v16, 3
+; ZVFHMIN-NEXT: ret
+ %r = insertelement <vscale x 32 x half> %v, half %elt, i32 3
+ ret <vscale x 32 x half> %r
+}
+
+define <vscale x 32 x half> @insertelt_nxv32f16_idx(<vscale x 32 x half> %v, half %elt, i32 zeroext %idx) {
+; ZVFH-LABEL: insertelt_nxv32f16_idx:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: vsetvli a1, zero, e16, m1, ta, ma
+; ZVFH-NEXT: vfmv.s.f v16, fa0
+; ZVFH-NEXT: addi a1, a0, 1
+; ZVFH-NEXT: vsetvli zero, a1, e16, m8, tu, ma
+; ZVFH-NEXT: vslideup.vx v8, v16, a0
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: insertelt_nxv32f16_idx:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: fmv.x.h a1, fa0
+; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m1, ta, ma
+; ZVFHMIN-NEXT: vmv.s.x v16, a1
+; ZVFHMIN-NEXT: addi a1, a0, 1
+; ZVFHMIN-NEXT: vsetvli zero, a1, e16, m8, tu, ma
+; ZVFHMIN-NEXT: vslideup.vx v8, v16, a0
+; ZVFHMIN-NEXT: ret
%r = insertelement <vscale x 32 x half> %v, half %elt, i32 %idx
ret <vscale x 32 x half> %r
}
>From 1112dd87019b3351d021f021fd63a3de029867b4 Mon Sep 17 00:00:00 2001
From: Luke Lau <luke at igalia.com>
Date: Mon, 30 Sep 2024 12:00:15 +0800
Subject: [PATCH 2/3] Remove extraneous comment, adjust other comment
---
llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 3 +--
1 file changed, 1 insertion(+), 2 deletions(-)
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 41c55f69e33887..4f77bba2968988 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -8774,9 +8774,8 @@ SDValue RISCVTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op,
if ((ValVT == MVT::f16 && !Subtarget.hasVInstructionsF16()) ||
ValVT == MVT::bf16) {
- // If we don't have vfmv.s.f for f16/bf16, insert into fmv.x.h first
+ // If we don't have vfmv.s.f for f16/bf16, use fmv.x.h first.
MVT IntVT = VecVT.changeTypeToInteger();
- // SDValue IntVal = DAG.getBitcast(IntVT.getVectorElementType(), Val);
SDValue IntInsert = DAG.getNode(
ISD::INSERT_VECTOR_ELT, DL, IntVT, DAG.getBitcast(IntVT, Vec),
DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, XLenVT, Val), Idx);
>From b4716f505901b402aa36a69200aa1aa548f2b1dc Mon Sep 17 00:00:00 2001
From: Luke Lau <luke at igalia.com>
Date: Wed, 2 Oct 2024 14:34:01 +0800
Subject: [PATCH 3/3] Move bfloat below half to tame diff
---
llvm/test/CodeGen/RISCV/rvv/insertelt-fp.ll | 444 ++++++++++----------
1 file changed, 222 insertions(+), 222 deletions(-)
diff --git a/llvm/test/CodeGen/RISCV/rvv/insertelt-fp.ll b/llvm/test/CodeGen/RISCV/rvv/insertelt-fp.ll
index 607e0085c3f468..9b7a2600bcf0b0 100644
--- a/llvm/test/CodeGen/RISCV/rvv/insertelt-fp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/insertelt-fp.ll
@@ -8,228 +8,6 @@
; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zfbfmin,+zvfhmin,+zvfbfmin,+v -target-abi=lp64d \
; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN
-define <vscale x 1 x bfloat> @insertelt_nxv1bf16_0(<vscale x 1 x bfloat> %v, bfloat %elt) {
-; CHECK-LABEL: insertelt_nxv1bf16_0:
-; CHECK: # %bb.0:
-; CHECK-NEXT: fmv.x.h a0, fa0
-; CHECK-NEXT: vsetvli a1, zero, e16, m1, tu, ma
-; CHECK-NEXT: vmv.s.x v8, a0
-; CHECK-NEXT: ret
- %r = insertelement <vscale x 1 x bfloat> %v, bfloat %elt, i32 0
- ret <vscale x 1 x bfloat> %r
-}
-
-define <vscale x 1 x bfloat> @insertelt_nxv1bf16_imm(<vscale x 1 x bfloat> %v, bfloat %elt) {
-; CHECK-LABEL: insertelt_nxv1bf16_imm:
-; CHECK: # %bb.0:
-; CHECK-NEXT: fmv.x.h a0, fa0
-; CHECK-NEXT: vsetivli zero, 4, e16, mf4, tu, ma
-; CHECK-NEXT: vmv.s.x v9, a0
-; CHECK-NEXT: vslideup.vi v8, v9, 3
-; CHECK-NEXT: ret
- %r = insertelement <vscale x 1 x bfloat> %v, bfloat %elt, i32 3
- ret <vscale x 1 x bfloat> %r
-}
-
-define <vscale x 1 x bfloat> @insertelt_nxv1bf16_idx(<vscale x 1 x bfloat> %v, bfloat %elt, i32 zeroext %idx) {
-; CHECK-LABEL: insertelt_nxv1bf16_idx:
-; CHECK: # %bb.0:
-; CHECK-NEXT: addi a1, a0, 1
-; CHECK-NEXT: fmv.x.h a2, fa0
-; CHECK-NEXT: vsetvli a3, zero, e16, m1, ta, ma
-; CHECK-NEXT: vmv.s.x v9, a2
-; CHECK-NEXT: vsetvli zero, a1, e16, mf4, tu, ma
-; CHECK-NEXT: vslideup.vx v8, v9, a0
-; CHECK-NEXT: ret
- %r = insertelement <vscale x 1 x bfloat> %v, bfloat %elt, i32 %idx
- ret <vscale x 1 x bfloat> %r
-}
-
-define <vscale x 2 x bfloat> @insertelt_nxv2bf16_0(<vscale x 2 x bfloat> %v, bfloat %elt) {
-; CHECK-LABEL: insertelt_nxv2bf16_0:
-; CHECK: # %bb.0:
-; CHECK-NEXT: fmv.x.h a0, fa0
-; CHECK-NEXT: vsetvli a1, zero, e16, m1, tu, ma
-; CHECK-NEXT: vmv.s.x v8, a0
-; CHECK-NEXT: ret
- %r = insertelement <vscale x 2 x bfloat> %v, bfloat %elt, i32 0
- ret <vscale x 2 x bfloat> %r
-}
-
-define <vscale x 2 x bfloat> @insertelt_nxv2bf16_imm(<vscale x 2 x bfloat> %v, bfloat %elt) {
-; CHECK-LABEL: insertelt_nxv2bf16_imm:
-; CHECK: # %bb.0:
-; CHECK-NEXT: fmv.x.h a0, fa0
-; CHECK-NEXT: vsetivli zero, 4, e16, mf2, tu, ma
-; CHECK-NEXT: vmv.s.x v9, a0
-; CHECK-NEXT: vslideup.vi v8, v9, 3
-; CHECK-NEXT: ret
- %r = insertelement <vscale x 2 x bfloat> %v, bfloat %elt, i32 3
- ret <vscale x 2 x bfloat> %r
-}
-
-define <vscale x 2 x bfloat> @insertelt_nxv2bf16_idx(<vscale x 2 x bfloat> %v, bfloat %elt, i32 zeroext %idx) {
-; CHECK-LABEL: insertelt_nxv2bf16_idx:
-; CHECK: # %bb.0:
-; CHECK-NEXT: addi a1, a0, 1
-; CHECK-NEXT: fmv.x.h a2, fa0
-; CHECK-NEXT: vsetvli a3, zero, e16, m1, ta, ma
-; CHECK-NEXT: vmv.s.x v9, a2
-; CHECK-NEXT: vsetvli zero, a1, e16, mf2, tu, ma
-; CHECK-NEXT: vslideup.vx v8, v9, a0
-; CHECK-NEXT: ret
- %r = insertelement <vscale x 2 x bfloat> %v, bfloat %elt, i32 %idx
- ret <vscale x 2 x bfloat> %r
-}
-
-define <vscale x 4 x bfloat> @insertelt_nxv4bf16_0(<vscale x 4 x bfloat> %v, bfloat %elt) {
-; CHECK-LABEL: insertelt_nxv4bf16_0:
-; CHECK: # %bb.0:
-; CHECK-NEXT: fmv.x.h a0, fa0
-; CHECK-NEXT: vsetvli a1, zero, e16, m1, tu, ma
-; CHECK-NEXT: vmv.s.x v8, a0
-; CHECK-NEXT: ret
- %r = insertelement <vscale x 4 x bfloat> %v, bfloat %elt, i32 0
- ret <vscale x 4 x bfloat> %r
-}
-
-define <vscale x 4 x bfloat> @insertelt_nxv4bf16_imm(<vscale x 4 x bfloat> %v, bfloat %elt) {
-; CHECK-LABEL: insertelt_nxv4bf16_imm:
-; CHECK: # %bb.0:
-; CHECK-NEXT: fmv.x.h a0, fa0
-; CHECK-NEXT: vsetivli zero, 4, e16, m1, tu, ma
-; CHECK-NEXT: vmv.s.x v9, a0
-; CHECK-NEXT: vslideup.vi v8, v9, 3
-; CHECK-NEXT: ret
- %r = insertelement <vscale x 4 x bfloat> %v, bfloat %elt, i32 3
- ret <vscale x 4 x bfloat> %r
-}
-
-define <vscale x 4 x bfloat> @insertelt_nxv4bf16_idx(<vscale x 4 x bfloat> %v, bfloat %elt, i32 zeroext %idx) {
-; CHECK-LABEL: insertelt_nxv4bf16_idx:
-; CHECK: # %bb.0:
-; CHECK-NEXT: addi a1, a0, 1
-; CHECK-NEXT: fmv.x.h a2, fa0
-; CHECK-NEXT: vsetvli a3, zero, e16, m1, ta, ma
-; CHECK-NEXT: vmv.s.x v9, a2
-; CHECK-NEXT: vsetvli zero, a1, e16, m1, tu, ma
-; CHECK-NEXT: vslideup.vx v8, v9, a0
-; CHECK-NEXT: ret
- %r = insertelement <vscale x 4 x bfloat> %v, bfloat %elt, i32 %idx
- ret <vscale x 4 x bfloat> %r
-}
-
-define <vscale x 8 x bfloat> @insertelt_nxv8bf16_0(<vscale x 8 x bfloat> %v, bfloat %elt) {
-; CHECK-LABEL: insertelt_nxv8bf16_0:
-; CHECK: # %bb.0:
-; CHECK-NEXT: fmv.x.h a0, fa0
-; CHECK-NEXT: vsetvli a1, zero, e16, m1, tu, ma
-; CHECK-NEXT: vmv.s.x v8, a0
-; CHECK-NEXT: ret
- %r = insertelement <vscale x 8 x bfloat> %v, bfloat %elt, i32 0
- ret <vscale x 8 x bfloat> %r
-}
-
-define <vscale x 8 x bfloat> @insertelt_nxv8bf16_imm(<vscale x 8 x bfloat> %v, bfloat %elt) {
-; CHECK-LABEL: insertelt_nxv8bf16_imm:
-; CHECK: # %bb.0:
-; CHECK-NEXT: fmv.x.h a0, fa0
-; CHECK-NEXT: vsetivli zero, 4, e16, m1, tu, ma
-; CHECK-NEXT: vmv.s.x v10, a0
-; CHECK-NEXT: vslideup.vi v8, v10, 3
-; CHECK-NEXT: ret
- %r = insertelement <vscale x 8 x bfloat> %v, bfloat %elt, i32 3
- ret <vscale x 8 x bfloat> %r
-}
-
-define <vscale x 8 x bfloat> @insertelt_nxv8bf16_idx(<vscale x 8 x bfloat> %v, bfloat %elt, i32 zeroext %idx) {
-; CHECK-LABEL: insertelt_nxv8bf16_idx:
-; CHECK: # %bb.0:
-; CHECK-NEXT: fmv.x.h a1, fa0
-; CHECK-NEXT: vsetvli a2, zero, e16, m1, ta, ma
-; CHECK-NEXT: vmv.s.x v10, a1
-; CHECK-NEXT: addi a1, a0, 1
-; CHECK-NEXT: vsetvli zero, a1, e16, m2, tu, ma
-; CHECK-NEXT: vslideup.vx v8, v10, a0
-; CHECK-NEXT: ret
- %r = insertelement <vscale x 8 x bfloat> %v, bfloat %elt, i32 %idx
- ret <vscale x 8 x bfloat> %r
-}
-
-define <vscale x 16 x bfloat> @insertelt_nxv16bf16_0(<vscale x 16 x bfloat> %v, bfloat %elt) {
-; CHECK-LABEL: insertelt_nxv16bf16_0:
-; CHECK: # %bb.0:
-; CHECK-NEXT: fmv.x.h a0, fa0
-; CHECK-NEXT: vsetvli a1, zero, e16, m1, tu, ma
-; CHECK-NEXT: vmv.s.x v8, a0
-; CHECK-NEXT: ret
- %r = insertelement <vscale x 16 x bfloat> %v, bfloat %elt, i32 0
- ret <vscale x 16 x bfloat> %r
-}
-
-define <vscale x 16 x bfloat> @insertelt_nxv16bf16_imm(<vscale x 16 x bfloat> %v, bfloat %elt) {
-; CHECK-LABEL: insertelt_nxv16bf16_imm:
-; CHECK: # %bb.0:
-; CHECK-NEXT: fmv.x.h a0, fa0
-; CHECK-NEXT: vsetivli zero, 4, e16, m1, tu, ma
-; CHECK-NEXT: vmv.s.x v12, a0
-; CHECK-NEXT: vslideup.vi v8, v12, 3
-; CHECK-NEXT: ret
- %r = insertelement <vscale x 16 x bfloat> %v, bfloat %elt, i32 3
- ret <vscale x 16 x bfloat> %r
-}
-
-define <vscale x 16 x bfloat> @insertelt_nxv16bf16_idx(<vscale x 16 x bfloat> %v, bfloat %elt, i32 zeroext %idx) {
-; CHECK-LABEL: insertelt_nxv16bf16_idx:
-; CHECK: # %bb.0:
-; CHECK-NEXT: fmv.x.h a1, fa0
-; CHECK-NEXT: vsetvli a2, zero, e16, m1, ta, ma
-; CHECK-NEXT: vmv.s.x v12, a1
-; CHECK-NEXT: addi a1, a0, 1
-; CHECK-NEXT: vsetvli zero, a1, e16, m4, tu, ma
-; CHECK-NEXT: vslideup.vx v8, v12, a0
-; CHECK-NEXT: ret
- %r = insertelement <vscale x 16 x bfloat> %v, bfloat %elt, i32 %idx
- ret <vscale x 16 x bfloat> %r
-}
-
-define <vscale x 32 x bfloat> @insertelt_nxv32bf16_0(<vscale x 32 x bfloat> %v, bfloat %elt) {
-; CHECK-LABEL: insertelt_nxv32bf16_0:
-; CHECK: # %bb.0:
-; CHECK-NEXT: fmv.x.h a0, fa0
-; CHECK-NEXT: vsetvli a1, zero, e16, m1, tu, ma
-; CHECK-NEXT: vmv.s.x v8, a0
-; CHECK-NEXT: ret
- %r = insertelement <vscale x 32 x bfloat> %v, bfloat %elt, i32 0
- ret <vscale x 32 x bfloat> %r
-}
-
-define <vscale x 32 x bfloat> @insertelt_nxv32bf16_imm(<vscale x 32 x bfloat> %v, bfloat %elt) {
-; CHECK-LABEL: insertelt_nxv32bf16_imm:
-; CHECK: # %bb.0:
-; CHECK-NEXT: fmv.x.h a0, fa0
-; CHECK-NEXT: vsetivli zero, 4, e16, m1, tu, ma
-; CHECK-NEXT: vmv.s.x v16, a0
-; CHECK-NEXT: vslideup.vi v8, v16, 3
-; CHECK-NEXT: ret
- %r = insertelement <vscale x 32 x bfloat> %v, bfloat %elt, i32 3
- ret <vscale x 32 x bfloat> %r
-}
-
-define <vscale x 32 x bfloat> @insertelt_nxv32bf16_idx(<vscale x 32 x bfloat> %v, bfloat %elt, i32 zeroext %idx) {
-; CHECK-LABEL: insertelt_nxv32bf16_idx:
-; CHECK: # %bb.0:
-; CHECK-NEXT: fmv.x.h a1, fa0
-; CHECK-NEXT: vsetvli a2, zero, e16, m1, ta, ma
-; CHECK-NEXT: vmv.s.x v16, a1
-; CHECK-NEXT: addi a1, a0, 1
-; CHECK-NEXT: vsetvli zero, a1, e16, m8, tu, ma
-; CHECK-NEXT: vslideup.vx v8, v16, a0
-; CHECK-NEXT: ret
- %r = insertelement <vscale x 32 x bfloat> %v, bfloat %elt, i32 %idx
- ret <vscale x 32 x bfloat> %r
-}
-
define <vscale x 1 x half> @insertelt_nxv1f16_0(<vscale x 1 x half> %v, half %elt) {
; ZVFH-LABEL: insertelt_nxv1f16_0:
; ZVFH: # %bb.0:
@@ -584,6 +362,228 @@ define <vscale x 32 x half> @insertelt_nxv32f16_idx(<vscale x 32 x half> %v, hal
ret <vscale x 32 x half> %r
}
+define <vscale x 1 x bfloat> @insertelt_nxv1bf16_0(<vscale x 1 x bfloat> %v, bfloat %elt) {
+; CHECK-LABEL: insertelt_nxv1bf16_0:
+; CHECK: # %bb.0:
+; CHECK-NEXT: fmv.x.h a0, fa0
+; CHECK-NEXT: vsetvli a1, zero, e16, m1, tu, ma
+; CHECK-NEXT: vmv.s.x v8, a0
+; CHECK-NEXT: ret
+ %r = insertelement <vscale x 1 x bfloat> %v, bfloat %elt, i32 0
+ ret <vscale x 1 x bfloat> %r
+}
+
+define <vscale x 1 x bfloat> @insertelt_nxv1bf16_imm(<vscale x 1 x bfloat> %v, bfloat %elt) {
+; CHECK-LABEL: insertelt_nxv1bf16_imm:
+; CHECK: # %bb.0:
+; CHECK-NEXT: fmv.x.h a0, fa0
+; CHECK-NEXT: vsetivli zero, 4, e16, mf4, tu, ma
+; CHECK-NEXT: vmv.s.x v9, a0
+; CHECK-NEXT: vslideup.vi v8, v9, 3
+; CHECK-NEXT: ret
+ %r = insertelement <vscale x 1 x bfloat> %v, bfloat %elt, i32 3
+ ret <vscale x 1 x bfloat> %r
+}
+
+define <vscale x 1 x bfloat> @insertelt_nxv1bf16_idx(<vscale x 1 x bfloat> %v, bfloat %elt, i32 zeroext %idx) {
+; CHECK-LABEL: insertelt_nxv1bf16_idx:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi a1, a0, 1
+; CHECK-NEXT: fmv.x.h a2, fa0
+; CHECK-NEXT: vsetvli a3, zero, e16, m1, ta, ma
+; CHECK-NEXT: vmv.s.x v9, a2
+; CHECK-NEXT: vsetvli zero, a1, e16, mf4, tu, ma
+; CHECK-NEXT: vslideup.vx v8, v9, a0
+; CHECK-NEXT: ret
+ %r = insertelement <vscale x 1 x bfloat> %v, bfloat %elt, i32 %idx
+ ret <vscale x 1 x bfloat> %r
+}
+
+define <vscale x 2 x bfloat> @insertelt_nxv2bf16_0(<vscale x 2 x bfloat> %v, bfloat %elt) {
+; CHECK-LABEL: insertelt_nxv2bf16_0:
+; CHECK: # %bb.0:
+; CHECK-NEXT: fmv.x.h a0, fa0
+; CHECK-NEXT: vsetvli a1, zero, e16, m1, tu, ma
+; CHECK-NEXT: vmv.s.x v8, a0
+; CHECK-NEXT: ret
+ %r = insertelement <vscale x 2 x bfloat> %v, bfloat %elt, i32 0
+ ret <vscale x 2 x bfloat> %r
+}
+
+define <vscale x 2 x bfloat> @insertelt_nxv2bf16_imm(<vscale x 2 x bfloat> %v, bfloat %elt) {
+; CHECK-LABEL: insertelt_nxv2bf16_imm:
+; CHECK: # %bb.0:
+; CHECK-NEXT: fmv.x.h a0, fa0
+; CHECK-NEXT: vsetivli zero, 4, e16, mf2, tu, ma
+; CHECK-NEXT: vmv.s.x v9, a0
+; CHECK-NEXT: vslideup.vi v8, v9, 3
+; CHECK-NEXT: ret
+ %r = insertelement <vscale x 2 x bfloat> %v, bfloat %elt, i32 3
+ ret <vscale x 2 x bfloat> %r
+}
+
+define <vscale x 2 x bfloat> @insertelt_nxv2bf16_idx(<vscale x 2 x bfloat> %v, bfloat %elt, i32 zeroext %idx) {
+; CHECK-LABEL: insertelt_nxv2bf16_idx:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi a1, a0, 1
+; CHECK-NEXT: fmv.x.h a2, fa0
+; CHECK-NEXT: vsetvli a3, zero, e16, m1, ta, ma
+; CHECK-NEXT: vmv.s.x v9, a2
+; CHECK-NEXT: vsetvli zero, a1, e16, mf2, tu, ma
+; CHECK-NEXT: vslideup.vx v8, v9, a0
+; CHECK-NEXT: ret
+ %r = insertelement <vscale x 2 x bfloat> %v, bfloat %elt, i32 %idx
+ ret <vscale x 2 x bfloat> %r
+}
+
+define <vscale x 4 x bfloat> @insertelt_nxv4bf16_0(<vscale x 4 x bfloat> %v, bfloat %elt) {
+; CHECK-LABEL: insertelt_nxv4bf16_0:
+; CHECK: # %bb.0:
+; CHECK-NEXT: fmv.x.h a0, fa0
+; CHECK-NEXT: vsetvli a1, zero, e16, m1, tu, ma
+; CHECK-NEXT: vmv.s.x v8, a0
+; CHECK-NEXT: ret
+ %r = insertelement <vscale x 4 x bfloat> %v, bfloat %elt, i32 0
+ ret <vscale x 4 x bfloat> %r
+}
+
+define <vscale x 4 x bfloat> @insertelt_nxv4bf16_imm(<vscale x 4 x bfloat> %v, bfloat %elt) {
+; CHECK-LABEL: insertelt_nxv4bf16_imm:
+; CHECK: # %bb.0:
+; CHECK-NEXT: fmv.x.h a0, fa0
+; CHECK-NEXT: vsetivli zero, 4, e16, m1, tu, ma
+; CHECK-NEXT: vmv.s.x v9, a0
+; CHECK-NEXT: vslideup.vi v8, v9, 3
+; CHECK-NEXT: ret
+ %r = insertelement <vscale x 4 x bfloat> %v, bfloat %elt, i32 3
+ ret <vscale x 4 x bfloat> %r
+}
+
+define <vscale x 4 x bfloat> @insertelt_nxv4bf16_idx(<vscale x 4 x bfloat> %v, bfloat %elt, i32 zeroext %idx) {
+; CHECK-LABEL: insertelt_nxv4bf16_idx:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi a1, a0, 1
+; CHECK-NEXT: fmv.x.h a2, fa0
+; CHECK-NEXT: vsetvli a3, zero, e16, m1, ta, ma
+; CHECK-NEXT: vmv.s.x v9, a2
+; CHECK-NEXT: vsetvli zero, a1, e16, m1, tu, ma
+; CHECK-NEXT: vslideup.vx v8, v9, a0
+; CHECK-NEXT: ret
+ %r = insertelement <vscale x 4 x bfloat> %v, bfloat %elt, i32 %idx
+ ret <vscale x 4 x bfloat> %r
+}
+
+define <vscale x 8 x bfloat> @insertelt_nxv8bf16_0(<vscale x 8 x bfloat> %v, bfloat %elt) {
+; CHECK-LABEL: insertelt_nxv8bf16_0:
+; CHECK: # %bb.0:
+; CHECK-NEXT: fmv.x.h a0, fa0
+; CHECK-NEXT: vsetvli a1, zero, e16, m1, tu, ma
+; CHECK-NEXT: vmv.s.x v8, a0
+; CHECK-NEXT: ret
+ %r = insertelement <vscale x 8 x bfloat> %v, bfloat %elt, i32 0
+ ret <vscale x 8 x bfloat> %r
+}
+
+define <vscale x 8 x bfloat> @insertelt_nxv8bf16_imm(<vscale x 8 x bfloat> %v, bfloat %elt) {
+; CHECK-LABEL: insertelt_nxv8bf16_imm:
+; CHECK: # %bb.0:
+; CHECK-NEXT: fmv.x.h a0, fa0
+; CHECK-NEXT: vsetivli zero, 4, e16, m1, tu, ma
+; CHECK-NEXT: vmv.s.x v10, a0
+; CHECK-NEXT: vslideup.vi v8, v10, 3
+; CHECK-NEXT: ret
+ %r = insertelement <vscale x 8 x bfloat> %v, bfloat %elt, i32 3
+ ret <vscale x 8 x bfloat> %r
+}
+
+define <vscale x 8 x bfloat> @insertelt_nxv8bf16_idx(<vscale x 8 x bfloat> %v, bfloat %elt, i32 zeroext %idx) {
+; CHECK-LABEL: insertelt_nxv8bf16_idx:
+; CHECK: # %bb.0:
+; CHECK-NEXT: fmv.x.h a1, fa0
+; CHECK-NEXT: vsetvli a2, zero, e16, m1, ta, ma
+; CHECK-NEXT: vmv.s.x v10, a1
+; CHECK-NEXT: addi a1, a0, 1
+; CHECK-NEXT: vsetvli zero, a1, e16, m2, tu, ma
+; CHECK-NEXT: vslideup.vx v8, v10, a0
+; CHECK-NEXT: ret
+ %r = insertelement <vscale x 8 x bfloat> %v, bfloat %elt, i32 %idx
+ ret <vscale x 8 x bfloat> %r
+}
+
+define <vscale x 16 x bfloat> @insertelt_nxv16bf16_0(<vscale x 16 x bfloat> %v, bfloat %elt) {
+; CHECK-LABEL: insertelt_nxv16bf16_0:
+; CHECK: # %bb.0:
+; CHECK-NEXT: fmv.x.h a0, fa0
+; CHECK-NEXT: vsetvli a1, zero, e16, m1, tu, ma
+; CHECK-NEXT: vmv.s.x v8, a0
+; CHECK-NEXT: ret
+ %r = insertelement <vscale x 16 x bfloat> %v, bfloat %elt, i32 0
+ ret <vscale x 16 x bfloat> %r
+}
+
+define <vscale x 16 x bfloat> @insertelt_nxv16bf16_imm(<vscale x 16 x bfloat> %v, bfloat %elt) {
+; CHECK-LABEL: insertelt_nxv16bf16_imm:
+; CHECK: # %bb.0:
+; CHECK-NEXT: fmv.x.h a0, fa0
+; CHECK-NEXT: vsetivli zero, 4, e16, m1, tu, ma
+; CHECK-NEXT: vmv.s.x v12, a0
+; CHECK-NEXT: vslideup.vi v8, v12, 3
+; CHECK-NEXT: ret
+ %r = insertelement <vscale x 16 x bfloat> %v, bfloat %elt, i32 3
+ ret <vscale x 16 x bfloat> %r
+}
+
+define <vscale x 16 x bfloat> @insertelt_nxv16bf16_idx(<vscale x 16 x bfloat> %v, bfloat %elt, i32 zeroext %idx) {
+; CHECK-LABEL: insertelt_nxv16bf16_idx:
+; CHECK: # %bb.0:
+; CHECK-NEXT: fmv.x.h a1, fa0
+; CHECK-NEXT: vsetvli a2, zero, e16, m1, ta, ma
+; CHECK-NEXT: vmv.s.x v12, a1
+; CHECK-NEXT: addi a1, a0, 1
+; CHECK-NEXT: vsetvli zero, a1, e16, m4, tu, ma
+; CHECK-NEXT: vslideup.vx v8, v12, a0
+; CHECK-NEXT: ret
+ %r = insertelement <vscale x 16 x bfloat> %v, bfloat %elt, i32 %idx
+ ret <vscale x 16 x bfloat> %r
+}
+
+define <vscale x 32 x bfloat> @insertelt_nxv32bf16_0(<vscale x 32 x bfloat> %v, bfloat %elt) {
+; CHECK-LABEL: insertelt_nxv32bf16_0:
+; CHECK: # %bb.0:
+; CHECK-NEXT: fmv.x.h a0, fa0
+; CHECK-NEXT: vsetvli a1, zero, e16, m1, tu, ma
+; CHECK-NEXT: vmv.s.x v8, a0
+; CHECK-NEXT: ret
+ %r = insertelement <vscale x 32 x bfloat> %v, bfloat %elt, i32 0
+ ret <vscale x 32 x bfloat> %r
+}
+
+define <vscale x 32 x bfloat> @insertelt_nxv32bf16_imm(<vscale x 32 x bfloat> %v, bfloat %elt) {
+; CHECK-LABEL: insertelt_nxv32bf16_imm:
+; CHECK: # %bb.0:
+; CHECK-NEXT: fmv.x.h a0, fa0
+; CHECK-NEXT: vsetivli zero, 4, e16, m1, tu, ma
+; CHECK-NEXT: vmv.s.x v16, a0
+; CHECK-NEXT: vslideup.vi v8, v16, 3
+; CHECK-NEXT: ret
+ %r = insertelement <vscale x 32 x bfloat> %v, bfloat %elt, i32 3
+ ret <vscale x 32 x bfloat> %r
+}
+
+define <vscale x 32 x bfloat> @insertelt_nxv32bf16_idx(<vscale x 32 x bfloat> %v, bfloat %elt, i32 zeroext %idx) {
+; CHECK-LABEL: insertelt_nxv32bf16_idx:
+; CHECK: # %bb.0:
+; CHECK-NEXT: fmv.x.h a1, fa0
+; CHECK-NEXT: vsetvli a2, zero, e16, m1, ta, ma
+; CHECK-NEXT: vmv.s.x v16, a1
+; CHECK-NEXT: addi a1, a0, 1
+; CHECK-NEXT: vsetvli zero, a1, e16, m8, tu, ma
+; CHECK-NEXT: vslideup.vx v8, v16, a0
+; CHECK-NEXT: ret
+ %r = insertelement <vscale x 32 x bfloat> %v, bfloat %elt, i32 %idx
+ ret <vscale x 32 x bfloat> %r
+}
+
define <vscale x 1 x float> @insertelt_nxv1f32_0(<vscale x 1 x float> %v, float %elt) {
; CHECK-LABEL: insertelt_nxv1f32_0:
; CHECK: # %bb.0:
More information about the llvm-commits
mailing list