[llvm] [RISCV] Handle f16/bf16 extract_vector_elt when scalar type is legal (PR #110144)

Luke Lau via llvm-commits llvm-commits at lists.llvm.org
Thu Sep 26 09:39:36 PDT 2024


https://github.com/lukel97 created https://github.com/llvm/llvm-project/pull/110144

When the scalar type is illegal, it gets softened during type legalization and gets lowered as an integer.

However with zfhmin/zfbfmin the type is now legal and it passes through type legalization where it crashes because we didn't have any custom lowering or patterns for it.

This handles said case via the existing custom lowering to a vslidedown and vfmv.f.s.
It also handles the case where we only have zvfhmin/zvfbfmin and don't have vfmv.f.s, in which case we need to extract it to a GPR and then use fmv.h.x.

Fixes #110126


>From fd141988146eb19880dae354af3e5ca6ca92328b Mon Sep 17 00:00:00 2001
From: Luke Lau <luke at igalia.com>
Date: Fri, 27 Sep 2024 00:18:55 +0800
Subject: [PATCH] [RISCV] Handle f16/bf16 extract_vector_elt when scalar type
 is legal

When the scalar type is illegal, it gets softened during type legalization and gets lowered as an integer.

However with zfhmin/zfbfmin the type is now legal and it passes through type legalization where it crashes because we didn't have any custom lowering or patterns for it.

This handles said case via the existing custom lowering to a vslidedown and vfmv.f.s.
It also handles the case where we only have zvfhmin/zvfbfmin and don't have vfmv.f.s, in which case we need to extract it to a GPR and then use fmv.h.x.

Fixes #110126
---
 llvm/lib/Target/RISCV/RISCVISelLowering.cpp  |  15 +-
 llvm/test/CodeGen/RISCV/rvv/extractelt-fp.ll | 922 ++++++++++++++++---
 2 files changed, 823 insertions(+), 114 deletions(-)

diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 7a19a879ca3420..d52b802bdd52be 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -1082,8 +1082,9 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
                          VT, Custom);
       MVT EltVT = VT.getVectorElementType();
       if (isTypeLegal(EltVT))
-        setOperationAction({ISD::SPLAT_VECTOR, ISD::EXPERIMENTAL_VP_SPLAT}, VT,
-                           Custom);
+        setOperationAction({ISD::SPLAT_VECTOR, ISD::EXPERIMENTAL_VP_SPLAT,
+                            ISD::EXTRACT_VECTOR_ELT},
+                           VT, Custom);
       else
         setOperationAction({ISD::SPLAT_VECTOR, ISD::EXPERIMENTAL_VP_SPLAT},
                            EltVT, Custom);
@@ -8990,6 +8991,16 @@ SDValue RISCVTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op,
     return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Vec, Idx);
   }
 
+  if ((EltVT == MVT::f16 && !Subtarget.hasVInstructionsF16()) ||
+      EltVT == MVT::bf16) {
+    // If we don't have vfmv.f.s for f16/bf16, extract to a gpr then use fmv.h.x
+    MVT IntVT = VecVT.changeTypeToInteger();
+    SDValue IntVec = DAG.getBitcast(IntVT, Vec);
+    SDValue IntExtract =
+        DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, XLenVT, IntVec, Idx);
+    return DAG.getNode(RISCVISD::FMV_H_X, DL, EltVT, IntExtract);
+  }
+
   // If this is a fixed vector, we need to convert it to a scalable vector.
   MVT ContainerVT = VecVT;
   if (VecVT.isFixedLengthVector()) {
diff --git a/llvm/test/CodeGen/RISCV/rvv/extractelt-fp.ll b/llvm/test/CodeGen/RISCV/rvv/extractelt-fp.ll
index 209a37bf66ae34..86ef78be97afb0 100644
--- a/llvm/test/CodeGen/RISCV/rvv/extractelt-fp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/extractelt-fp.ll
@@ -1,197 +1,895 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v -target-abi=ilp32d \
-; RUN:     -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32
-; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -target-abi=lp64d \
-; RUN:     -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64
+; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zvfh,+zvfbfmin -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32,NOZFMIN,ZVFH
+; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zvfh,+zvfbfmin -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64,NOZFMIN,ZVFH
+; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zvfhmin,+zvfbfmin -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32,NOZFMIN,ZVFHMIN
+; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zvfhmin,+zvfbfmin -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64,NOZFMIN,ZVFHMIN
+; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfhmin,+zfbfmin,+zvfhmin,+zvfbfmin -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32,ZFMIN
+; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfhmin,+zfbfmin,+zvfhmin,+zvfbfmin -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64,ZFMIN
+
+define bfloat @extractelt_nxv1bf16_0(<vscale x 1 x bfloat> %v) {
+; NOZFMIN-LABEL: extractelt_nxv1bf16_0:
+; NOZFMIN:       # %bb.0:
+; NOZFMIN-NEXT:    vsetivli zero, 1, e16, m1, ta, ma
+; NOZFMIN-NEXT:    vmv.x.s a0, v8
+; NOZFMIN-NEXT:    lui a1, 1048560
+; NOZFMIN-NEXT:    or a0, a0, a1
+; NOZFMIN-NEXT:    fmv.w.x fa0, a0
+; NOZFMIN-NEXT:    ret
+;
+; ZFMIN-LABEL: extractelt_nxv1bf16_0:
+; ZFMIN:       # %bb.0:
+; ZFMIN-NEXT:    vsetivli zero, 1, e16, m1, ta, ma
+; ZFMIN-NEXT:    vmv.x.s a0, v8
+; ZFMIN-NEXT:    fmv.h.x fa0, a0
+; ZFMIN-NEXT:    ret
+  %r = extractelement <vscale x 1 x bfloat> %v, i32 0
+  ret bfloat %r
+}
+
+define bfloat @extractelt_nxv1bf16_imm(<vscale x 1 x bfloat> %v) {
+; NOZFMIN-LABEL: extractelt_nxv1bf16_imm:
+; NOZFMIN:       # %bb.0:
+; NOZFMIN-NEXT:    vsetivli zero, 1, e16, mf4, ta, ma
+; NOZFMIN-NEXT:    vslidedown.vi v8, v8, 2
+; NOZFMIN-NEXT:    vmv.x.s a0, v8
+; NOZFMIN-NEXT:    lui a1, 1048560
+; NOZFMIN-NEXT:    or a0, a0, a1
+; NOZFMIN-NEXT:    fmv.w.x fa0, a0
+; NOZFMIN-NEXT:    ret
+;
+; ZFMIN-LABEL: extractelt_nxv1bf16_imm:
+; ZFMIN:       # %bb.0:
+; ZFMIN-NEXT:    vsetivli zero, 1, e16, mf4, ta, ma
+; ZFMIN-NEXT:    vslidedown.vi v8, v8, 2
+; ZFMIN-NEXT:    vmv.x.s a0, v8
+; ZFMIN-NEXT:    fmv.h.x fa0, a0
+; ZFMIN-NEXT:    ret
+  %r = extractelement <vscale x 1 x bfloat> %v, i32 2
+  ret bfloat %r
+}
+
+define bfloat @extractelt_nxv1bf16_idx(<vscale x 1 x bfloat> %v, i32 zeroext %idx) {
+; NOZFMIN-LABEL: extractelt_nxv1bf16_idx:
+; NOZFMIN:       # %bb.0:
+; NOZFMIN-NEXT:    vsetivli zero, 1, e16, mf4, ta, ma
+; NOZFMIN-NEXT:    vslidedown.vx v8, v8, a0
+; NOZFMIN-NEXT:    vmv.x.s a0, v8
+; NOZFMIN-NEXT:    lui a1, 1048560
+; NOZFMIN-NEXT:    or a0, a0, a1
+; NOZFMIN-NEXT:    fmv.w.x fa0, a0
+; NOZFMIN-NEXT:    ret
+;
+; ZFMIN-LABEL: extractelt_nxv1bf16_idx:
+; ZFMIN:       # %bb.0:
+; ZFMIN-NEXT:    vsetivli zero, 1, e16, mf4, ta, ma
+; ZFMIN-NEXT:    vslidedown.vx v8, v8, a0
+; ZFMIN-NEXT:    vmv.x.s a0, v8
+; ZFMIN-NEXT:    fmv.h.x fa0, a0
+; ZFMIN-NEXT:    ret
+  %r = extractelement <vscale x 1 x bfloat> %v, i32 %idx
+  ret bfloat %r
+}
+
+define bfloat @extractelt_nxv2bf16_0(<vscale x 2 x bfloat> %v) {
+; NOZFMIN-LABEL: extractelt_nxv2bf16_0:
+; NOZFMIN:       # %bb.0:
+; NOZFMIN-NEXT:    vsetivli zero, 1, e16, m1, ta, ma
+; NOZFMIN-NEXT:    vmv.x.s a0, v8
+; NOZFMIN-NEXT:    lui a1, 1048560
+; NOZFMIN-NEXT:    or a0, a0, a1
+; NOZFMIN-NEXT:    fmv.w.x fa0, a0
+; NOZFMIN-NEXT:    ret
+;
+; ZFMIN-LABEL: extractelt_nxv2bf16_0:
+; ZFMIN:       # %bb.0:
+; ZFMIN-NEXT:    vsetivli zero, 1, e16, m1, ta, ma
+; ZFMIN-NEXT:    vmv.x.s a0, v8
+; ZFMIN-NEXT:    fmv.h.x fa0, a0
+; ZFMIN-NEXT:    ret
+  %r = extractelement <vscale x 2 x bfloat> %v, i32 0
+  ret bfloat %r
+}
+
+define bfloat @extractelt_nxv2bf16_imm(<vscale x 2 x bfloat> %v) {
+; NOZFMIN-LABEL: extractelt_nxv2bf16_imm:
+; NOZFMIN:       # %bb.0:
+; NOZFMIN-NEXT:    vsetivli zero, 1, e16, mf2, ta, ma
+; NOZFMIN-NEXT:    vslidedown.vi v8, v8, 2
+; NOZFMIN-NEXT:    vmv.x.s a0, v8
+; NOZFMIN-NEXT:    lui a1, 1048560
+; NOZFMIN-NEXT:    or a0, a0, a1
+; NOZFMIN-NEXT:    fmv.w.x fa0, a0
+; NOZFMIN-NEXT:    ret
+;
+; ZFMIN-LABEL: extractelt_nxv2bf16_imm:
+; ZFMIN:       # %bb.0:
+; ZFMIN-NEXT:    vsetivli zero, 1, e16, mf2, ta, ma
+; ZFMIN-NEXT:    vslidedown.vi v8, v8, 2
+; ZFMIN-NEXT:    vmv.x.s a0, v8
+; ZFMIN-NEXT:    fmv.h.x fa0, a0
+; ZFMIN-NEXT:    ret
+  %r = extractelement <vscale x 2 x bfloat> %v, i32 2
+  ret bfloat %r
+}
+
+define bfloat @extractelt_nxv2bf16_idx(<vscale x 2 x bfloat> %v, i32 zeroext %idx) {
+; NOZFMIN-LABEL: extractelt_nxv2bf16_idx:
+; NOZFMIN:       # %bb.0:
+; NOZFMIN-NEXT:    vsetivli zero, 1, e16, mf2, ta, ma
+; NOZFMIN-NEXT:    vslidedown.vx v8, v8, a0
+; NOZFMIN-NEXT:    vmv.x.s a0, v8
+; NOZFMIN-NEXT:    lui a1, 1048560
+; NOZFMIN-NEXT:    or a0, a0, a1
+; NOZFMIN-NEXT:    fmv.w.x fa0, a0
+; NOZFMIN-NEXT:    ret
+;
+; ZFMIN-LABEL: extractelt_nxv2bf16_idx:
+; ZFMIN:       # %bb.0:
+; ZFMIN-NEXT:    vsetivli zero, 1, e16, mf2, ta, ma
+; ZFMIN-NEXT:    vslidedown.vx v8, v8, a0
+; ZFMIN-NEXT:    vmv.x.s a0, v8
+; ZFMIN-NEXT:    fmv.h.x fa0, a0
+; ZFMIN-NEXT:    ret
+  %r = extractelement <vscale x 2 x bfloat> %v, i32 %idx
+  ret bfloat %r
+}
+
+define bfloat @extractelt_nxv4bf16_0(<vscale x 4 x bfloat> %v) {
+; NOZFMIN-LABEL: extractelt_nxv4bf16_0:
+; NOZFMIN:       # %bb.0:
+; NOZFMIN-NEXT:    vsetivli zero, 1, e16, m1, ta, ma
+; NOZFMIN-NEXT:    vmv.x.s a0, v8
+; NOZFMIN-NEXT:    lui a1, 1048560
+; NOZFMIN-NEXT:    or a0, a0, a1
+; NOZFMIN-NEXT:    fmv.w.x fa0, a0
+; NOZFMIN-NEXT:    ret
+;
+; ZFMIN-LABEL: extractelt_nxv4bf16_0:
+; ZFMIN:       # %bb.0:
+; ZFMIN-NEXT:    vsetivli zero, 1, e16, m1, ta, ma
+; ZFMIN-NEXT:    vmv.x.s a0, v8
+; ZFMIN-NEXT:    fmv.h.x fa0, a0
+; ZFMIN-NEXT:    ret
+  %r = extractelement <vscale x 4 x bfloat> %v, i32 0
+  ret bfloat %r
+}
+
+define bfloat @extractelt_nxv4bf16_imm(<vscale x 4 x bfloat> %v) {
+; NOZFMIN-LABEL: extractelt_nxv4bf16_imm:
+; NOZFMIN:       # %bb.0:
+; NOZFMIN-NEXT:    vsetivli zero, 1, e16, m1, ta, ma
+; NOZFMIN-NEXT:    vslidedown.vi v8, v8, 2
+; NOZFMIN-NEXT:    vmv.x.s a0, v8
+; NOZFMIN-NEXT:    lui a1, 1048560
+; NOZFMIN-NEXT:    or a0, a0, a1
+; NOZFMIN-NEXT:    fmv.w.x fa0, a0
+; NOZFMIN-NEXT:    ret
+;
+; ZFMIN-LABEL: extractelt_nxv4bf16_imm:
+; ZFMIN:       # %bb.0:
+; ZFMIN-NEXT:    vsetivli zero, 1, e16, m1, ta, ma
+; ZFMIN-NEXT:    vslidedown.vi v8, v8, 2
+; ZFMIN-NEXT:    vmv.x.s a0, v8
+; ZFMIN-NEXT:    fmv.h.x fa0, a0
+; ZFMIN-NEXT:    ret
+  %r = extractelement <vscale x 4 x bfloat> %v, i32 2
+  ret bfloat %r
+}
+
+define bfloat @extractelt_nxv4bf16_idx(<vscale x 4 x bfloat> %v, i32 zeroext %idx) {
+; NOZFMIN-LABEL: extractelt_nxv4bf16_idx:
+; NOZFMIN:       # %bb.0:
+; NOZFMIN-NEXT:    vsetivli zero, 1, e16, m1, ta, ma
+; NOZFMIN-NEXT:    vslidedown.vx v8, v8, a0
+; NOZFMIN-NEXT:    vmv.x.s a0, v8
+; NOZFMIN-NEXT:    lui a1, 1048560
+; NOZFMIN-NEXT:    or a0, a0, a1
+; NOZFMIN-NEXT:    fmv.w.x fa0, a0
+; NOZFMIN-NEXT:    ret
+;
+; ZFMIN-LABEL: extractelt_nxv4bf16_idx:
+; ZFMIN:       # %bb.0:
+; ZFMIN-NEXT:    vsetivli zero, 1, e16, m1, ta, ma
+; ZFMIN-NEXT:    vslidedown.vx v8, v8, a0
+; ZFMIN-NEXT:    vmv.x.s a0, v8
+; ZFMIN-NEXT:    fmv.h.x fa0, a0
+; ZFMIN-NEXT:    ret
+  %r = extractelement <vscale x 4 x bfloat> %v, i32 %idx
+  ret bfloat %r
+}
+
+define bfloat @extractelt_nxv8bf16_0(<vscale x 8 x bfloat> %v) {
+; NOZFMIN-LABEL: extractelt_nxv8bf16_0:
+; NOZFMIN:       # %bb.0:
+; NOZFMIN-NEXT:    vsetivli zero, 1, e16, m1, ta, ma
+; NOZFMIN-NEXT:    vmv.x.s a0, v8
+; NOZFMIN-NEXT:    lui a1, 1048560
+; NOZFMIN-NEXT:    or a0, a0, a1
+; NOZFMIN-NEXT:    fmv.w.x fa0, a0
+; NOZFMIN-NEXT:    ret
+;
+; ZFMIN-LABEL: extractelt_nxv8bf16_0:
+; ZFMIN:       # %bb.0:
+; ZFMIN-NEXT:    vsetivli zero, 1, e16, m1, ta, ma
+; ZFMIN-NEXT:    vmv.x.s a0, v8
+; ZFMIN-NEXT:    fmv.h.x fa0, a0
+; ZFMIN-NEXT:    ret
+  %r = extractelement <vscale x 8 x bfloat> %v, i32 0
+  ret bfloat %r
+}
+
+define bfloat @extractelt_nxv8bf16_imm(<vscale x 8 x bfloat> %v) {
+; NOZFMIN-LABEL: extractelt_nxv8bf16_imm:
+; NOZFMIN:       # %bb.0:
+; NOZFMIN-NEXT:    vsetivli zero, 1, e16, m1, ta, ma
+; NOZFMIN-NEXT:    vslidedown.vi v8, v8, 2
+; NOZFMIN-NEXT:    vmv.x.s a0, v8
+; NOZFMIN-NEXT:    lui a1, 1048560
+; NOZFMIN-NEXT:    or a0, a0, a1
+; NOZFMIN-NEXT:    fmv.w.x fa0, a0
+; NOZFMIN-NEXT:    ret
+;
+; ZFMIN-LABEL: extractelt_nxv8bf16_imm:
+; ZFMIN:       # %bb.0:
+; ZFMIN-NEXT:    vsetivli zero, 1, e16, m1, ta, ma
+; ZFMIN-NEXT:    vslidedown.vi v8, v8, 2
+; ZFMIN-NEXT:    vmv.x.s a0, v8
+; ZFMIN-NEXT:    fmv.h.x fa0, a0
+; ZFMIN-NEXT:    ret
+  %r = extractelement <vscale x 8 x bfloat> %v, i32 2
+  ret bfloat %r
+}
+
+define bfloat @extractelt_nxv8bf16_idx(<vscale x 8 x bfloat> %v, i32 zeroext %idx) {
+; NOZFMIN-LABEL: extractelt_nxv8bf16_idx:
+; NOZFMIN:       # %bb.0:
+; NOZFMIN-NEXT:    vsetivli zero, 1, e16, m2, ta, ma
+; NOZFMIN-NEXT:    vslidedown.vx v8, v8, a0
+; NOZFMIN-NEXT:    vmv.x.s a0, v8
+; NOZFMIN-NEXT:    lui a1, 1048560
+; NOZFMIN-NEXT:    or a0, a0, a1
+; NOZFMIN-NEXT:    fmv.w.x fa0, a0
+; NOZFMIN-NEXT:    ret
+;
+; ZFMIN-LABEL: extractelt_nxv8bf16_idx:
+; ZFMIN:       # %bb.0:
+; ZFMIN-NEXT:    vsetivli zero, 1, e16, m2, ta, ma
+; ZFMIN-NEXT:    vslidedown.vx v8, v8, a0
+; ZFMIN-NEXT:    vmv.x.s a0, v8
+; ZFMIN-NEXT:    fmv.h.x fa0, a0
+; ZFMIN-NEXT:    ret
+  %r = extractelement <vscale x 8 x bfloat> %v, i32 %idx
+  ret bfloat %r
+}
+
+define bfloat @extractelt_nxv16bf16_0(<vscale x 16 x bfloat> %v) {
+; NOZFMIN-LABEL: extractelt_nxv16bf16_0:
+; NOZFMIN:       # %bb.0:
+; NOZFMIN-NEXT:    vsetivli zero, 1, e16, m1, ta, ma
+; NOZFMIN-NEXT:    vmv.x.s a0, v8
+; NOZFMIN-NEXT:    lui a1, 1048560
+; NOZFMIN-NEXT:    or a0, a0, a1
+; NOZFMIN-NEXT:    fmv.w.x fa0, a0
+; NOZFMIN-NEXT:    ret
+;
+; ZFMIN-LABEL: extractelt_nxv16bf16_0:
+; ZFMIN:       # %bb.0:
+; ZFMIN-NEXT:    vsetivli zero, 1, e16, m1, ta, ma
+; ZFMIN-NEXT:    vmv.x.s a0, v8
+; ZFMIN-NEXT:    fmv.h.x fa0, a0
+; ZFMIN-NEXT:    ret
+  %r = extractelement <vscale x 16 x bfloat> %v, i32 0
+  ret bfloat %r
+}
+
+define bfloat @extractelt_nxv16bf16_imm(<vscale x 16 x bfloat> %v) {
+; NOZFMIN-LABEL: extractelt_nxv16bf16_imm:
+; NOZFMIN:       # %bb.0:
+; NOZFMIN-NEXT:    vsetivli zero, 1, e16, m1, ta, ma
+; NOZFMIN-NEXT:    vslidedown.vi v8, v8, 2
+; NOZFMIN-NEXT:    vmv.x.s a0, v8
+; NOZFMIN-NEXT:    lui a1, 1048560
+; NOZFMIN-NEXT:    or a0, a0, a1
+; NOZFMIN-NEXT:    fmv.w.x fa0, a0
+; NOZFMIN-NEXT:    ret
+;
+; ZFMIN-LABEL: extractelt_nxv16bf16_imm:
+; ZFMIN:       # %bb.0:
+; ZFMIN-NEXT:    vsetivli zero, 1, e16, m1, ta, ma
+; ZFMIN-NEXT:    vslidedown.vi v8, v8, 2
+; ZFMIN-NEXT:    vmv.x.s a0, v8
+; ZFMIN-NEXT:    fmv.h.x fa0, a0
+; ZFMIN-NEXT:    ret
+  %r = extractelement <vscale x 16 x bfloat> %v, i32 2
+  ret bfloat %r
+}
+
+define bfloat @extractelt_nxv16bf16_idx(<vscale x 16 x bfloat> %v, i32 zeroext %idx) {
+; NOZFMIN-LABEL: extractelt_nxv16bf16_idx:
+; NOZFMIN:       # %bb.0:
+; NOZFMIN-NEXT:    vsetivli zero, 1, e16, m4, ta, ma
+; NOZFMIN-NEXT:    vslidedown.vx v8, v8, a0
+; NOZFMIN-NEXT:    vmv.x.s a0, v8
+; NOZFMIN-NEXT:    lui a1, 1048560
+; NOZFMIN-NEXT:    or a0, a0, a1
+; NOZFMIN-NEXT:    fmv.w.x fa0, a0
+; NOZFMIN-NEXT:    ret
+;
+; ZFMIN-LABEL: extractelt_nxv16bf16_idx:
+; ZFMIN:       # %bb.0:
+; ZFMIN-NEXT:    vsetivli zero, 1, e16, m4, ta, ma
+; ZFMIN-NEXT:    vslidedown.vx v8, v8, a0
+; ZFMIN-NEXT:    vmv.x.s a0, v8
+; ZFMIN-NEXT:    fmv.h.x fa0, a0
+; ZFMIN-NEXT:    ret
+  %r = extractelement <vscale x 16 x bfloat> %v, i32 %idx
+  ret bfloat %r
+}
+
+define bfloat @extractelt_nxv32bf16_0(<vscale x 32 x bfloat> %v) {
+; NOZFMIN-LABEL: extractelt_nxv32bf16_0:
+; NOZFMIN:       # %bb.0:
+; NOZFMIN-NEXT:    vsetivli zero, 1, e16, m1, ta, ma
+; NOZFMIN-NEXT:    vmv.x.s a0, v8
+; NOZFMIN-NEXT:    lui a1, 1048560
+; NOZFMIN-NEXT:    or a0, a0, a1
+; NOZFMIN-NEXT:    fmv.w.x fa0, a0
+; NOZFMIN-NEXT:    ret
+;
+; ZFMIN-LABEL: extractelt_nxv32bf16_0:
+; ZFMIN:       # %bb.0:
+; ZFMIN-NEXT:    vsetivli zero, 1, e16, m1, ta, ma
+; ZFMIN-NEXT:    vmv.x.s a0, v8
+; ZFMIN-NEXT:    fmv.h.x fa0, a0
+; ZFMIN-NEXT:    ret
+  %r = extractelement <vscale x 32 x bfloat> %v, i32 0
+  ret bfloat %r
+}
+
+define bfloat @extractelt_nxv32bf16_imm(<vscale x 32 x bfloat> %v) {
+; NOZFMIN-LABEL: extractelt_nxv32bf16_imm:
+; NOZFMIN:       # %bb.0:
+; NOZFMIN-NEXT:    vsetivli zero, 1, e16, m1, ta, ma
+; NOZFMIN-NEXT:    vslidedown.vi v8, v8, 2
+; NOZFMIN-NEXT:    vmv.x.s a0, v8
+; NOZFMIN-NEXT:    lui a1, 1048560
+; NOZFMIN-NEXT:    or a0, a0, a1
+; NOZFMIN-NEXT:    fmv.w.x fa0, a0
+; NOZFMIN-NEXT:    ret
+;
+; ZFMIN-LABEL: extractelt_nxv32bf16_imm:
+; ZFMIN:       # %bb.0:
+; ZFMIN-NEXT:    vsetivli zero, 1, e16, m1, ta, ma
+; ZFMIN-NEXT:    vslidedown.vi v8, v8, 2
+; ZFMIN-NEXT:    vmv.x.s a0, v8
+; ZFMIN-NEXT:    fmv.h.x fa0, a0
+; ZFMIN-NEXT:    ret
+  %r = extractelement <vscale x 32 x bfloat> %v, i32 2
+  ret bfloat %r
+}
+
+define bfloat @extractelt_nxv32bf16_idx(<vscale x 32 x bfloat> %v, i32 zeroext %idx) {
+; NOZFMIN-LABEL: extractelt_nxv32bf16_idx:
+; NOZFMIN:       # %bb.0:
+; NOZFMIN-NEXT:    vsetivli zero, 1, e16, m8, ta, ma
+; NOZFMIN-NEXT:    vslidedown.vx v8, v8, a0
+; NOZFMIN-NEXT:    vmv.x.s a0, v8
+; NOZFMIN-NEXT:    lui a1, 1048560
+; NOZFMIN-NEXT:    or a0, a0, a1
+; NOZFMIN-NEXT:    fmv.w.x fa0, a0
+; NOZFMIN-NEXT:    ret
+;
+; ZFMIN-LABEL: extractelt_nxv32bf16_idx:
+; ZFMIN:       # %bb.0:
+; ZFMIN-NEXT:    vsetivli zero, 1, e16, m8, ta, ma
+; ZFMIN-NEXT:    vslidedown.vx v8, v8, a0
+; ZFMIN-NEXT:    vmv.x.s a0, v8
+; ZFMIN-NEXT:    fmv.h.x fa0, a0
+; ZFMIN-NEXT:    ret
+  %r = extractelement <vscale x 32 x bfloat> %v, i32 %idx
+  ret bfloat %r
+}
 
 define half @extractelt_nxv1f16_0(<vscale x 1 x half> %v) {
-; CHECK-LABEL: extractelt_nxv1f16_0:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetivli zero, 1, e16, m1, ta, ma
-; CHECK-NEXT:    vfmv.f.s fa0, v8
-; CHECK-NEXT:    ret
+; ZVFH-LABEL: extractelt_nxv1f16_0:
+; ZVFH:       # %bb.0:
+; ZVFH-NEXT:    vsetivli zero, 1, e16, m1, ta, ma
+; ZVFH-NEXT:    vfmv.f.s fa0, v8
+; ZVFH-NEXT:    ret
+;
+; ZVFHMIN-LABEL: extractelt_nxv1f16_0:
+; ZVFHMIN:       # %bb.0:
+; ZVFHMIN-NEXT:    vsetivli zero, 1, e16, m1, ta, ma
+; ZVFHMIN-NEXT:    vmv.x.s a0, v8
+; ZVFHMIN-NEXT:    lui a1, 1048560
+; ZVFHMIN-NEXT:    or a0, a0, a1
+; ZVFHMIN-NEXT:    fmv.w.x fa0, a0
+; ZVFHMIN-NEXT:    ret
+;
+; ZFMIN-LABEL: extractelt_nxv1f16_0:
+; ZFMIN:       # %bb.0:
+; ZFMIN-NEXT:    vsetivli zero, 1, e16, m1, ta, ma
+; ZFMIN-NEXT:    vmv.x.s a0, v8
+; ZFMIN-NEXT:    fmv.h.x fa0, a0
+; ZFMIN-NEXT:    ret
   %r = extractelement <vscale x 1 x half> %v, i32 0
   ret half %r
 }
 
 define half @extractelt_nxv1f16_imm(<vscale x 1 x half> %v) {
-; CHECK-LABEL: extractelt_nxv1f16_imm:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetivli zero, 1, e16, mf4, ta, ma
-; CHECK-NEXT:    vslidedown.vi v8, v8, 2
-; CHECK-NEXT:    vfmv.f.s fa0, v8
-; CHECK-NEXT:    ret
+; ZVFH-LABEL: extractelt_nxv1f16_imm:
+; ZVFH:       # %bb.0:
+; ZVFH-NEXT:    vsetivli zero, 1, e16, mf4, ta, ma
+; ZVFH-NEXT:    vslidedown.vi v8, v8, 2
+; ZVFH-NEXT:    vfmv.f.s fa0, v8
+; ZVFH-NEXT:    ret
+;
+; ZVFHMIN-LABEL: extractelt_nxv1f16_imm:
+; ZVFHMIN:       # %bb.0:
+; ZVFHMIN-NEXT:    vsetivli zero, 1, e16, mf4, ta, ma
+; ZVFHMIN-NEXT:    vslidedown.vi v8, v8, 2
+; ZVFHMIN-NEXT:    vmv.x.s a0, v8
+; ZVFHMIN-NEXT:    lui a1, 1048560
+; ZVFHMIN-NEXT:    or a0, a0, a1
+; ZVFHMIN-NEXT:    fmv.w.x fa0, a0
+; ZVFHMIN-NEXT:    ret
+;
+; ZFMIN-LABEL: extractelt_nxv1f16_imm:
+; ZFMIN:       # %bb.0:
+; ZFMIN-NEXT:    vsetivli zero, 1, e16, mf4, ta, ma
+; ZFMIN-NEXT:    vslidedown.vi v8, v8, 2
+; ZFMIN-NEXT:    vmv.x.s a0, v8
+; ZFMIN-NEXT:    fmv.h.x fa0, a0
+; ZFMIN-NEXT:    ret
   %r = extractelement <vscale x 1 x half> %v, i32 2
   ret half %r
 }
 
 define half @extractelt_nxv1f16_idx(<vscale x 1 x half> %v, i32 zeroext %idx) {
-; CHECK-LABEL: extractelt_nxv1f16_idx:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetivli zero, 1, e16, mf4, ta, ma
-; CHECK-NEXT:    vslidedown.vx v8, v8, a0
-; CHECK-NEXT:    vfmv.f.s fa0, v8
-; CHECK-NEXT:    ret
+; ZVFH-LABEL: extractelt_nxv1f16_idx:
+; ZVFH:       # %bb.0:
+; ZVFH-NEXT:    vsetivli zero, 1, e16, mf4, ta, ma
+; ZVFH-NEXT:    vslidedown.vx v8, v8, a0
+; ZVFH-NEXT:    vfmv.f.s fa0, v8
+; ZVFH-NEXT:    ret
+;
+; ZVFHMIN-LABEL: extractelt_nxv1f16_idx:
+; ZVFHMIN:       # %bb.0:
+; ZVFHMIN-NEXT:    vsetivli zero, 1, e16, mf4, ta, ma
+; ZVFHMIN-NEXT:    vslidedown.vx v8, v8, a0
+; ZVFHMIN-NEXT:    vmv.x.s a0, v8
+; ZVFHMIN-NEXT:    lui a1, 1048560
+; ZVFHMIN-NEXT:    or a0, a0, a1
+; ZVFHMIN-NEXT:    fmv.w.x fa0, a0
+; ZVFHMIN-NEXT:    ret
+;
+; ZFMIN-LABEL: extractelt_nxv1f16_idx:
+; ZFMIN:       # %bb.0:
+; ZFMIN-NEXT:    vsetivli zero, 1, e16, mf4, ta, ma
+; ZFMIN-NEXT:    vslidedown.vx v8, v8, a0
+; ZFMIN-NEXT:    vmv.x.s a0, v8
+; ZFMIN-NEXT:    fmv.h.x fa0, a0
+; ZFMIN-NEXT:    ret
   %r = extractelement <vscale x 1 x half> %v, i32 %idx
   ret half %r
 }
 
 define half @extractelt_nxv2f16_0(<vscale x 2 x half> %v) {
-; CHECK-LABEL: extractelt_nxv2f16_0:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetivli zero, 1, e16, m1, ta, ma
-; CHECK-NEXT:    vfmv.f.s fa0, v8
-; CHECK-NEXT:    ret
+; ZVFH-LABEL: extractelt_nxv2f16_0:
+; ZVFH:       # %bb.0:
+; ZVFH-NEXT:    vsetivli zero, 1, e16, m1, ta, ma
+; ZVFH-NEXT:    vfmv.f.s fa0, v8
+; ZVFH-NEXT:    ret
+;
+; ZVFHMIN-LABEL: extractelt_nxv2f16_0:
+; ZVFHMIN:       # %bb.0:
+; ZVFHMIN-NEXT:    vsetivli zero, 1, e16, m1, ta, ma
+; ZVFHMIN-NEXT:    vmv.x.s a0, v8
+; ZVFHMIN-NEXT:    lui a1, 1048560
+; ZVFHMIN-NEXT:    or a0, a0, a1
+; ZVFHMIN-NEXT:    fmv.w.x fa0, a0
+; ZVFHMIN-NEXT:    ret
+;
+; ZFMIN-LABEL: extractelt_nxv2f16_0:
+; ZFMIN:       # %bb.0:
+; ZFMIN-NEXT:    vsetivli zero, 1, e16, m1, ta, ma
+; ZFMIN-NEXT:    vmv.x.s a0, v8
+; ZFMIN-NEXT:    fmv.h.x fa0, a0
+; ZFMIN-NEXT:    ret
   %r = extractelement <vscale x 2 x half> %v, i32 0
   ret half %r
 }
 
 define half @extractelt_nxv2f16_imm(<vscale x 2 x half> %v) {
-; CHECK-LABEL: extractelt_nxv2f16_imm:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetivli zero, 1, e16, mf2, ta, ma
-; CHECK-NEXT:    vslidedown.vi v8, v8, 2
-; CHECK-NEXT:    vfmv.f.s fa0, v8
-; CHECK-NEXT:    ret
+; ZVFH-LABEL: extractelt_nxv2f16_imm:
+; ZVFH:       # %bb.0:
+; ZVFH-NEXT:    vsetivli zero, 1, e16, mf2, ta, ma
+; ZVFH-NEXT:    vslidedown.vi v8, v8, 2
+; ZVFH-NEXT:    vfmv.f.s fa0, v8
+; ZVFH-NEXT:    ret
+;
+; ZVFHMIN-LABEL: extractelt_nxv2f16_imm:
+; ZVFHMIN:       # %bb.0:
+; ZVFHMIN-NEXT:    vsetivli zero, 1, e16, mf2, ta, ma
+; ZVFHMIN-NEXT:    vslidedown.vi v8, v8, 2
+; ZVFHMIN-NEXT:    vmv.x.s a0, v8
+; ZVFHMIN-NEXT:    lui a1, 1048560
+; ZVFHMIN-NEXT:    or a0, a0, a1
+; ZVFHMIN-NEXT:    fmv.w.x fa0, a0
+; ZVFHMIN-NEXT:    ret
+;
+; ZFMIN-LABEL: extractelt_nxv2f16_imm:
+; ZFMIN:       # %bb.0:
+; ZFMIN-NEXT:    vsetivli zero, 1, e16, mf2, ta, ma
+; ZFMIN-NEXT:    vslidedown.vi v8, v8, 2
+; ZFMIN-NEXT:    vmv.x.s a0, v8
+; ZFMIN-NEXT:    fmv.h.x fa0, a0
+; ZFMIN-NEXT:    ret
   %r = extractelement <vscale x 2 x half> %v, i32 2
   ret half %r
 }
 
 define half @extractelt_nxv2f16_idx(<vscale x 2 x half> %v, i32 zeroext %idx) {
-; CHECK-LABEL: extractelt_nxv2f16_idx:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetivli zero, 1, e16, mf2, ta, ma
-; CHECK-NEXT:    vslidedown.vx v8, v8, a0
-; CHECK-NEXT:    vfmv.f.s fa0, v8
-; CHECK-NEXT:    ret
+; ZVFH-LABEL: extractelt_nxv2f16_idx:
+; ZVFH:       # %bb.0:
+; ZVFH-NEXT:    vsetivli zero, 1, e16, mf2, ta, ma
+; ZVFH-NEXT:    vslidedown.vx v8, v8, a0
+; ZVFH-NEXT:    vfmv.f.s fa0, v8
+; ZVFH-NEXT:    ret
+;
+; ZVFHMIN-LABEL: extractelt_nxv2f16_idx:
+; ZVFHMIN:       # %bb.0:
+; ZVFHMIN-NEXT:    vsetivli zero, 1, e16, mf2, ta, ma
+; ZVFHMIN-NEXT:    vslidedown.vx v8, v8, a0
+; ZVFHMIN-NEXT:    vmv.x.s a0, v8
+; ZVFHMIN-NEXT:    lui a1, 1048560
+; ZVFHMIN-NEXT:    or a0, a0, a1
+; ZVFHMIN-NEXT:    fmv.w.x fa0, a0
+; ZVFHMIN-NEXT:    ret
+;
+; ZFMIN-LABEL: extractelt_nxv2f16_idx:
+; ZFMIN:       # %bb.0:
+; ZFMIN-NEXT:    vsetivli zero, 1, e16, mf2, ta, ma
+; ZFMIN-NEXT:    vslidedown.vx v8, v8, a0
+; ZFMIN-NEXT:    vmv.x.s a0, v8
+; ZFMIN-NEXT:    fmv.h.x fa0, a0
+; ZFMIN-NEXT:    ret
   %r = extractelement <vscale x 2 x half> %v, i32 %idx
   ret half %r
 }
 
 define half @extractelt_nxv4f16_0(<vscale x 4 x half> %v) {
-; CHECK-LABEL: extractelt_nxv4f16_0:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetivli zero, 1, e16, m1, ta, ma
-; CHECK-NEXT:    vfmv.f.s fa0, v8
-; CHECK-NEXT:    ret
+; ZVFH-LABEL: extractelt_nxv4f16_0:
+; ZVFH:       # %bb.0:
+; ZVFH-NEXT:    vsetivli zero, 1, e16, m1, ta, ma
+; ZVFH-NEXT:    vfmv.f.s fa0, v8
+; ZVFH-NEXT:    ret
+;
+; ZVFHMIN-LABEL: extractelt_nxv4f16_0:
+; ZVFHMIN:       # %bb.0:
+; ZVFHMIN-NEXT:    vsetivli zero, 1, e16, m1, ta, ma
+; ZVFHMIN-NEXT:    vmv.x.s a0, v8
+; ZVFHMIN-NEXT:    lui a1, 1048560
+; ZVFHMIN-NEXT:    or a0, a0, a1
+; ZVFHMIN-NEXT:    fmv.w.x fa0, a0
+; ZVFHMIN-NEXT:    ret
+;
+; ZFMIN-LABEL: extractelt_nxv4f16_0:
+; ZFMIN:       # %bb.0:
+; ZFMIN-NEXT:    vsetivli zero, 1, e16, m1, ta, ma
+; ZFMIN-NEXT:    vmv.x.s a0, v8
+; ZFMIN-NEXT:    fmv.h.x fa0, a0
+; ZFMIN-NEXT:    ret
   %r = extractelement <vscale x 4 x half> %v, i32 0
   ret half %r
 }
 
 define half @extractelt_nxv4f16_imm(<vscale x 4 x half> %v) {
-; CHECK-LABEL: extractelt_nxv4f16_imm:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetivli zero, 1, e16, m1, ta, ma
-; CHECK-NEXT:    vslidedown.vi v8, v8, 2
-; CHECK-NEXT:    vfmv.f.s fa0, v8
-; CHECK-NEXT:    ret
+; ZVFH-LABEL: extractelt_nxv4f16_imm:
+; ZVFH:       # %bb.0:
+; ZVFH-NEXT:    vsetivli zero, 1, e16, m1, ta, ma
+; ZVFH-NEXT:    vslidedown.vi v8, v8, 2
+; ZVFH-NEXT:    vfmv.f.s fa0, v8
+; ZVFH-NEXT:    ret
+;
+; ZVFHMIN-LABEL: extractelt_nxv4f16_imm:
+; ZVFHMIN:       # %bb.0:
+; ZVFHMIN-NEXT:    vsetivli zero, 1, e16, m1, ta, ma
+; ZVFHMIN-NEXT:    vslidedown.vi v8, v8, 2
+; ZVFHMIN-NEXT:    vmv.x.s a0, v8
+; ZVFHMIN-NEXT:    lui a1, 1048560
+; ZVFHMIN-NEXT:    or a0, a0, a1
+; ZVFHMIN-NEXT:    fmv.w.x fa0, a0
+; ZVFHMIN-NEXT:    ret
+;
+; ZFMIN-LABEL: extractelt_nxv4f16_imm:
+; ZFMIN:       # %bb.0:
+; ZFMIN-NEXT:    vsetivli zero, 1, e16, m1, ta, ma
+; ZFMIN-NEXT:    vslidedown.vi v8, v8, 2
+; ZFMIN-NEXT:    vmv.x.s a0, v8
+; ZFMIN-NEXT:    fmv.h.x fa0, a0
+; ZFMIN-NEXT:    ret
   %r = extractelement <vscale x 4 x half> %v, i32 2
   ret half %r
 }
 
 define half @extractelt_nxv4f16_idx(<vscale x 4 x half> %v, i32 zeroext %idx) {
-; CHECK-LABEL: extractelt_nxv4f16_idx:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetivli zero, 1, e16, m1, ta, ma
-; CHECK-NEXT:    vslidedown.vx v8, v8, a0
-; CHECK-NEXT:    vfmv.f.s fa0, v8
-; CHECK-NEXT:    ret
+; ZVFH-LABEL: extractelt_nxv4f16_idx:
+; ZVFH:       # %bb.0:
+; ZVFH-NEXT:    vsetivli zero, 1, e16, m1, ta, ma
+; ZVFH-NEXT:    vslidedown.vx v8, v8, a0
+; ZVFH-NEXT:    vfmv.f.s fa0, v8
+; ZVFH-NEXT:    ret
+;
+; ZVFHMIN-LABEL: extractelt_nxv4f16_idx:
+; ZVFHMIN:       # %bb.0:
+; ZVFHMIN-NEXT:    vsetivli zero, 1, e16, m1, ta, ma
+; ZVFHMIN-NEXT:    vslidedown.vx v8, v8, a0
+; ZVFHMIN-NEXT:    vmv.x.s a0, v8
+; ZVFHMIN-NEXT:    lui a1, 1048560
+; ZVFHMIN-NEXT:    or a0, a0, a1
+; ZVFHMIN-NEXT:    fmv.w.x fa0, a0
+; ZVFHMIN-NEXT:    ret
+;
+; ZFMIN-LABEL: extractelt_nxv4f16_idx:
+; ZFMIN:       # %bb.0:
+; ZFMIN-NEXT:    vsetivli zero, 1, e16, m1, ta, ma
+; ZFMIN-NEXT:    vslidedown.vx v8, v8, a0
+; ZFMIN-NEXT:    vmv.x.s a0, v8
+; ZFMIN-NEXT:    fmv.h.x fa0, a0
+; ZFMIN-NEXT:    ret
   %r = extractelement <vscale x 4 x half> %v, i32 %idx
   ret half %r
 }
 
 define half @extractelt_nxv8f16_0(<vscale x 8 x half> %v) {
-; CHECK-LABEL: extractelt_nxv8f16_0:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetivli zero, 1, e16, m1, ta, ma
-; CHECK-NEXT:    vfmv.f.s fa0, v8
-; CHECK-NEXT:    ret
+; ZVFH-LABEL: extractelt_nxv8f16_0:
+; ZVFH:       # %bb.0:
+; ZVFH-NEXT:    vsetivli zero, 1, e16, m1, ta, ma
+; ZVFH-NEXT:    vfmv.f.s fa0, v8
+; ZVFH-NEXT:    ret
+;
+; ZVFHMIN-LABEL: extractelt_nxv8f16_0:
+; ZVFHMIN:       # %bb.0:
+; ZVFHMIN-NEXT:    vsetivli zero, 1, e16, m1, ta, ma
+; ZVFHMIN-NEXT:    vmv.x.s a0, v8
+; ZVFHMIN-NEXT:    lui a1, 1048560
+; ZVFHMIN-NEXT:    or a0, a0, a1
+; ZVFHMIN-NEXT:    fmv.w.x fa0, a0
+; ZVFHMIN-NEXT:    ret
+;
+; ZFMIN-LABEL: extractelt_nxv8f16_0:
+; ZFMIN:       # %bb.0:
+; ZFMIN-NEXT:    vsetivli zero, 1, e16, m1, ta, ma
+; ZFMIN-NEXT:    vmv.x.s a0, v8
+; ZFMIN-NEXT:    fmv.h.x fa0, a0
+; ZFMIN-NEXT:    ret
   %r = extractelement <vscale x 8 x half> %v, i32 0
   ret half %r
 }
 
 define half @extractelt_nxv8f16_imm(<vscale x 8 x half> %v) {
-; CHECK-LABEL: extractelt_nxv8f16_imm:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetivli zero, 1, e16, m1, ta, ma
-; CHECK-NEXT:    vslidedown.vi v8, v8, 2
-; CHECK-NEXT:    vfmv.f.s fa0, v8
-; CHECK-NEXT:    ret
+; ZVFH-LABEL: extractelt_nxv8f16_imm:
+; ZVFH:       # %bb.0:
+; ZVFH-NEXT:    vsetivli zero, 1, e16, m1, ta, ma
+; ZVFH-NEXT:    vslidedown.vi v8, v8, 2
+; ZVFH-NEXT:    vfmv.f.s fa0, v8
+; ZVFH-NEXT:    ret
+;
+; ZVFHMIN-LABEL: extractelt_nxv8f16_imm:
+; ZVFHMIN:       # %bb.0:
+; ZVFHMIN-NEXT:    vsetivli zero, 1, e16, m1, ta, ma
+; ZVFHMIN-NEXT:    vslidedown.vi v8, v8, 2
+; ZVFHMIN-NEXT:    vmv.x.s a0, v8
+; ZVFHMIN-NEXT:    lui a1, 1048560
+; ZVFHMIN-NEXT:    or a0, a0, a1
+; ZVFHMIN-NEXT:    fmv.w.x fa0, a0
+; ZVFHMIN-NEXT:    ret
+;
+; ZFMIN-LABEL: extractelt_nxv8f16_imm:
+; ZFMIN:       # %bb.0:
+; ZFMIN-NEXT:    vsetivli zero, 1, e16, m1, ta, ma
+; ZFMIN-NEXT:    vslidedown.vi v8, v8, 2
+; ZFMIN-NEXT:    vmv.x.s a0, v8
+; ZFMIN-NEXT:    fmv.h.x fa0, a0
+; ZFMIN-NEXT:    ret
   %r = extractelement <vscale x 8 x half> %v, i32 2
   ret half %r
 }
 
 define half @extractelt_nxv8f16_idx(<vscale x 8 x half> %v, i32 zeroext %idx) {
-; CHECK-LABEL: extractelt_nxv8f16_idx:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetivli zero, 1, e16, m2, ta, ma
-; CHECK-NEXT:    vslidedown.vx v8, v8, a0
-; CHECK-NEXT:    vfmv.f.s fa0, v8
-; CHECK-NEXT:    ret
+; ZVFH-LABEL: extractelt_nxv8f16_idx:
+; ZVFH:       # %bb.0:
+; ZVFH-NEXT:    vsetivli zero, 1, e16, m2, ta, ma
+; ZVFH-NEXT:    vslidedown.vx v8, v8, a0
+; ZVFH-NEXT:    vfmv.f.s fa0, v8
+; ZVFH-NEXT:    ret
+;
+; ZVFHMIN-LABEL: extractelt_nxv8f16_idx:
+; ZVFHMIN:       # %bb.0:
+; ZVFHMIN-NEXT:    vsetivli zero, 1, e16, m2, ta, ma
+; ZVFHMIN-NEXT:    vslidedown.vx v8, v8, a0
+; ZVFHMIN-NEXT:    vmv.x.s a0, v8
+; ZVFHMIN-NEXT:    lui a1, 1048560
+; ZVFHMIN-NEXT:    or a0, a0, a1
+; ZVFHMIN-NEXT:    fmv.w.x fa0, a0
+; ZVFHMIN-NEXT:    ret
+;
+; ZFMIN-LABEL: extractelt_nxv8f16_idx:
+; ZFMIN:       # %bb.0:
+; ZFMIN-NEXT:    vsetivli zero, 1, e16, m2, ta, ma
+; ZFMIN-NEXT:    vslidedown.vx v8, v8, a0
+; ZFMIN-NEXT:    vmv.x.s a0, v8
+; ZFMIN-NEXT:    fmv.h.x fa0, a0
+; ZFMIN-NEXT:    ret
   %r = extractelement <vscale x 8 x half> %v, i32 %idx
   ret half %r
 }
 
 define half @extractelt_nxv16f16_0(<vscale x 16 x half> %v) {
-; CHECK-LABEL: extractelt_nxv16f16_0:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetivli zero, 1, e16, m1, ta, ma
-; CHECK-NEXT:    vfmv.f.s fa0, v8
-; CHECK-NEXT:    ret
+; ZVFH-LABEL: extractelt_nxv16f16_0:
+; ZVFH:       # %bb.0:
+; ZVFH-NEXT:    vsetivli zero, 1, e16, m1, ta, ma
+; ZVFH-NEXT:    vfmv.f.s fa0, v8
+; ZVFH-NEXT:    ret
+;
+; ZVFHMIN-LABEL: extractelt_nxv16f16_0:
+; ZVFHMIN:       # %bb.0:
+; ZVFHMIN-NEXT:    vsetivli zero, 1, e16, m1, ta, ma
+; ZVFHMIN-NEXT:    vmv.x.s a0, v8
+; ZVFHMIN-NEXT:    lui a1, 1048560
+; ZVFHMIN-NEXT:    or a0, a0, a1
+; ZVFHMIN-NEXT:    fmv.w.x fa0, a0
+; ZVFHMIN-NEXT:    ret
+;
+; ZFMIN-LABEL: extractelt_nxv16f16_0:
+; ZFMIN:       # %bb.0:
+; ZFMIN-NEXT:    vsetivli zero, 1, e16, m1, ta, ma
+; ZFMIN-NEXT:    vmv.x.s a0, v8
+; ZFMIN-NEXT:    fmv.h.x fa0, a0
+; ZFMIN-NEXT:    ret
   %r = extractelement <vscale x 16 x half> %v, i32 0
   ret half %r
 }
 
 define half @extractelt_nxv16f16_imm(<vscale x 16 x half> %v) {
-; CHECK-LABEL: extractelt_nxv16f16_imm:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetivli zero, 1, e16, m1, ta, ma
-; CHECK-NEXT:    vslidedown.vi v8, v8, 2
-; CHECK-NEXT:    vfmv.f.s fa0, v8
-; CHECK-NEXT:    ret
+; ZVFH-LABEL: extractelt_nxv16f16_imm:
+; ZVFH:       # %bb.0:
+; ZVFH-NEXT:    vsetivli zero, 1, e16, m1, ta, ma
+; ZVFH-NEXT:    vslidedown.vi v8, v8, 2
+; ZVFH-NEXT:    vfmv.f.s fa0, v8
+; ZVFH-NEXT:    ret
+;
+; ZVFHMIN-LABEL: extractelt_nxv16f16_imm:
+; ZVFHMIN:       # %bb.0:
+; ZVFHMIN-NEXT:    vsetivli zero, 1, e16, m1, ta, ma
+; ZVFHMIN-NEXT:    vslidedown.vi v8, v8, 2
+; ZVFHMIN-NEXT:    vmv.x.s a0, v8
+; ZVFHMIN-NEXT:    lui a1, 1048560
+; ZVFHMIN-NEXT:    or a0, a0, a1
+; ZVFHMIN-NEXT:    fmv.w.x fa0, a0
+; ZVFHMIN-NEXT:    ret
+;
+; ZFMIN-LABEL: extractelt_nxv16f16_imm:
+; ZFMIN:       # %bb.0:
+; ZFMIN-NEXT:    vsetivli zero, 1, e16, m1, ta, ma
+; ZFMIN-NEXT:    vslidedown.vi v8, v8, 2
+; ZFMIN-NEXT:    vmv.x.s a0, v8
+; ZFMIN-NEXT:    fmv.h.x fa0, a0
+; ZFMIN-NEXT:    ret
   %r = extractelement <vscale x 16 x half> %v, i32 2
   ret half %r
 }
 
 define half @extractelt_nxv16f16_idx(<vscale x 16 x half> %v, i32 zeroext %idx) {
-; CHECK-LABEL: extractelt_nxv16f16_idx:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetivli zero, 1, e16, m4, ta, ma
-; CHECK-NEXT:    vslidedown.vx v8, v8, a0
-; CHECK-NEXT:    vfmv.f.s fa0, v8
-; CHECK-NEXT:    ret
+; ZVFH-LABEL: extractelt_nxv16f16_idx:
+; ZVFH:       # %bb.0:
+; ZVFH-NEXT:    vsetivli zero, 1, e16, m4, ta, ma
+; ZVFH-NEXT:    vslidedown.vx v8, v8, a0
+; ZVFH-NEXT:    vfmv.f.s fa0, v8
+; ZVFH-NEXT:    ret
+;
+; ZVFHMIN-LABEL: extractelt_nxv16f16_idx:
+; ZVFHMIN:       # %bb.0:
+; ZVFHMIN-NEXT:    vsetivli zero, 1, e16, m4, ta, ma
+; ZVFHMIN-NEXT:    vslidedown.vx v8, v8, a0
+; ZVFHMIN-NEXT:    vmv.x.s a0, v8
+; ZVFHMIN-NEXT:    lui a1, 1048560
+; ZVFHMIN-NEXT:    or a0, a0, a1
+; ZVFHMIN-NEXT:    fmv.w.x fa0, a0
+; ZVFHMIN-NEXT:    ret
+;
+; ZFMIN-LABEL: extractelt_nxv16f16_idx:
+; ZFMIN:       # %bb.0:
+; ZFMIN-NEXT:    vsetivli zero, 1, e16, m4, ta, ma
+; ZFMIN-NEXT:    vslidedown.vx v8, v8, a0
+; ZFMIN-NEXT:    vmv.x.s a0, v8
+; ZFMIN-NEXT:    fmv.h.x fa0, a0
+; ZFMIN-NEXT:    ret
   %r = extractelement <vscale x 16 x half> %v, i32 %idx
   ret half %r
 }
 
 define half @extractelt_nxv32f16_0(<vscale x 32 x half> %v) {
-; CHECK-LABEL: extractelt_nxv32f16_0:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetivli zero, 1, e16, m1, ta, ma
-; CHECK-NEXT:    vfmv.f.s fa0, v8
-; CHECK-NEXT:    ret
+; ZVFH-LABEL: extractelt_nxv32f16_0:
+; ZVFH:       # %bb.0:
+; ZVFH-NEXT:    vsetivli zero, 1, e16, m1, ta, ma
+; ZVFH-NEXT:    vfmv.f.s fa0, v8
+; ZVFH-NEXT:    ret
+;
+; ZVFHMIN-LABEL: extractelt_nxv32f16_0:
+; ZVFHMIN:       # %bb.0:
+; ZVFHMIN-NEXT:    vsetivli zero, 1, e16, m1, ta, ma
+; ZVFHMIN-NEXT:    vmv.x.s a0, v8
+; ZVFHMIN-NEXT:    lui a1, 1048560
+; ZVFHMIN-NEXT:    or a0, a0, a1
+; ZVFHMIN-NEXT:    fmv.w.x fa0, a0
+; ZVFHMIN-NEXT:    ret
+;
+; ZFMIN-LABEL: extractelt_nxv32f16_0:
+; ZFMIN:       # %bb.0:
+; ZFMIN-NEXT:    vsetivli zero, 1, e16, m1, ta, ma
+; ZFMIN-NEXT:    vmv.x.s a0, v8
+; ZFMIN-NEXT:    fmv.h.x fa0, a0
+; ZFMIN-NEXT:    ret
   %r = extractelement <vscale x 32 x half> %v, i32 0
   ret half %r
 }
 
 define half @extractelt_nxv32f16_imm(<vscale x 32 x half> %v) {
-; CHECK-LABEL: extractelt_nxv32f16_imm:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetivli zero, 1, e16, m1, ta, ma
-; CHECK-NEXT:    vslidedown.vi v8, v8, 2
-; CHECK-NEXT:    vfmv.f.s fa0, v8
-; CHECK-NEXT:    ret
+; ZVFH-LABEL: extractelt_nxv32f16_imm:
+; ZVFH:       # %bb.0:
+; ZVFH-NEXT:    vsetivli zero, 1, e16, m1, ta, ma
+; ZVFH-NEXT:    vslidedown.vi v8, v8, 2
+; ZVFH-NEXT:    vfmv.f.s fa0, v8
+; ZVFH-NEXT:    ret
+;
+; ZVFHMIN-LABEL: extractelt_nxv32f16_imm:
+; ZVFHMIN:       # %bb.0:
+; ZVFHMIN-NEXT:    vsetivli zero, 1, e16, m1, ta, ma
+; ZVFHMIN-NEXT:    vslidedown.vi v8, v8, 2
+; ZVFHMIN-NEXT:    vmv.x.s a0, v8
+; ZVFHMIN-NEXT:    lui a1, 1048560
+; ZVFHMIN-NEXT:    or a0, a0, a1
+; ZVFHMIN-NEXT:    fmv.w.x fa0, a0
+; ZVFHMIN-NEXT:    ret
+;
+; ZFMIN-LABEL: extractelt_nxv32f16_imm:
+; ZFMIN:       # %bb.0:
+; ZFMIN-NEXT:    vsetivli zero, 1, e16, m1, ta, ma
+; ZFMIN-NEXT:    vslidedown.vi v8, v8, 2
+; ZFMIN-NEXT:    vmv.x.s a0, v8
+; ZFMIN-NEXT:    fmv.h.x fa0, a0
+; ZFMIN-NEXT:    ret
   %r = extractelement <vscale x 32 x half> %v, i32 2
   ret half %r
 }
 
 define half @extractelt_nxv32f16_idx(<vscale x 32 x half> %v, i32 zeroext %idx) {
-; CHECK-LABEL: extractelt_nxv32f16_idx:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetivli zero, 1, e16, m8, ta, ma
-; CHECK-NEXT:    vslidedown.vx v8, v8, a0
-; CHECK-NEXT:    vfmv.f.s fa0, v8
-; CHECK-NEXT:    ret
+; ZVFH-LABEL: extractelt_nxv32f16_idx:
+; ZVFH:       # %bb.0:
+; ZVFH-NEXT:    vsetivli zero, 1, e16, m8, ta, ma
+; ZVFH-NEXT:    vslidedown.vx v8, v8, a0
+; ZVFH-NEXT:    vfmv.f.s fa0, v8
+; ZVFH-NEXT:    ret
+;
+; ZVFHMIN-LABEL: extractelt_nxv32f16_idx:
+; ZVFHMIN:       # %bb.0:
+; ZVFHMIN-NEXT:    vsetivli zero, 1, e16, m8, ta, ma
+; ZVFHMIN-NEXT:    vslidedown.vx v8, v8, a0
+; ZVFHMIN-NEXT:    vmv.x.s a0, v8
+; ZVFHMIN-NEXT:    lui a1, 1048560
+; ZVFHMIN-NEXT:    or a0, a0, a1
+; ZVFHMIN-NEXT:    fmv.w.x fa0, a0
+; ZVFHMIN-NEXT:    ret
+;
+; ZFMIN-LABEL: extractelt_nxv32f16_idx:
+; ZFMIN:       # %bb.0:
+; ZFMIN-NEXT:    vsetivli zero, 1, e16, m8, ta, ma
+; ZFMIN-NEXT:    vslidedown.vx v8, v8, a0
+; ZFMIN-NEXT:    vmv.x.s a0, v8
+; ZFMIN-NEXT:    fmv.h.x fa0, a0
+; ZFMIN-NEXT:    ret
   %r = extractelement <vscale x 32 x half> %v, i32 %idx
   ret half %r
 }
@@ -636,10 +1334,10 @@ define double @extractelt_nxv16f64_neg1(<vscale x 16 x double> %v) {
 ; RV64-NEXT:    slli a2, a2, 1
 ; RV64-NEXT:    addi a2, a2, -1
 ; RV64-NEXT:    vs8r.v v16, (a3)
-; RV64-NEXT:    bltu a2, a1, .LBB52_2
+; RV64-NEXT:    bltu a2, a1, .LBB70_2
 ; RV64-NEXT:  # %bb.1:
 ; RV64-NEXT:    mv a2, a1
-; RV64-NEXT:  .LBB52_2:
+; RV64-NEXT:  .LBB70_2:
 ; RV64-NEXT:    slli a2, a2, 3
 ; RV64-NEXT:    add a0, a0, a2
 ; RV64-NEXT:    fld fa0, 0(a0)
@@ -669,10 +1367,10 @@ define double @extractelt_nxv16f64_idx(<vscale x 16 x double> %v, i32 zeroext %i
 ; RV32-NEXT:    csrr a1, vlenb
 ; RV32-NEXT:    slli a2, a1, 1
 ; RV32-NEXT:    addi a2, a2, -1
-; RV32-NEXT:    bltu a0, a2, .LBB54_2
+; RV32-NEXT:    bltu a0, a2, .LBB72_2
 ; RV32-NEXT:  # %bb.1:
 ; RV32-NEXT:    mv a0, a2
-; RV32-NEXT:  .LBB54_2:
+; RV32-NEXT:  .LBB72_2:
 ; RV32-NEXT:    addi sp, sp, -80
 ; RV32-NEXT:    .cfi_def_cfa_offset 80
 ; RV32-NEXT:    sw ra, 76(sp) # 4-byte Folded Spill
@@ -704,10 +1402,10 @@ define double @extractelt_nxv16f64_idx(<vscale x 16 x double> %v, i32 zeroext %i
 ; RV64-NEXT:    csrr a1, vlenb
 ; RV64-NEXT:    slli a2, a1, 1
 ; RV64-NEXT:    addi a2, a2, -1
-; RV64-NEXT:    bltu a0, a2, .LBB54_2
+; RV64-NEXT:    bltu a0, a2, .LBB72_2
 ; RV64-NEXT:  # %bb.1:
 ; RV64-NEXT:    mv a0, a2
-; RV64-NEXT:  .LBB54_2:
+; RV64-NEXT:  .LBB72_2:
 ; RV64-NEXT:    addi sp, sp, -80
 ; RV64-NEXT:    .cfi_def_cfa_offset 80
 ; RV64-NEXT:    sd ra, 72(sp) # 8-byte Folded Spill



More information about the llvm-commits mailing list