[llvm] [RISCV] Lower fixed-length {insert,extract}_vector_elt on zvfhmin/zvfbfmin (PR #114927)
via llvm-commits
llvm-commits at lists.llvm.org
Mon Nov 4 21:06:00 PST 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-risc-v
Author: Luke Lau (lukel97)
<details>
<summary>Changes</summary>
RISCVTargetLowering::lower{INSERT,EXTRACT}_VECTOR_ELT already handles f16 and bf16 scalable vectors after #<!-- -->110221, so we can reuse it for fixed-length vectors.
---
Patch is 121.22 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/114927.diff
6 Files Affected:
- (modified) llvm/lib/Target/RISCV/RISCVISelLowering.cpp (+4-5)
- (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract.ll (+143-24)
- (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec-bf16.ll (+4-44)
- (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll (+4-44)
- (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert.ll (+103-2)
- (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-setcc-fp-vp.ll (+1500-852)
``````````diff
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index d2d03d4572dac8..b48cd1f2427d06 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -1333,7 +1333,8 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
// expansion to a build_vector of 0s.
setOperationAction(ISD::UNDEF, VT, Custom);
- setOperationAction({ISD::CONCAT_VECTORS, ISD::INSERT_SUBVECTOR,
+ setOperationAction({ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT,
+ ISD::CONCAT_VECTORS, ISD::INSERT_SUBVECTOR,
ISD::EXTRACT_SUBVECTOR, ISD::VECTOR_REVERSE,
ISD::VECTOR_SHUFFLE, ISD::VECTOR_COMPRESS},
VT, Custom);
@@ -1404,10 +1405,8 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
continue;
}
- setOperationAction({ISD::BUILD_VECTOR,
- ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT,
- ISD::SCALAR_TO_VECTOR},
- VT, Custom);
+ setOperationAction({ISD::BUILD_VECTOR, ISD::SCALAR_TO_VECTOR}, VT,
+ Custom);
setOperationAction(
{ISD::MLOAD, ISD::MSTORE, ISD::MGATHER, ISD::MSCATTER}, VT, Custom);
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract.ll
index f2052ccc462793..cb830d668d2e8c 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract.ll
@@ -1,8 +1,11 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+v,+zvfh,+f,+d -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32,RV32NOM
-; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+v,+zvfh,+f,+d,+m -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32,RV32M
-; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zvfh,+f,+d -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64,RV64NOM
-; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zvfh,+f,+d,+m -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64,RV64M
+; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+v,+zvfh,+zfbfmin,+zvfbfmin,+f,+d -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH,RV32,RV32NOM
+; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+v,+zvfh,+zfbfmin,+zvfbfmin,+f,+d,+m -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH,RV32,RV32M
+; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zvfh,+zfbfmin,+zvfbfmin,+f,+d -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH,RV64,RV64NOM
+; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zvfh,+zfbfmin,+zvfbfmin,+f,+d,+m -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH,RV64,RV64M
+
+; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+v,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin,+f,+d,+m -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN,RV32,RV32M
+; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin,+f,+d,+m -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN,RV64,RV64M
define i8 @extractelt_v16i8(ptr %x) nounwind {
; CHECK-LABEL: extractelt_v16i8:
@@ -66,14 +69,37 @@ define i64 @extractelt_v2i64(ptr %x) nounwind {
ret i64 %b
}
-define half @extractelt_v8f16(ptr %x) nounwind {
-; CHECK-LABEL: extractelt_v8f16:
+define bfloat @extractelt_v8bf16(ptr %x) nounwind {
+; CHECK-LABEL: extractelt_v8bf16:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
; CHECK-NEXT: vle16.v v8, (a0)
; CHECK-NEXT: vslidedown.vi v8, v8, 7
-; CHECK-NEXT: vfmv.f.s fa0, v8
+; CHECK-NEXT: vmv.x.s a0, v8
+; CHECK-NEXT: fmv.h.x fa0, a0
; CHECK-NEXT: ret
+ %a = load <8 x bfloat>, ptr %x
+ %b = extractelement <8 x bfloat> %a, i32 7
+ ret bfloat %b
+}
+
+define half @extractelt_v8f16(ptr %x) nounwind {
+; ZVFH-LABEL: extractelt_v8f16:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; ZVFH-NEXT: vle16.v v8, (a0)
+; ZVFH-NEXT: vslidedown.vi v8, v8, 7
+; ZVFH-NEXT: vfmv.f.s fa0, v8
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: extractelt_v8f16:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; ZVFHMIN-NEXT: vle16.v v8, (a0)
+; ZVFHMIN-NEXT: vslidedown.vi v8, v8, 7
+; ZVFHMIN-NEXT: vmv.x.s a0, v8
+; ZVFHMIN-NEXT: fmv.h.x fa0, a0
+; ZVFHMIN-NEXT: ret
%a = load <8 x half>, ptr %x
%b = extractelement <8 x half> %a, i32 7
ret half %b
@@ -171,15 +197,40 @@ define i64 @extractelt_v4i64(ptr %x) nounwind {
ret i64 %b
}
-define half @extractelt_v16f16(ptr %x) nounwind {
-; CHECK-LABEL: extractelt_v16f16:
+define bfloat @extractelt_v16bf16(ptr %x) nounwind {
+; CHECK-LABEL: extractelt_v16bf16:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma
; CHECK-NEXT: vle16.v v8, (a0)
; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; CHECK-NEXT: vslidedown.vi v8, v8, 7
-; CHECK-NEXT: vfmv.f.s fa0, v8
+; CHECK-NEXT: vmv.x.s a0, v8
+; CHECK-NEXT: fmv.h.x fa0, a0
; CHECK-NEXT: ret
+ %a = load <16 x bfloat>, ptr %x
+ %b = extractelement <16 x bfloat> %a, i32 7
+ ret bfloat %b
+}
+
+define half @extractelt_v16f16(ptr %x) nounwind {
+; ZVFH-LABEL: extractelt_v16f16:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: vsetivli zero, 16, e16, m2, ta, ma
+; ZVFH-NEXT: vle16.v v8, (a0)
+; ZVFH-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; ZVFH-NEXT: vslidedown.vi v8, v8, 7
+; ZVFH-NEXT: vfmv.f.s fa0, v8
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: extractelt_v16f16:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma
+; ZVFHMIN-NEXT: vle16.v v8, (a0)
+; ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; ZVFHMIN-NEXT: vslidedown.vi v8, v8, 7
+; ZVFHMIN-NEXT: vmv.x.s a0, v8
+; ZVFHMIN-NEXT: fmv.h.x fa0, a0
+; ZVFHMIN-NEXT: ret
%a = load <16 x half>, ptr %x
%b = extractelement <16 x half> %a, i32 7
ret half %b
@@ -398,15 +449,49 @@ define i64 @extractelt_v2i64_idx(ptr %x, i32 zeroext %idx) nounwind {
ret i64 %c
}
-define half @extractelt_v8f16_idx(ptr %x, i32 zeroext %idx) nounwind {
-; CHECK-LABEL: extractelt_v8f16_idx:
+define bfloat @extractelt_v8bf16_idx(ptr %x, i32 zeroext %idx) nounwind {
+; CHECK-LABEL: extractelt_v8bf16_idx:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
; CHECK-NEXT: vle16.v v8, (a0)
-; CHECK-NEXT: vfadd.vv v8, v8, v8
-; CHECK-NEXT: vslidedown.vx v8, v8, a1
-; CHECK-NEXT: vfmv.f.s fa0, v8
+; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8
+; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; CHECK-NEXT: vfadd.vv v8, v10, v10
+; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v10, v8
+; CHECK-NEXT: vslidedown.vx v8, v10, a1
+; CHECK-NEXT: vmv.x.s a0, v8
+; CHECK-NEXT: fmv.h.x fa0, a0
; CHECK-NEXT: ret
+ %a = load <8 x bfloat>, ptr %x
+ %b = fadd <8 x bfloat> %a, %a
+ %c = extractelement <8 x bfloat> %b, i32 %idx
+ ret bfloat %c
+}
+
+define half @extractelt_v8f16_idx(ptr %x, i32 zeroext %idx) nounwind {
+; ZVFH-LABEL: extractelt_v8f16_idx:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; ZVFH-NEXT: vle16.v v8, (a0)
+; ZVFH-NEXT: vfadd.vv v8, v8, v8
+; ZVFH-NEXT: vslidedown.vx v8, v8, a1
+; ZVFH-NEXT: vfmv.f.s fa0, v8
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: extractelt_v8f16_idx:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; ZVFHMIN-NEXT: vle16.v v8, (a0)
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; ZVFHMIN-NEXT: vfadd.vv v8, v10, v10
+; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
+; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v8
+; ZVFHMIN-NEXT: vslidedown.vx v8, v10, a1
+; ZVFHMIN-NEXT: vmv.x.s a0, v8
+; ZVFHMIN-NEXT: fmv.h.x fa0, a0
+; ZVFHMIN-NEXT: ret
%a = load <8 x half>, ptr %x
%b = fadd <8 x half> %a, %a
%c = extractelement <8 x half> %b, i32 %idx
@@ -513,15 +598,49 @@ define i64 @extractelt_v4i64_idx(ptr %x, i32 zeroext %idx) nounwind {
ret i64 %c
}
-define half @extractelt_v16f16_idx(ptr %x, i32 zeroext %idx) nounwind {
-; CHECK-LABEL: extractelt_v16f16_idx:
+define bfloat @extractelt_v16bf16_idx(ptr %x, i32 zeroext %idx) nounwind {
+; CHECK-LABEL: extractelt_v16bf16_idx:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma
; CHECK-NEXT: vle16.v v8, (a0)
-; CHECK-NEXT: vfadd.vv v8, v8, v8
-; CHECK-NEXT: vslidedown.vx v8, v8, a1
-; CHECK-NEXT: vfmv.f.s fa0, v8
+; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8
+; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; CHECK-NEXT: vfadd.vv v8, v12, v12
+; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v12, v8
+; CHECK-NEXT: vslidedown.vx v8, v12, a1
+; CHECK-NEXT: vmv.x.s a0, v8
+; CHECK-NEXT: fmv.h.x fa0, a0
; CHECK-NEXT: ret
+ %a = load <16 x bfloat>, ptr %x
+ %b = fadd <16 x bfloat> %a, %a
+ %c = extractelement <16 x bfloat> %b, i32 %idx
+ ret bfloat %c
+}
+
+define half @extractelt_v16f16_idx(ptr %x, i32 zeroext %idx) nounwind {
+; ZVFH-LABEL: extractelt_v16f16_idx:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: vsetivli zero, 16, e16, m2, ta, ma
+; ZVFH-NEXT: vle16.v v8, (a0)
+; ZVFH-NEXT: vfadd.vv v8, v8, v8
+; ZVFH-NEXT: vslidedown.vx v8, v8, a1
+; ZVFH-NEXT: vfmv.f.s fa0, v8
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: extractelt_v16f16_idx:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma
+; ZVFHMIN-NEXT: vle16.v v8, (a0)
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; ZVFHMIN-NEXT: vfadd.vv v8, v12, v12
+; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
+; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v8
+; ZVFHMIN-NEXT: vslidedown.vx v8, v12, a1
+; ZVFHMIN-NEXT: vmv.x.s a0, v8
+; ZVFHMIN-NEXT: fmv.h.x fa0, a0
+; ZVFHMIN-NEXT: ret
%a = load <16 x half>, ptr %x
%b = fadd <16 x half> %a, %a
%c = extractelement <16 x half> %b, i32 %idx
@@ -939,8 +1058,8 @@ define i32 @extractelt_mul_v4i32(<4 x i32> %x) {
define i32 @extractelt_sdiv_v4i32(<4 x i32> %x) {
; RV32NOM-LABEL: extractelt_sdiv_v4i32:
; RV32NOM: # %bb.0:
-; RV32NOM-NEXT: lui a0, %hi(.LCPI42_0)
-; RV32NOM-NEXT: addi a0, a0, %lo(.LCPI42_0)
+; RV32NOM-NEXT: lui a0, %hi(.LCPI46_0)
+; RV32NOM-NEXT: addi a0, a0, %lo(.LCPI46_0)
; RV32NOM-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; RV32NOM-NEXT: vle32.v v9, (a0)
; RV32NOM-NEXT: vmulh.vv v9, v8, v9
@@ -975,8 +1094,8 @@ define i32 @extractelt_sdiv_v4i32(<4 x i32> %x) {
;
; RV64NOM-LABEL: extractelt_sdiv_v4i32:
; RV64NOM: # %bb.0:
-; RV64NOM-NEXT: lui a0, %hi(.LCPI42_0)
-; RV64NOM-NEXT: addi a0, a0, %lo(.LCPI42_0)
+; RV64NOM-NEXT: lui a0, %hi(.LCPI46_0)
+; RV64NOM-NEXT: addi a0, a0, %lo(.LCPI46_0)
; RV64NOM-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; RV64NOM-NEXT: vle32.v v9, (a0)
; RV64NOM-NEXT: vmulh.vv v9, v8, v9
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec-bf16.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec-bf16.ll
index bdedc5f33c3a19..3f7cd91737f4b7 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec-bf16.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec-bf16.ll
@@ -21,58 +21,18 @@ define <4 x bfloat> @splat_idx_v4bf16(<4 x bfloat> %v, i64 %idx) {
;
; RV32-ZFBFMIN-LABEL: splat_idx_v4bf16:
; RV32-ZFBFMIN: # %bb.0:
-; RV32-ZFBFMIN-NEXT: addi sp, sp, -48
-; RV32-ZFBFMIN-NEXT: .cfi_def_cfa_offset 48
-; RV32-ZFBFMIN-NEXT: sw ra, 44(sp) # 4-byte Folded Spill
-; RV32-ZFBFMIN-NEXT: .cfi_offset ra, -4
-; RV32-ZFBFMIN-NEXT: csrr a1, vlenb
-; RV32-ZFBFMIN-NEXT: sub sp, sp, a1
-; RV32-ZFBFMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x01, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 1 * vlenb
-; RV32-ZFBFMIN-NEXT: addi a1, sp, 32
-; RV32-ZFBFMIN-NEXT: vs1r.v v8, (a1) # Unknown-size Folded Spill
-; RV32-ZFBFMIN-NEXT: andi a0, a0, 3
-; RV32-ZFBFMIN-NEXT: li a1, 2
-; RV32-ZFBFMIN-NEXT: call __mulsi3
-; RV32-ZFBFMIN-NEXT: addi a1, sp, 16
-; RV32-ZFBFMIN-NEXT: add a0, a1, a0
-; RV32-ZFBFMIN-NEXT: addi a2, sp, 32
-; RV32-ZFBFMIN-NEXT: vl1r.v v8, (a2) # Unknown-size Folded Reload
; RV32-ZFBFMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
-; RV32-ZFBFMIN-NEXT: vse16.v v8, (a1)
-; RV32-ZFBFMIN-NEXT: lh a0, 0(a0)
+; RV32-ZFBFMIN-NEXT: vslidedown.vx v8, v8, a0
+; RV32-ZFBFMIN-NEXT: vmv.x.s a0, v8
; RV32-ZFBFMIN-NEXT: vmv.v.x v8, a0
-; RV32-ZFBFMIN-NEXT: csrr a0, vlenb
-; RV32-ZFBFMIN-NEXT: add sp, sp, a0
-; RV32-ZFBFMIN-NEXT: lw ra, 44(sp) # 4-byte Folded Reload
-; RV32-ZFBFMIN-NEXT: addi sp, sp, 48
; RV32-ZFBFMIN-NEXT: ret
;
; RV64-ZFBFMIN-LABEL: splat_idx_v4bf16:
; RV64-ZFBFMIN: # %bb.0:
-; RV64-ZFBFMIN-NEXT: addi sp, sp, -48
-; RV64-ZFBFMIN-NEXT: .cfi_def_cfa_offset 48
-; RV64-ZFBFMIN-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
-; RV64-ZFBFMIN-NEXT: .cfi_offset ra, -8
-; RV64-ZFBFMIN-NEXT: csrr a1, vlenb
-; RV64-ZFBFMIN-NEXT: sub sp, sp, a1
-; RV64-ZFBFMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x01, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 1 * vlenb
-; RV64-ZFBFMIN-NEXT: addi a1, sp, 32
-; RV64-ZFBFMIN-NEXT: vs1r.v v8, (a1) # Unknown-size Folded Spill
-; RV64-ZFBFMIN-NEXT: andi a0, a0, 3
-; RV64-ZFBFMIN-NEXT: li a1, 2
-; RV64-ZFBFMIN-NEXT: call __muldi3
-; RV64-ZFBFMIN-NEXT: addi a1, sp, 16
-; RV64-ZFBFMIN-NEXT: add a0, a1, a0
-; RV64-ZFBFMIN-NEXT: addi a2, sp, 32
-; RV64-ZFBFMIN-NEXT: vl1r.v v8, (a2) # Unknown-size Folded Reload
; RV64-ZFBFMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
-; RV64-ZFBFMIN-NEXT: vse16.v v8, (a1)
-; RV64-ZFBFMIN-NEXT: lh a0, 0(a0)
+; RV64-ZFBFMIN-NEXT: vslidedown.vx v8, v8, a0
+; RV64-ZFBFMIN-NEXT: vmv.x.s a0, v8
; RV64-ZFBFMIN-NEXT: vmv.v.x v8, a0
-; RV64-ZFBFMIN-NEXT: csrr a0, vlenb
-; RV64-ZFBFMIN-NEXT: add sp, sp, a0
-; RV64-ZFBFMIN-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
-; RV64-ZFBFMIN-NEXT: addi sp, sp, 48
; RV64-ZFBFMIN-NEXT: ret
%x = extractelement <4 x bfloat> %v, i64 %idx
%ins = insertelement <4 x bfloat> poison, bfloat %x, i32 0
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll
index 924732e554f0ef..7e219836688406 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll
@@ -201,58 +201,18 @@ define <4 x half> @splat_idx_v4f16(<4 x half> %v, i64 %idx) {
;
; RV32-ZFHMIN-LABEL: splat_idx_v4f16:
; RV32-ZFHMIN: # %bb.0:
-; RV32-ZFHMIN-NEXT: addi sp, sp, -48
-; RV32-ZFHMIN-NEXT: .cfi_def_cfa_offset 48
-; RV32-ZFHMIN-NEXT: sw ra, 44(sp) # 4-byte Folded Spill
-; RV32-ZFHMIN-NEXT: .cfi_offset ra, -4
-; RV32-ZFHMIN-NEXT: csrr a1, vlenb
-; RV32-ZFHMIN-NEXT: sub sp, sp, a1
-; RV32-ZFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x01, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 1 * vlenb
-; RV32-ZFHMIN-NEXT: addi a1, sp, 32
-; RV32-ZFHMIN-NEXT: vs1r.v v8, (a1) # Unknown-size Folded Spill
-; RV32-ZFHMIN-NEXT: andi a0, a0, 3
-; RV32-ZFHMIN-NEXT: li a1, 2
-; RV32-ZFHMIN-NEXT: call __mulsi3
-; RV32-ZFHMIN-NEXT: addi a1, sp, 16
-; RV32-ZFHMIN-NEXT: add a0, a1, a0
-; RV32-ZFHMIN-NEXT: addi a2, sp, 32
-; RV32-ZFHMIN-NEXT: vl1r.v v8, (a2) # Unknown-size Folded Reload
; RV32-ZFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
-; RV32-ZFHMIN-NEXT: vse16.v v8, (a1)
-; RV32-ZFHMIN-NEXT: lh a0, 0(a0)
+; RV32-ZFHMIN-NEXT: vslidedown.vx v8, v8, a0
+; RV32-ZFHMIN-NEXT: vmv.x.s a0, v8
; RV32-ZFHMIN-NEXT: vmv.v.x v8, a0
-; RV32-ZFHMIN-NEXT: csrr a0, vlenb
-; RV32-ZFHMIN-NEXT: add sp, sp, a0
-; RV32-ZFHMIN-NEXT: lw ra, 44(sp) # 4-byte Folded Reload
-; RV32-ZFHMIN-NEXT: addi sp, sp, 48
; RV32-ZFHMIN-NEXT: ret
;
; RV64-ZFHMIN-LABEL: splat_idx_v4f16:
; RV64-ZFHMIN: # %bb.0:
-; RV64-ZFHMIN-NEXT: addi sp, sp, -48
-; RV64-ZFHMIN-NEXT: .cfi_def_cfa_offset 48
-; RV64-ZFHMIN-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
-; RV64-ZFHMIN-NEXT: .cfi_offset ra, -8
-; RV64-ZFHMIN-NEXT: csrr a1, vlenb
-; RV64-ZFHMIN-NEXT: sub sp, sp, a1
-; RV64-ZFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x01, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 1 * vlenb
-; RV64-ZFHMIN-NEXT: addi a1, sp, 32
-; RV64-ZFHMIN-NEXT: vs1r.v v8, (a1) # Unknown-size Folded Spill
-; RV64-ZFHMIN-NEXT: andi a0, a0, 3
-; RV64-ZFHMIN-NEXT: li a1, 2
-; RV64-ZFHMIN-NEXT: call __muldi3
-; RV64-ZFHMIN-NEXT: addi a1, sp, 16
-; RV64-ZFHMIN-NEXT: add a0, a1, a0
-; RV64-ZFHMIN-NEXT: addi a2, sp, 32
-; RV64-ZFHMIN-NEXT: vl1r.v v8, (a2) # Unknown-size Folded Reload
; RV64-ZFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
-; RV64-ZFHMIN-NEXT: vse16.v v8, (a1)
-; RV64-ZFHMIN-NEXT: lh a0, 0(a0)
+; RV64-ZFHMIN-NEXT: vslidedown.vx v8, v8, a0
+; RV64-ZFHMIN-NEXT: vmv.x.s a0, v8
; RV64-ZFHMIN-NEXT: vmv.v.x v8, a0
-; RV64-ZFHMIN-NEXT: csrr a0, vlenb
-; RV64-ZFHMIN-NEXT: add sp, sp, a0
-; RV64-ZFHMIN-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
-; RV64-ZFHMIN-NEXT: addi sp, sp, 48
; RV64-ZFHMIN-NEXT: ret
%x = extractelement <4 x half> %v, i64 %idx
%ins = insertelement <4 x half> poison, half %x, i32 0
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert.ll
index 87f9bfbd1aaff7..55249834ae72de 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert.ll
@@ -1,6 +1,8 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+v,+zvfh,+f,+d -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32
-; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zvfh,+f,+d -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64
+; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+v,+zvfh,+zfbfmin,+zvfbfmin,+f,+d -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32,ZVFH
+; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zvfh,+zfbfmin,+zvfbfmin,+f,+d -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64,ZVFH
+; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+v,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin,+f,+d -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32,ZVFHMIN,ZVFHMINRV32
+; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin,+f,+d -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64,ZVFHMIN,ZVFHMINRV64
define <4 x i32> @insertelt_v4i32_0(<4 x i32> %a, i32 %y) {
; CHECK-LABEL: insertelt_v4i32_0:
@@ -673,3 +675,102 @@ define <8 x i64> @insertelt_c5_v8xi64_exact(<8 x i64> %vin, i64 %a) vscale_range
%v = insertelement <8 x i64> %vin, i64 %a, i32 5
ret <8 x i64> %v
}
+
+define <4 x bfloat> @insertelt_v4bf16_0(<4 x bfloat> %a, bfloat %y) {
+; CHECK-LABEL: insertelt_v4bf16_0:
+; CHECK: # %bb.0:
+; CHECK-NEXT: fmv.x.h a0, fa0
+; CHECK-NEXT: vsetivli zero, 4, e16, m1, tu, ma
+; CHECK-NEXT: vmv.s.x v8, a0
+; CHECK-NEXT: ret
+ %b = insertelement <4 x bfloat> %a, bfloat %y, i32 0
+ ret <4 x bfloat> %b
+}
+
+define <4 x bfloat> @insertelt_v4bf16_3(<4 x bfloat> %a, bfloat %y) {
+; CHECK-LABEL: insertelt_v4bf16_3:
+; CHECK: # %bb.0:
+; CHECK-NEXT: fmv.x.h a0, fa0
+; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
+; CHECK-NEXT: vmv.s.x v9, a0
+; CHECK-NEXT: vslideup.vi v8, v9, 3
+; CHECK-NEXT: ret
+ %b = insertelement <4 x bfloat> %a, bfloat %y, i32 3
+ ret <4 x bfloat> %b
+}
+
+define <4 x bfloat> @insertelt_v4bf16_idx(<4 x bfloat> %a, bfloat %y, i32 zeroext %idx) {
+; CHECK-LABEL: insert...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/114927
More information about the llvm-commits
mailing list