[llvm] a3cd269 - [RISCV] Remove {s,u}int_to_fp custom op action for f16/bf16 (#111471)

Thu Oct 10 06:40:28 PDT 2024

Author: Luke Lau
Date: 2024-10-10T14:40:24+01:00
New Revision: a3cd269fbebecb6971e216a9c29ad8933ad7b0fc

URL: https://github.com/llvm/llvm-project/commit/a3cd269fbebecb6971e216a9c29ad8933ad7b0fc
DIFF: https://github.com/llvm/llvm-project/commit/a3cd269fbebecb6971e216a9c29ad8933ad7b0fc.diff

LOG: [RISCV] Remove {s,u}int_to_fp custom op action for f16/bf16 (#111471)

It turns out that {s,u}int_to_fp nodes get their operation action from
their operand's type, not the result type, so we don't need to set it
for fp16 or bf16. vp_{s,u}int_to_fp uses the result type though so we
need to keep it.

This also means that we can lower int_to_fp for fixed length bf16
vectors already, so this adds tests for that.

The cost model test changes are due to BasicTTIImpl's getCastInstrCost
not taking into account that int_to_fp needs its legal type swapped.
This can be fixed in a later patch, but its worth noting that the
affected types in the tests currently crash when lowered anyway (due to
them needing split at LMUL > 8)

Added: 
    

Modified: 
    llvm/lib/Target/RISCV/RISCVISelLowering.cpp
    llvm/test/Analysis/CostModel/RISCV/cast-half.ll
    llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp2i.ll
    llvm/test/CodeGen/RISCV/rvv/fixed-vectors-i2fp.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 01fa418e4dbdf4..230ccd8209e1f2 100644

--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -1071,9 +1071,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
       setOperationAction({ISD::VP_MERGE, ISD::VP_SELECT, ISD::SELECT}, VT,
                          Custom);
       setOperationAction(ISD::SELECT_CC, VT, Expand);
-      setOperationAction({ISD::SINT_TO_FP, ISD::UINT_TO_FP, ISD::VP_SINT_TO_FP,
-                          ISD::VP_UINT_TO_FP},
-                         VT, Custom);
+      setOperationAction({ISD::VP_SINT_TO_FP, ISD::VP_UINT_TO_FP}, VT, Custom);
       setOperationAction({ISD::INSERT_VECTOR_ELT, ISD::CONCAT_VECTORS,
                           ISD::INSERT_SUBVECTOR, ISD::EXTRACT_SUBVECTOR,
                           ISD::VECTOR_DEINTERLEAVE, ISD::VECTOR_INTERLEAVE,
@@ -1343,9 +1341,8 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
           setOperationAction(
               {ISD::VP_MERGE, ISD::VP_SELECT, ISD::VSELECT, ISD::SELECT}, VT,
               Custom);
-          setOperationAction({ISD::SINT_TO_FP, ISD::UINT_TO_FP,
-                              ISD::VP_SINT_TO_FP, ISD::VP_UINT_TO_FP},
-                             VT, Custom);
+          setOperationAction({ISD::VP_SINT_TO_FP, ISD::VP_UINT_TO_FP}, VT,
+                             Custom);
           setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
           if (Subtarget.hasStdExtZfhmin()) {
             setOperationAction(ISD::BUILD_VECTOR, VT, Custom);

diff  --git a/llvm/test/Analysis/CostModel/RISCV/cast-half.ll b/llvm/test/Analysis/CostModel/RISCV/cast-half.ll
index 84b5486eb2de1c..244c42cc94ba03 100644
--- a/llvm/test/Analysis/CostModel/RISCV/cast-half.ll
+++ b/llvm/test/Analysis/CostModel/RISCV/cast-half.ll
@@ -842,7 +842,7 @@ define void @sitofp() {
 ; RV32ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 27 for instruction: %v64i64_v64f16 = sitofp <64 x i64> undef to <64 x half>
 ; RV32ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 49 for instruction: %v64i1_v64f16 = sitofp <64 x i1> undef to <64 x half>
 ; RV32ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 83 for instruction: %v128i8_v128f16 = sitofp <128 x i8> undef to <128 x half>
-; RV32ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v128i16_v128f16 = sitofp <128 x i16> undef to <128 x half>
+; RV32ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 66 for instruction: %v128i16_v128f16 = sitofp <128 x i16> undef to <128 x half>
 ; RV32ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 50 for instruction: %v128i32_v128f16 = sitofp <128 x i32> undef to <128 x half>
 ; RV32ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %v128i64_v128f16 = sitofp <128 x i64> undef to <128 x half>
 ; RV32ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 99 for instruction: %v128i1_v128f16 = sitofp <128 x i1> undef to <128 x half>
@@ -988,7 +988,7 @@ define void @sitofp() {
 ; RV64ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 27 for instruction: %v64i64_v64f16 = sitofp <64 x i64> undef to <64 x half>
 ; RV64ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 49 for instruction: %v64i1_v64f16 = sitofp <64 x i1> undef to <64 x half>
 ; RV64ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 83 for instruction: %v128i8_v128f16 = sitofp <128 x i8> undef to <128 x half>
-; RV64ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v128i16_v128f16 = sitofp <128 x i16> undef to <128 x half>
+; RV64ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 66 for instruction: %v128i16_v128f16 = sitofp <128 x i16> undef to <128 x half>
 ; RV64ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 50 for instruction: %v128i32_v128f16 = sitofp <128 x i32> undef to <128 x half>
 ; RV64ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %v128i64_v128f16 = sitofp <128 x i64> undef to <128 x half>
 ; RV64ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 99 for instruction: %v128i1_v128f16 = sitofp <128 x i1> undef to <128 x half>
@@ -1208,7 +1208,7 @@ define void @uitofp() {
 ; RV32ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 27 for instruction: %v64i64_v64f16 = uitofp <64 x i64> undef to <64 x half>
 ; RV32ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 49 for instruction: %v64i1_v64f16 = uitofp <64 x i1> undef to <64 x half>
 ; RV32ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 83 for instruction: %v128i8_v128f16 = uitofp <128 x i8> undef to <128 x half>
-; RV32ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v128i16_v128f16 = uitofp <128 x i16> undef to <128 x half>
+; RV32ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 66 for instruction: %v128i16_v128f16 = uitofp <128 x i16> undef to <128 x half>
 ; RV32ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 50 for instruction: %v128i32_v128f16 = uitofp <128 x i32> undef to <128 x half>
 ; RV32ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %v128i64_v128f16 = uitofp <128 x i64> undef to <128 x half>
 ; RV32ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 99 for instruction: %v128i1_v128f16 = uitofp <128 x i1> undef to <128 x half>
@@ -1354,7 +1354,7 @@ define void @uitofp() {
 ; RV64ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 27 for instruction: %v64i64_v64f16 = uitofp <64 x i64> undef to <64 x half>
 ; RV64ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 49 for instruction: %v64i1_v64f16 = uitofp <64 x i1> undef to <64 x half>
 ; RV64ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 83 for instruction: %v128i8_v128f16 = uitofp <128 x i8> undef to <128 x half>
-; RV64ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v128i16_v128f16 = uitofp <128 x i16> undef to <128 x half>
+; RV64ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 66 for instruction: %v128i16_v128f16 = uitofp <128 x i16> undef to <128 x half>
 ; RV64ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 50 for instruction: %v128i32_v128f16 = uitofp <128 x i32> undef to <128 x half>
 ; RV64ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %v128i64_v128f16 = uitofp <128 x i64> undef to <128 x half>
 ; RV64ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 99 for instruction: %v128i1_v128f16 = uitofp <128 x i1> undef to <128 x half>

diff  --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp2i.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp2i.ll
index bfcc7017178e31..a4a491989c7f02 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp2i.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp2i.ll
@@ -1,8 +1,8 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+v,+zfh,+zvfh,+f,+d -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH,ZVFH32
-; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zfh,+zvfh,+f,+d -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH,ZVFH64
-; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+v,+zfh,+zvfhmin,+f,+d -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN,ZVFHMIN32
-; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zfh,+zvfhmin,+f,+d -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN,ZVFHMIN64
+; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+v,+zfh,+zvfh,+zvfbfmin,+f,+d -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH,ZVFH32
+; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zfh,+zvfh,+zvfbfmin,+f,+d -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH,ZVFH64
+; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+v,+zfh,+zvfhmin,+zvfbfmin,+f,+d -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN,ZVFHMIN32
+; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zfh,+zvfhmin,+zvfbfmin,+f,+d -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN,ZVFHMIN64
 
 define void @fp2si_v2f32_v2i32(ptr %x, ptr %y) {
 ; CHECK-LABEL: fp2si_v2f32_v2i32:
@@ -432,6 +432,64 @@ define void @fp2ui_v8f32_v8i64(ptr %x, ptr %y) {
   ret void
 }
 
+define void @fp2si_v2bf16_v2i64(ptr %x, ptr %y) {
+; CHECK-LABEL: fp2si_v2bf16_v2i64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 2, e16, mf4, ta, ma
+; CHECK-NEXT:    vle16.v v8, (a0)
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v9, v8
+; CHECK-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
+; CHECK-NEXT:    vfwcvt.rtz.x.f.v v8, v9
+; CHECK-NEXT:    vse64.v v8, (a1)
+; CHECK-NEXT:    ret
+  %a = load <2 x bfloat>, ptr %x
+  %d = fptosi <2 x bfloat> %a to <2 x i64>
+  store <2 x i64> %d, ptr %y
+  ret void
+}
+
+define void @fp2ui_v2bf16_v2i64(ptr %x, ptr %y) {
+; CHECK-LABEL: fp2ui_v2bf16_v2i64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 2, e16, mf4, ta, ma
+; CHECK-NEXT:    vle16.v v8, (a0)
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v9, v8
+; CHECK-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
+; CHECK-NEXT:    vfwcvt.rtz.xu.f.v v8, v9
+; CHECK-NEXT:    vse64.v v8, (a1)
+; CHECK-NEXT:    ret
+  %a = load <2 x bfloat>, ptr %x
+  %d = fptoui <2 x bfloat> %a to <2 x i64>
+  store <2 x i64> %d, ptr %y
+  ret void
+}
+
+define <2 x i1> @fp2si_v2bf16_v2i1(<2 x bfloat> %x) {
+; CHECK-LABEL: fp2si_v2bf16_v2i1:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 2, e16, mf4, ta, ma
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v9, v8
+; CHECK-NEXT:    vfncvt.rtz.x.f.w v8, v9
+; CHECK-NEXT:    vand.vi v8, v8, 1
+; CHECK-NEXT:    vmsne.vi v0, v8, 0
+; CHECK-NEXT:    ret
+  %z = fptosi <2 x bfloat> %x to <2 x i1>
+  ret <2 x i1> %z
+}
+
+define <2 x i1> @fp2ui_v2bf16_v2i1(<2 x bfloat> %x) {
+; CHECK-LABEL: fp2ui_v2bf16_v2i1:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 2, e16, mf4, ta, ma
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v9, v8
+; CHECK-NEXT:    vfncvt.rtz.xu.f.w v8, v9
+; CHECK-NEXT:    vand.vi v8, v8, 1
+; CHECK-NEXT:    vmsne.vi v0, v8, 0
+; CHECK-NEXT:    ret
+  %z = fptoui <2 x bfloat> %x to <2 x i1>
+  ret <2 x i1> %z
+}
+
 define void @fp2si_v2f16_v2i64(ptr %x, ptr %y) {
 ; CHECK-LABEL: fp2si_v2f16_v2i64:
 ; CHECK:       # %bb.0:

diff  --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-i2fp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-i2fp.ll
index 7333067e9205e0..9cdc9b81c9530a 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-i2fp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-i2fp.ll
@@ -1,8 +1,8 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+v,+zfh,+zvfh,+f,+d -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH,ZVFH32
-; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zfh,+zvfh,+f,+d -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH,ZVFH64
-; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+v,+zfh,+zvfhmin,+f,+d -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN,ZVFHMIN32
-; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zfh,+zvfhmin,+f,+d -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN,ZVFHMIN64
+; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+v,+zfh,+zvfh,+zvfbfmin,+f,+d -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH,ZVFH32
+; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zfh,+zvfh,+zvfbfmin,+f,+d -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH,ZVFH64
+; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+v,+zfh,+zvfhmin,+zvfbfmin,+f,+d -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN,ZVFHMIN32
+; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zfh,+zvfhmin,+zvfbfmin,+f,+d -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN,ZVFHMIN64
 
 define void @si2fp_v2i32_v2f32(ptr %x, ptr %y) {
 ; CHECK-LABEL: si2fp_v2i32_v2f32:
@@ -418,6 +418,122 @@ define <8 x double> @ui2fp_v8i1_v8f64(<8 x i1> %x) {
   ret <8 x double> %z
 }
 
+define void @si2fp_v2i64_v2bf16(ptr %x, ptr %y) {
+; CHECK-LABEL: si2fp_v2i64_v2bf16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
+; CHECK-NEXT:    vle64.v v8, (a0)
+; CHECK-NEXT:    vfncvt.f.x.w v9, v8
+; CHECK-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v9
+; CHECK-NEXT:    vse16.v v8, (a1)
+; CHECK-NEXT:    ret
+  %a = load <2 x i64>, ptr %x
+  %d = sitofp <2 x i64> %a to <2 x bfloat>
+  store <2 x bfloat> %d, ptr %y
+  ret void
+}
+
+define void @ui2fp_v2i64_v2bf16(ptr %x, ptr %y) {
+; CHECK-LABEL: ui2fp_v2i64_v2bf16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
+; CHECK-NEXT:    vle64.v v8, (a0)
+; CHECK-NEXT:    vfncvt.f.xu.w v9, v8
+; CHECK-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v9
+; CHECK-NEXT:    vse16.v v8, (a1)
+; CHECK-NEXT:    ret
+  %a = load <2 x i64>, ptr %x
+  %d = uitofp <2 x i64> %a to <2 x bfloat>
+  store <2 x bfloat> %d, ptr %y
+  ret void
+}
+
+define <2 x bfloat> @si2fp_v2i1_v2bf16(<2 x i1> %x) {
+; CHECK-LABEL: si2fp_v2i1_v2bf16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 2, e16, mf4, ta, ma
+; CHECK-NEXT:    vmv.v.i v8, 0
+; CHECK-NEXT:    vmerge.vim v8, v8, -1, v0
+; CHECK-NEXT:    vfwcvt.f.x.v v9, v8
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v9
+; CHECK-NEXT:    ret
+  %z = sitofp <2 x i1> %x to <2 x bfloat>
+  ret <2 x bfloat> %z
+}
+
+define <2 x bfloat> @ui2fp_v2i1_v2bf16(<2 x i1> %x) {
+; CHECK-LABEL: ui2fp_v2i1_v2bf16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 2, e16, mf4, ta, ma
+; CHECK-NEXT:    vmv.v.i v8, 0
+; CHECK-NEXT:    vmerge.vim v8, v8, 1, v0
+; CHECK-NEXT:    vfwcvt.f.xu.v v9, v8
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v9
+; CHECK-NEXT:    ret
+  %z = uitofp <2 x i1> %x to <2 x bfloat>
+  ret <2 x bfloat> %z
+}
+
+define void @si2fp_v8i64_v8bf16(ptr %x, ptr %y) {
+; CHECK-LABEL: si2fp_v8i64_v8bf16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
+; CHECK-NEXT:    vle64.v v8, (a0)
+; CHECK-NEXT:    vfncvt.f.x.w v12, v8
+; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v12
+; CHECK-NEXT:    vse16.v v8, (a1)
+; CHECK-NEXT:    ret
+  %a = load <8 x i64>, ptr %x
+  %d = sitofp <8 x i64> %a to <8 x bfloat>
+  store <8 x bfloat> %d, ptr %y
+  ret void
+}
+
+define void @ui2fp_v8i64_v8bf16(ptr %x, ptr %y) {
+; CHECK-LABEL: ui2fp_v8i64_v8bf16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
+; CHECK-NEXT:    vle64.v v8, (a0)
+; CHECK-NEXT:    vfncvt.f.xu.w v12, v8
+; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v12
+; CHECK-NEXT:    vse16.v v8, (a1)
+; CHECK-NEXT:    ret
+  %a = load <8 x i64>, ptr %x
+  %d = uitofp <8 x i64> %a to <8 x bfloat>
+  store <8 x bfloat> %d, ptr %y
+  ret void
+}
+
+define <8 x bfloat> @si2fp_v8i1_v8bf16(<8 x i1> %x) {
+; CHECK-LABEL: si2fp_v8i1_v8bf16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; CHECK-NEXT:    vmv.v.i v8, 0
+; CHECK-NEXT:    vmerge.vim v8, v8, -1, v0
+; CHECK-NEXT:    vfwcvt.f.x.v v10, v8
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v10
+; CHECK-NEXT:    ret
+  %z = sitofp <8 x i1> %x to <8 x bfloat>
+  ret <8 x bfloat> %z
+}
+
+define <8 x bfloat> @ui2fp_v8i1_v8bf16(<8 x i1> %x) {
+; CHECK-LABEL: ui2fp_v8i1_v8bf16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; CHECK-NEXT:    vmv.v.i v8, 0
+; CHECK-NEXT:    vmerge.vim v8, v8, 1, v0
+; CHECK-NEXT:    vfwcvt.f.xu.v v10, v8
+; CHECK-NEXT:    vfncvtbf16.f.f.w v8, v10
+; CHECK-NEXT:    ret
+  %z = uitofp <8 x i1> %x to <8 x bfloat>
+  ret <8 x bfloat> %z
+}
+
 define void @si2fp_v2i64_v2f16(ptr %x, ptr %y) {
 ; CHECK-LABEL: si2fp_v2i64_v2f16:
 ; CHECK:       # %bb.0: