[llvm] [RISCV] Support LLVM IR intrinsics for XAndesVBFHCvt (PR #145321)
Jim Lin via llvm-commits
llvm-commits at lists.llvm.org
Mon Jun 23 05:49:31 PDT 2025
https://github.com/tclin914 created https://github.com/llvm/llvm-project/pull/145321
This patch adds LLVM IR intrinsic support for XAndesVBFHCvt.
The document for the intrinsics can be found at:
https://github.com/andestech/andes-vector-intrinsic-doc/blob/ast-v5_4_0-release-v5/auto-generated/andes-v5/intrinsic_funcs.adoc#vector-widening-convert-intrinsicsxandesvbfhcvt https://github.com/andestech/andes-vector-intrinsic-doc/blob/ast-v5_4_0-release-v5/auto-generated/andes-v5/intrinsic_funcs.adoc#vector-narrowing-convert-intrinsicsxandesvbfhcvt
Vector bf16 load/store intrisics is also enabled when +xandesvbfhcvt is specified. The corresponding LLVM IR intrisic testcase would be added in a follow-up patches.
The clang part will be added in a later patch.
>From b08acc13ef041197cfe4ff657668ac6b7990f436 Mon Sep 17 00:00:00 2001
From: Jim Lin <jim at andestech.com>
Date: Thu, 19 Jun 2025 17:15:37 +0800
Subject: [PATCH] [RISCV] Support LLVM IR intrinsics for XAndesVBFHCvt
This patch adds LLVM IR intrinsic support for XAndesVBFHCvt.
The document for the intrinsics can be found at:
https://github.com/andestech/andes-vector-intrinsic-doc/blob/ast-v5_4_0-release-v5/auto-generated/andes-v5/intrinsic_funcs.adoc#vector-widening-convert-intrinsicsxandesvbfhcvt
https://github.com/andestech/andes-vector-intrinsic-doc/blob/ast-v5_4_0-release-v5/auto-generated/andes-v5/intrinsic_funcs.adoc#vector-narrowing-convert-intrinsicsxandesvbfhcvt
Vector bf16 load/store intrisics is also enabled when +xandesvbfhcvt is specified.
The corresponding LLVM IR intrisic testcase would be added in a follow-up patches.
The clang part will be added in a later patch.
Co-authored-by: Tony Chuan-Yue Yuan <yuan593 at andestech.com>
---
llvm/include/llvm/IR/IntrinsicsRISCVXAndes.td | 4 +
llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 3 +-
llvm/lib/Target/RISCV/RISCVInstrInfoXAndes.td | 48 +++++++++++
.../RISCV/rvv/xandesvbfhcvt-vfncvt-bf16-s.ll | 85 +++++++++++++++++++
.../RISCV/rvv/xandesvbfhcvt-vfwcvt-s-bf16.ll | 85 +++++++++++++++++++
5 files changed, 224 insertions(+), 1 deletion(-)
create mode 100644 llvm/test/CodeGen/RISCV/rvv/xandesvbfhcvt-vfncvt-bf16-s.ll
create mode 100644 llvm/test/CodeGen/RISCV/rvv/xandesvbfhcvt-vfwcvt-s-bf16.ll
diff --git a/llvm/include/llvm/IR/IntrinsicsRISCVXAndes.td b/llvm/include/llvm/IR/IntrinsicsRISCVXAndes.td
index 270066f815d8b..92a07e9a6a5d0 100644
--- a/llvm/include/llvm/IR/IntrinsicsRISCVXAndes.td
+++ b/llvm/include/llvm/IR/IntrinsicsRISCVXAndes.td
@@ -11,6 +11,10 @@
//===----------------------------------------------------------------------===//
let TargetPrefix = "riscv" in {
+ // Andes Vector BFloat16 Conversion Extension
+ def int_riscv_nds_vfwcvt_s_bf16 : RISCVConversionUnMasked;
+ def int_riscv_nds_vfncvt_bf16_s : RISCVConversionUnMaskedRoundingMode;
+
// Andes Vector Packed FP16 Extension
defm nds_vfpmadt : RISCVBinaryAAXRoundingMode;
defm nds_vfpmadb : RISCVBinaryAAXRoundingMode;
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 9e568052079ce..8f428ff6a54f3 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -215,7 +215,8 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
for (MVT VT : F16VecVTs)
addRegClassForRVV(VT);
- if (Subtarget.hasVInstructionsBF16Minimal())
+ if (Subtarget.hasVInstructionsBF16Minimal() ||
+ Subtarget.hasVendorXAndesVBFHCvt())
for (MVT VT : BF16VecVTs)
addRegClassForRVV(VT);
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoXAndes.td b/llvm/lib/Target/RISCV/RISCVInstrInfoXAndes.td
index 4b8d40d1429aa..0e2fcd2336151 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoXAndes.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoXAndes.td
@@ -384,6 +384,46 @@ class NDSRVInstVBFHCvt<bits<7> funct7, bits<5> vs1, string opcodestr>
// Multiclass
//===----------------------------------------------------------------------===//
+multiclass VPseudoVWCVT_S_BF16 {
+ defvar constraint = "@earlyclobber $rd";
+ foreach m = MxListFW in {
+ let VLMul = m.value, SEW=16 in
+ def "_" # m.MX : VPseudoUnaryNoMask<m.wvrclass, m.vrclass, constraint>,
+ SchedUnary<"WriteVFWCvtIToFV", "ReadVFWCvtIToFV", m.MX, 16,
+ forcePassthruRead=true>;
+ }
+}
+
+multiclass VPseudoVNCVT_BF16_S {
+ defvar constraint = "@earlyclobber $rd";
+ foreach m = MxListFW in {
+ let VLMul = m.value, SEW=16 in
+ def "_" # m.MX : VPseudoUnaryNoMaskRoundingMode<m.vrclass, m.wvrclass, constraint>,
+ SchedUnary<"WriteVFNCvtFToFV", "ReadVFNCvtFToFV", m.MX, 16,
+ forcePassthruRead=true>;
+ }
+}
+
+multiclass VPatConversionS_BF16<string intrinsic, string instruction> {
+ foreach fvtiToFWti = AllWidenableBFloatToFloatVectors in {
+ defvar fvti = fvtiToFWti.Vti;
+ defvar fwti = fvtiToFWti.Wti;
+ let Predicates = [HasVendorXAndesVBFHCvt] in
+ def : VPatUnaryNoMask<intrinsic, instruction, "BF16", fwti.Vector, fvti.Vector,
+ fvti.Log2SEW, fvti.LMul, fwti.RegClass, fvti.RegClass>;
+ }
+}
+
+multiclass VPatConversionBF16_S<string intrinsic, string instruction> {
+ foreach fvtiToFWti = AllWidenableBFloatToFloatVectors in {
+ defvar fvti = fvtiToFWti.Vti;
+ defvar fwti = fvtiToFWti.Wti;
+ let Predicates = [HasVendorXAndesVBFHCvt] in
+ def : VPatUnaryNoMaskRoundingMode<intrinsic, instruction, "S", fvti.Vector, fwti.Vector,
+ fvti.Log2SEW, fvti.LMul, fvti.RegClass, fwti.RegClass>;
+ }
+}
+
let fprclass = !cast<RegisterClass>("FPR32") in
def SCALAR_F16_FPR32 : FPR_Info<16>;
@@ -547,6 +587,14 @@ def : Sh2AddPat<NDS_LEA_W_ZE>;
def : Sh3AddPat<NDS_LEA_D_ZE>;
} // Predicates = [HasVendorXAndesPerf, IsRV64]
+let Predicates = [HasVendorXAndesVBFHCvt] in {
+defm PseudoNDS_VFWCVT_S_BF16 : VPseudoVWCVT_S_BF16;
+defm PseudoNDS_VFNCVT_BF16_S : VPseudoVNCVT_BF16_S;
+} // Predicates = [HasVendorXAndesVBFHCvt]
+
+defm : VPatConversionS_BF16<"int_riscv_nds_vfwcvt_s_bf16", "PseudoNDS_VFWCVT_S">;
+defm : VPatConversionBF16_S<"int_riscv_nds_vfncvt_bf16_s", "PseudoNDS_VFNCVT_BF16">;
+
let Predicates = [HasVendorXAndesVPackFPH],
mayRaiseFPException = true in {
defm PseudoNDS_VFPMADT : VPseudoVFPMAD_VF_RM;
diff --git a/llvm/test/CodeGen/RISCV/rvv/xandesvbfhcvt-vfncvt-bf16-s.ll b/llvm/test/CodeGen/RISCV/rvv/xandesvbfhcvt-vfncvt-bf16-s.ll
new file mode 100644
index 0000000000000..3cde575db8f05
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/xandesvbfhcvt-vfncvt-bf16-s.ll
@@ -0,0 +1,85 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+xandesvbfhcvt \
+; RUN: -verify-machineinstrs -target-abi=ilp32d | FileCheck %s
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+xandesvbfhcvt \
+; RUN: -verify-machineinstrs -target-abi=lp64d | FileCheck %s
+
+define <vscale x 1 x bfloat> @intrinsic_vfncvt_bf16.s_nxv1bf16_nxv1f32(<vscale x 1 x float> %0, iXLen %1) nounwind {
+; CHECK-LABEL: intrinsic_vfncvt_bf16.s_nxv1bf16_nxv1f32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
+; CHECK-NEXT: nds.vfncvt.bf16.s v9, v8
+; CHECK-NEXT: vmv1r.v v8, v9
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x bfloat> @llvm.riscv.nds.vfncvt.bf16.s.nxv1bf16.nxv1f32(
+ <vscale x 1 x bfloat> undef,
+ <vscale x 1 x float> %0,
+ iXLen 7, iXLen %1)
+
+ ret <vscale x 1 x bfloat> %a
+}
+
+define <vscale x 2 x bfloat> @intrinsic_vfncvt_bf16.s_nxv2bf16_nxv2f32(<vscale x 2 x float> %0, iXLen %1) nounwind {
+; CHECK-LABEL: intrinsic_vfncvt_bf16.s_nxv2bf16_nxv2f32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
+; CHECK-NEXT: nds.vfncvt.bf16.s v9, v8
+; CHECK-NEXT: vmv1r.v v8, v9
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x bfloat> @llvm.riscv.nds.vfncvt.bf16.s.nxv2bf16.nxv2f32(
+ <vscale x 2 x bfloat> undef,
+ <vscale x 2 x float> %0,
+ iXLen 7, iXLen %1)
+
+ ret <vscale x 2 x bfloat> %a
+}
+
+define <vscale x 4 x bfloat> @intrinsic_vfncvt_bf16.s_nxv4bf16_nxv4f32(<vscale x 4 x float> %0, iXLen %1) nounwind {
+; CHECK-LABEL: intrinsic_vfncvt_bf16.s_nxv4bf16_nxv4f32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; CHECK-NEXT: nds.vfncvt.bf16.s v10, v8
+; CHECK-NEXT: vmv.v.v v8, v10
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x bfloat> @llvm.riscv.nds.vfncvt.bf16.s.nxv4bf16.nxv4f32(
+ <vscale x 4 x bfloat> undef,
+ <vscale x 4 x float> %0,
+ iXLen 7, iXLen %1)
+
+ ret <vscale x 4 x bfloat> %a
+}
+
+define <vscale x 8 x bfloat> @intrinsic_vfncvt_bf16.s_nxv8bf16_nxv8f32(<vscale x 8 x float> %0, iXLen %1) nounwind {
+; CHECK-LABEL: intrinsic_vfncvt_bf16.s_nxv8bf16_nxv8f32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma
+; CHECK-NEXT: nds.vfncvt.bf16.s v12, v8
+; CHECK-NEXT: vmv.v.v v8, v12
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x bfloat> @llvm.riscv.nds.vfncvt.bf16.s.nxv8bf16.nxv8f32(
+ <vscale x 8 x bfloat> undef,
+ <vscale x 8 x float> %0,
+ iXLen 7, iXLen %1)
+
+ ret <vscale x 8 x bfloat> %a
+}
+
+define <vscale x 16 x bfloat> @intrinsic_vfncvt_bf16.s_nxv16bf16_nxv16f32(<vscale x 16 x float> %0, iXLen %1) nounwind {
+; CHECK-LABEL: intrinsic_vfncvt_bf16.s_nxv16bf16_nxv16f32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma
+; CHECK-NEXT: nds.vfncvt.bf16.s v16, v8
+; CHECK-NEXT: vmv.v.v v8, v16
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x bfloat> @llvm.riscv.nds.vfncvt.bf16.s.nxv16bf16.nxv16f32(
+ <vscale x 16 x bfloat> undef,
+ <vscale x 16 x float> %0,
+ iXLen 7, iXLen %1)
+
+ ret <vscale x 16 x bfloat> %a
+}
diff --git a/llvm/test/CodeGen/RISCV/rvv/xandesvbfhcvt-vfwcvt-s-bf16.ll b/llvm/test/CodeGen/RISCV/rvv/xandesvbfhcvt-vfwcvt-s-bf16.ll
new file mode 100644
index 0000000000000..d44295d2480c1
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/xandesvbfhcvt-vfwcvt-s-bf16.ll
@@ -0,0 +1,85 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+xandesvbfhcvt \
+; RUN: -verify-machineinstrs -target-abi=ilp32d | FileCheck %s
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+xandesvbfhcvt \
+; RUN: -verify-machineinstrs -target-abi=lp64d | FileCheck %s
+
+define <vscale x 1 x float> @intrinsic_vfwcvt_s.bf16_nxv1f32_nxv1bf16(<vscale x 1 x bfloat> %0, iXLen %1) nounwind {
+; CHECK-LABEL: intrinsic_vfwcvt_s.bf16_nxv1f32_nxv1bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
+; CHECK-NEXT: nds.vfwcvt.s.bf16 v9, v8
+; CHECK-NEXT: vmv1r.v v8, v9
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x float> @llvm.riscv.nds.vfwcvt.s.bf16.nxv1f32.nxv1bf16(
+ <vscale x 1 x float> undef,
+ <vscale x 1 x bfloat> %0,
+ iXLen %1)
+
+ ret <vscale x 1 x float> %a
+}
+
+define <vscale x 2 x float> @intrinsic_vfwcvt_s.bf16_nxv2f32_nxv2bf16(<vscale x 2 x bfloat> %0, iXLen %1) nounwind {
+; CHECK-LABEL: intrinsic_vfwcvt_s.bf16_nxv2f32_nxv2bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
+; CHECK-NEXT: nds.vfwcvt.s.bf16 v9, v8
+; CHECK-NEXT: vmv1r.v v8, v9
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x float> @llvm.riscv.nds.vfwcvt.s.bf16.nxv2f32.nxv2bf16(
+ <vscale x 2 x float> undef,
+ <vscale x 2 x bfloat> %0,
+ iXLen %1)
+
+ ret <vscale x 2 x float> %a
+}
+
+define <vscale x 4 x float> @intrinsic_vfwcvt_s.bf16_nxv4f32_nxv4bf16(<vscale x 4 x bfloat> %0, iXLen %1) nounwind {
+; CHECK-LABEL: intrinsic_vfwcvt_s.bf16_nxv4f32_nxv4bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: nds.vfwcvt.s.bf16 v8, v10
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x float> @llvm.riscv.nds.vfwcvt.s.bf16.nxv4f32.nxv4bf16(
+ <vscale x 4 x float> undef,
+ <vscale x 4 x bfloat> %0,
+ iXLen %1)
+
+ ret <vscale x 4 x float> %a
+}
+
+define <vscale x 8 x float> @intrinsic_vfwcvt_s.bf16_nxv8f32_nxv8bf16(<vscale x 8 x bfloat> %0, iXLen %1) nounwind {
+; CHECK-LABEL: intrinsic_vfwcvt_s.bf16_nxv8f32_nxv8bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma
+; CHECK-NEXT: vmv2r.v v12, v8
+; CHECK-NEXT: nds.vfwcvt.s.bf16 v8, v12
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x float> @llvm.riscv.nds.vfwcvt.s.bf16.nxv8f32.nxv8bf16(
+ <vscale x 8 x float> undef,
+ <vscale x 8 x bfloat> %0,
+ iXLen %1)
+
+ ret <vscale x 8 x float> %a
+}
+
+define <vscale x 16 x float> @intrinsic_vfwcvt_s.bf16_nxv16f32_nxv16bf16(<vscale x 16 x bfloat> %0, iXLen %1) nounwind {
+; CHECK-LABEL: intrinsic_vfwcvt_s.bf16_nxv16f32_nxv16bf16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma
+; CHECK-NEXT: vmv4r.v v16, v8
+; CHECK-NEXT: nds.vfwcvt.s.bf16 v8, v16
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x float> @llvm.riscv.nds.vfwcvt.s.bf16.nxv16f32.nxv16bf16(
+ <vscale x 16 x float> undef,
+ <vscale x 16 x bfloat> %0,
+ iXLen %1)
+
+ ret <vscale x 16 x float> %a
+}
More information about the llvm-commits
mailing list