[llvm] [RISCV][llvm] Support fminimum, fmaximum, fminnum, fmaxnum, fminimumnum, fmaximumnum codegen for zvfbfa (PR #171794)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Dec 11 02:17:06 PST 2025
- Previous message: [llvm] [RISCV][llvm] Support fminimum, fmaximum, fminnum, fmaxnum, fminimumnum, fmaximumnum codegen for zvfbfa (PR #171794)
- Next message: [llvm] [RISCV][llvm] Support fminimum, fmaximum, fminnum, fmaxnum, fminimumnum, fmaximumnum codegen for zvfbfa (PR #171794)
- Messages sorted by:
[ date ]
[ thread ]
[ subject ]
[ author ]
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-risc-v
Author: Brandon Wu (4vtomat)
<details>
<summary>Changes</summary>
This patch supports for both scalable vector and fixed-length vector.
It also enables fsetcc pattern match for zvfbfa to make fminimum and
fmaximum work correctly.
---
Patch is 452.03 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/171794.diff
15 Files Affected:
- (modified) llvm/lib/Target/RISCV/RISCVISelLowering.cpp (+9-9)
- (modified) llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td (+6-3)
- (modified) llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td (+16-7)
- (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fmaximum.ll (+346-4)
- (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fmaximumnum.ll (+1512)
- (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fminimum.ll (+346-4)
- (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fminimumnum.ll (+1512)
- (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfmax.ll (+588-4)
- (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfmin.ll (+588-4)
- (modified) llvm/test/CodeGen/RISCV/rvv/fmaximum-sdnode.ll (+547-145)
- (modified) llvm/test/CodeGen/RISCV/rvv/fmaximumnum-sdnode.ll (+641-177)
- (modified) llvm/test/CodeGen/RISCV/rvv/fminimum-sdnode.ll (+547-145)
- (modified) llvm/test/CodeGen/RISCV/rvv/fminimumnum-sdnode.ll (+641-177)
- (modified) llvm/test/CodeGen/RISCV/rvv/vfmax-sdnode.ll (+603-167)
- (modified) llvm/test/CodeGen/RISCV/rvv/vfmin-sdnode.ll (+603-167)
``````````diff
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 2c0a02ae396c7..9cd6817c1b7a7 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -91,8 +91,10 @@ static cl::opt<bool>
static const unsigned ZvfbfaVPOps[] = {
ISD::VP_FNEG, ISD::VP_FABS, ISD::VP_FCOPYSIGN};
static const unsigned ZvfbfaOps[] = {
- ISD::FNEG, ISD::FABS, ISD::FCOPYSIGN, ISD::SPLAT_VECTOR,
- ISD::FADD, ISD::FSUB, ISD::FMUL};
+ ISD::FNEG, ISD::FABS, ISD::FCOPYSIGN, ISD::SPLAT_VECTOR,
+ ISD::FADD, ISD::FSUB, ISD::FMUL, ISD::FMINNUM,
+ ISD::FMAXNUM, ISD::FMINIMUMNUM, ISD::FMAXIMUMNUM, ISD::FMINIMUM,
+ ISD::FMAXIMUM};
RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
const RISCVSubtarget &STI)
@@ -1087,11 +1089,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
ISD::VECREDUCE_FMAXIMUM};
// TODO: Make more of these ops legal.
- static const unsigned ZvfbfaPromoteOps[] = {ISD::FMINNUM,
- ISD::FMAXNUM,
- ISD::FMINIMUMNUM,
- ISD::FMAXIMUMNUM,
- ISD::FDIV,
+ static const unsigned ZvfbfaPromoteOps[] = {ISD::FDIV,
ISD::FMA,
ISD::FSQRT,
ISD::FCEIL,
@@ -1103,8 +1101,6 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
ISD::FNEARBYINT,
ISD::IS_FPCLASS,
ISD::SETCC,
- ISD::FMAXIMUM,
- ISD::FMINIMUM,
ISD::STRICT_FADD,
ISD::STRICT_FSUB,
ISD::STRICT_FMUL,
@@ -1297,6 +1293,10 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
ISD::VECTOR_INTERLEAVE, ISD::VECTOR_REVERSE,
ISD::VECTOR_SPLICE, ISD::VECTOR_COMPRESS},
VT, Custom);
+ setOperationAction(
+ {ISD::FMINNUM, ISD::FMAXNUM, ISD::FMAXIMUMNUM, ISD::FMINIMUMNUM}, VT,
+ Legal);
+ setOperationAction({ISD::FMAXIMUM, ISD::FMINIMUM}, VT, Custom);
setOperationAction(ISD::EXPERIMENTAL_VP_SPLICE, VT, Custom);
setOperationAction(ISD::EXPERIMENTAL_VP_REVERSE, VT, Custom);
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td
index 14ad7ca0eb35a..b3cc33d31761d 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td
@@ -201,12 +201,15 @@ class VPatBinarySDNode_VF_RM<SDPatternOperator vop,
multiclass VPatBinaryFPSDNode_VV_VF<SDPatternOperator vop, string instruction_name,
bit isSEWAware = 0> {
- foreach vti = AllFloatVectors in {
+ foreach vti = AllFloatAndBF16Vectors in {
let Predicates = GetVTypePredicates<vti>.Predicates in {
- def : VPatBinarySDNode_VV<vop, instruction_name,
+ def : VPatBinarySDNode_VV<vop, instruction_name #
+ !if(!eq(vti.Scalar, bf16), "_ALT", ""),
vti.Vector, vti.Vector, vti.Log2SEW,
vti.LMul, vti.AVL, vti.RegClass, isSEWAware>;
- def : VPatBinarySDNode_VF<vop, instruction_name#"_V"#vti.ScalarSuffix,
+ def : VPatBinarySDNode_VF<vop, instruction_name#
+ !if(!eq(vti.Scalar, bf16), "_ALT", "")#
+ "_V"#vti.ScalarSuffix,
vti.Vector, vti.Vector, vti.Scalar,
vti.Log2SEW, vti.LMul, vti.AVL, vti.RegClass,
vti.ScalarRegClass, isSEWAware>;
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td
index 9273ce094eb0a..4c41667560a98 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td
@@ -1043,13 +1043,16 @@ class VPatBinaryVL_VF_RM<SDPatternOperator vop,
multiclass VPatBinaryFPVL_VV_VF<SDPatternOperator vop, string instruction_name,
bit isSEWAware = 0> {
- foreach vti = AllFloatVectors in {
+ foreach vti = AllFloatAndBF16Vectors in {
let Predicates = GetVTypePredicates<vti>.Predicates in {
- def : VPatBinaryVL_V<vop, instruction_name, "VV",
+ def : VPatBinaryVL_V<vop, instruction_name#
+ !if(!eq(vti.Scalar, bf16), "_ALT", ""), "VV",
vti.Vector, vti.Vector, vti.Vector, vti.Mask,
vti.Log2SEW, vti.LMul, vti.RegClass, vti.RegClass,
vti.RegClass, isSEWAware>;
- def : VPatBinaryVL_VF<vop, instruction_name#"_V"#vti.ScalarSuffix,
+ def : VPatBinaryVL_VF<vop, instruction_name#
+ !if(!eq(vti.Scalar, bf16), "_ALT", "")#"_V"#
+ vti.ScalarSuffix,
vti.Vector, vti.Vector, vti.Vector, vti.Mask,
vti.Log2SEW, vti.LMul, vti.RegClass, vti.RegClass,
vti.ScalarRegClass, isSEWAware>;
@@ -1199,7 +1202,7 @@ multiclass VPatIntegerSetCCVL_VI_Swappable<VTypeInfo vti, string instruction_nam
multiclass VPatFPSetCCVL_VV_VF_FV<SDPatternOperator vop, CondCode cc,
string inst_name,
string swapped_op_inst_name> {
- foreach fvti = AllFloatVectors in {
+ foreach fvti = AllFloatAndBF16Vectors in {
let Predicates = GetVTypePredicates<fvti>.Predicates in {
def : Pat<(fvti.Mask (vop (fvti.Vector fvti.RegClass:$rs1),
fvti.RegClass:$rs2,
@@ -1207,7 +1210,9 @@ multiclass VPatFPSetCCVL_VV_VF_FV<SDPatternOperator vop, CondCode cc,
VR:$passthru,
(fvti.Mask VMV0:$vm),
VLOpFrag)),
- (!cast<Instruction>(inst_name#"_VV_"#fvti.LMul.MX#"_MASK")
+ (!cast<Instruction>(inst_name#
+ !if(!eq(fvti.Scalar, bf16), "_ALT", "")#
+ "_VV_"#fvti.LMul.MX#"_MASK")
VR:$passthru, fvti.RegClass:$rs1,
fvti.RegClass:$rs2, (fvti.Mask VMV0:$vm),
GPR:$vl, fvti.Log2SEW, TA_MU)>;
@@ -1217,7 +1222,9 @@ multiclass VPatFPSetCCVL_VV_VF_FV<SDPatternOperator vop, CondCode cc,
VR:$passthru,
(fvti.Mask VMV0:$vm),
VLOpFrag)),
- (!cast<Instruction>(inst_name#"_V"#fvti.ScalarSuffix#"_"#fvti.LMul.MX#"_MASK")
+ (!cast<Instruction>(inst_name#
+ !if(!eq(fvti.Scalar, bf16), "_ALT", "")#
+ "_V"#fvti.ScalarSuffix#"_"#fvti.LMul.MX#"_MASK")
VR:$passthru, fvti.RegClass:$rs1,
fvti.ScalarRegClass:$rs2, (fvti.Mask VMV0:$vm),
GPR:$vl, fvti.Log2SEW, TA_MU)>;
@@ -1227,7 +1234,9 @@ multiclass VPatFPSetCCVL_VV_VF_FV<SDPatternOperator vop, CondCode cc,
VR:$passthru,
(fvti.Mask VMV0:$vm),
VLOpFrag)),
- (!cast<Instruction>(swapped_op_inst_name#"_V"#fvti.ScalarSuffix#"_"#fvti.LMul.MX#"_MASK")
+ (!cast<Instruction>(swapped_op_inst_name#
+ !if(!eq(fvti.Scalar, bf16), "_ALT", "")#
+ "_V"#fvti.ScalarSuffix#"_"#fvti.LMul.MX#"_MASK")
VR:$passthru, fvti.RegClass:$rs1,
fvti.ScalarRegClass:$rs2, (fvti.Mask VMV0:$vm),
GPR:$vl, fvti.Log2SEW, TA_MU)>;
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fmaximum.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fmaximum.ll
index 6ee2e204bcfe3..c3d6b1fac50dd 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fmaximum.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fmaximum.ll
@@ -1,12 +1,208 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfh,+v -target-abi=ilp32d \
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfh,+zfbfmin,+zvfbfmin,+v -target-abi=ilp32d \
; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH
-; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfh,+v -target-abi=lp64d \
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfh,+zfbfmin,+zvfbfmin,+v -target-abi=lp64d \
; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH
-; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfhmin,+v -target-abi=ilp32d \
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfhmin,+zfbfmin,+zvfbfmin,+v -target-abi=ilp32d \
; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN
-; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfhmin,+v -target-abi=lp64d \
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfhmin,+zfbfmin,+zvfbfmin,+v -target-abi=lp64d \
; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfhmin,+experimental-zvfbfa,+v \
+; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFBFA
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfhmin,+experimental-zvfbfa,+v \
+; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFBFA
+
+define <2 x bfloat> @vfmax_v2bf16_vv(<2 x bfloat> %a, <2 x bfloat> %b) {
+; ZVFH-LABEL: vfmax_v2bf16_vv:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
+; ZVFH-NEXT: vfwcvtbf16.f.f.v v10, v8
+; ZVFH-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
+; ZVFH-NEXT: vmfeq.vv v0, v10, v10
+; ZVFH-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
+; ZVFH-NEXT: vfwcvtbf16.f.f.v v8, v9
+; ZVFH-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
+; ZVFH-NEXT: vmerge.vvm v9, v10, v8, v0
+; ZVFH-NEXT: vmfeq.vv v0, v8, v8
+; ZVFH-NEXT: vmerge.vvm v8, v8, v10, v0
+; ZVFH-NEXT: vfmax.vv v9, v8, v9
+; ZVFH-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
+; ZVFH-NEXT: vfncvtbf16.f.f.w v8, v9
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: vfmax_v2bf16_vv:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
+; ZVFHMIN-NEXT: vfwcvtbf16.f.f.v v10, v8
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
+; ZVFHMIN-NEXT: vmfeq.vv v0, v10, v10
+; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
+; ZVFHMIN-NEXT: vfwcvtbf16.f.f.v v8, v9
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
+; ZVFHMIN-NEXT: vmerge.vvm v9, v10, v8, v0
+; ZVFHMIN-NEXT: vmfeq.vv v0, v8, v8
+; ZVFHMIN-NEXT: vmerge.vvm v8, v8, v10, v0
+; ZVFHMIN-NEXT: vfmax.vv v9, v8, v9
+; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
+; ZVFHMIN-NEXT: vfncvtbf16.f.f.w v8, v9
+; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfmax_v2bf16_vv:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetivli zero, 2, e16alt, mf4, ta, ma
+; ZVFBFA-NEXT: vmfeq.vv v0, v8, v8
+; ZVFBFA-NEXT: vmerge.vvm v10, v8, v9, v0
+; ZVFBFA-NEXT: vmfeq.vv v0, v9, v9
+; ZVFBFA-NEXT: vmerge.vvm v8, v9, v8, v0
+; ZVFBFA-NEXT: vfmax.vv v8, v8, v10
+; ZVFBFA-NEXT: ret
+ %v = call <2 x bfloat> @llvm.maximum.v2bf16(<2 x bfloat> %a, <2 x bfloat> %b)
+ ret <2 x bfloat> %v
+}
+
+define <4 x bfloat> @vfmax_v4bf16_vv(<4 x bfloat> %a, <4 x bfloat> %b) {
+; ZVFH-LABEL: vfmax_v4bf16_vv:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
+; ZVFH-NEXT: vfwcvtbf16.f.f.v v10, v8
+; ZVFH-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; ZVFH-NEXT: vmfeq.vv v0, v10, v10
+; ZVFH-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
+; ZVFH-NEXT: vfwcvtbf16.f.f.v v8, v9
+; ZVFH-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; ZVFH-NEXT: vmerge.vvm v9, v10, v8, v0
+; ZVFH-NEXT: vmfeq.vv v0, v8, v8
+; ZVFH-NEXT: vmerge.vvm v8, v8, v10, v0
+; ZVFH-NEXT: vfmax.vv v9, v8, v9
+; ZVFH-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
+; ZVFH-NEXT: vfncvtbf16.f.f.w v8, v9
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: vfmax_v4bf16_vv:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
+; ZVFHMIN-NEXT: vfwcvtbf16.f.f.v v10, v8
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; ZVFHMIN-NEXT: vmfeq.vv v0, v10, v10
+; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
+; ZVFHMIN-NEXT: vfwcvtbf16.f.f.v v8, v9
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; ZVFHMIN-NEXT: vmerge.vvm v9, v10, v8, v0
+; ZVFHMIN-NEXT: vmfeq.vv v0, v8, v8
+; ZVFHMIN-NEXT: vmerge.vvm v8, v8, v10, v0
+; ZVFHMIN-NEXT: vfmax.vv v9, v8, v9
+; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
+; ZVFHMIN-NEXT: vfncvtbf16.f.f.w v8, v9
+; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfmax_v4bf16_vv:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetivli zero, 4, e16alt, mf2, ta, ma
+; ZVFBFA-NEXT: vmfeq.vv v0, v8, v8
+; ZVFBFA-NEXT: vmerge.vvm v10, v8, v9, v0
+; ZVFBFA-NEXT: vmfeq.vv v0, v9, v9
+; ZVFBFA-NEXT: vmerge.vvm v8, v9, v8, v0
+; ZVFBFA-NEXT: vfmax.vv v8, v8, v10
+; ZVFBFA-NEXT: ret
+ %v = call <4 x bfloat> @llvm.maximum.v4bf16(<4 x bfloat> %a, <4 x bfloat> %b)
+ ret <4 x bfloat> %v
+}
+
+define <8 x bfloat> @vfmax_v8bf16_vv(<8 x bfloat> %a, <8 x bfloat> %b) {
+; ZVFH-LABEL: vfmax_v8bf16_vv:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; ZVFH-NEXT: vfwcvtbf16.f.f.v v10, v8
+; ZVFH-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; ZVFH-NEXT: vmfeq.vv v0, v10, v10
+; ZVFH-NEXT: vsetvli zero, zero, e16, m1, ta, ma
+; ZVFH-NEXT: vfwcvtbf16.f.f.v v12, v9
+; ZVFH-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; ZVFH-NEXT: vmerge.vvm v8, v10, v12, v0
+; ZVFH-NEXT: vmfeq.vv v0, v12, v12
+; ZVFH-NEXT: vmerge.vvm v10, v12, v10, v0
+; ZVFH-NEXT: vfmax.vv v10, v10, v8
+; ZVFH-NEXT: vsetvli zero, zero, e16, m1, ta, ma
+; ZVFH-NEXT: vfncvtbf16.f.f.w v8, v10
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: vfmax_v8bf16_vv:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; ZVFHMIN-NEXT: vfwcvtbf16.f.f.v v10, v8
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; ZVFHMIN-NEXT: vmfeq.vv v0, v10, v10
+; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
+; ZVFHMIN-NEXT: vfwcvtbf16.f.f.v v12, v9
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; ZVFHMIN-NEXT: vmerge.vvm v8, v10, v12, v0
+; ZVFHMIN-NEXT: vmfeq.vv v0, v12, v12
+; ZVFHMIN-NEXT: vmerge.vvm v10, v12, v10, v0
+; ZVFHMIN-NEXT: vfmax.vv v10, v10, v8
+; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
+; ZVFHMIN-NEXT: vfncvtbf16.f.f.w v8, v10
+; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfmax_v8bf16_vv:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetivli zero, 8, e16alt, m1, ta, ma
+; ZVFBFA-NEXT: vmfeq.vv v0, v8, v8
+; ZVFBFA-NEXT: vmerge.vvm v10, v8, v9, v0
+; ZVFBFA-NEXT: vmfeq.vv v0, v9, v9
+; ZVFBFA-NEXT: vmerge.vvm v8, v9, v8, v0
+; ZVFBFA-NEXT: vfmax.vv v8, v8, v10
+; ZVFBFA-NEXT: ret
+ %v = call <8 x bfloat> @llvm.maximum.v8bf16(<8 x bfloat> %a, <8 x bfloat> %b)
+ ret <8 x bfloat> %v
+}
+
+define <16 x bfloat> @vfmax_v16bf16_vv(<16 x bfloat> %a, <16 x bfloat> %b) {
+; ZVFH-LABEL: vfmax_v16bf16_vv:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: vsetivli zero, 16, e16, m2, ta, ma
+; ZVFH-NEXT: vfwcvtbf16.f.f.v v12, v8
+; ZVFH-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; ZVFH-NEXT: vmfeq.vv v0, v12, v12
+; ZVFH-NEXT: vsetvli zero, zero, e16, m2, ta, ma
+; ZVFH-NEXT: vfwcvtbf16.f.f.v v16, v10
+; ZVFH-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; ZVFH-NEXT: vmerge.vvm v8, v12, v16, v0
+; ZVFH-NEXT: vmfeq.vv v0, v16, v16
+; ZVFH-NEXT: vmerge.vvm v12, v16, v12, v0
+; ZVFH-NEXT: vfmax.vv v12, v12, v8
+; ZVFH-NEXT: vsetvli zero, zero, e16, m2, ta, ma
+; ZVFH-NEXT: vfncvtbf16.f.f.w v8, v12
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: vfmax_v16bf16_vv:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma
+; ZVFHMIN-NEXT: vfwcvtbf16.f.f.v v12, v8
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; ZVFHMIN-NEXT: vmfeq.vv v0, v12, v12
+; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
+; ZVFHMIN-NEXT: vfwcvtbf16.f.f.v v16, v10
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; ZVFHMIN-NEXT: vmerge.vvm v8, v12, v16, v0
+; ZVFHMIN-NEXT: vmfeq.vv v0, v16, v16
+; ZVFHMIN-NEXT: vmerge.vvm v12, v16, v12, v0
+; ZVFHMIN-NEXT: vfmax.vv v12, v12, v8
+; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
+; ZVFHMIN-NEXT: vfncvtbf16.f.f.w v8, v12
+; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfmax_v16bf16_vv:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetivli zero, 16, e16alt, m2, ta, ma
+; ZVFBFA-NEXT: vmfeq.vv v0, v8, v8
+; ZVFBFA-NEXT: vmerge.vvm v12, v8, v10, v0
+; ZVFBFA-NEXT: vmfeq.vv v0, v10, v10
+; ZVFBFA-NEXT: vmerge.vvm v8, v10, v8, v0
+; ZVFBFA-NEXT: vfmax.vv v8, v8, v12
+; ZVFBFA-NEXT: ret
+ %v = call <16 x bfloat> @llvm.maximum.v16bf16(<16 x bfloat> %a, <16 x bfloat> %b)
+ ret <16 x bfloat> %v
+}
define <2 x half> @vfmax_v2f16_vv(<2 x half> %a, <2 x half> %b) {
; ZVFH-LABEL: vfmax_v2f16_vv:
@@ -35,6 +231,23 @@ define <2 x half> @vfmax_v2f16_vv(<2 x half> %a, <2 x half> %b) {
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfmax_v2f16_vv:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
+; ZVFBFA-NEXT: vfwcvt.f.f.v v10, v8
+; ZVFBFA-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
+; ZVFBFA-NEXT: vmfeq.vv v0, v10, v10
+; ZVFBFA-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
+; ZVFBFA-NEXT: vfwcvt.f.f.v v8, v9
+; ZVFBFA-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
+; ZVFBFA-NEXT: vmerge.vvm v9, v10, v8, v0
+; ZVFBFA-NEXT: vmfeq.vv v0, v8, v8
+; ZVFBFA-NEXT: vmerge.vvm v8, v8, v10, v0
+; ZVFBFA-NEXT: vfmax.vv v9, v8, v9
+; ZVFBFA-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
+; ZVFBFA-NEXT: vfncvt.f.f.w v8, v9
+; ZVFBFA-NEXT: ret
%v = call <2 x half> @llvm.maximum.v2f16(<2 x half> %a, <2 x half> %b)
ret <2 x half> %v
}
@@ -66,6 +279,23 @@ define <4 x half> @vfmax_v4f16_vv(<4 x half> %a, <4 x half> %b) {
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfmax_v4f16_vv:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
+; ZVFBFA-NEXT: vfwcvt.f.f.v v10, v8
+; ZVFBFA-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; ZVFBFA-NEXT: vmfeq.vv v0, v10, v10
+; ZVFBFA-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
+; ZVFBFA-NEXT: vfwcvt.f.f.v v8, v9
+; ZVFBFA-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; ZVFBFA-NEXT: vmerge.vvm v9, v10, v8, v0
+; ZVFBFA-NEXT: vmfeq.vv v0, v8, v8
+; ZVFBFA-NEXT: vmerge.vvm v8, v8, v10, v0
+; ZVFBFA-NEXT: vfmax.vv v9, v8, v9
+; ZVFBFA-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
+; ZVFBFA-NEXT: vfncvt.f.f.w v8, v9
+; ZVFBFA-NEXT: ret
%v = call <4 x half> @llvm.maximum.v4f16(<4 x half> %a, <4 x half> %b)
ret <4 x half> %v
}
@@ -97,6 +327,23 @@ define <8 x half> @vfmax_v8f16_vv(<8 x half> %a, <8 x half> %b) {
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfmax_v8f16_vv:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; ZVFBFA-NEXT: vfwcvt.f.f.v v10, v8
+; ZVFBFA-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; ZVFBFA-NEXT: vmfeq.vv v0, v10, v10
+; ZVFBFA-NEXT: vsetv...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/171794
- Previous message: [llvm] [RISCV][llvm] Support fminimum, fmaximum, fminnum, fmaxnum, fminimumnum, fmaximumnum codegen for zvfbfa (PR #171794)
- Next message: [llvm] [RISCV][llvm] Support fminimum, fmaximum, fminnum, fmaxnum, fminimumnum, fmaximumnum codegen for zvfbfa (PR #171794)
- Messages sorted by:
[ date ]
[ thread ]
[ subject ]
[ author ]
More information about the llvm-commits
mailing list