[llvm] a484fa1 - [RISCV][VLOPT] Add floating point multiply divide instructions to getSupportedInstr
Michael Maitland via llvm-commits
llvm-commits at lists.llvm.org
Thu Jan 9 10:50:51 PST 2025
Author: Michael Maitland
Date: 2025-01-09T10:50:32-08:00
New Revision: a484fa1d0a974680d609d3f08ee6d57c9e3d21f8
URL: https://github.com/llvm/llvm-project/commit/a484fa1d0a974680d609d3f08ee6d57c9e3d21f8
DIFF: https://github.com/llvm/llvm-project/commit/a484fa1d0a974680d609d3f08ee6d57c9e3d21f8.diff
LOG: [RISCV][VLOPT] Add floating point multiply divide instructions to getSupportedInstr
Added:
Modified:
llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp
llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll
llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp b/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp
index 018556d27d7637..bf6a8068413ac8 100644
--- a/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp
+++ b/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp
@@ -998,6 +998,12 @@ static bool isSupportedInstr(const MachineInstr &MI) {
case RISCV::VFWADD_WV:
case RISCV::VFWSUB_WF:
case RISCV::VFWSUB_WV:
+ // Vector Single-Width Floating-Point Multiply/Divide Instructions
+ case RISCV::VFMUL_VF:
+ case RISCV::VFMUL_VV:
+ case RISCV::VFDIV_VF:
+ case RISCV::VFDIV_VV:
+ case RISCV::VFRDIV_VF:
// Single-Width Floating-Point/Integer Type-Convert Instructions
case RISCV::VFCVT_XU_F_V:
case RISCV::VFCVT_X_F_V:
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll
index ce23dd0eac203b..b8710a518287a4 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll
@@ -363,12 +363,11 @@ define void @fmul_v6f16(ptr %x, ptr %y) {
; ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, ta, ma
; ZVFHMIN-NEXT: vle16.v v8, (a1)
; ZVFHMIN-NEXT: vle16.v v9, (a0)
-; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
; ZVFHMIN-NEXT: vfmul.vv v8, v12, v10
-; ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, ta, ma
+; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v8
; ZVFHMIN-NEXT: vse16.v v10, (a0)
; ZVFHMIN-NEXT: ret
@@ -499,12 +498,11 @@ define void @fdiv_v6f16(ptr %x, ptr %y) {
; ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, ta, ma
; ZVFHMIN-NEXT: vle16.v v8, (a1)
; ZVFHMIN-NEXT: vle16.v v9, (a0)
-; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
; ZVFHMIN-NEXT: vfdiv.vv v8, v12, v10
-; ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, ta, ma
+; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v8
; ZVFHMIN-NEXT: vse16.v v10, (a0)
; ZVFHMIN-NEXT: ret
@@ -2892,13 +2890,12 @@ define void @fmul_vf_v6f16(ptr %x, half %y) {
; ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, ta, ma
; ZVFHMIN-NEXT: vle16.v v8, (a0)
; ZVFHMIN-NEXT: fmv.x.w a1, fa0
-; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
; ZVFHMIN-NEXT: vmv.v.x v9, a1
; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
; ZVFHMIN-NEXT: vfmul.vv v8, v10, v12
-; ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, ta, ma
+; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v8
; ZVFHMIN-NEXT: vse16.v v10, (a0)
; ZVFHMIN-NEXT: ret
@@ -3034,13 +3031,12 @@ define void @fmul_fv_v6f16(ptr %x, half %y) {
; ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, ta, ma
; ZVFHMIN-NEXT: vle16.v v8, (a0)
; ZVFHMIN-NEXT: fmv.x.w a1, fa0
-; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
; ZVFHMIN-NEXT: vmv.v.x v9, a1
; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
; ZVFHMIN-NEXT: vfmul.vv v8, v12, v10
-; ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, ta, ma
+; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v8
; ZVFHMIN-NEXT: vse16.v v10, (a0)
; ZVFHMIN-NEXT: ret
@@ -3176,13 +3172,12 @@ define void @fdiv_vf_v6f16(ptr %x, half %y) {
; ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, ta, ma
; ZVFHMIN-NEXT: vle16.v v8, (a0)
; ZVFHMIN-NEXT: fmv.x.w a1, fa0
-; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
; ZVFHMIN-NEXT: vmv.v.x v9, a1
; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
; ZVFHMIN-NEXT: vfdiv.vv v8, v10, v12
-; ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, ta, ma
+; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v8
; ZVFHMIN-NEXT: vse16.v v10, (a0)
; ZVFHMIN-NEXT: ret
@@ -3318,13 +3313,12 @@ define void @fdiv_fv_v6f16(ptr %x, half %y) {
; ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, ta, ma
; ZVFHMIN-NEXT: vle16.v v8, (a0)
; ZVFHMIN-NEXT: fmv.x.w a1, fa0
-; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
; ZVFHMIN-NEXT: vmv.v.x v9, a1
; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
; ZVFHMIN-NEXT: vfdiv.vv v8, v12, v10
-; ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, ta, ma
+; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v8
; ZVFHMIN-NEXT: vse16.v v10, (a0)
; ZVFHMIN-NEXT: ret
@@ -4993,12 +4987,11 @@ define void @fmuladd_v6f16(ptr %x, ptr %y, ptr %z) {
; ZVFHMIN-NEXT: vle16.v v8, (a1)
; ZVFHMIN-NEXT: vle16.v v9, (a0)
; ZVFHMIN-NEXT: vle16.v v10, (a2)
-; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8
; ZVFHMIN-NEXT: vfwcvt.f.f.v v14, v9
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
; ZVFHMIN-NEXT: vfmul.vv v8, v14, v12
-; ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, ta, ma
+; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v11, v8
; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v11
; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10
@@ -5170,12 +5163,11 @@ define void @fmsub_fmuladd_v6f16(ptr %x, ptr %y, ptr %z) {
; ZVFHMIN-NEXT: vle16.v v8, (a1)
; ZVFHMIN-NEXT: vle16.v v9, (a0)
; ZVFHMIN-NEXT: vle16.v v10, (a2)
-; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8
; ZVFHMIN-NEXT: vfwcvt.f.f.v v14, v9
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
; ZVFHMIN-NEXT: vfmul.vv v8, v14, v12
-; ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, ta, ma
+; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v11, v8
; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v11
; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10
diff --git a/llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll b/llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll
index 804a8a614a820b..bdacd17fe75c04 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll
@@ -3193,3 +3193,103 @@ define <vscale x 4 x double> @vfwsub_wf(<vscale x 4 x double> %a, float %b, iXLe
%2 = call <vscale x 4 x double> @llvm.riscv.vfadd.nxv4f64.nxv4f64(<vscale x 4 x double> poison, <vscale x 4 x double> %1, <vscale x 4 x double> %1, iXLen 7, iXLen %vl)
ret <vscale x 4 x double> %2
}
+
+define <vscale x 4 x float> @vfmul_vv(<vscale x 4 x float> %a, <vscale x 4 x float> %b, iXLen %vl) {
+; NOVLOPT-LABEL: vfmul_vv:
+; NOVLOPT: # %bb.0:
+; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma
+; NOVLOPT-NEXT: vfmul.vv v8, v8, v10
+; NOVLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; NOVLOPT-NEXT: vfadd.vv v8, v8, v10
+; NOVLOPT-NEXT: ret
+;
+; VLOPT-LABEL: vfmul_vv:
+; VLOPT: # %bb.0:
+; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; VLOPT-NEXT: vfmul.vv v8, v8, v10
+; VLOPT-NEXT: vfadd.vv v8, v8, v10
+; VLOPT-NEXT: ret
+ %1 = call <vscale x 4 x float> @llvm.riscv.vfmul.nxv4f32.nxv4f32(<vscale x 4 x float> poison, <vscale x 4 x float> %a, <vscale x 4 x float> %b, iXLen 7, iXLen -1)
+ %2 = call <vscale x 4 x float> @llvm.riscv.vfadd.nxv4f32.nxv4f32(<vscale x 4 x float> poison, <vscale x 4 x float> %1, <vscale x 4 x float> %b, iXLen 7, iXLen %vl)
+ ret <vscale x 4 x float> %2
+}
+
+define <vscale x 4 x float> @vfmul_vf(<vscale x 4 x float> %a, float %b, iXLen %vl) {
+; NOVLOPT-LABEL: vfmul_vf:
+; NOVLOPT: # %bb.0:
+; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma
+; NOVLOPT-NEXT: vfmul.vf v10, v8, fa0
+; NOVLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; NOVLOPT-NEXT: vfadd.vv v8, v10, v8
+; NOVLOPT-NEXT: ret
+;
+; VLOPT-LABEL: vfmul_vf:
+; VLOPT: # %bb.0:
+; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; VLOPT-NEXT: vfmul.vf v10, v8, fa0
+; VLOPT-NEXT: vfadd.vv v8, v10, v8
+; VLOPT-NEXT: ret
+ %1 = call <vscale x 4 x float> @llvm.riscv.vfmul.nxv4f32.nxv4f32(<vscale x 4 x float> poison, <vscale x 4 x float> %a, float %b, iXLen 7, iXLen -1)
+ %2 = call <vscale x 4 x float> @llvm.riscv.vfadd.nxv4f32.nxv4f32(<vscale x 4 x float> poison, <vscale x 4 x float> %1, <vscale x 4 x float> %a, iXLen 7, iXLen %vl)
+ ret <vscale x 4 x float> %2
+}
+
+define <vscale x 4 x float> @vfdiv_vv(<vscale x 4 x float> %a, <vscale x 4 x float> %b, iXLen %vl) {
+; NOVLOPT-LABEL: vfdiv_vv:
+; NOVLOPT: # %bb.0:
+; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma
+; NOVLOPT-NEXT: vfdiv.vv v8, v8, v10
+; NOVLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; NOVLOPT-NEXT: vfadd.vv v8, v8, v10
+; NOVLOPT-NEXT: ret
+;
+; VLOPT-LABEL: vfdiv_vv:
+; VLOPT: # %bb.0:
+; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; VLOPT-NEXT: vfdiv.vv v8, v8, v10
+; VLOPT-NEXT: vfadd.vv v8, v8, v10
+; VLOPT-NEXT: ret
+ %1 = call <vscale x 4 x float> @llvm.riscv.vfdiv.nxv4f32.nxv4f32(<vscale x 4 x float> poison, <vscale x 4 x float> %a, <vscale x 4 x float> %b, iXLen 7, iXLen -1)
+ %2 = call <vscale x 4 x float> @llvm.riscv.vfadd.nxv4f32.nxv4f32(<vscale x 4 x float> poison, <vscale x 4 x float> %1, <vscale x 4 x float> %b, iXLen 7, iXLen %vl)
+ ret <vscale x 4 x float> %2
+}
+
+define <vscale x 4 x float> @vfdiv_vf(<vscale x 4 x float> %a, float %b, iXLen %vl) {
+; NOVLOPT-LABEL: vfdiv_vf:
+; NOVLOPT: # %bb.0:
+; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma
+; NOVLOPT-NEXT: vfdiv.vf v10, v8, fa0
+; NOVLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; NOVLOPT-NEXT: vfadd.vv v8, v10, v8
+; NOVLOPT-NEXT: ret
+;
+; VLOPT-LABEL: vfdiv_vf:
+; VLOPT: # %bb.0:
+; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; VLOPT-NEXT: vfdiv.vf v10, v8, fa0
+; VLOPT-NEXT: vfadd.vv v8, v10, v8
+; VLOPT-NEXT: ret
+ %1 = call <vscale x 4 x float> @llvm.riscv.vfdiv.nxv4f32.nxv4f32(<vscale x 4 x float> poison, <vscale x 4 x float> %a, float %b, iXLen 7, iXLen -1)
+ %2 = call <vscale x 4 x float> @llvm.riscv.vfadd.nxv4f32.nxv4f32(<vscale x 4 x float> poison, <vscale x 4 x float> %1, <vscale x 4 x float> %a, iXLen 7, iXLen %vl)
+ ret <vscale x 4 x float> %2
+}
+
+define <vscale x 4 x float> @vfrdiv_vf(<vscale x 4 x float> %a, float %b, iXLen %vl) {
+; NOVLOPT-LABEL: vfrdiv_vf:
+; NOVLOPT: # %bb.0:
+; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma
+; NOVLOPT-NEXT: vfrdiv.vf v10, v8, fa0
+; NOVLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; NOVLOPT-NEXT: vfadd.vv v8, v10, v8
+; NOVLOPT-NEXT: ret
+;
+; VLOPT-LABEL: vfrdiv_vf:
+; VLOPT: # %bb.0:
+; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; VLOPT-NEXT: vfrdiv.vf v10, v8, fa0
+; VLOPT-NEXT: vfadd.vv v8, v10, v8
+; VLOPT-NEXT: ret
+ %1 = call <vscale x 4 x float> @llvm.riscv.vfrdiv.nxv4f32.nxv4f32(<vscale x 4 x float> poison, <vscale x 4 x float> %a, float %b, iXLen 7, iXLen -1)
+ %2 = call <vscale x 4 x float> @llvm.riscv.vfadd.nxv4f32.nxv4f32(<vscale x 4 x float> poison, <vscale x 4 x float> %1, <vscale x 4 x float> %a, iXLen 7, iXLen %vl)
+ ret <vscale x 4 x float> %2
+}
More information about the llvm-commits
mailing list