[llvm] [RISCV][VLOPT] Add vfsqrt/vfrsqrt7 instruction to isSupportInstr (PR #127462)
via llvm-commits
llvm-commits at lists.llvm.org
Tue Feb 18 18:55:51 PST 2025
https://github.com/LiqinWeng updated https://github.com/llvm/llvm-project/pull/127462
>From c110e5bb93d1211aad8985af0547ba47d168e879 Mon Sep 17 00:00:00 2001
From: LiqinWeng <liqin.weng at spacemit.com>
Date: Mon, 17 Feb 2025 14:44:51 +0800
Subject: [PATCH 1/3] [RISCV][VLOPT] Add fsqrt instruction to isSupportInstr
---
llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp | 5 ++++-
.../CodeGen/RISCV/rvv/fixed-vectors-fp.ll | 6 ++----
llvm/test/CodeGen/RISCV/rvv/vl-opt.mir | 20 +++++++++++++++++++
3 files changed, 26 insertions(+), 5 deletions(-)
diff --git a/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp b/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp
index 1ba7f0b522a2b..0ed15afc7533d 100644
--- a/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp
+++ b/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp
@@ -290,7 +290,6 @@ getOperandLog2EEW(const MachineOperand &MO, const MachineRegisterInfo *MRI) {
return MILog2SEW;
return 6;
}
-
// Vector Integer Arithmetic Instructions
// Vector Single-Width Integer Add and Subtract
case RISCV::VADD_VI:
@@ -1141,6 +1140,10 @@ static bool isSupportedInstr(const MachineInstr &MI) {
case RISCV::VFNCVT_F_F_W:
case RISCV::VFNCVT_ROD_F_F_W:
case RISCV::VFNCVTBF16_F_F_W:
+ // Vector Floating-Point Square-Root Instruction
+ case RISCV::VFSQRT_V:
+ // Vector Floating-Point Reciprocal Square-Root Estimate Instruction
+ case RISCV::VFRSQRT7_V:
return true;
}
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll
index 585a331e55094..bef29dfecef4c 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll
@@ -1318,11 +1318,10 @@ define void @sqrt_v6bf16(ptr %x) {
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma
; CHECK-NEXT: vle16.v v8, (a0)
-; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8
; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
; CHECK-NEXT: vfsqrt.v v8, v10
-; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma
+; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
; CHECK-NEXT: vfncvtbf16.f.f.w v10, v8
; CHECK-NEXT: vse16.v v10, (a0)
; CHECK-NEXT: ret
@@ -1371,11 +1370,10 @@ define void @sqrt_v6f16(ptr %x) {
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, ta, ma
; ZVFHMIN-NEXT: vle16.v v8, (a0)
-; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
; ZVFHMIN-NEXT: vfsqrt.v v8, v10
-; ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, ta, ma
+; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v8
; ZVFHMIN-NEXT: vse16.v v10, (a0)
; ZVFHMIN-NEXT: ret
diff --git a/llvm/test/CodeGen/RISCV/rvv/vl-opt.mir b/llvm/test/CodeGen/RISCV/rvv/vl-opt.mir
index 0475a988e9851..3729441db8a71 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vl-opt.mir
+++ b/llvm/test/CodeGen/RISCV/rvv/vl-opt.mir
@@ -141,6 +141,26 @@ body: |
%y:vr = PseudoVADD_VV_M1 $noreg, %x, $noreg, 1, 4 /* e16 */, 0
...
---
+name: vfsqr
+body: |
+ bb.0:
+ ; CHECK-LABEL: name: vfsqr
+ ; CHECK: %x:vrm2 = nofpexcept PseudoVFSQRT_V_M2_E32 $noreg, $noreg, 7, 6, 5 /* e32 */, 3 /* ta, ma */, implicit $frm
+ ; CHECK-NEXT: early-clobber %y:vr = nofpexcept PseudoVFNCVTBF16_F_F_W_M1_E16 $noreg, %x, 7, 6, 4 /* e16 */, 3 /* ta, ma */, implicit $frm
+ %x:vrm2 = nofpexcept PseudoVFSQRT_V_M2_E32 $noreg, $noreg, 7, 8, 5, 3, implicit $frm
+ early-clobber %y:vr = nofpexcept PseudoVFNCVTBF16_F_F_W_M1_E16 $noreg, %x, 7, 6, 4, 3, implicit $frm
+...
+---
+name: vfsqr_nofpexcept
+body: |
+ bb.0:
+ ; CHECK-LABEL: name: vfsqr_nofpexcept
+ ; CHECK: %x:vrm2 = PseudoVFSQRT_V_M2_E32 $noreg, $noreg, 7, 8, 5 /* e32 */, 3 /* ta, ma */, implicit $frm
+ ; CHECK-NEXT: early-clobber %y:vr = nofpexcept PseudoVFNCVTBF16_F_F_W_M1_E16 $noreg, %x, 7, 6, 4 /* e16 */, 3 /* ta, ma */, implicit $frm
+ %x:vrm2 = PseudoVFSQRT_V_M2_E32 $noreg, $noreg, 7, 8, 5, 3, implicit $frm
+ early-clobber %y:vr = nofpexcept PseudoVFNCVTBF16_F_F_W_M1_E16 $noreg, %x, 7, 6, 4, 3, implicit $frm
+...
+---
name: vwadd_tied_vs1
body: |
bb.0:
>From 33b7481e6f5aab4e99c9632ee819d444b1dd300d Mon Sep 17 00:00:00 2001
From: LiqinWeng <liqin.weng at spacemit.com>
Date: Tue, 18 Feb 2025 17:27:40 +0800
Subject: [PATCH 2/3] address the comments and add the tests
---
llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp | 8 +--
llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll | 68 ++++++++++++++++++--
llvm/test/CodeGen/RISCV/rvv/vl-opt.mir | 28 ++++++--
3 files changed, 92 insertions(+), 12 deletions(-)
diff --git a/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp b/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp
index 0ed15afc7533d..63b0755742fa0 100644
--- a/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp
+++ b/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp
@@ -1091,6 +1091,10 @@ static bool isSupportedInstr(const MachineInstr &MI) {
case RISCV::VFWNMSAC_VF:
case RISCV::VFWMACCBF16_VV:
case RISCV::VFWMACCBF16_VF:
+ // Vector Floating-Point Square-Root Instruction
+ case RISCV::VFSQRT_V:
+ // Vector Floating-Point Reciprocal Square-Root Estimate Instruction
+ case RISCV::VFRSQRT7_V:
// Vector Floating-Point MIN/MAX Instructions
case RISCV::VFMIN_VF:
case RISCV::VFMIN_VV:
@@ -1140,10 +1144,6 @@ static bool isSupportedInstr(const MachineInstr &MI) {
case RISCV::VFNCVT_F_F_W:
case RISCV::VFNCVT_ROD_F_F_W:
case RISCV::VFNCVTBF16_F_F_W:
- // Vector Floating-Point Square-Root Instruction
- case RISCV::VFSQRT_V:
- // Vector Floating-Point Reciprocal Square-Root Estimate Instruction
- case RISCV::VFRSQRT7_V:
return true;
}
diff --git a/llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll b/llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll
index c6ee9e34dc207..fbed88d9e9904 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll
@@ -1,8 +1,8 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
-; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+zvbb,+zvfbfwma -riscv-enable-vl-optimizer=false -verify-machineinstrs | FileCheck %s --check-prefixes=NOVLOPT
-; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+zvbb,+zvfbfwma -riscv-enable-vl-optimizer=false -verify-machineinstrs | FileCheck %s --check-prefixes=NOVLOPT
-; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+zvbb,+zvfbfwma -riscv-enable-vl-optimizer -verify-machineinstrs | FileCheck %s --check-prefixes=VLOPT
-; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+zvbb,+zvfbfwma -riscv-enable-vl-optimizer -verify-machineinstrs | FileCheck %s --check-prefixes=VLOPT
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+zvbb,+zvfbfwma,+zvfh,+zvfbfmin -riscv-enable-vl-optimizer=false -verify-machineinstrs | FileCheck %s --check-prefixes=NOVLOPT
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+zvbb,+zvfbfwma,+zvfh,+zvfbfmin -riscv-enable-vl-optimizer=false -verify-machineinstrs | FileCheck %s --check-prefixes=NOVLOPT
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+zvbb,+zvfbfwma,+zvfh,+zvfbfmin -riscv-enable-vl-optimizer -verify-machineinstrs | FileCheck %s --check-prefixes=VLOPT
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+zvbb,+zvfbfwma,+zvfh,+zvfbfmin -riscv-enable-vl-optimizer -verify-machineinstrs | FileCheck %s --check-prefixes=VLOPT
; The purpose of this file is to check the behavior of specific instructions as it relates to the VL optimizer
@@ -5069,3 +5069,63 @@ define <vscale x 4 x float> @vfwmaccbf16_vf(<vscale x 4 x float> %a, bfloat %b,
%2 = call <vscale x 4 x float> @llvm.riscv.vfadd(<vscale x 4 x float> poison, <vscale x 4 x float> %1, <vscale x 4 x float> %d, iXLen 7, iXLen %vl)
ret <vscale x 4 x float> %2
}
+
+define <vscale x 4 x half> @vfsqrt(<vscale x 4 x half> %a) {
+; NOVLOPT-LABEL: vfsqrt:
+; NOVLOPT: # %bb.0:
+; NOVLOPT-NEXT: vsetivli zero, 6, e16, m1, ta, ma
+; NOVLOPT-NEXT: vfwcvt.f.f.v v10, v8
+; NOVLOPT-NEXT: fsrmi a0, 0
+; NOVLOPT-NEXT: vsetivli zero, 7, e32, m2, ta, ma
+; NOVLOPT-NEXT: vfsqrt.v v10, v10
+; NOVLOPT-NEXT: vsetivli zero, 6, e16, m1, ta, ma
+; NOVLOPT-NEXT: vfncvt.f.f.w v8, v10
+; NOVLOPT-NEXT: fsrm a0
+; NOVLOPT-NEXT: ret
+;
+; VLOPT-LABEL: vfsqrt:
+; VLOPT: # %bb.0:
+; VLOPT-NEXT: vsetivli zero, 6, e16, m1, ta, ma
+; VLOPT-NEXT: vfwcvt.f.f.v v10, v8
+; VLOPT-NEXT: fsrmi a0, 0
+; VLOPT-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; VLOPT-NEXT: vfsqrt.v v10, v10
+; VLOPT-NEXT: vsetvli zero, zero, e16, m1, ta, ma
+; VLOPT-NEXT: vfncvt.f.f.w v8, v10
+; VLOPT-NEXT: fsrm a0
+; VLOPT-NEXT: ret
+ %1 = call <vscale x 4 x float> @llvm.riscv.vfwcvt.f.f.v.nxv4f32.nxv4f16(<vscale x 4 x float> poison, <vscale x 4 x half> %a, iXLen 6)
+ %2 = call <vscale x 4 x float> @llvm.riscv.vfsqrt.nxv4f32(<vscale x 4 x float> poison, <vscale x 4 x float> %1, iXLen 0, iXLen 7)
+ %3 = call <vscale x 4 x half> @llvm.riscv.vfncvt.f.f.w.nxv4f16.nxv4f32(<vscale x 4 x half> poison, <vscale x 4 x float> %2, iXLen 0, iXLen 6)
+ ret <vscale x 4 x half> %3
+}
+
+define <vscale x 4 x half> @vfrsqrt7(<vscale x 4 x half> %a) {
+; NOVLOPT-LABEL: vfrsqrt7:
+; NOVLOPT: # %bb.0:
+; NOVLOPT-NEXT: vsetivli zero, 6, e16, m1, ta, ma
+; NOVLOPT-NEXT: vfwcvt.f.f.v v10, v8
+; NOVLOPT-NEXT: vsetivli zero, 7, e32, m2, ta, ma
+; NOVLOPT-NEXT: vfrsqrt7.v v10, v10
+; NOVLOPT-NEXT: fsrmi a0, 0
+; NOVLOPT-NEXT: vsetivli zero, 6, e16, m1, ta, ma
+; NOVLOPT-NEXT: vfncvt.f.f.w v8, v10
+; NOVLOPT-NEXT: fsrm a0
+; NOVLOPT-NEXT: ret
+;
+; VLOPT-LABEL: vfrsqrt7:
+; VLOPT: # %bb.0:
+; VLOPT-NEXT: vsetivli zero, 6, e16, m1, ta, ma
+; VLOPT-NEXT: vfwcvt.f.f.v v10, v8
+; VLOPT-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; VLOPT-NEXT: vfrsqrt7.v v10, v10
+; VLOPT-NEXT: fsrmi a0, 0
+; VLOPT-NEXT: vsetvli zero, zero, e16, m1, ta, ma
+; VLOPT-NEXT: vfncvt.f.f.w v8, v10
+; VLOPT-NEXT: fsrm a0
+; VLOPT-NEXT: ret
+ %1 = call <vscale x 4 x float> @llvm.riscv.vfwcvt.f.f.v.nxv4f32.nxv4f16(<vscale x 4 x float> poison, <vscale x 4 x half> %a, iXLen 6)
+ %2 = call <vscale x 4 x float> @llvm.riscv.vfrsqrt7.nxv4f32(<vscale x 4 x float> poison, <vscale x 4 x float> %1, iXLen 7)
+ %3 = call <vscale x 4 x half> @llvm.riscv.vfncvt.f.f.w.nxv4f16.nxv4f32(<vscale x 4 x half> poison,<vscale x 4 x float> %2, iXLen 0, iXLen 6)
+ ret <vscale x 4 x half> %3
+}
diff --git a/llvm/test/CodeGen/RISCV/rvv/vl-opt.mir b/llvm/test/CodeGen/RISCV/rvv/vl-opt.mir
index 3729441db8a71..7ee7a5a102623 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vl-opt.mir
+++ b/llvm/test/CodeGen/RISCV/rvv/vl-opt.mir
@@ -141,26 +141,46 @@ body: |
%y:vr = PseudoVADD_VV_M1 $noreg, %x, $noreg, 1, 4 /* e16 */, 0
...
---
-name: vfsqr
+name: vfsqrt
body: |
bb.0:
- ; CHECK-LABEL: name: vfsqr
+ ; CHECK-LABEL: name: vfsqrt
; CHECK: %x:vrm2 = nofpexcept PseudoVFSQRT_V_M2_E32 $noreg, $noreg, 7, 6, 5 /* e32 */, 3 /* ta, ma */, implicit $frm
; CHECK-NEXT: early-clobber %y:vr = nofpexcept PseudoVFNCVTBF16_F_F_W_M1_E16 $noreg, %x, 7, 6, 4 /* e16 */, 3 /* ta, ma */, implicit $frm
%x:vrm2 = nofpexcept PseudoVFSQRT_V_M2_E32 $noreg, $noreg, 7, 8, 5, 3, implicit $frm
early-clobber %y:vr = nofpexcept PseudoVFNCVTBF16_F_F_W_M1_E16 $noreg, %x, 7, 6, 4, 3, implicit $frm
...
---
-name: vfsqr_nofpexcept
+name: vfsqrt_nofpexcept
body: |
bb.0:
- ; CHECK-LABEL: name: vfsqr_nofpexcept
+ ; CHECK-LABEL: name: vfsqrt_nofpexcept
; CHECK: %x:vrm2 = PseudoVFSQRT_V_M2_E32 $noreg, $noreg, 7, 8, 5 /* e32 */, 3 /* ta, ma */, implicit $frm
; CHECK-NEXT: early-clobber %y:vr = nofpexcept PseudoVFNCVTBF16_F_F_W_M1_E16 $noreg, %x, 7, 6, 4 /* e16 */, 3 /* ta, ma */, implicit $frm
%x:vrm2 = PseudoVFSQRT_V_M2_E32 $noreg, $noreg, 7, 8, 5, 3, implicit $frm
early-clobber %y:vr = nofpexcept PseudoVFNCVTBF16_F_F_W_M1_E16 $noreg, %x, 7, 6, 4, 3, implicit $frm
...
---
+name: vfrsqrt7
+body: |
+ bb.0:
+ ; CHECK-LABEL: name: vfrsqrt7
+ ; CHECK: %x:vrm2 = nofpexcept PseudoVFRSQRT7_V_M2_E32 $noreg, $noreg, 1, 5 /* e32 */, 0 /* tu, mu */
+ ; CHECK-NEXT: %y:vrm2 = PseudoVADD_VV_M2 $noreg, %x, $noreg, 1, 5 /* e32 */, 0 /* tu, mu */
+ %x:vrm2 = nofpexcept PseudoVFRSQRT7_V_M2_E32 $noreg, $noreg, 7, 5, 0
+ %y:vrm2 = PseudoVADD_VV_M2 $noreg, %x, $noreg, 1, 5 /* e32 */, 0
+...
+---
+name: vfrsqrt7_nofpexcept
+body: |
+ bb.0:
+ ; CHECK-LABEL: name: vfrsqrt7_nofpexcept
+ ; CHECK: %x:vrm2 = PseudoVFRSQRT7_V_M2_E32 $noreg, $noreg, 7, 5 /* e32 */, 0 /* tu, mu */
+ ; CHECK-NEXT: %y:vrm2 = PseudoVADD_VV_M2 $noreg, %x, $noreg, 1, 5 /* e32 */, 0 /* tu, mu */
+ %x:vrm2 = PseudoVFRSQRT7_V_M2_E32 $noreg, $noreg, 7, 5, 0
+ %y:vrm2 = PseudoVADD_VV_M2 $noreg, %x, $noreg, 1, 5 /* e32 */, 0
+...
+---
name: vwadd_tied_vs1
body: |
bb.0:
>From 5a5811fd3a4ec8278f1c6a595063800e0f92a3dc Mon Sep 17 00:00:00 2001
From: LiqinWeng <liqin.weng at spacemit.com>
Date: Wed, 19 Feb 2025 10:55:13 +0800
Subject: [PATCH 3/3] address the comments
---
llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp | 1 +
llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll | 68 ++++++++------------
llvm/test/CodeGen/RISCV/rvv/vl-opt.mir | 18 +++---
3 files changed, 38 insertions(+), 49 deletions(-)
diff --git a/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp b/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp
index 63b0755742fa0..1537cb1e9125e 100644
--- a/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp
+++ b/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp
@@ -290,6 +290,7 @@ getOperandLog2EEW(const MachineOperand &MO, const MachineRegisterInfo *MRI) {
return MILog2SEW;
return 6;
}
+
// Vector Integer Arithmetic Instructions
// Vector Single-Width Integer Add and Subtract
case RISCV::VADD_VI:
diff --git a/llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll b/llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll
index fbed88d9e9904..5cd9b77af82cf 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll
@@ -1,8 +1,8 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
-; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+zvbb,+zvfbfwma,+zvfh,+zvfbfmin -riscv-enable-vl-optimizer=false -verify-machineinstrs | FileCheck %s --check-prefixes=NOVLOPT
-; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+zvbb,+zvfbfwma,+zvfh,+zvfbfmin -riscv-enable-vl-optimizer=false -verify-machineinstrs | FileCheck %s --check-prefixes=NOVLOPT
-; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+zvbb,+zvfbfwma,+zvfh,+zvfbfmin -riscv-enable-vl-optimizer -verify-machineinstrs | FileCheck %s --check-prefixes=VLOPT
-; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+zvbb,+zvfbfwma,+zvfh,+zvfbfmin -riscv-enable-vl-optimizer -verify-machineinstrs | FileCheck %s --check-prefixes=VLOPT
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+zvbb,+zvfbfwma -riscv-enable-vl-optimizer=false -verify-machineinstrs | FileCheck %s --check-prefixes=NOVLOPT
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+zvbb,+zvfbfwma -riscv-enable-vl-optimizer=false -verify-machineinstrs | FileCheck %s --check-prefixes=NOVLOPT
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+zvbb,+zvfbfwma -riscv-enable-vl-optimizer -verify-machineinstrs | FileCheck %s --check-prefixes=VLOPT
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+zvbb,+zvfbfwma -riscv-enable-vl-optimizer -verify-machineinstrs | FileCheck %s --check-prefixes=VLOPT
; The purpose of this file is to check the behavior of specific instructions as it relates to the VL optimizer
@@ -5070,62 +5070,50 @@ define <vscale x 4 x float> @vfwmaccbf16_vf(<vscale x 4 x float> %a, bfloat %b,
ret <vscale x 4 x float> %2
}
-define <vscale x 4 x half> @vfsqrt(<vscale x 4 x half> %a) {
+define <vscale x 4 x double> @vfsqrt(<vscale x 4 x float> %a) {
; NOVLOPT-LABEL: vfsqrt:
; NOVLOPT: # %bb.0:
-; NOVLOPT-NEXT: vsetivli zero, 6, e16, m1, ta, ma
-; NOVLOPT-NEXT: vfwcvt.f.f.v v10, v8
; NOVLOPT-NEXT: fsrmi a0, 0
; NOVLOPT-NEXT: vsetivli zero, 7, e32, m2, ta, ma
-; NOVLOPT-NEXT: vfsqrt.v v10, v10
-; NOVLOPT-NEXT: vsetivli zero, 6, e16, m1, ta, ma
-; NOVLOPT-NEXT: vfncvt.f.f.w v8, v10
+; NOVLOPT-NEXT: vfsqrt.v v10, v8
; NOVLOPT-NEXT: fsrm a0
+; NOVLOPT-NEXT: vsetivli zero, 6, e32, m2, ta, ma
+; NOVLOPT-NEXT: vfwmacc.vv v12, v8, v10
+; NOVLOPT-NEXT: vmv4r.v v8, v12
; NOVLOPT-NEXT: ret
;
; VLOPT-LABEL: vfsqrt:
; VLOPT: # %bb.0:
-; VLOPT-NEXT: vsetivli zero, 6, e16, m1, ta, ma
-; VLOPT-NEXT: vfwcvt.f.f.v v10, v8
; VLOPT-NEXT: fsrmi a0, 0
-; VLOPT-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; VLOPT-NEXT: vfsqrt.v v10, v10
-; VLOPT-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; VLOPT-NEXT: vfncvt.f.f.w v8, v10
+; VLOPT-NEXT: vsetivli zero, 6, e32, m2, ta, ma
+; VLOPT-NEXT: vfsqrt.v v10, v8
; VLOPT-NEXT: fsrm a0
+; VLOPT-NEXT: vfwmacc.vv v12, v8, v10
+; VLOPT-NEXT: vmv4r.v v8, v12
; VLOPT-NEXT: ret
- %1 = call <vscale x 4 x float> @llvm.riscv.vfwcvt.f.f.v.nxv4f32.nxv4f16(<vscale x 4 x float> poison, <vscale x 4 x half> %a, iXLen 6)
- %2 = call <vscale x 4 x float> @llvm.riscv.vfsqrt.nxv4f32(<vscale x 4 x float> poison, <vscale x 4 x float> %1, iXLen 0, iXLen 7)
- %3 = call <vscale x 4 x half> @llvm.riscv.vfncvt.f.f.w.nxv4f16.nxv4f32(<vscale x 4 x half> poison, <vscale x 4 x float> %2, iXLen 0, iXLen 6)
- ret <vscale x 4 x half> %3
+ %1 = call <vscale x 4 x float> @llvm.riscv.vfsqrt.nxv4f32(<vscale x 4 x float> poison, <vscale x 4 x float> %a, iXLen 0, iXLen 7)
+ %2 = call <vscale x 4 x double> @llvm.riscv.vfwmacc(<vscale x 4 x double> poison, <vscale x 4 x float> %a, <vscale x 4 x float> %1, iXLen 7, iXLen 6, iXLen 0)
+ ret <vscale x 4 x double> %2
}
-define <vscale x 4 x half> @vfrsqrt7(<vscale x 4 x half> %a) {
+define <vscale x 4 x double> @vfrsqrt7(<vscale x 4 x float> %a) {
; NOVLOPT-LABEL: vfrsqrt7:
; NOVLOPT: # %bb.0:
-; NOVLOPT-NEXT: vsetivli zero, 6, e16, m1, ta, ma
-; NOVLOPT-NEXT: vfwcvt.f.f.v v10, v8
; NOVLOPT-NEXT: vsetivli zero, 7, e32, m2, ta, ma
-; NOVLOPT-NEXT: vfrsqrt7.v v10, v10
-; NOVLOPT-NEXT: fsrmi a0, 0
-; NOVLOPT-NEXT: vsetivli zero, 6, e16, m1, ta, ma
-; NOVLOPT-NEXT: vfncvt.f.f.w v8, v10
-; NOVLOPT-NEXT: fsrm a0
+; NOVLOPT-NEXT: vfrsqrt7.v v10, v8
+; NOVLOPT-NEXT: vsetivli zero, 6, e32, m2, ta, ma
+; NOVLOPT-NEXT: vfwmacc.vv v12, v8, v10
+; NOVLOPT-NEXT: vmv4r.v v8, v12
; NOVLOPT-NEXT: ret
;
; VLOPT-LABEL: vfrsqrt7:
; VLOPT: # %bb.0:
-; VLOPT-NEXT: vsetivli zero, 6, e16, m1, ta, ma
-; VLOPT-NEXT: vfwcvt.f.f.v v10, v8
-; VLOPT-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; VLOPT-NEXT: vfrsqrt7.v v10, v10
-; VLOPT-NEXT: fsrmi a0, 0
-; VLOPT-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; VLOPT-NEXT: vfncvt.f.f.w v8, v10
-; VLOPT-NEXT: fsrm a0
+; VLOPT-NEXT: vsetivli zero, 6, e32, m2, ta, ma
+; VLOPT-NEXT: vfrsqrt7.v v10, v8
+; VLOPT-NEXT: vfwmacc.vv v12, v8, v10
+; VLOPT-NEXT: vmv4r.v v8, v12
; VLOPT-NEXT: ret
- %1 = call <vscale x 4 x float> @llvm.riscv.vfwcvt.f.f.v.nxv4f32.nxv4f16(<vscale x 4 x float> poison, <vscale x 4 x half> %a, iXLen 6)
- %2 = call <vscale x 4 x float> @llvm.riscv.vfrsqrt7.nxv4f32(<vscale x 4 x float> poison, <vscale x 4 x float> %1, iXLen 7)
- %3 = call <vscale x 4 x half> @llvm.riscv.vfncvt.f.f.w.nxv4f16.nxv4f32(<vscale x 4 x half> poison,<vscale x 4 x float> %2, iXLen 0, iXLen 6)
- ret <vscale x 4 x half> %3
+ %1 = call <vscale x 4 x float> @llvm.riscv.vfrsqrt7.nxv4f32(<vscale x 4 x float> poison, <vscale x 4 x float> %a, iXLen 7)
+ %2 = call <vscale x 4 x double> @llvm.riscv.vfwmacc(<vscale x 4 x double> poison, <vscale x 4 x float> %a, <vscale x 4 x float> %1, iXLen 7, iXLen 6, iXLen 0)
+ ret <vscale x 4 x double> %2
}
diff --git a/llvm/test/CodeGen/RISCV/rvv/vl-opt.mir b/llvm/test/CodeGen/RISCV/rvv/vl-opt.mir
index 7ee7a5a102623..cb43a89ea3bc6 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vl-opt.mir
+++ b/llvm/test/CodeGen/RISCV/rvv/vl-opt.mir
@@ -141,43 +141,43 @@ body: |
%y:vr = PseudoVADD_VV_M1 $noreg, %x, $noreg, 1, 4 /* e16 */, 0
...
---
-name: vfsqrt
+name: vfsqrt_nofpexcept
body: |
bb.0:
- ; CHECK-LABEL: name: vfsqrt
+ ; CHECK-LABEL: name: vfsqrt_nofpexcept
; CHECK: %x:vrm2 = nofpexcept PseudoVFSQRT_V_M2_E32 $noreg, $noreg, 7, 6, 5 /* e32 */, 3 /* ta, ma */, implicit $frm
; CHECK-NEXT: early-clobber %y:vr = nofpexcept PseudoVFNCVTBF16_F_F_W_M1_E16 $noreg, %x, 7, 6, 4 /* e16 */, 3 /* ta, ma */, implicit $frm
%x:vrm2 = nofpexcept PseudoVFSQRT_V_M2_E32 $noreg, $noreg, 7, 8, 5, 3, implicit $frm
early-clobber %y:vr = nofpexcept PseudoVFNCVTBF16_F_F_W_M1_E16 $noreg, %x, 7, 6, 4, 3, implicit $frm
...
---
-name: vfsqrt_nofpexcept
+name: vfsqrt_fpexcept
body: |
bb.0:
- ; CHECK-LABEL: name: vfsqrt_nofpexcept
+ ; CHECK-LABEL: name: vfsqrt_fpexcept
; CHECK: %x:vrm2 = PseudoVFSQRT_V_M2_E32 $noreg, $noreg, 7, 8, 5 /* e32 */, 3 /* ta, ma */, implicit $frm
; CHECK-NEXT: early-clobber %y:vr = nofpexcept PseudoVFNCVTBF16_F_F_W_M1_E16 $noreg, %x, 7, 6, 4 /* e16 */, 3 /* ta, ma */, implicit $frm
%x:vrm2 = PseudoVFSQRT_V_M2_E32 $noreg, $noreg, 7, 8, 5, 3, implicit $frm
early-clobber %y:vr = nofpexcept PseudoVFNCVTBF16_F_F_W_M1_E16 $noreg, %x, 7, 6, 4, 3, implicit $frm
...
---
-name: vfrsqrt7
+name: vfrsqrt7_nofpexcept
body: |
bb.0:
- ; CHECK-LABEL: name: vfrsqrt7
+ ; CHECK-LABEL: name: vfrsqrt7_nofpexcept
; CHECK: %x:vrm2 = nofpexcept PseudoVFRSQRT7_V_M2_E32 $noreg, $noreg, 1, 5 /* e32 */, 0 /* tu, mu */
; CHECK-NEXT: %y:vrm2 = PseudoVADD_VV_M2 $noreg, %x, $noreg, 1, 5 /* e32 */, 0 /* tu, mu */
%x:vrm2 = nofpexcept PseudoVFRSQRT7_V_M2_E32 $noreg, $noreg, 7, 5, 0
%y:vrm2 = PseudoVADD_VV_M2 $noreg, %x, $noreg, 1, 5 /* e32 */, 0
...
---
-name: vfrsqrt7_nofpexcept
+name: vfrsqrt7_fpexcept
body: |
bb.0:
- ; CHECK-LABEL: name: vfrsqrt7_nofpexcept
+ ; CHECK-LABEL: name: vfrsqrt7_fpexcept
; CHECK: %x:vrm2 = PseudoVFRSQRT7_V_M2_E32 $noreg, $noreg, 7, 5 /* e32 */, 0 /* tu, mu */
; CHECK-NEXT: %y:vrm2 = PseudoVADD_VV_M2 $noreg, %x, $noreg, 1, 5 /* e32 */, 0 /* tu, mu */
- %x:vrm2 = PseudoVFRSQRT7_V_M2_E32 $noreg, $noreg, 7, 5, 0
+ %x:vrm2 = PseudoVFRSQRT7_V_M2_E32 $noreg, $noreg, 7, 5, 0
%y:vrm2 = PseudoVADD_VV_M2 $noreg, %x, $noreg, 1, 5 /* e32 */, 0
...
---
More information about the llvm-commits
mailing list