[llvm] [RISCV] Implement isHighLatencyDef() (PR #127476)
Pengcheng Wang via llvm-commits
llvm-commits at lists.llvm.org
Mon Feb 17 04:04:54 PST 2025
https://github.com/wangpc-pp created https://github.com/llvm/llvm-project/pull/127476
And returns true for div/rem/sqrt/... operations.
This is an alternative if we don't support generic scheduling model.
>From 3b4d653efcfebacb75a8aca55454b119793a8532 Mon Sep 17 00:00:00 2001
From: Wang Pengcheng <wangpengcheng.pp at bytedance.com>
Date: Mon, 17 Feb 2025 20:03:32 +0800
Subject: [PATCH] [RISCV] Implement isHighLatencyDef()
And returns true for div/rem/sqrt/... operations.
This is an alternative if we don't support generic scheduling model.
---
llvm/lib/Target/RISCV/RISCVInstrInfo.cpp | 46 ++++++++++
llvm/lib/Target/RISCV/RISCVInstrInfo.h | 2 +
.../CodeGen/RISCV/rvv/fixed-vectors-vwmul.ll | 22 ++---
.../RISCV/rvv/vfdiv-constrained-sdnode.ll | 88 ++++++++++++-------
llvm/test/CodeGen/RISCV/rvv/vfdiv-sdnode.ll | 76 +++++-----------
5 files changed, 135 insertions(+), 99 deletions(-)
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
index 1ec299e3c8cc0..74235488ca53f 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
@@ -3679,6 +3679,52 @@ MachineInstr *RISCVInstrInfo::commuteInstructionImpl(MachineInstr &MI,
return TargetInstrInfo::commuteInstructionImpl(MI, NewMI, OpIdx1, OpIdx2);
}
+bool RISCVInstrInfo::isHighLatencyDef(int Opc) const {
+ switch (Opc) {
+ default:
+ return false;
+ // Integer div/rem.
+ case RISCV::DIV:
+ case RISCV::DIVW:
+ case RISCV::DIVU:
+ case RISCV::DIVUW:
+ case RISCV::REM:
+ case RISCV::REMW:
+ case RISCV::REMU:
+ case RISCV::REMUW:
+ // Floating-point div/rem/sqrt.
+ case RISCV::FDIV_H:
+ case RISCV::FDIV_S:
+ case RISCV::FDIV_D:
+ case RISCV::FDIV_H_INX:
+ case RISCV::FDIV_S_INX:
+ case RISCV::FDIV_D_INX:
+ case RISCV::FDIV_D_IN32X:
+ case RISCV::FSQRT_H:
+ case RISCV::FSQRT_S:
+ case RISCV::FSQRT_D:
+ case RISCV::FSQRT_H_INX:
+ case RISCV::FSQRT_S_INX:
+ case RISCV::FSQRT_D_INX:
+ case RISCV::FSQRT_D_IN32X:
+ // Integer div/rem.
+ case CASE_VFMA_OPCODE_VV(DIV):
+ case CASE_VFMA_OPCODE_VV(DIVU):
+ case CASE_VFMA_OPCODE_VV(REM):
+ case CASE_VFMA_OPCODE_VV(REMU):
+ // case CASE_VFMA_OPCODE_VX(DIV):
+ // case CASE_VFMA_OPCODE_VX(DIVU):
+ // case CASE_VFMA_OPCODE_VX(REM):
+ // case CASE_VFMA_OPCODE_VX(REMU):
+ // Vector floating-point div/sqrt.
+ case CASE_VFMA_OPCODE_VV(FDIV):
+ // case CASE_VFMA_OPCODE_VF(FRDIV):
+ // case CASE_VFMA_OPCODE_VV(FSQRT):
+ // case CASE_VFMA_OPCODE_VV(FRSQRT7):
+ return true;
+ }
+}
+
#undef CASE_RVV_OPCODE_UNMASK_LMUL
#undef CASE_RVV_OPCODE_MASK_LMUL
#undef CASE_RVV_OPCODE_LMUL
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.h b/llvm/lib/Target/RISCV/RISCVInstrInfo.h
index ec628620d2982..afbc8df50b452 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfo.h
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.h
@@ -300,6 +300,8 @@ class RISCVInstrInfo : public RISCVGenInstrInfo {
std::unique_ptr<TargetInstrInfo::PipelinerLoopInfo>
analyzeLoopForPipelining(MachineBasicBlock *LoopBB) const override;
+ bool isHighLatencyDef(int Opc) const override;
+
protected:
const RISCVSubtarget &STI;
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwmul.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwmul.ll
index eb7be14abe431..0d1d75c1b2a75 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwmul.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwmul.ll
@@ -894,18 +894,18 @@ define <2 x i16> @vwmul_v2i16_multiuse(ptr %x, ptr %y, ptr %z, ptr %w) {
; CHECK-LABEL: vwmul_v2i16_multiuse:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
-; CHECK-NEXT: vle8.v v8, (a0)
-; CHECK-NEXT: vle8.v v9, (a1)
-; CHECK-NEXT: vle8.v v10, (a2)
-; CHECK-NEXT: vle8.v v11, (a3)
-; CHECK-NEXT: vsext.vf2 v12, v8
+; CHECK-NEXT: vle8.v v8, (a1)
+; CHECK-NEXT: vle8.v v9, (a2)
+; CHECK-NEXT: vsext.vf2 v10, v8
; CHECK-NEXT: vsext.vf2 v8, v9
-; CHECK-NEXT: vsext.vf2 v9, v10
-; CHECK-NEXT: vsext.vf2 v10, v11
-; CHECK-NEXT: vmul.vv v11, v12, v10
-; CHECK-NEXT: vmul.vv v10, v8, v10
-; CHECK-NEXT: vdivu.vv v8, v8, v9
-; CHECK-NEXT: vor.vv v9, v11, v10
+; CHECK-NEXT: vdivu.vv v8, v10, v8
+; CHECK-NEXT: vle8.v v9, (a0)
+; CHECK-NEXT: vle8.v v11, (a3)
+; CHECK-NEXT: vsext.vf2 v12, v9
+; CHECK-NEXT: vsext.vf2 v9, v11
+; CHECK-NEXT: vmul.vv v11, v12, v9
+; CHECK-NEXT: vmul.vv v9, v10, v9
+; CHECK-NEXT: vor.vv v9, v11, v9
; CHECK-NEXT: vor.vv v8, v9, v8
; CHECK-NEXT: ret
%a = load <2 x i8>, ptr %x
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfdiv-constrained-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vfdiv-constrained-sdnode.ll
index 07750623dd44b..217a02d08dead 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfdiv-constrained-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfdiv-constrained-sdnode.ll
@@ -221,16 +221,16 @@ define <vscale x 32 x bfloat> @vfdiv_vv_nxv32bf16(<vscale x 32 x bfloat> %va, <v
; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill
; CHECK-NEXT: vfwcvtbf16.f.f.v v0, v8
; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v20
-; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v12
-; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
+; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma
-; CHECK-NEXT: vfdiv.vv v0, v0, v8
+; CHECK-NEXT: vfdiv.vv v16, v0, v16
; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma
-; CHECK-NEXT: vfncvtbf16.f.f.w v8, v0
+; CHECK-NEXT: vfwcvtbf16.f.f.v v0, v12
; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma
-; CHECK-NEXT: vfdiv.vv v16, v16, v24
+; CHECK-NEXT: vfdiv.vv v24, v0, v24
; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma
-; CHECK-NEXT: vfncvtbf16.f.f.w v12, v16
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16
+; CHECK-NEXT: vfncvtbf16.f.f.w v12, v24
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: slli a0, a0, 3
; CHECK-NEXT: add sp, sp, a0
@@ -249,32 +249,42 @@ define <vscale x 32 x bfloat> @vfdiv_vf_nxv32bf16(<vscale x 32 x bfloat> %va, bf
; CHECK-NEXT: addi sp, sp, -16
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: slli a0, a0, 3
+; CHECK-NEXT: slli a0, a0, 4
; CHECK-NEXT: sub sp, sp, a0
-; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
+; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb
; CHECK-NEXT: fmv.x.h a0, fa0
; CHECK-NEXT: vsetvli a1, zero, e16, m4, ta, ma
; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8
; CHECK-NEXT: addi a1, sp, 16
; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill
-; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v12
; CHECK-NEXT: vsetvli a1, zero, e16, m8, ta, ma
-; CHECK-NEXT: vmv.v.x v8, a0
+; CHECK-NEXT: vmv.v.x v16, a0
; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma
-; CHECK-NEXT: vfwcvtbf16.f.f.v v0, v8
-; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v12
+; CHECK-NEXT: vfwcvtbf16.f.f.v v0, v16
+; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v20
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: slli a0, a0, 3
+; CHECK-NEXT: add a0, sp, a0
+; CHECK-NEXT: addi a0, a0, 16
+; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill
; CHECK-NEXT: addi a0, sp, 16
-; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
+; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma
-; CHECK-NEXT: vfdiv.vv v0, v8, v0
+; CHECK-NEXT: vfdiv.vv v24, v16, v0
; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma
-; CHECK-NEXT: vfncvtbf16.f.f.w v8, v0
+; CHECK-NEXT: vfwcvtbf16.f.f.v v0, v12
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: slli a0, a0, 3
+; CHECK-NEXT: add a0, sp, a0
+; CHECK-NEXT: addi a0, a0, 16
+; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma
-; CHECK-NEXT: vfdiv.vv v16, v24, v16
+; CHECK-NEXT: vfdiv.vv v16, v0, v8
; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v24
; CHECK-NEXT: vfncvtbf16.f.f.w v12, v16
; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: slli a0, a0, 3
+; CHECK-NEXT: slli a0, a0, 4
; CHECK-NEXT: add sp, sp, a0
; CHECK-NEXT: .cfi_def_cfa sp, 16
; CHECK-NEXT: addi sp, sp, 16
@@ -573,16 +583,16 @@ define <vscale x 32 x half> @vfdiv_vv_nxv32f16(<vscale x 32 x half> %va, <vscale
; ZVFHMIN-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill
; ZVFHMIN-NEXT: vfwcvt.f.f.v v0, v8
; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v20
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12
-; ZVFHMIN-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
+; ZVFHMIN-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT: vfdiv.vv v0, v0, v8
+; ZVFHMIN-NEXT: vfdiv.vv v16, v0, v16
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v0
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v0, v12
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT: vfdiv.vv v16, v16, v24
+; ZVFHMIN-NEXT: vfdiv.vv v24, v0, v24
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v16
+; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16
+; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v24
; ZVFHMIN-NEXT: csrr a0, vlenb
; ZVFHMIN-NEXT: slli a0, a0, 3
; ZVFHMIN-NEXT: add sp, sp, a0
@@ -607,32 +617,42 @@ define <vscale x 32 x half> @vfdiv_vf_nxv32f16(<vscale x 32 x half> %va, half %b
; ZVFHMIN-NEXT: addi sp, sp, -16
; ZVFHMIN-NEXT: .cfi_def_cfa_offset 16
; ZVFHMIN-NEXT: csrr a0, vlenb
-; ZVFHMIN-NEXT: slli a0, a0, 3
+; ZVFHMIN-NEXT: slli a0, a0, 4
; ZVFHMIN-NEXT: sub sp, sp, a0
-; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
+; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb
; ZVFHMIN-NEXT: fmv.x.h a0, fa0
; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m4, ta, ma
; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8
; ZVFHMIN-NEXT: addi a1, sp, 16
; ZVFHMIN-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12
; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m8, ta, ma
-; ZVFHMIN-NEXT: vmv.v.x v8, a0
+; ZVFHMIN-NEXT: vmv.v.x v16, a0
; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v0, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v0, v16
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v20
+; ZVFHMIN-NEXT: csrr a0, vlenb
+; ZVFHMIN-NEXT: slli a0, a0, 3
+; ZVFHMIN-NEXT: add a0, sp, a0
+; ZVFHMIN-NEXT: addi a0, a0, 16
+; ZVFHMIN-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill
; ZVFHMIN-NEXT: addi a0, sp, 16
-; ZVFHMIN-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
+; ZVFHMIN-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT: vfdiv.vv v0, v8, v0
+; ZVFHMIN-NEXT: vfdiv.vv v24, v16, v0
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v0
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v0, v12
+; ZVFHMIN-NEXT: csrr a0, vlenb
+; ZVFHMIN-NEXT: slli a0, a0, 3
+; ZVFHMIN-NEXT: add a0, sp, a0
+; ZVFHMIN-NEXT: addi a0, a0, 16
+; ZVFHMIN-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT: vfdiv.vv v16, v24, v16
+; ZVFHMIN-NEXT: vfdiv.vv v16, v0, v8
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
+; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v24
; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v16
; ZVFHMIN-NEXT: csrr a0, vlenb
-; ZVFHMIN-NEXT: slli a0, a0, 3
+; ZVFHMIN-NEXT: slli a0, a0, 4
; ZVFHMIN-NEXT: add sp, sp, a0
; ZVFHMIN-NEXT: .cfi_def_cfa sp, 16
; ZVFHMIN-NEXT: addi sp, sp, 16
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfdiv-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vfdiv-sdnode.ll
index e671ba850415b..9aba6455f0fac 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfdiv-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfdiv-sdnode.ll
@@ -200,16 +200,16 @@ define <vscale x 32 x bfloat> @vfdiv_vv_nxv32bf16(<vscale x 32 x bfloat> %va, <v
; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill
; CHECK-NEXT: vfwcvtbf16.f.f.v v0, v8
; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v20
-; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v12
-; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
+; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma
-; CHECK-NEXT: vfdiv.vv v0, v0, v8
+; CHECK-NEXT: vfdiv.vv v16, v0, v16
; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma
-; CHECK-NEXT: vfncvtbf16.f.f.w v8, v0
+; CHECK-NEXT: vfwcvtbf16.f.f.v v0, v12
; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma
-; CHECK-NEXT: vfdiv.vv v16, v16, v24
+; CHECK-NEXT: vfdiv.vv v24, v0, v24
; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma
-; CHECK-NEXT: vfncvtbf16.f.f.w v12, v16
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16
+; CHECK-NEXT: vfncvtbf16.f.f.w v12, v24
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: slli a0, a0, 3
; CHECK-NEXT: add sp, sp, a0
@@ -224,39 +224,23 @@ define <vscale x 32 x bfloat> @vfdiv_vv_nxv32bf16(<vscale x 32 x bfloat> %va, <v
define <vscale x 32 x bfloat> @vfdiv_vf_nxv32bf16(<vscale x 32 x bfloat> %va, bfloat %b) {
; CHECK-LABEL: vfdiv_vf_nxv32bf16:
; CHECK: # %bb.0:
-; CHECK-NEXT: addi sp, sp, -16
-; CHECK-NEXT: .cfi_def_cfa_offset 16
-; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: slli a0, a0, 3
-; CHECK-NEXT: sub sp, sp, a0
-; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
; CHECK-NEXT: fmv.x.h a0, fa0
; CHECK-NEXT: vsetvli a1, zero, e16, m4, ta, ma
; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8
-; CHECK-NEXT: addi a1, sp, 16
-; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill
; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v12
; CHECK-NEXT: vsetvli a1, zero, e16, m8, ta, ma
; CHECK-NEXT: vmv.v.x v8, a0
; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma
; CHECK-NEXT: vfwcvtbf16.f.f.v v0, v8
-; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v12
-; CHECK-NEXT: addi a0, sp, 16
-; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma
-; CHECK-NEXT: vfdiv.vv v0, v8, v0
+; CHECK-NEXT: vfdiv.vv v16, v16, v0
; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma
-; CHECK-NEXT: vfncvtbf16.f.f.w v8, v0
+; CHECK-NEXT: vfwcvtbf16.f.f.v v0, v12
; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma
-; CHECK-NEXT: vfdiv.vv v16, v24, v16
+; CHECK-NEXT: vfdiv.vv v24, v24, v0
; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma
-; CHECK-NEXT: vfncvtbf16.f.f.w v12, v16
-; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: slli a0, a0, 3
-; CHECK-NEXT: add sp, sp, a0
-; CHECK-NEXT: .cfi_def_cfa sp, 16
-; CHECK-NEXT: addi sp, sp, 16
-; CHECK-NEXT: .cfi_def_cfa_offset 0
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16
+; CHECK-NEXT: vfncvtbf16.f.f.w v12, v24
; CHECK-NEXT: ret
%head = insertelement <vscale x 32 x bfloat> poison, bfloat %b, i32 0
%splat = shufflevector <vscale x 32 x bfloat> %head, <vscale x 32 x bfloat> poison, <vscale x 32 x i32> zeroinitializer
@@ -528,16 +512,16 @@ define <vscale x 32 x half> @vfdiv_vv_nxv32f16(<vscale x 32 x half> %va, <vscale
; ZVFHMIN-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill
; ZVFHMIN-NEXT: vfwcvt.f.f.v v0, v8
; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v20
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12
-; ZVFHMIN-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
+; ZVFHMIN-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT: vfdiv.vv v0, v0, v8
+; ZVFHMIN-NEXT: vfdiv.vv v16, v0, v16
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v0
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v0, v12
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT: vfdiv.vv v16, v16, v24
+; ZVFHMIN-NEXT: vfdiv.vv v24, v0, v24
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v16
+; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16
+; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v24
; ZVFHMIN-NEXT: csrr a0, vlenb
; ZVFHMIN-NEXT: slli a0, a0, 3
; ZVFHMIN-NEXT: add sp, sp, a0
@@ -558,39 +542,23 @@ define <vscale x 32 x half> @vfdiv_vf_nxv32f16(<vscale x 32 x half> %va, half %b
;
; ZVFHMIN-LABEL: vfdiv_vf_nxv32f16:
; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: addi sp, sp, -16
-; ZVFHMIN-NEXT: .cfi_def_cfa_offset 16
-; ZVFHMIN-NEXT: csrr a0, vlenb
-; ZVFHMIN-NEXT: slli a0, a0, 3
-; ZVFHMIN-NEXT: sub sp, sp, a0
-; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
; ZVFHMIN-NEXT: fmv.x.h a0, fa0
; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m4, ta, ma
; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8
-; ZVFHMIN-NEXT: addi a1, sp, 16
-; ZVFHMIN-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill
; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12
; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m8, ta, ma
; ZVFHMIN-NEXT: vmv.v.x v8, a0
; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma
; ZVFHMIN-NEXT: vfwcvt.f.f.v v0, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12
-; ZVFHMIN-NEXT: addi a0, sp, 16
-; ZVFHMIN-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT: vfdiv.vv v0, v8, v0
+; ZVFHMIN-NEXT: vfdiv.vv v16, v16, v0
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v0
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v0, v12
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT: vfdiv.vv v16, v24, v16
+; ZVFHMIN-NEXT: vfdiv.vv v24, v24, v0
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v16
-; ZVFHMIN-NEXT: csrr a0, vlenb
-; ZVFHMIN-NEXT: slli a0, a0, 3
-; ZVFHMIN-NEXT: add sp, sp, a0
-; ZVFHMIN-NEXT: .cfi_def_cfa sp, 16
-; ZVFHMIN-NEXT: addi sp, sp, 16
-; ZVFHMIN-NEXT: .cfi_def_cfa_offset 0
+; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16
+; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v24
; ZVFHMIN-NEXT: ret
%head = insertelement <vscale x 32 x half> poison, half %b, i32 0
%splat = shufflevector <vscale x 32 x half> %head, <vscale x 32 x half> poison, <vscale x 32 x i32> zeroinitializer
More information about the llvm-commits
mailing list