[llvm] [LegalizeVectorOps][RISCV] Use VP_FP_EXTEND/ROUND when promoting VP_FP* operations. (PR #122784)
via llvm-commits
llvm-commits at lists.llvm.org
Mon Jan 13 12:31:14 PST 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-llvm-selectiondag
Author: Craig Topper (topperc)
<details>
<summary>Changes</summary>
This preserves the original VL leading to more reuse of VL for vsetvli. The VLOptimizer can also clean up a lot of this, but I'm not sure if it gets all of it.
There are some regressions in here from propagating the mask too, but I'm not sure if that's a concern.
---
Patch is 1.50 MiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/122784.diff
33 Files Affected:
- (modified) llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp (+20-3)
- (modified) llvm/test/CodeGen/RISCV/rvv/ceil-vp.ll (+310-234)
- (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ceil-vp.ll (+74-62)
- (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-floor-vp.ll (+74-62)
- (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fmaximum-vp.ll (+64-72)
- (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fminimum-vp.ll (+64-72)
- (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-round-vp.ll (+74-62)
- (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-roundeven-vp.ll (+74-62)
- (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-roundtozero-vp.ll (+74-62)
- (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfadd-vp.ll (+78-70)
- (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfdiv-vp.ll (+78-70)
- (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfma-vp.ll (+92-76)
- (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfmax-vp.ll (+36-36)
- (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfmin-vp.ll (+36-36)
- (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfmul-vp.ll (+78-70)
- (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfsqrt-vp.ll (+32-32)
- (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfsub-vp.ll (+78-70)
- (modified) llvm/test/CodeGen/RISCV/rvv/floor-vp.ll (+310-234)
- (modified) llvm/test/CodeGen/RISCV/rvv/fmaximum-vp.ll (+462-432)
- (modified) llvm/test/CodeGen/RISCV/rvv/fminimum-vp.ll (+462-432)
- (modified) llvm/test/CodeGen/RISCV/rvv/nearbyint-vp.ll (+334-226)
- (modified) llvm/test/CodeGen/RISCV/rvv/rint-vp.ll (+310-232)
- (modified) llvm/test/CodeGen/RISCV/rvv/round-vp.ll (+310-234)
- (modified) llvm/test/CodeGen/RISCV/rvv/roundeven-vp.ll (+310-234)
- (modified) llvm/test/CodeGen/RISCV/rvv/roundtozero-vp.ll (+310-234)
- (modified) llvm/test/CodeGen/RISCV/rvv/vfadd-vp.ll (+397-346)
- (modified) llvm/test/CodeGen/RISCV/rvv/vfdiv-vp.ll (+383-334)
- (modified) llvm/test/CodeGen/RISCV/rvv/vfma-vp.ll (+4739-5205)
- (modified) llvm/test/CodeGen/RISCV/rvv/vfmax-vp.ll (+194-122)
- (modified) llvm/test/CodeGen/RISCV/rvv/vfmin-vp.ll (+194-122)
- (modified) llvm/test/CodeGen/RISCV/rvv/vfmul-vp.ll (+158-138)
- (modified) llvm/test/CodeGen/RISCV/rvv/vfsqrt-vp.ll (+101-99)
- (modified) llvm/test/CodeGen/RISCV/rvv/vfsub-vp.ll (+383-334)
``````````diff
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
index 89a00c5a4f0439..a6d1b1cb7b104e 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
@@ -737,7 +737,17 @@ void VectorLegalizer::Promote(SDNode *Node, SmallVectorImpl<SDValue> &Results) {
.getVectorElementType()
.isFloatingPoint() &&
NVT.isVector() && NVT.getVectorElementType().isFloatingPoint())
- Operands[j] = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(j));
+ if (ISD::isVPOpcode(Node->getOpcode())) {
+ unsigned EVLIdx =
+ *ISD::getVPExplicitVectorLengthIdx(Node->getOpcode());
+ unsigned MaskIdx = *ISD::getVPMaskIdx(Node->getOpcode());
+ Operands[j] =
+ DAG.getNode(ISD::VP_FP_EXTEND, dl, NVT, Node->getOperand(j),
+ Node->getOperand(MaskIdx), Node->getOperand(EVLIdx));
+ } else {
+ Operands[j] =
+ DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(j));
+ }
else
Operands[j] = DAG.getNode(ISD::BITCAST, dl, NVT, Node->getOperand(j));
else
@@ -750,8 +760,15 @@ void VectorLegalizer::Promote(SDNode *Node, SmallVectorImpl<SDValue> &Results) {
if ((VT.isFloatingPoint() && NVT.isFloatingPoint()) ||
(VT.isVector() && VT.getVectorElementType().isFloatingPoint() &&
NVT.isVector() && NVT.getVectorElementType().isFloatingPoint()))
- Res = DAG.getNode(ISD::FP_ROUND, dl, VT, Res,
- DAG.getIntPtrConstant(0, dl, /*isTarget=*/true));
+ if (ISD::isVPOpcode(Node->getOpcode())) {
+ unsigned EVLIdx = *ISD::getVPExplicitVectorLengthIdx(Node->getOpcode());
+ unsigned MaskIdx = *ISD::getVPMaskIdx(Node->getOpcode());
+ Res = DAG.getNode(ISD::VP_FP_ROUND, dl, VT, Res,
+ Node->getOperand(MaskIdx), Node->getOperand(EVLIdx));
+ } else {
+ Res = DAG.getNode(ISD::FP_ROUND, dl, VT, Res,
+ DAG.getIntPtrConstant(0, dl, /*isTarget=*/true));
+ }
else
Res = DAG.getNode(ISD::BITCAST, dl, VT, Res);
diff --git a/llvm/test/CodeGen/RISCV/rvv/ceil-vp.ll b/llvm/test/CodeGen/RISCV/rvv/ceil-vp.ll
index a81f0ac982f569..1b9c78a20ec3b9 100644
--- a/llvm/test/CodeGen/RISCV/rvv/ceil-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/ceil-vp.ll
@@ -17,23 +17,27 @@ declare <vscale x 1 x bfloat> @llvm.vp.ceil.nxv1bf16(<vscale x 1 x bfloat>, <vsc
define <vscale x 1 x bfloat> @vp_ceil_vv_nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_ceil_vv_nxv1bf16:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma
-; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8
-; CHECK-NEXT: lui a1, 307200
-; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
-; CHECK-NEXT: vfabs.v v8, v9, v0.t
-; CHECK-NEXT: fmv.w.x fa5, a1
+; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
+; CHECK-NEXT: vmv1r.v v9, v0
+; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8, v0.t
+; CHECK-NEXT: lui a0, 307200
+; CHECK-NEXT: vmv1r.v v8, v0
+; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
+; CHECK-NEXT: vfabs.v v11, v10, v0.t
+; CHECK-NEXT: fmv.w.x fa5, a0
; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu
-; CHECK-NEXT: vmflt.vf v0, v8, fa5, v0.t
+; CHECK-NEXT: vmflt.vf v8, v11, fa5, v0.t
; CHECK-NEXT: fsrmi a0, 3
+; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
-; CHECK-NEXT: vfcvt.x.f.v v8, v9, v0.t
+; CHECK-NEXT: vfcvt.x.f.v v11, v10, v0.t
; CHECK-NEXT: fsrm a0
-; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t
+; CHECK-NEXT: vfcvt.f.x.v v11, v11, v0.t
; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu
-; CHECK-NEXT: vfsgnj.vv v9, v8, v9, v0.t
-; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
-; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9
+; CHECK-NEXT: vfsgnj.vv v10, v11, v10, v0.t
+; CHECK-NEXT: vmv1r.v v0, v9
+; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10, v0.t
; CHECK-NEXT: ret
%v = call <vscale x 1 x bfloat> @llvm.vp.ceil.nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x i1> %m, i32 %evl)
ret <vscale x 1 x bfloat> %v
@@ -42,12 +46,12 @@ define <vscale x 1 x bfloat> @vp_ceil_vv_nxv1bf16(<vscale x 1 x bfloat> %va, <vs
define <vscale x 1 x bfloat> @vp_ceil_vv_nxv1bf16_unmasked(<vscale x 1 x bfloat> %va, i32 zeroext %evl) {
; CHECK-LABEL: vp_ceil_vv_nxv1bf16_unmasked:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma
+; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8
-; CHECK-NEXT: lui a1, 307200
-; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-NEXT: lui a0, 307200
+; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
; CHECK-NEXT: vfabs.v v8, v9
-; CHECK-NEXT: fmv.w.x fa5, a1
+; CHECK-NEXT: fmv.w.x fa5, a0
; CHECK-NEXT: vmflt.vf v0, v8, fa5
; CHECK-NEXT: fsrmi a0, 3
; CHECK-NEXT: vfcvt.x.f.v v8, v9, v0.t
@@ -55,7 +59,7 @@ define <vscale x 1 x bfloat> @vp_ceil_vv_nxv1bf16_unmasked(<vscale x 1 x bfloat>
; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu
; CHECK-NEXT: vfsgnj.vv v9, v8, v9, v0.t
-; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
+; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9
; CHECK-NEXT: ret
%v = call <vscale x 1 x bfloat> @llvm.vp.ceil.nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x i1> splat (i1 true), i32 %evl)
@@ -67,23 +71,27 @@ declare <vscale x 2 x bfloat> @llvm.vp.ceil.nxv2bf16(<vscale x 2 x bfloat>, <vsc
define <vscale x 2 x bfloat> @vp_ceil_vv_nxv2bf16(<vscale x 2 x bfloat> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_ceil_vv_nxv2bf16:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a1, zero, e16, mf2, ta, ma
-; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8
-; CHECK-NEXT: lui a1, 307200
-; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
-; CHECK-NEXT: vfabs.v v8, v9, v0.t
-; CHECK-NEXT: fmv.w.x fa5, a1
+; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
+; CHECK-NEXT: vmv1r.v v9, v0
+; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8, v0.t
+; CHECK-NEXT: lui a0, 307200
+; CHECK-NEXT: vmv1r.v v8, v0
+; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; CHECK-NEXT: vfabs.v v11, v10, v0.t
+; CHECK-NEXT: fmv.w.x fa5, a0
; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu
-; CHECK-NEXT: vmflt.vf v0, v8, fa5, v0.t
+; CHECK-NEXT: vmflt.vf v8, v11, fa5, v0.t
; CHECK-NEXT: fsrmi a0, 3
+; CHECK-NEXT: vmv.v.v v0, v8
; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; CHECK-NEXT: vfcvt.x.f.v v8, v9, v0.t
+; CHECK-NEXT: vfcvt.x.f.v v11, v10, v0.t
; CHECK-NEXT: fsrm a0
-; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t
+; CHECK-NEXT: vfcvt.f.x.v v11, v11, v0.t
; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu
-; CHECK-NEXT: vfsgnj.vv v9, v8, v9, v0.t
-; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
-; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9
+; CHECK-NEXT: vfsgnj.vv v10, v11, v10, v0.t
+; CHECK-NEXT: vmv1r.v v0, v9
+; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10, v0.t
; CHECK-NEXT: ret
%v = call <vscale x 2 x bfloat> @llvm.vp.ceil.nxv2bf16(<vscale x 2 x bfloat> %va, <vscale x 2 x i1> %m, i32 %evl)
ret <vscale x 2 x bfloat> %v
@@ -92,12 +100,12 @@ define <vscale x 2 x bfloat> @vp_ceil_vv_nxv2bf16(<vscale x 2 x bfloat> %va, <vs
define <vscale x 2 x bfloat> @vp_ceil_vv_nxv2bf16_unmasked(<vscale x 2 x bfloat> %va, i32 zeroext %evl) {
; CHECK-LABEL: vp_ceil_vv_nxv2bf16_unmasked:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a1, zero, e16, mf2, ta, ma
+; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8
-; CHECK-NEXT: lui a1, 307200
-; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-NEXT: lui a0, 307200
+; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; CHECK-NEXT: vfabs.v v8, v9
-; CHECK-NEXT: fmv.w.x fa5, a1
+; CHECK-NEXT: fmv.w.x fa5, a0
; CHECK-NEXT: vmflt.vf v0, v8, fa5
; CHECK-NEXT: fsrmi a0, 3
; CHECK-NEXT: vfcvt.x.f.v v8, v9, v0.t
@@ -105,7 +113,7 @@ define <vscale x 2 x bfloat> @vp_ceil_vv_nxv2bf16_unmasked(<vscale x 2 x bfloat>
; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu
; CHECK-NEXT: vfsgnj.vv v9, v8, v9, v0.t
-; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
+; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9
; CHECK-NEXT: ret
%v = call <vscale x 2 x bfloat> @llvm.vp.ceil.nxv2bf16(<vscale x 2 x bfloat> %va, <vscale x 2 x i1> splat (i1 true), i32 %evl)
@@ -117,25 +125,27 @@ declare <vscale x 4 x bfloat> @llvm.vp.ceil.nxv4bf16(<vscale x 4 x bfloat>, <vsc
define <vscale x 4 x bfloat> @vp_ceil_vv_nxv4bf16(<vscale x 4 x bfloat> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_ceil_vv_nxv4bf16:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma
+; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma
; CHECK-NEXT: vmv1r.v v9, v0
-; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8
-; CHECK-NEXT: lui a1, 307200
-; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8, v0.t
+; CHECK-NEXT: lui a0, 307200
+; CHECK-NEXT: vmv1r.v v8, v0
+; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
; CHECK-NEXT: vfabs.v v12, v10, v0.t
-; CHECK-NEXT: fmv.w.x fa5, a1
+; CHECK-NEXT: fmv.w.x fa5, a0
; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu
-; CHECK-NEXT: vmflt.vf v9, v12, fa5, v0.t
+; CHECK-NEXT: vmflt.vf v8, v12, fa5, v0.t
; CHECK-NEXT: fsrmi a0, 3
-; CHECK-NEXT: vmv1r.v v0, v9
+; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
; CHECK-NEXT: vfcvt.x.f.v v12, v10, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t
; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu
; CHECK-NEXT: vfsgnj.vv v10, v12, v10, v0.t
-; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
-; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10
+; CHECK-NEXT: vmv1r.v v0, v9
+; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10, v0.t
; CHECK-NEXT: ret
%v = call <vscale x 4 x bfloat> @llvm.vp.ceil.nxv4bf16(<vscale x 4 x bfloat> %va, <vscale x 4 x i1> %m, i32 %evl)
ret <vscale x 4 x bfloat> %v
@@ -144,12 +154,12 @@ define <vscale x 4 x bfloat> @vp_ceil_vv_nxv4bf16(<vscale x 4 x bfloat> %va, <vs
define <vscale x 4 x bfloat> @vp_ceil_vv_nxv4bf16_unmasked(<vscale x 4 x bfloat> %va, i32 zeroext %evl) {
; CHECK-LABEL: vp_ceil_vv_nxv4bf16_unmasked:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma
+; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma
; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8
-; CHECK-NEXT: lui a1, 307200
-; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; CHECK-NEXT: lui a0, 307200
+; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
; CHECK-NEXT: vfabs.v v8, v10
-; CHECK-NEXT: fmv.w.x fa5, a1
+; CHECK-NEXT: fmv.w.x fa5, a0
; CHECK-NEXT: vmflt.vf v0, v8, fa5
; CHECK-NEXT: fsrmi a0, 3
; CHECK-NEXT: vfcvt.x.f.v v8, v10, v0.t
@@ -157,7 +167,7 @@ define <vscale x 4 x bfloat> @vp_ceil_vv_nxv4bf16_unmasked(<vscale x 4 x bfloat>
; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu
; CHECK-NEXT: vfsgnj.vv v10, v8, v10, v0.t
-; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
+; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10
; CHECK-NEXT: ret
%v = call <vscale x 4 x bfloat> @llvm.vp.ceil.nxv4bf16(<vscale x 4 x bfloat> %va, <vscale x 4 x i1> splat (i1 true), i32 %evl)
@@ -169,25 +179,27 @@ declare <vscale x 8 x bfloat> @llvm.vp.ceil.nxv8bf16(<vscale x 8 x bfloat>, <vsc
define <vscale x 8 x bfloat> @vp_ceil_vv_nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_ceil_vv_nxv8bf16:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma
+; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma
; CHECK-NEXT: vmv1r.v v10, v0
-; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8
-; CHECK-NEXT: lui a1, 307200
-; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8, v0.t
+; CHECK-NEXT: lui a0, 307200
+; CHECK-NEXT: vmv1r.v v8, v0
+; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
; CHECK-NEXT: vfabs.v v16, v12, v0.t
-; CHECK-NEXT: fmv.w.x fa5, a1
+; CHECK-NEXT: fmv.w.x fa5, a0
; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu
-; CHECK-NEXT: vmflt.vf v10, v16, fa5, v0.t
+; CHECK-NEXT: vmflt.vf v8, v16, fa5, v0.t
; CHECK-NEXT: fsrmi a0, 3
-; CHECK-NEXT: vmv1r.v v0, v10
+; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
; CHECK-NEXT: vfcvt.x.f.v v16, v12, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t
; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu
; CHECK-NEXT: vfsgnj.vv v12, v16, v12, v0.t
-; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma
-; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12
+; CHECK-NEXT: vmv1r.v v0, v10
+; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12, v0.t
; CHECK-NEXT: ret
%v = call <vscale x 8 x bfloat> @llvm.vp.ceil.nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x i1> %m, i32 %evl)
ret <vscale x 8 x bfloat> %v
@@ -196,12 +208,12 @@ define <vscale x 8 x bfloat> @vp_ceil_vv_nxv8bf16(<vscale x 8 x bfloat> %va, <vs
define <vscale x 8 x bfloat> @vp_ceil_vv_nxv8bf16_unmasked(<vscale x 8 x bfloat> %va, i32 zeroext %evl) {
; CHECK-LABEL: vp_ceil_vv_nxv8bf16_unmasked:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma
+; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma
; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8
-; CHECK-NEXT: lui a1, 307200
-; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-NEXT: lui a0, 307200
+; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
; CHECK-NEXT: vfabs.v v8, v12
-; CHECK-NEXT: fmv.w.x fa5, a1
+; CHECK-NEXT: fmv.w.x fa5, a0
; CHECK-NEXT: vmflt.vf v0, v8, fa5
; CHECK-NEXT: fsrmi a0, 3
; CHECK-NEXT: vfcvt.x.f.v v8, v12, v0.t
@@ -209,7 +221,7 @@ define <vscale x 8 x bfloat> @vp_ceil_vv_nxv8bf16_unmasked(<vscale x 8 x bfloat>
; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu
; CHECK-NEXT: vfsgnj.vv v12, v8, v12, v0.t
-; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma
+; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma
; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12
; CHECK-NEXT: ret
%v = call <vscale x 8 x bfloat> @llvm.vp.ceil.nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x i1> splat (i1 true), i32 %evl)
@@ -221,25 +233,27 @@ declare <vscale x 16 x bfloat> @llvm.vp.ceil.nxv16bf16(<vscale x 16 x bfloat>, <
define <vscale x 16 x bfloat> @vp_ceil_vv_nxv16bf16(<vscale x 16 x bfloat> %va, <vscale x 16 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_ceil_vv_nxv16bf16:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a1, zero, e16, m4, ta, ma
+; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma
; CHECK-NEXT: vmv1r.v v12, v0
-; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8
-; CHECK-NEXT: lui a1, 307200
-; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8, v0.t
+; CHECK-NEXT: lui a0, 307200
+; CHECK-NEXT: vmv1r.v v8, v0
+; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma
; CHECK-NEXT: vfabs.v v24, v16, v0.t
-; CHECK-NEXT: fmv.w.x fa5, a1
+; CHECK-NEXT: fmv.w.x fa5, a0
; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu
-; CHECK-NEXT: vmflt.vf v12, v24, fa5, v0.t
+; CHECK-NEXT: vmflt.vf v8, v24, fa5, v0.t
; CHECK-NEXT: fsrmi a0, 3
-; CHECK-NEXT: vmv1r.v v0, v12
+; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma
; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t
; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu
; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t
-; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma
-; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16
+; CHECK-NEXT: vmv1r.v v0, v12
+; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16, v0.t
; CHECK-NEXT: ret
%v = call <vscale x 16 x bfloat> @llvm.vp.ceil.nxv16bf16(<vscale x 16 x bfloat> %va, <vscale x 16 x i1> %m, i32 %evl)
ret <vscale x 16 x bfloat> %v
@@ -248,12 +262,12 @@ define <vscale x 16 x bfloat> @vp_ceil_vv_nxv16bf16(<vscale x 16 x bfloat> %va,
define <vscale x 16 x bfloat> @vp_ceil_vv_nxv16bf16_unmasked(<vscale x 16 x bfloat> %va, i32 zeroext %evl) {
; CHECK-LABEL: vp_ceil_vv_nxv16bf16_unmasked:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a1, zero, e16, m4, ta, ma
+; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma
; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8
-; CHECK-NEXT: lui a1, 307200
-; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma
+; CHECK-NEXT: lui a0, 307200
+; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma
; CHECK-NEXT: vfabs.v v8, v16
-; CHECK-NEXT: fmv.w.x fa5, a1
+; CHECK-NEXT: fmv.w.x fa5, a0
; CHECK-NEXT: vmflt.vf v0, v8, fa5
; CHECK-NEXT: fsrmi a0, 3
; CHECK-NEXT: vfcvt.x.f.v v8, v16, v0.t
@@ -261,7 +275,7 @@ define <vscale x 16 x bfloat> @vp_ceil_vv_nxv16bf16_unmasked(<vscale x 16 x bflo
; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu
; CHECK-NEXT: vfsgnj.vv v16, v8, v16, v0.t
-; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma
+; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma
; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16
; CHECK-NEXT: ret
%v = call <vscale x 16 x bfloat> @llvm.vp.ceil.nxv16bf16(<vscale x 16 x bfloat> %va, <vscale x 16 x i1> splat (i1 true), i32 %evl)
@@ -279,59 +293,64 @@ define <vscale x 32 x bfloat> @vp_ceil_vv_nxv32bf16(<vscale x 32 x bfloat> %va,
; CHECK-NEXT: slli a1, a1, 3
; CHECK-NEXT: sub sp, sp, a1
; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
-; CHECK-NEXT: vsetvli a1, zero, e16, m4, ta, ma
+; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, ma
; CHECK-NEXT: vmv1r.v v7, v0
; CHECK-NEXT: csrr a2, vlenb
-; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v12
; CHECK-NEXT: lui a3, 307200
; CHECK-NEXT: slli a1, a2, 1
; CHECK-NEXT: srli a2, a2, 2
; CHECK-NEXT: fmv.w.x fa5, a3
; CHECK-NEXT: sub a3, a0, a1
-; CHECK-NEXT: vsetvli a4, zero, e8, mf2, ta, ma
-; CHECK-NEXT: vslidedown.vx v12, v0, a2
+; CHECK-NEXT: vslidedown.vx v17, v0, a2
; CHECK-NEXT: sltu a2, a0, a3
+; CHECK-NEXT: vmv1r.v v18, v17
; CHECK-NEXT: addi a2, a2, -1
; CHECK-NEXT: and a2, a2, a3
-; CHECK-NEXT: vmv1r.v v0, v12
-; CHECK-NEXT: vsetvli zero, a2, e32, m8, ta, ma
-; CHECK-NEXT: vfabs.v v16, v24, v0.t
+; CHECK-NEXT: vmv1r.v v0, v17
+; CHECK-NEXT: addi a3, sp, 16
+; CHECK-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill
+; CHECK-NEXT: vsetvli zero, a2, e16, m4, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v12, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; CHECK-NEXT: vfabs.v v8, v24, v0.t
; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu
-; CHECK-NEXT: vmflt.vf v12, v16, fa5, v0.t
+; CHECK-NEXT: vmflt.vf v18, v8, fa5, v0.t
; CHECK-NEXT: fsrmi a2, 3
-; CHECK-NEXT: vmv1r.v v0, v12
+; CHECK-NEXT: vmv1r.v v0, v18
; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma
-; CHECK-NEXT: vfcvt.x.f.v v16, v24, v0.t
-; CHECK-NEXT: addi a3, sp, 16
-; CHECK-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill
+; CHECK-NEXT: vfcvt.x.f.v v8, v24, v0.t
; CHECK-NEXT: fsrm a2
-; CHECK-NEXT: addi a2, sp, 16
-; CHECK-NEXT: vl8r.v v16, (a2) # Unknown-size Folded Reload
-; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t
+; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu
-; CHECK-NEXT: vfsgnj.vv v24, v16, v24, v0.t
-; C...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/122784
More information about the llvm-commits
mailing list