[llvm] [SelectionDAG][RISCV] (add (vscale * C0), (vscale * C1)) to (vscale * (C0 + C1)) in getNode. (PR #144565)
via llvm-commits
llvm-commits at lists.llvm.org
Tue Jun 17 09:56:04 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-llvm-selectiondag
Author: Craig Topper (topperc)
<details>
<summary>Changes</summary>
We already have shl/mul vscale related folds in getNode.
This is an alternative to the DAGCombine proposed in #<!-- -->144507.
---
Patch is 127.45 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/144565.diff
14 Files Affected:
- (modified) llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp (+6)
- (modified) llvm/test/CodeGen/RISCV/rvv/extract-subvector.ll (-4)
- (modified) llvm/test/CodeGen/RISCV/rvv/insert-subvector.ll (+22-24)
- (modified) llvm/test/CodeGen/RISCV/rvv/setcc-fp-vp.ll (+149-134)
- (modified) llvm/test/CodeGen/RISCV/rvv/setcc-fp.ll (+4-8)
- (modified) llvm/test/CodeGen/RISCV/rvv/setcc-int-vp.ll (+3-6)
- (modified) llvm/test/CodeGen/RISCV/rvv/setcc-integer.ll (+1-2)
- (modified) llvm/test/CodeGen/RISCV/rvv/undef-earlyclobber-chain.ll (+1-1)
- (modified) llvm/test/CodeGen/RISCV/rvv/vector-deinterleave-fixed.ll (+84-110)
- (modified) llvm/test/CodeGen/RISCV/rvv/vector-deinterleave.ll (-18)
- (modified) llvm/test/CodeGen/RISCV/rvv/vector-interleave-store.ll (+4-5)
- (modified) llvm/test/CodeGen/RISCV/rvv/vector-interleave.ll (+372-445)
- (modified) llvm/test/CodeGen/RISCV/rvv/vfptoi-sdnode.ll (+4-8)
- (modified) llvm/test/CodeGen/RISCV/rvv/vp-vector-interleaved-access.ll (+44-56)
``````````diff
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 45a37622a531b..b50d8fe72d5cb 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -7377,6 +7377,12 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
if ((Opcode == ISD::ADD || Opcode == ISD::SUB) &&
VT.getScalarType() == MVT::i1)
return getNode(ISD::XOR, DL, VT, N1, N2);
+ if (Opcode == ISD::ADD && N1.getOpcode() == ISD::VSCALE &&
+ N2.getOpcode() == ISD::VSCALE) {
+ const APInt &C1 = N1->getConstantOperandAPInt(0);
+ const APInt &C2 = N2->getConstantOperandAPInt(0);
+ return getVScale(DL, VT, C1 + C2);
+ }
break;
case ISD::MUL:
assert(VT.isInteger() && "This operator does not apply to FP types!");
diff --git a/llvm/test/CodeGen/RISCV/rvv/extract-subvector.ll b/llvm/test/CodeGen/RISCV/rvv/extract-subvector.ll
index 83637e4a71d45..0d288c0426a5e 100644
--- a/llvm/test/CodeGen/RISCV/rvv/extract-subvector.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/extract-subvector.ll
@@ -490,8 +490,6 @@ define <vscale x 6 x half> @extract_nxv6f16_nxv12f16_6(<vscale x 12 x half> %in)
; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma
; CHECK-NEXT: vslidedown.vx v13, v10, a0
; CHECK-NEXT: vslidedown.vx v12, v9, a0
-; CHECK-NEXT: add a1, a0, a0
-; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma
; CHECK-NEXT: vslideup.vx v12, v10, a0
; CHECK-NEXT: vmv2r.v v8, v12
; CHECK-NEXT: ret
@@ -545,8 +543,6 @@ define <vscale x 6 x bfloat> @extract_nxv6bf16_nxv12bf16_6(<vscale x 12 x bfloat
; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma
; CHECK-NEXT: vslidedown.vx v13, v10, a0
; CHECK-NEXT: vslidedown.vx v12, v9, a0
-; CHECK-NEXT: add a1, a0, a0
-; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma
; CHECK-NEXT: vslideup.vx v12, v10, a0
; CHECK-NEXT: vmv2r.v v8, v12
; CHECK-NEXT: ret
diff --git a/llvm/test/CodeGen/RISCV/rvv/insert-subvector.ll b/llvm/test/CodeGen/RISCV/rvv/insert-subvector.ll
index ca9cec921b3cd..e6f3c56956f72 100644
--- a/llvm/test/CodeGen/RISCV/rvv/insert-subvector.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/insert-subvector.ll
@@ -80,10 +80,9 @@ define <vscale x 4 x i8> @insert_nxv1i8_nxv4i8_3(<vscale x 4 x i8> %vec, <vscale
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: srli a0, a0, 3
; CHECK-NEXT: slli a1, a0, 1
-; CHECK-NEXT: add a1, a1, a0
; CHECK-NEXT: add a0, a1, a0
-; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma
-; CHECK-NEXT: vslideup.vx v8, v9, a1
+; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, ma
+; CHECK-NEXT: vslideup.vx v8, v9, a0
; CHECK-NEXT: ret
%v = call <vscale x 4 x i8> @llvm.vector.insert.nxv1i8.nxv4i8(<vscale x 4 x i8> %vec, <vscale x 1 x i8> %subvec, i64 3)
ret <vscale x 4 x i8> %v
@@ -246,8 +245,7 @@ define <vscale x 16 x i32> @insert_nxv16i32_nxv1i32_1(<vscale x 16 x i32> %vec,
; CHECK: # %bb.0:
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: srli a0, a0, 3
-; CHECK-NEXT: add a1, a0, a0
-; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma
+; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, ma
; CHECK-NEXT: vslideup.vx v8, v16, a0
; CHECK-NEXT: ret
%v = call <vscale x 16 x i32> @llvm.vector.insert.nxv1i32.nxv16i32(<vscale x 16 x i32> %vec, <vscale x 1 x i32> %subvec, i64 1)
@@ -282,8 +280,8 @@ define <vscale x 16 x i8> @insert_nxv16i8_nxv1i8_1(<vscale x 16 x i8> %vec, <vsc
; CHECK-LABEL: insert_nxv16i8_nxv1i8_1:
; CHECK: # %bb.0:
; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: srli a1, a0, 2
; CHECK-NEXT: srli a0, a0, 3
-; CHECK-NEXT: add a1, a0, a0
; CHECK-NEXT: vsetvli zero, a1, e8, m1, tu, ma
; CHECK-NEXT: vslideup.vx v8, v10, a0
; CHECK-NEXT: ret
@@ -296,8 +294,9 @@ define <vscale x 16 x i8> @insert_nxv16i8_nxv1i8_2(<vscale x 16 x i8> %vec, <vsc
; CHECK: # %bb.0:
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: srli a1, a0, 3
+; CHECK-NEXT: slli a2, a1, 1
+; CHECK-NEXT: add a1, a2, a1
; CHECK-NEXT: srli a0, a0, 2
-; CHECK-NEXT: add a1, a0, a1
; CHECK-NEXT: vsetvli zero, a1, e8, m1, tu, ma
; CHECK-NEXT: vslideup.vx v8, v10, a0
; CHECK-NEXT: ret
@@ -309,10 +308,10 @@ define <vscale x 16 x i8> @insert_nxv16i8_nxv1i8_3(<vscale x 16 x i8> %vec, <vsc
; CHECK-LABEL: insert_nxv16i8_nxv1i8_3:
; CHECK: # %bb.0:
; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: srli a0, a0, 3
-; CHECK-NEXT: slli a1, a0, 1
-; CHECK-NEXT: add a1, a1, a0
-; CHECK-NEXT: add a0, a1, a0
+; CHECK-NEXT: srli a1, a0, 3
+; CHECK-NEXT: slli a2, a1, 1
+; CHECK-NEXT: add a1, a2, a1
+; CHECK-NEXT: srli a0, a0, 1
; CHECK-NEXT: vsetvli zero, a0, e8, m1, tu, ma
; CHECK-NEXT: vslideup.vx v8, v10, a1
; CHECK-NEXT: ret
@@ -363,8 +362,7 @@ define <vscale x 32 x half> @insert_nxv32f16_nxv2f16_2(<vscale x 32 x half> %vec
; CHECK: # %bb.0:
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: srli a0, a0, 2
-; CHECK-NEXT: add a1, a0, a0
-; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma
+; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma
; CHECK-NEXT: vslideup.vx v8, v16, a0
; CHECK-NEXT: ret
%v = call <vscale x 32 x half> @llvm.vector.insert.nxv2f16.nxv32f16(<vscale x 32 x half> %vec, <vscale x 2 x half> %subvec, i64 2)
@@ -376,8 +374,7 @@ define <vscale x 32 x half> @insert_nxv32f16_nxv2f16_26(<vscale x 32 x half> %ve
; CHECK: # %bb.0:
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: srli a0, a0, 2
-; CHECK-NEXT: add a1, a0, a0
-; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma
+; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma
; CHECK-NEXT: vslideup.vx v14, v16, a0
; CHECK-NEXT: ret
%v = call <vscale x 32 x half> @llvm.vector.insert.nxv2f16.nxv32f16(<vscale x 32 x half> %vec, <vscale x 2 x half> %subvec, i64 26)
@@ -397,8 +394,9 @@ define <vscale x 32 x half> @insert_nxv32f16_undef_nxv1f16_26(<vscale x 1 x half
; CHECK: # %bb.0:
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: srli a1, a0, 3
+; CHECK-NEXT: slli a2, a1, 1
+; CHECK-NEXT: add a1, a2, a1
; CHECK-NEXT: srli a0, a0, 2
-; CHECK-NEXT: add a1, a0, a1
; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma
; CHECK-NEXT: vslideup.vx v14, v8, a0
; CHECK-NEXT: ret
@@ -422,8 +420,8 @@ define <vscale x 32 x i1> @insert_nxv32i1_nxv8i1_8(<vscale x 32 x i1> %v, <vscal
; CHECK-LABEL: insert_nxv32i1_nxv8i1_8:
; CHECK: # %bb.0:
; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: srli a1, a0, 2
; CHECK-NEXT: srli a0, a0, 3
-; CHECK-NEXT: add a1, a0, a0
; CHECK-NEXT: vsetvli zero, a1, e8, mf2, tu, ma
; CHECK-NEXT: vslideup.vx v0, v8, a0
; CHECK-NEXT: ret
@@ -462,10 +460,11 @@ define <vscale x 4 x i1> @insert_nxv4i1_nxv1i1_2(<vscale x 4 x i1> %v, <vscale x
; CHECK-NEXT: vsetvli a1, zero, e8, mf8, ta, ma
; CHECK-NEXT: vmv.v.i v10, 0
; CHECK-NEXT: srli a1, a0, 3
-; CHECK-NEXT: srli a0, a0, 2
-; CHECK-NEXT: add a1, a0, a1
; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: vmerge.vim v8, v10, 1, v0
+; CHECK-NEXT: slli a2, a1, 1
+; CHECK-NEXT: add a1, a2, a1
+; CHECK-NEXT: srli a0, a0, 2
; CHECK-NEXT: vsetvli zero, a1, e8, mf2, tu, ma
; CHECK-NEXT: vslideup.vx v9, v8, a0
; CHECK-NEXT: vsetvli a0, zero, e8, mf2, ta, ma
@@ -570,8 +569,7 @@ define <vscale x 32 x bfloat> @insert_nxv32bf16_nxv2bf16_2(<vscale x 32 x bfloat
; CHECK: # %bb.0:
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: srli a0, a0, 2
-; CHECK-NEXT: add a1, a0, a0
-; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma
+; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma
; CHECK-NEXT: vslideup.vx v8, v16, a0
; CHECK-NEXT: ret
%v = call <vscale x 32 x bfloat> @llvm.vector.insert.nxv2bf16.nxv32bf16(<vscale x 32 x bfloat> %vec, <vscale x 2 x bfloat> %subvec, i64 2)
@@ -583,8 +581,7 @@ define <vscale x 32 x bfloat> @insert_nxv32bf16_nxv2bf16_26(<vscale x 32 x bfloa
; CHECK: # %bb.0:
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: srli a0, a0, 2
-; CHECK-NEXT: add a1, a0, a0
-; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma
+; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma
; CHECK-NEXT: vslideup.vx v14, v16, a0
; CHECK-NEXT: ret
%v = call <vscale x 32 x bfloat> @llvm.vector.insert.nxv2bf16.nxv32bf16(<vscale x 32 x bfloat> %vec, <vscale x 2 x bfloat> %subvec, i64 26)
@@ -604,8 +601,9 @@ define <vscale x 32 x bfloat> @insert_nxv32bf16_undef_nxv1bf16_26(<vscale x 1 x
; CHECK: # %bb.0:
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: srli a1, a0, 3
+; CHECK-NEXT: slli a2, a1, 1
+; CHECK-NEXT: add a1, a2, a1
; CHECK-NEXT: srli a0, a0, 2
-; CHECK-NEXT: add a1, a0, a1
; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma
; CHECK-NEXT: vslideup.vx v14, v8, a0
; CHECK-NEXT: ret
diff --git a/llvm/test/CodeGen/RISCV/rvv/setcc-fp-vp.ll b/llvm/test/CodeGen/RISCV/rvv/setcc-fp-vp.ll
index 28b27bb75f210..9972df97ad9f4 100644
--- a/llvm/test/CodeGen/RISCV/rvv/setcc-fp-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/setcc-fp-vp.ll
@@ -1371,6 +1371,8 @@ define <vscale x 64 x i1> @fcmp_oeq_vv_nxv64bf16(<vscale x 64 x bfloat> %va, <vs
; CHECK-NEXT: slli a1, a1, 1
; CHECK-NEXT: mv a3, a1
; CHECK-NEXT: slli a1, a1, 2
+; CHECK-NEXT: add a3, a3, a1
+; CHECK-NEXT: slli a1, a1, 1
; CHECK-NEXT: add a1, a1, a3
; CHECK-NEXT: add a1, sp, a1
; CHECK-NEXT: addi a1, a1, 16
@@ -1378,9 +1380,8 @@ define <vscale x 64 x i1> @fcmp_oeq_vv_nxv64bf16(<vscale x 64 x bfloat> %va, <vs
; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma
; CHECK-NEXT: vmv8r.v v0, v16
; CHECK-NEXT: csrr a1, vlenb
-; CHECK-NEXT: mv a3, a1
; CHECK-NEXT: slli a1, a1, 1
-; CHECK-NEXT: add a3, a3, a1
+; CHECK-NEXT: mv a3, a1
; CHECK-NEXT: slli a1, a1, 3
; CHECK-NEXT: add a1, a1, a3
; CHECK-NEXT: add a1, sp, a1
@@ -1406,6 +1407,8 @@ define <vscale x 64 x i1> @fcmp_oeq_vv_nxv64bf16(<vscale x 64 x bfloat> %va, <vs
; CHECK-NEXT: slli t0, t0, 1
; CHECK-NEXT: mv t1, t0
; CHECK-NEXT: slli t0, t0, 2
+; CHECK-NEXT: add t1, t1, t0
+; CHECK-NEXT: slli t0, t0, 1
; CHECK-NEXT: add t0, t0, t1
; CHECK-NEXT: add t0, sp, t0
; CHECK-NEXT: addi t0, t0, 16
@@ -1413,9 +1416,8 @@ define <vscale x 64 x i1> @fcmp_oeq_vv_nxv64bf16(<vscale x 64 x bfloat> %va, <vs
; CHECK-NEXT: vslidedown.vx v16, v8, a1
; CHECK-NEXT: vl8re16.v v8, (a0)
; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: mv t0, a0
; CHECK-NEXT: slli a0, a0, 1
-; CHECK-NEXT: add t0, t0, a0
+; CHECK-NEXT: mv t0, a0
; CHECK-NEXT: slli a0, a0, 2
; CHECK-NEXT: add a0, a0, t0
; CHECK-NEXT: add a0, sp, a0
@@ -1445,10 +1447,6 @@ define <vscale x 64 x i1> @fcmp_oeq_vv_nxv64bf16(<vscale x 64 x bfloat> %va, <vs
; CHECK-NEXT: vl1r.v v0, (a0) # vscale x 8-byte Folded Reload
; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma
; CHECK-NEXT: vmfeq.vv v5, v8, v16, v0.t
-; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: add a0, sp, a0
-; CHECK-NEXT: addi a0, a0, 16
-; CHECK-NEXT: vl1r.v v0, (a0) # vscale x 8-byte Folded Reload
; CHECK-NEXT: vsetvli zero, a6, e16, m4, ta, ma
; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v24
; CHECK-NEXT: csrr a0, vlenb
@@ -1457,85 +1455,95 @@ define <vscale x 64 x i1> @fcmp_oeq_vv_nxv64bf16(<vscale x 64 x bfloat> %va, <vs
; CHECK-NEXT: addi a0, a0, 16
; CHECK-NEXT: vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v8
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: add a0, sp, a0
+; CHECK-NEXT: addi a0, a0, 16
+; CHECK-NEXT: vl1r.v v0, (a0) # vscale x 8-byte Folded Reload
; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma
-; CHECK-NEXT: vmfeq.vv v6, v24, v16, v0.t
-; CHECK-NEXT: add a0, a3, a3
+; CHECK-NEXT: vmfeq.vv v7, v24, v16, v0.t
; CHECK-NEXT: bltu a2, a5, .LBB85_4
; CHECK-NEXT: # %bb.3:
; CHECK-NEXT: mv a2, a5
; CHECK-NEXT: .LBB85_4:
-; CHECK-NEXT: sub a5, a2, a4
-; CHECK-NEXT: csrr a6, vlenb
-; CHECK-NEXT: slli a6, a6, 1
-; CHECK-NEXT: mv a7, a6
-; CHECK-NEXT: slli a6, a6, 2
-; CHECK-NEXT: add a6, a6, a7
-; CHECK-NEXT: add a6, sp, a6
-; CHECK-NEXT: addi a6, a6, 16
-; CHECK-NEXT: vl1r.v v7, (a6) # vscale x 8-byte Folded Reload
-; CHECK-NEXT: vsetvli a6, zero, e8, mf2, ta, ma
-; CHECK-NEXT: vslidedown.vx v0, v7, a3
-; CHECK-NEXT: sltu a6, a2, a5
-; CHECK-NEXT: addi a6, a6, -1
-; CHECK-NEXT: and a5, a6, a5
-; CHECK-NEXT: csrr a6, vlenb
-; CHECK-NEXT: mv a7, a6
-; CHECK-NEXT: slli a6, a6, 1
-; CHECK-NEXT: add a7, a7, a6
-; CHECK-NEXT: slli a6, a6, 3
-; CHECK-NEXT: add a6, a6, a7
-; CHECK-NEXT: add a6, sp, a6
-; CHECK-NEXT: addi a6, a6, 16
-; CHECK-NEXT: vl8r.v v24, (a6) # vscale x 64-byte Folded Reload
-; CHECK-NEXT: vsetvli zero, a5, e16, m4, ta, ma
-; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v28
+; CHECK-NEXT: sub a0, a2, a4
; CHECK-NEXT: csrr a5, vlenb
-; CHECK-NEXT: mv a6, a5
; CHECK-NEXT: slli a5, a5, 1
-; CHECK-NEXT: add a6, a6, a5
+; CHECK-NEXT: mv a6, a5
; CHECK-NEXT: slli a5, a5, 2
+; CHECK-NEXT: add a6, a6, a5
+; CHECK-NEXT: slli a5, a5, 1
+; CHECK-NEXT: add a5, a5, a6
+; CHECK-NEXT: add a5, sp, a5
+; CHECK-NEXT: addi a5, a5, 16
+; CHECK-NEXT: vl1r.v v8, (a5) # vscale x 8-byte Folded Reload
+; CHECK-NEXT: vsetvli a5, zero, e8, mf2, ta, ma
+; CHECK-NEXT: vslidedown.vx v0, v8, a3
+; CHECK-NEXT: sltu a5, a2, a0
+; CHECK-NEXT: addi a5, a5, -1
+; CHECK-NEXT: and a0, a5, a0
+; CHECK-NEXT: csrr a5, vlenb
+; CHECK-NEXT: slli a5, a5, 1
+; CHECK-NEXT: mv a6, a5
+; CHECK-NEXT: slli a5, a5, 3
; CHECK-NEXT: add a5, a5, a6
; CHECK-NEXT: add a5, sp, a5
; CHECK-NEXT: addi a5, a5, 16
-; CHECK-NEXT: vl8r.v v8, (a5) # vscale x 64-byte Folded Reload
+; CHECK-NEXT: vl8r.v v24, (a5) # vscale x 64-byte Folded Reload
+; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v28
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: slli a0, a0, 1
+; CHECK-NEXT: mv a5, a0
+; CHECK-NEXT: slli a0, a0, 2
+; CHECK-NEXT: add a0, a0, a5
+; CHECK-NEXT: add a0, sp, a0
+; CHECK-NEXT: addi a0, a0, 16
+; CHECK-NEXT: vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v12
; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma
-; CHECK-NEXT: vmfeq.vv v4, v16, v24, v0.t
-; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma
-; CHECK-NEXT: vslideup.vx v6, v5, a3
+; CHECK-NEXT: vmfeq.vv v10, v16, v24, v0.t
+; CHECK-NEXT: vmv1r.v v9, v7
+; CHECK-NEXT: vsetvli a0, zero, e8, mf2, ta, ma
+; CHECK-NEXT: vslideup.vx v9, v5, a3
; CHECK-NEXT: bltu a2, a4, .LBB85_6
; CHECK-NEXT: # %bb.5:
; CHECK-NEXT: mv a2, a4
; CHECK-NEXT: .LBB85_6:
-; CHECK-NEXT: vmv1r.v v0, v7
-; CHECK-NEXT: csrr a4, vlenb
-; CHECK-NEXT: mv a5, a4
-; CHECK-NEXT: slli a4, a4, 1
-; CHECK-NEXT: add a5, a5, a4
-; CHECK-NEXT: slli a4, a4, 3
-; CHECK-NEXT: add a4, a4, a5
-; CHECK-NEXT: add a4, sp, a4
-; CHECK-NEXT: addi a4, a4, 16
-; CHECK-NEXT: vl8r.v v24, (a4) # vscale x 64-byte Folded Reload
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: slli a0, a0, 1
+; CHECK-NEXT: mv a4, a0
+; CHECK-NEXT: slli a0, a0, 3
+; CHECK-NEXT: add a0, a0, a4
+; CHECK-NEXT: add a0, sp, a0
+; CHECK-NEXT: addi a0, a0, 16
+; CHECK-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
; CHECK-NEXT: vsetvli zero, a2, e16, m4, ta, ma
; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v24
-; CHECK-NEXT: csrr a2, vlenb
-; CHECK-NEXT: mv a4, a2
-; CHECK-NEXT: slli a2, a2, 1
-; CHECK-NEXT: add a4, a4, a2
-; CHECK-NEXT: slli a2, a2, 2
-; CHECK-NEXT: add a2, a2, a4
-; CHECK-NEXT: add a2, sp, a2
-; CHECK-NEXT: addi a2, a2, 16
-; CHECK-NEXT: vl8r.v v8, (a2) # vscale x 64-byte Folded Reload
-; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v8
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: slli a0, a0, 1
+; CHECK-NEXT: mv a2, a0
+; CHECK-NEXT: slli a0, a0, 2
+; CHECK-NEXT: add a0, a0, a2
+; CHECK-NEXT: add a0, sp, a0
+; CHECK-NEXT: addi a0, a0, 16
+; CHECK-NEXT: vl8r.v v0, (a0) # vscale x 64-byte Folded Reload
+; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v0
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: slli a0, a0, 1
+; CHECK-NEXT: mv a2, a0
+; CHECK-NEXT: slli a0, a0, 2
+; CHECK-NEXT: add a2, a2, a0
+; CHECK-NEXT: slli a0, a0, 1
+; CHECK-NEXT: add a0, a0, a2
+; CHECK-NEXT: add a0, sp, a0
+; CHECK-NEXT: addi a0, a0, 16
+; CHECK-NEXT: vl1r.v v0, (a0) # vscale x 8-byte Folded Reload
; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma
; CHECK-NEXT: vmfeq.vv v8, v16, v24, v0.t
-; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma
-; CHECK-NEXT: vslideup.vx v8, v4, a3
-; CHECK-NEXT: add a0, a1, a1
-; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma
-; CHECK-NEXT: vslideup.vx v8, v6, a1
+; CHECK-NEXT: vsetvli a0, zero, e8, mf2, ta, ma
+; CHECK-NEXT: vslideup.vx v8, v10, a3
+; CHECK-NEXT: vsetvli a0, zero, e8, m1, ta, ma
+; CHECK-NEXT: vslideup.vx v8, v9, a1
; CHECK-NEXT: vmv.v.v v0, v8
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: mv a1, a0
@@ -3546,8 +3554,7 @@ define <vscale x 64 x i1> @fcmp_oeq_vv_nxv64f16(<vscale x 64 x half> %va, <vscal
; ZVFH-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
; ZVFH-NEXT: vsetvli zero, a2, e16, m8, ta, ma
; ZVFH-NEXT: vmfeq.vv v16, v24, v8, v0.t
-; ZVFH-NEXT: add a0, a1, a1
-; ZVFH-NEXT: vsetvli zero, a0, e8, m1, ta, ma
+; ZVFH-NEXT: vsetvli a0, zero, e8, m1, ta, ma
; ZVFH-NEXT: vslideup.vx v16, v6, a1
; ZVFH-NEXT: vmv.v.v v0, v16
; ZVFH-NEXT: csrr a0, vlenb
@@ -3576,6 +3583,8 @@ define <vscale x 64 x i1> @fcmp_oeq_vv_nxv64f16(<vscale x 64 x half> %va, <vscal
; ZVFHMIN-NEXT: slli a1, a1, 1
; ZVFHMIN-NEXT: mv a3, a1
; ZVFHMIN-NEXT: slli a1, a1, 2
+; ZVFHMIN-NEXT: add a3, a3, a1
+; ZVFHMIN-NEXT: slli a1, a1, 1
; ZVFHMIN-NEXT: add a1, a1, a3
; ZVFHMIN-NEXT: add a1, sp, a1
; ZVFHMIN-NEXT: addi a1, a1, 16
@@ -3583,9 +3592,8 @@ define <vscale x 64 x i1> @fcmp_oeq_vv_nxv64f16(<vscale x 64 x half> %va, <vscal
; ZVFHMIN-NEXT: vsetvli a1, zero, e8, m1, ta, ma
; ZVFHMIN-NEXT: vmv8r.v v0, v16
; ZVFHMIN-NEXT: csrr a1, vlenb
-; ZVFHMIN-NEXT: mv a3, a1
; ZVFHMIN-NEXT: slli a1, a1, 1
-; ZVFHMIN-NEXT: add a3, a3, a1
+; ZVFHMIN-NEXT: mv a3, a1
; ZVFHMIN-NEXT: slli a1, a1, 3
; ZVFHMIN-NEXT: add a1, a1, a3
; ZVFHMIN-NEXT: add a1, sp, a1
@@ -3611,6 +3619,8 @@ define <vscale x 64 x i1> @fcmp_oeq_vv_nxv64f16(<vscale x 64 x half> %va, <vscal
; ZVFHMIN-NEXT: slli t0, t0, 1
; ZVFHMIN-NEXT: mv t1, t0
; ZVFHMIN-NEXT: slli t0, t0, 2
+; ZVFHMIN-NEXT: add t1, t1, t0
+; ZVFHMIN-NEXT: slli t0, t0, 1
; ZVFHMIN-NEXT: add t0, t0, t1
; ZVFHMIN-NEXT: add t0, sp, t0
; ZVFHMIN-NEXT: addi t0, t0, 16
@@ -3618,9 +3628,8 @@ define <vscale x 64 x i1> @fcmp_oeq_vv_nxv64f16(<vscale x 64 x half> %va, <vscal
; ZVFHMIN-NEXT: vslidedown.vx v16, v8, a1
; ZVFHMIN-NEXT: vl8re16.v v8, (a0)
; ZVFHMIN-NEXT: csrr a0, vlenb
-; ZVFHMIN-NEXT: mv t0, a0
; ZVFHMIN-NEXT: slli a0, a0, 1
-; ZVFHMIN-NEXT: add t0, t0, a0
+; ZVFHMIN-NEXT: mv t0, a0
; ZVFHMIN-NEXT: slli a0, a0, 2
; ZVFHMIN-NEXT: add a0, a0, t0
; ZVFHMIN-NEXT: add a0, sp, a0
@@ -3650,10 +3659,6 @@ define <vscale x 64 x i1> @fcmp_oeq_vv_nxv64f16(<vscale x 64 x half> %va, <vscal
; ZVFHMIN-NEXT: vl1r.v v0, (a0) # vscale x 8-byte Folded Reload
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
; ZVFHMIN-NEXT: vmfeq.vv v5, v8, v16, v0.t
-; ZVFHMIN-NEXT: csrr a0, vlenb
-; ZVFHMIN-NEXT: add a0, sp, a0
-; ZVFHMIN-NEXT: addi a0, a0, 16
-; ZVFHMIN-NEXT: vl1r.v v0, (a0) # vscale x 8-byte Folded Reload
; ZVFHMIN-NEXT: vsetvli zero, a6, e16, m4, ta, ma
; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v24
; ZVFHMIN-NEXT: csrr a0, vlenb
@@ -3662,85 +3667,95 @@ define <vscale x 64 x i1> @fcmp_oeq_vv_nxv64f16(<vscale x 64 x half> %va, <vscal
; ZVFHMIN-NEXT: addi a0, a0, 16
; ZVFHMIN-NEXT: vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v8
+; ZVFHMIN-NEXT: csrr a0, vlenb
+; ZVFHMIN-NEXT: add a0, sp, a0
+; ZVFHMIN-NEXT: addi a0, a0, 16
+; ZVFHMIN-NEXT: vl1r.v v0, (a0) # vscale x 8-byte Folded Reload
; ZVFHMIN-NEXT: ...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/144565
More information about the llvm-commits
mailing list