[llvm] [SelectionDAG][RISCV] (add (vscale * C0), (vscale * C1)) to (vscale * (C0 + C1)) in getNode. (PR #144565)
Pengcheng Wang via llvm-commits
llvm-commits at lists.llvm.org
Tue Jun 17 20:22:54 PDT 2025
================
@@ -3662,85 +3667,95 @@ define <vscale x 64 x i1> @fcmp_oeq_vv_nxv64f16(<vscale x 64 x half> %va, <vscal
; ZVFHMIN-NEXT: addi a0, a0, 16
; ZVFHMIN-NEXT: vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v8
+; ZVFHMIN-NEXT: csrr a0, vlenb
+; ZVFHMIN-NEXT: add a0, sp, a0
+; ZVFHMIN-NEXT: addi a0, a0, 16
+; ZVFHMIN-NEXT: vl1r.v v0, (a0) # vscale x 8-byte Folded Reload
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT: vmfeq.vv v6, v24, v16, v0.t
-; ZVFHMIN-NEXT: add a0, a3, a3
+; ZVFHMIN-NEXT: vmfeq.vv v7, v24, v16, v0.t
; ZVFHMIN-NEXT: bltu a2, a5, .LBB171_4
; ZVFHMIN-NEXT: # %bb.3:
; ZVFHMIN-NEXT: mv a2, a5
; ZVFHMIN-NEXT: .LBB171_4:
-; ZVFHMIN-NEXT: sub a5, a2, a4
-; ZVFHMIN-NEXT: csrr a6, vlenb
-; ZVFHMIN-NEXT: slli a6, a6, 1
-; ZVFHMIN-NEXT: mv a7, a6
-; ZVFHMIN-NEXT: slli a6, a6, 2
-; ZVFHMIN-NEXT: add a6, a6, a7
-; ZVFHMIN-NEXT: add a6, sp, a6
-; ZVFHMIN-NEXT: addi a6, a6, 16
-; ZVFHMIN-NEXT: vl1r.v v7, (a6) # vscale x 8-byte Folded Reload
-; ZVFHMIN-NEXT: vsetvli a6, zero, e8, mf2, ta, ma
-; ZVFHMIN-NEXT: vslidedown.vx v0, v7, a3
-; ZVFHMIN-NEXT: sltu a6, a2, a5
-; ZVFHMIN-NEXT: addi a6, a6, -1
-; ZVFHMIN-NEXT: and a5, a6, a5
-; ZVFHMIN-NEXT: csrr a6, vlenb
-; ZVFHMIN-NEXT: mv a7, a6
-; ZVFHMIN-NEXT: slli a6, a6, 1
-; ZVFHMIN-NEXT: add a7, a7, a6
-; ZVFHMIN-NEXT: slli a6, a6, 3
-; ZVFHMIN-NEXT: add a6, a6, a7
-; ZVFHMIN-NEXT: add a6, sp, a6
-; ZVFHMIN-NEXT: addi a6, a6, 16
-; ZVFHMIN-NEXT: vl8r.v v24, (a6) # vscale x 64-byte Folded Reload
-; ZVFHMIN-NEXT: vsetvli zero, a5, e16, m4, ta, ma
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v28
+; ZVFHMIN-NEXT: sub a0, a2, a4
; ZVFHMIN-NEXT: csrr a5, vlenb
-; ZVFHMIN-NEXT: mv a6, a5
; ZVFHMIN-NEXT: slli a5, a5, 1
-; ZVFHMIN-NEXT: add a6, a6, a5
+; ZVFHMIN-NEXT: mv a6, a5
; ZVFHMIN-NEXT: slli a5, a5, 2
+; ZVFHMIN-NEXT: add a6, a6, a5
+; ZVFHMIN-NEXT: slli a5, a5, 1
+; ZVFHMIN-NEXT: add a5, a5, a6
+; ZVFHMIN-NEXT: add a5, sp, a5
+; ZVFHMIN-NEXT: addi a5, a5, 16
+; ZVFHMIN-NEXT: vl1r.v v8, (a5) # vscale x 8-byte Folded Reload
+; ZVFHMIN-NEXT: vsetvli a5, zero, e8, mf2, ta, ma
+; ZVFHMIN-NEXT: vslidedown.vx v0, v8, a3
+; ZVFHMIN-NEXT: sltu a5, a2, a0
+; ZVFHMIN-NEXT: addi a5, a5, -1
+; ZVFHMIN-NEXT: and a0, a5, a0
+; ZVFHMIN-NEXT: csrr a5, vlenb
+; ZVFHMIN-NEXT: slli a5, a5, 1
+; ZVFHMIN-NEXT: mv a6, a5
+; ZVFHMIN-NEXT: slli a5, a5, 3
; ZVFHMIN-NEXT: add a5, a5, a6
; ZVFHMIN-NEXT: add a5, sp, a5
; ZVFHMIN-NEXT: addi a5, a5, 16
-; ZVFHMIN-NEXT: vl8r.v v8, (a5) # vscale x 64-byte Folded Reload
+; ZVFHMIN-NEXT: vl8r.v v24, (a5) # vscale x 64-byte Folded Reload
+; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v28
+; ZVFHMIN-NEXT: csrr a0, vlenb
+; ZVFHMIN-NEXT: slli a0, a0, 1
+; ZVFHMIN-NEXT: mv a5, a0
+; ZVFHMIN-NEXT: slli a0, a0, 2
+; ZVFHMIN-NEXT: add a0, a0, a5
+; ZVFHMIN-NEXT: add a0, sp, a0
+; ZVFHMIN-NEXT: addi a0, a0, 16
+; ZVFHMIN-NEXT: vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT: vmfeq.vv v4, v16, v24, v0.t
-; ZVFHMIN-NEXT: vsetvli zero, a0, e8, mf2, ta, ma
-; ZVFHMIN-NEXT: vslideup.vx v6, v5, a3
+; ZVFHMIN-NEXT: vmfeq.vv v10, v16, v24, v0.t
+; ZVFHMIN-NEXT: vmv1r.v v9, v7
+; ZVFHMIN-NEXT: vsetvli a0, zero, e8, mf2, ta, ma
+; ZVFHMIN-NEXT: vslideup.vx v9, v5, a3
; ZVFHMIN-NEXT: bltu a2, a4, .LBB171_6
; ZVFHMIN-NEXT: # %bb.5:
; ZVFHMIN-NEXT: mv a2, a4
; ZVFHMIN-NEXT: .LBB171_6:
-; ZVFHMIN-NEXT: vmv1r.v v0, v7
-; ZVFHMIN-NEXT: csrr a4, vlenb
-; ZVFHMIN-NEXT: mv a5, a4
-; ZVFHMIN-NEXT: slli a4, a4, 1
-; ZVFHMIN-NEXT: add a5, a5, a4
-; ZVFHMIN-NEXT: slli a4, a4, 3
-; ZVFHMIN-NEXT: add a4, a4, a5
-; ZVFHMIN-NEXT: add a4, sp, a4
-; ZVFHMIN-NEXT: addi a4, a4, 16
-; ZVFHMIN-NEXT: vl8r.v v24, (a4) # vscale x 64-byte Folded Reload
+; ZVFHMIN-NEXT: csrr a0, vlenb
+; ZVFHMIN-NEXT: slli a0, a0, 1
+; ZVFHMIN-NEXT: mv a4, a0
+; ZVFHMIN-NEXT: slli a0, a0, 3
+; ZVFHMIN-NEXT: add a0, a0, a4
+; ZVFHMIN-NEXT: add a0, sp, a0
+; ZVFHMIN-NEXT: addi a0, a0, 16
+; ZVFHMIN-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
; ZVFHMIN-NEXT: vsetvli zero, a2, e16, m4, ta, ma
; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v24
-; ZVFHMIN-NEXT: csrr a2, vlenb
-; ZVFHMIN-NEXT: mv a4, a2
-; ZVFHMIN-NEXT: slli a2, a2, 1
-; ZVFHMIN-NEXT: add a4, a4, a2
-; ZVFHMIN-NEXT: slli a2, a2, 2
-; ZVFHMIN-NEXT: add a2, a2, a4
-; ZVFHMIN-NEXT: add a2, sp, a2
-; ZVFHMIN-NEXT: addi a2, a2, 16
-; ZVFHMIN-NEXT: vl8r.v v8, (a2) # vscale x 64-byte Folded Reload
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v8
+; ZVFHMIN-NEXT: csrr a0, vlenb
+; ZVFHMIN-NEXT: slli a0, a0, 1
+; ZVFHMIN-NEXT: mv a2, a0
+; ZVFHMIN-NEXT: slli a0, a0, 2
+; ZVFHMIN-NEXT: add a0, a0, a2
+; ZVFHMIN-NEXT: add a0, sp, a0
+; ZVFHMIN-NEXT: addi a0, a0, 16
+; ZVFHMIN-NEXT: vl8r.v v0, (a0) # vscale x 64-byte Folded Reload
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v0
+; ZVFHMIN-NEXT: csrr a0, vlenb
+; ZVFHMIN-NEXT: slli a0, a0, 1
+; ZVFHMIN-NEXT: mv a2, a0
+; ZVFHMIN-NEXT: slli a0, a0, 2
+; ZVFHMIN-NEXT: add a2, a2, a0
+; ZVFHMIN-NEXT: slli a0, a0, 1
+; ZVFHMIN-NEXT: add a0, a0, a2
+; ZVFHMIN-NEXT: add a0, sp, a0
+; ZVFHMIN-NEXT: addi a0, a0, 16
+; ZVFHMIN-NEXT: vl1r.v v0, (a0) # vscale x 8-byte Folded Reload
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
; ZVFHMIN-NEXT: vmfeq.vv v8, v16, v24, v0.t
-; ZVFHMIN-NEXT: vsetvli zero, a0, e8, mf2, ta, ma
-; ZVFHMIN-NEXT: vslideup.vx v8, v4, a3
-; ZVFHMIN-NEXT: add a0, a1, a1
-; ZVFHMIN-NEXT: vsetvli zero, a0, e8, m1, ta, ma
-; ZVFHMIN-NEXT: vslideup.vx v8, v6, a1
+; ZVFHMIN-NEXT: vsetvli a0, zero, e8, mf2, ta, ma
+; ZVFHMIN-NEXT: vslideup.vx v8, v10, a3
+; ZVFHMIN-NEXT: vsetvli a0, zero, e8, m1, ta, ma
+; ZVFHMIN-NEXT: vslideup.vx v8, v9, a1
----------------
wangpc-pp wrote:
Maybe just some scheduling noises.
https://github.com/llvm/llvm-project/pull/144565
More information about the llvm-commits
mailing list