[llvm] [RISCV] Set the exact flag on the SRL created for converting vscale to a read of vlenb. (PR #144571)
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Tue Jun 17 15:05:56 PDT 2025
https://github.com/topperc updated https://github.com/llvm/llvm-project/pull/144571
>From 2b7b12f3d508e4d5e91e7eff500b394a9ede3871 Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper at sifive.com>
Date: Tue, 17 Jun 2025 10:40:55 -0700
Subject: [PATCH 1/3] [RISCV] Set the exact flag on the SRL created for
converting vscale to a read of vlenb.
We know that vlenb is a multiple of RVVBytesPerBlock so we aren't
shifting out any non-zero bits.
---
llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 5 +-
.../CodeGen/RISCV/rvv/extract-subvector.ll | 12 +-
.../CodeGen/RISCV/rvv/get_vector_length.ll | 24 +--
.../CodeGen/RISCV/rvv/legalize-load-sdnode.ll | 12 +-
.../RISCV/rvv/legalize-store-sdnode.ll | 6 +-
llvm/test/CodeGen/RISCV/rvv/vandn-sdnode.ll | 15 +-
.../CodeGen/RISCV/rvv/vector-deinterleave.ll | 155 +++++++++---------
.../RISCV/rvv/vreductions-fp-sdnode.ll | 42 ++---
8 files changed, 128 insertions(+), 143 deletions(-)
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 779786fa400fc..a6679d726622c 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -7372,8 +7372,11 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
Res = DAG.getNode(ISD::MUL, DL, XLenVT, Res,
DAG.getConstant(Val / 8, DL, XLenVT));
} else {
+ SDNodeFlags Flags;
+ Flags.setExact(true);
SDValue VScale = DAG.getNode(ISD::SRL, DL, XLenVT, Res,
- DAG.getConstant(3, DL, XLenVT));
+ DAG.getConstant(3, DL, XLenVT),
+ Flags);
Res = DAG.getNode(ISD::MUL, DL, XLenVT, VScale,
DAG.getConstant(Val, DL, XLenVT));
}
diff --git a/llvm/test/CodeGen/RISCV/rvv/extract-subvector.ll b/llvm/test/CodeGen/RISCV/rvv/extract-subvector.ll
index 83637e4a71d45..d42c42c7ce036 100644
--- a/llvm/test/CodeGen/RISCV/rvv/extract-subvector.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/extract-subvector.ll
@@ -290,8 +290,7 @@ define <vscale x 2 x i8> @extract_nxv32i8_nxv2i8_6(<vscale x 32 x i8> %vec) {
; CHECK-LABEL: extract_nxv32i8_nxv2i8_6:
; CHECK: # %bb.0:
; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: srli a1, a0, 3
-; CHECK-NEXT: slli a1, a1, 1
+; CHECK-NEXT: srli a1, a0, 2
; CHECK-NEXT: sub a0, a0, a1
; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma
; CHECK-NEXT: vslidedown.vx v8, v8, a0
@@ -314,8 +313,7 @@ define <vscale x 2 x i8> @extract_nxv32i8_nxv2i8_22(<vscale x 32 x i8> %vec) {
; CHECK-LABEL: extract_nxv32i8_nxv2i8_22:
; CHECK: # %bb.0:
; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: srli a1, a0, 3
-; CHECK-NEXT: slli a1, a1, 1
+; CHECK-NEXT: srli a1, a0, 2
; CHECK-NEXT: sub a0, a0, a1
; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma
; CHECK-NEXT: vslidedown.vx v8, v10, a0
@@ -341,9 +339,9 @@ define <vscale x 1 x i8> @extract_nxv4i8_nxv1i8_3(<vscale x 4 x i8> %vec) {
; CHECK-LABEL: extract_nxv4i8_nxv1i8_3:
; CHECK: # %bb.0:
; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: srli a0, a0, 3
-; CHECK-NEXT: slli a1, a0, 1
-; CHECK-NEXT: add a0, a1, a0
+; CHECK-NEXT: srli a1, a0, 3
+; CHECK-NEXT: srli a0, a0, 2
+; CHECK-NEXT: add a0, a0, a1
; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, ma
; CHECK-NEXT: vslidedown.vx v8, v8, a0
; CHECK-NEXT: ret
diff --git a/llvm/test/CodeGen/RISCV/rvv/get_vector_length.ll b/llvm/test/CodeGen/RISCV/rvv/get_vector_length.ll
index bd0fecd285515..aea688f03cf72 100644
--- a/llvm/test/CodeGen/RISCV/rvv/get_vector_length.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/get_vector_length.ll
@@ -257,9 +257,9 @@ define i32 @vector_length_vf3_i32(i32 zeroext %tc) {
; RV32-LABEL: vector_length_vf3_i32:
; RV32: # %bb.0:
; RV32-NEXT: csrr a1, vlenb
-; RV32-NEXT: srli a1, a1, 3
-; RV32-NEXT: slli a2, a1, 1
-; RV32-NEXT: add a1, a2, a1
+; RV32-NEXT: srli a2, a1, 3
+; RV32-NEXT: srli a1, a1, 2
+; RV32-NEXT: add a1, a1, a2
; RV32-NEXT: bltu a0, a1, .LBB22_2
; RV32-NEXT: # %bb.1:
; RV32-NEXT: mv a0, a1
@@ -270,9 +270,9 @@ define i32 @vector_length_vf3_i32(i32 zeroext %tc) {
; RV64: # %bb.0:
; RV64-NEXT: sext.w a0, a0
; RV64-NEXT: csrr a1, vlenb
-; RV64-NEXT: srli a1, a1, 3
-; RV64-NEXT: slli a2, a1, 1
-; RV64-NEXT: add a1, a2, a1
+; RV64-NEXT: srli a2, a1, 3
+; RV64-NEXT: srli a1, a1, 2
+; RV64-NEXT: add a1, a1, a2
; RV64-NEXT: bltu a0, a1, .LBB22_2
; RV64-NEXT: # %bb.1:
; RV64-NEXT: mv a0, a1
@@ -286,9 +286,9 @@ define i32 @vector_length_vf3_XLen(iXLen zeroext %tc) {
; RV32-LABEL: vector_length_vf3_XLen:
; RV32: # %bb.0:
; RV32-NEXT: csrr a1, vlenb
-; RV32-NEXT: srli a1, a1, 3
-; RV32-NEXT: slli a2, a1, 1
-; RV32-NEXT: add a1, a2, a1
+; RV32-NEXT: srli a2, a1, 3
+; RV32-NEXT: srli a1, a1, 2
+; RV32-NEXT: add a1, a1, a2
; RV32-NEXT: bltu a0, a1, .LBB23_2
; RV32-NEXT: # %bb.1:
; RV32-NEXT: mv a0, a1
@@ -299,9 +299,9 @@ define i32 @vector_length_vf3_XLen(iXLen zeroext %tc) {
; RV64: # %bb.0:
; RV64-NEXT: sext.w a0, a0
; RV64-NEXT: csrr a1, vlenb
-; RV64-NEXT: srli a1, a1, 3
-; RV64-NEXT: slli a2, a1, 1
-; RV64-NEXT: add a1, a2, a1
+; RV64-NEXT: srli a2, a1, 3
+; RV64-NEXT: srli a1, a1, 2
+; RV64-NEXT: add a1, a1, a2
; RV64-NEXT: bltu a0, a1, .LBB23_2
; RV64-NEXT: # %bb.1:
; RV64-NEXT: mv a0, a1
diff --git a/llvm/test/CodeGen/RISCV/rvv/legalize-load-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/legalize-load-sdnode.ll
index e9e1303d10768..f847ccafefdaf 100644
--- a/llvm/test/CodeGen/RISCV/rvv/legalize-load-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/legalize-load-sdnode.ll
@@ -8,9 +8,9 @@ define <vscale x 3 x i8> @load_nxv3i8(ptr %ptr) {
; CHECK-LABEL: load_nxv3i8:
; CHECK: # %bb.0:
; CHECK-NEXT: csrr a1, vlenb
-; CHECK-NEXT: srli a1, a1, 3
-; CHECK-NEXT: slli a2, a1, 1
-; CHECK-NEXT: add a1, a2, a1
+; CHECK-NEXT: srli a2, a1, 3
+; CHECK-NEXT: srli a1, a1, 2
+; CHECK-NEXT: add a1, a1, a2
; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma
; CHECK-NEXT: vle8.v v8, (a0)
; CHECK-NEXT: ret
@@ -22,9 +22,9 @@ define <vscale x 5 x half> @load_nxv5f16(ptr %ptr) {
; CHECK-LABEL: load_nxv5f16:
; CHECK: # %bb.0:
; CHECK-NEXT: csrr a1, vlenb
-; CHECK-NEXT: srli a1, a1, 3
-; CHECK-NEXT: slli a2, a1, 2
-; CHECK-NEXT: add a1, a2, a1
+; CHECK-NEXT: srli a2, a1, 3
+; CHECK-NEXT: srli a1, a1, 1
+; CHECK-NEXT: add a1, a1, a2
; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma
; CHECK-NEXT: vle16.v v8, (a0)
; CHECK-NEXT: ret
diff --git a/llvm/test/CodeGen/RISCV/rvv/legalize-store-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/legalize-store-sdnode.ll
index 77438ee53b634..03b84ec177ee9 100644
--- a/llvm/test/CodeGen/RISCV/rvv/legalize-store-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/legalize-store-sdnode.ll
@@ -8,9 +8,9 @@ define void @store_nxv3i8(<vscale x 3 x i8> %val, ptr %ptr) {
; CHECK-LABEL: store_nxv3i8:
; CHECK: # %bb.0:
; CHECK-NEXT: csrr a1, vlenb
-; CHECK-NEXT: srli a1, a1, 3
-; CHECK-NEXT: slli a2, a1, 1
-; CHECK-NEXT: add a1, a2, a1
+; CHECK-NEXT: srli a2, a1, 3
+; CHECK-NEXT: srli a1, a1, 2
+; CHECK-NEXT: add a1, a1, a2
; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma
; CHECK-NEXT: vse8.v v8, (a0)
; CHECK-NEXT: ret
diff --git a/llvm/test/CodeGen/RISCV/rvv/vandn-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vandn-sdnode.ll
index aef46e1f5cf1b..bfd7fc5d04cd6 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vandn-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vandn-sdnode.ll
@@ -2300,10 +2300,9 @@ define void @vand_vx_loop_hoisted_not(ptr %a, i32 noundef signext %mask) {
; CHECK-RV64-NEXT: li a2, 0
; CHECK-RV64-NEXT: j .LBB98_5
; CHECK-RV64-NEXT: .LBB98_2: # %vector.ph
-; CHECK-RV64-NEXT: slli a2, a2, 2
-; CHECK-RV64-NEXT: negw a2, a2
-; CHECK-RV64-NEXT: andi a2, a2, 256
; CHECK-RV64-NEXT: srli a3, a4, 1
+; CHECK-RV64-NEXT: negw a2, a3
+; CHECK-RV64-NEXT: andi a2, a2, 256
; CHECK-RV64-NEXT: slli a4, a4, 1
; CHECK-RV64-NEXT: mv a5, a0
; CHECK-RV64-NEXT: mv a6, a2
@@ -2395,10 +2394,9 @@ define void @vand_vx_loop_hoisted_not(ptr %a, i32 noundef signext %mask) {
; CHECK-ZVKB-NOZBB64-NEXT: li a2, 0
; CHECK-ZVKB-NOZBB64-NEXT: j .LBB98_5
; CHECK-ZVKB-NOZBB64-NEXT: .LBB98_2: # %vector.ph
-; CHECK-ZVKB-NOZBB64-NEXT: slli a2, a2, 2
-; CHECK-ZVKB-NOZBB64-NEXT: negw a2, a2
-; CHECK-ZVKB-NOZBB64-NEXT: andi a2, a2, 256
; CHECK-ZVKB-NOZBB64-NEXT: srli a3, a4, 1
+; CHECK-ZVKB-NOZBB64-NEXT: negw a2, a3
+; CHECK-ZVKB-NOZBB64-NEXT: andi a2, a2, 256
; CHECK-ZVKB-NOZBB64-NEXT: slli a4, a4, 1
; CHECK-ZVKB-NOZBB64-NEXT: mv a5, a0
; CHECK-ZVKB-NOZBB64-NEXT: mv a6, a2
@@ -2489,10 +2487,9 @@ define void @vand_vx_loop_hoisted_not(ptr %a, i32 noundef signext %mask) {
; CHECK-ZVKB-ZBB64-NEXT: li a2, 0
; CHECK-ZVKB-ZBB64-NEXT: j .LBB98_5
; CHECK-ZVKB-ZBB64-NEXT: .LBB98_2: # %vector.ph
-; CHECK-ZVKB-ZBB64-NEXT: slli a2, a2, 2
-; CHECK-ZVKB-ZBB64-NEXT: negw a2, a2
-; CHECK-ZVKB-ZBB64-NEXT: andi a2, a2, 256
; CHECK-ZVKB-ZBB64-NEXT: srli a3, a4, 1
+; CHECK-ZVKB-ZBB64-NEXT: negw a2, a3
+; CHECK-ZVKB-ZBB64-NEXT: andi a2, a2, 256
; CHECK-ZVKB-ZBB64-NEXT: slli a4, a4, 1
; CHECK-ZVKB-ZBB64-NEXT: mv a5, a0
; CHECK-ZVKB-ZBB64-NEXT: mv a6, a2
diff --git a/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave.ll b/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave.ll
index 6a08f5a28a295..75f92c86ff09f 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave.ll
@@ -477,27 +477,26 @@ define {<vscale x 16 x i1>, <vscale x 16 x i1>, <vscale x 16 x i1>, <vscale x 16
; CHECK-NEXT: sub sp, sp, a0
; CHECK-NEXT: vsetvli a0, zero, e8, m2, ta, ma
; CHECK-NEXT: vmv1r.v v8, v0
-; CHECK-NEXT: vmv.v.i v10, 0
; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: vmerge.vim v16, v10, 1, v0
+; CHECK-NEXT: vmv.v.i v10, 0
; CHECK-NEXT: srli a1, a0, 2
-; CHECK-NEXT: vsetvli a2, zero, e8, m1, ta, ma
-; CHECK-NEXT: vslidedown.vx v0, v0, a1
-; CHECK-NEXT: srli a1, a0, 1
+; CHECK-NEXT: sub a2, a0, a1
+; CHECK-NEXT: vsetvli a3, zero, e8, m1, ta, ma
+; CHECK-NEXT: vslidedown.vx v0, v0, a2
+; CHECK-NEXT: srli a0, a0, 1
; CHECK-NEXT: vsetvli a2, zero, e8, m2, ta, ma
-; CHECK-NEXT: vmerge.vim v18, v10, 1, v0
+; CHECK-NEXT: vmerge.vim v22, v10, 1, v0
; CHECK-NEXT: vsetvli a2, zero, e8, m1, ta, ma
-; CHECK-NEXT: vslidedown.vx v0, v8, a1
-; CHECK-NEXT: srli a1, a0, 3
-; CHECK-NEXT: slli a1, a1, 1
-; CHECK-NEXT: sub a0, a0, a1
-; CHECK-NEXT: vsetvli a1, zero, e8, m2, ta, ma
-; CHECK-NEXT: vmerge.vim v20, v10, 1, v0
-; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma
; CHECK-NEXT: vslidedown.vx v0, v8, a0
+; CHECK-NEXT: vsetvli a0, zero, e8, m2, ta, ma
+; CHECK-NEXT: vmerge.vim v20, v10, 1, v0
+; CHECK-NEXT: vmv1r.v v0, v8
+; CHECK-NEXT: vmerge.vim v16, v10, 1, v0
+; CHECK-NEXT: vsetvli a0, zero, e8, m1, ta, ma
+; CHECK-NEXT: vslidedown.vx v0, v8, a1
; CHECK-NEXT: addi a0, sp, 16
; CHECK-NEXT: vsetvli a1, zero, e8, m2, ta, ma
-; CHECK-NEXT: vmerge.vim v22, v10, 1, v0
+; CHECK-NEXT: vmerge.vim v18, v10, 1, v0
; CHECK-NEXT: vs8r.v v16, (a0)
; CHECK-NEXT: vlseg4e8.v v8, (a0)
; CHECK-NEXT: vmsne.vi v0, v8, 0
@@ -606,11 +605,9 @@ define {<vscale x 16 x i1>, <vscale x 16 x i1>, <vscale x 16 x i1>, <vscale x 16
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: vmerge.vim v16, v12, 1, v0
; CHECK-NEXT: srli a1, a0, 2
-; CHECK-NEXT: srli a2, a0, 1
-; CHECK-NEXT: vsetvli a3, zero, e8, m1, ta, ma
+; CHECK-NEXT: vsetvli a2, zero, e8, m1, ta, ma
; CHECK-NEXT: vslidedown.vx v0, v0, a1
-; CHECK-NEXT: srli a1, a0, 3
-; CHECK-NEXT: slli a1, a1, 1
+; CHECK-NEXT: srli a2, a0, 1
; CHECK-NEXT: vsetvli a3, zero, e8, m2, ta, ma
; CHECK-NEXT: vmerge.vim v18, v12, 1, v0
; CHECK-NEXT: vsetvli a3, zero, e8, m1, ta, ma
@@ -836,39 +833,37 @@ define {<vscale x 16 x i1>, <vscale x 16 x i1>, <vscale x 16 x i1>, <vscale x 16
; CHECK-NEXT: vmv1r.v v9, v0
; CHECK-NEXT: vmv.v.i v10, 0
; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: addi a1, sp, 16
; CHECK-NEXT: vmerge.vim v16, v10, 1, v0
-; CHECK-NEXT: srli a2, a0, 2
-; CHECK-NEXT: vsetvli a3, zero, e8, m1, ta, ma
-; CHECK-NEXT: vslidedown.vx v0, v0, a2
-; CHECK-NEXT: srli a3, a0, 1
-; CHECK-NEXT: vsetvli a4, zero, e8, m2, ta, ma
+; CHECK-NEXT: srli a1, a0, 2
+; CHECK-NEXT: vsetvli a2, zero, e8, m1, ta, ma
+; CHECK-NEXT: vslidedown.vx v0, v0, a1
+; CHECK-NEXT: srli a2, a0, 1
+; CHECK-NEXT: vsetvli a3, zero, e8, m2, ta, ma
; CHECK-NEXT: vmerge.vim v18, v10, 1, v0
-; CHECK-NEXT: vsetvli a4, zero, e8, m1, ta, ma
-; CHECK-NEXT: vslidedown.vx v0, v9, a3
-; CHECK-NEXT: srli a3, a0, 3
-; CHECK-NEXT: slli a3, a3, 1
-; CHECK-NEXT: sub a0, a0, a3
+; CHECK-NEXT: vsetvli a3, zero, e8, m1, ta, ma
+; CHECK-NEXT: vslidedown.vx v0, v9, a2
+; CHECK-NEXT: addi a2, sp, 16
+; CHECK-NEXT: sub a0, a0, a1
; CHECK-NEXT: vsetvli a3, zero, e8, m2, ta, ma
; CHECK-NEXT: vmerge.vim v20, v10, 1, v0
; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: vmerge.vim v26, v10, 1, v0
; CHECK-NEXT: vsetvli a3, zero, e8, m1, ta, ma
; CHECK-NEXT: vslidedown.vx v0, v9, a0
-; CHECK-NEXT: vs8r.v v16, (a1)
+; CHECK-NEXT: vs8r.v v16, (a2)
; CHECK-NEXT: vsetvli a0, zero, e8, m2, ta, ma
; CHECK-NEXT: vmerge.vim v24, v10, 1, v0
; CHECK-NEXT: vsetvli a0, zero, e8, m1, ta, ma
-; CHECK-NEXT: vslidedown.vx v0, v8, a2
+; CHECK-NEXT: vslidedown.vx v0, v8, a1
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: slli a0, a0, 3
; CHECK-NEXT: add a0, sp, a0
; CHECK-NEXT: addi a0, a0, 16
-; CHECK-NEXT: vsetvli a2, zero, e8, m2, ta, ma
+; CHECK-NEXT: vsetvli a1, zero, e8, m2, ta, ma
; CHECK-NEXT: vmerge.vim v28, v10, 1, v0
; CHECK-NEXT: vs8r.v v24, (a0)
-; CHECK-NEXT: vsetvli a2, zero, e8, m1, ta, ma
-; CHECK-NEXT: vlseg6e8.v v16, (a1)
+; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma
+; CHECK-NEXT: vlseg6e8.v v16, (a2)
; CHECK-NEXT: vlseg6e8.v v10, (a0)
; CHECK-NEXT: vmv2r.v v8, v16
; CHECK-NEXT: vmv2r.v v22, v18
@@ -1068,36 +1063,35 @@ define {<vscale x 16 x i1>, <vscale x 16 x i1>, <vscale x 16 x i1>, <vscale x 16
; CHECK-NEXT: sub sp, sp, a0
; CHECK-NEXT: vsetvli a0, zero, e8, m2, ta, ma
; CHECK-NEXT: vmv1r.v v9, v0
-; CHECK-NEXT: vmv.v.i v12, 0
; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: vmerge.vim v16, v12, 1, v0
+; CHECK-NEXT: vmv.v.i v12, 0
; CHECK-NEXT: srli a1, a0, 2
-; CHECK-NEXT: srli a2, a0, 1
-; CHECK-NEXT: srli a3, a0, 3
-; CHECK-NEXT: vsetvli a4, zero, e8, m1, ta, ma
-; CHECK-NEXT: vslidedown.vx v0, v0, a1
-; CHECK-NEXT: slli a3, a3, 1
-; CHECK-NEXT: vsetvli a4, zero, e8, m2, ta, ma
-; CHECK-NEXT: vmerge.vim v18, v12, 1, v0
-; CHECK-NEXT: vsetvli a4, zero, e8, m1, ta, ma
-; CHECK-NEXT: vslidedown.vx v0, v9, a2
-; CHECK-NEXT: sub a0, a0, a3
-; CHECK-NEXT: vsetvli a3, zero, e8, m2, ta, ma
-; CHECK-NEXT: vmerge.vim v20, v12, 1, v0
+; CHECK-NEXT: sub a2, a0, a1
; CHECK-NEXT: vsetvli a3, zero, e8, m1, ta, ma
-; CHECK-NEXT: vslidedown.vx v0, v9, a0
-; CHECK-NEXT: vsetvli a0, zero, e8, m2, ta, ma
+; CHECK-NEXT: vslidedown.vx v0, v0, a2
+; CHECK-NEXT: srli a0, a0, 1
+; CHECK-NEXT: vsetvli a2, zero, e8, m2, ta, ma
; CHECK-NEXT: vmerge.vim v22, v12, 1, v0
+; CHECK-NEXT: vsetvli a2, zero, e8, m1, ta, ma
+; CHECK-NEXT: vslidedown.vx v0, v9, a0
+; CHECK-NEXT: vsetvli a2, zero, e8, m2, ta, ma
+; CHECK-NEXT: vmerge.vim v20, v12, 1, v0
+; CHECK-NEXT: vmv1r.v v0, v9
+; CHECK-NEXT: vmerge.vim v16, v12, 1, v0
+; CHECK-NEXT: vsetvli a2, zero, e8, m1, ta, ma
+; CHECK-NEXT: vslidedown.vx v0, v9, a1
+; CHECK-NEXT: vsetvli a2, zero, e8, m2, ta, ma
+; CHECK-NEXT: vmerge.vim v18, v12, 1, v0
; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: vmerge.vim v14, v12, 1, v0
-; CHECK-NEXT: vsetvli a0, zero, e8, m1, ta, ma
+; CHECK-NEXT: vsetvli a2, zero, e8, m1, ta, ma
; CHECK-NEXT: vslidedown.vx v0, v8, a1
; CHECK-NEXT: vmv1r.v v10, v15
-; CHECK-NEXT: vsetvli a0, zero, e8, m2, ta, ma
+; CHECK-NEXT: vsetvli a1, zero, e8, m2, ta, ma
; CHECK-NEXT: vmerge.vim v24, v12, 1, v0
; CHECK-NEXT: vmv1r.v v11, v24
-; CHECK-NEXT: vsetvli a0, zero, e8, m1, ta, ma
-; CHECK-NEXT: vslidedown.vx v0, v8, a2
+; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma
+; CHECK-NEXT: vslidedown.vx v0, v8, a0
; CHECK-NEXT: vmv1r.v v8, v23
; CHECK-NEXT: addi a0, sp, 16
; CHECK-NEXT: vmv1r.v v9, v14
@@ -1339,49 +1333,48 @@ define {<vscale x 16 x i1>, <vscale x 16 x i1>, <vscale x 16 x i1>, <vscale x 16
; CHECK-NEXT: sub sp, sp, a0
; CHECK-NEXT: vsetvli a0, zero, e8, m2, ta, ma
; CHECK-NEXT: vmv1r.v v9, v0
-; CHECK-NEXT: vmv.v.i v10, 0
; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: vmerge.vim v16, v10, 1, v0
+; CHECK-NEXT: vmv.v.i v10, 0
; CHECK-NEXT: srli a1, a0, 2
-; CHECK-NEXT: srli a2, a0, 1
-; CHECK-NEXT: srli a3, a0, 3
-; CHECK-NEXT: vsetvli a4, zero, e8, m1, ta, ma
-; CHECK-NEXT: vslidedown.vx v0, v0, a1
-; CHECK-NEXT: slli a3, a3, 1
-; CHECK-NEXT: vsetvli a4, zero, e8, m2, ta, ma
-; CHECK-NEXT: vmerge.vim v18, v10, 1, v0
-; CHECK-NEXT: vsetvli a4, zero, e8, m1, ta, ma
-; CHECK-NEXT: vslidedown.vx v0, v9, a2
-; CHECK-NEXT: sub a0, a0, a3
+; CHECK-NEXT: sub a2, a0, a1
+; CHECK-NEXT: vsetvli a3, zero, e8, m1, ta, ma
+; CHECK-NEXT: vslidedown.vx v0, v0, a2
+; CHECK-NEXT: srli a0, a0, 1
; CHECK-NEXT: vsetvli a3, zero, e8, m2, ta, ma
-; CHECK-NEXT: vmerge.vim v20, v10, 1, v0
+; CHECK-NEXT: vmerge.vim v22, v10, 1, v0
; CHECK-NEXT: vsetvli a3, zero, e8, m1, ta, ma
; CHECK-NEXT: vslidedown.vx v0, v9, a0
; CHECK-NEXT: vsetvli a3, zero, e8, m2, ta, ma
-; CHECK-NEXT: vmerge.vim v22, v10, 1, v0
-; CHECK-NEXT: vmv1r.v v0, v8
-; CHECK-NEXT: vmerge.vim v24, v10, 1, v0
+; CHECK-NEXT: vmerge.vim v20, v10, 1, v0
+; CHECK-NEXT: vmv1r.v v0, v9
+; CHECK-NEXT: vmerge.vim v16, v10, 1, v0
; CHECK-NEXT: vsetvli a3, zero, e8, m1, ta, ma
-; CHECK-NEXT: vslidedown.vx v0, v8, a1
-; CHECK-NEXT: addi a1, sp, 16
+; CHECK-NEXT: vslidedown.vx v0, v9, a1
; CHECK-NEXT: vsetvli a3, zero, e8, m2, ta, ma
-; CHECK-NEXT: vmerge.vim v26, v10, 1, v0
+; CHECK-NEXT: vmerge.vim v18, v10, 1, v0
; CHECK-NEXT: vsetvli a3, zero, e8, m1, ta, ma
; CHECK-NEXT: vslidedown.vx v0, v8, a2
-; CHECK-NEXT: vs8r.v v16, (a1)
-; CHECK-NEXT: vsetvli a2, zero, e8, m2, ta, ma
-; CHECK-NEXT: vmerge.vim v28, v10, 1, v0
-; CHECK-NEXT: vsetvli a2, zero, e8, m1, ta, ma
+; CHECK-NEXT: addi a2, sp, 16
+; CHECK-NEXT: vsetvli a3, zero, e8, m2, ta, ma
+; CHECK-NEXT: vmerge.vim v30, v10, 1, v0
+; CHECK-NEXT: vsetvli a3, zero, e8, m1, ta, ma
; CHECK-NEXT: vslidedown.vx v0, v8, a0
+; CHECK-NEXT: vs8r.v v16, (a2)
+; CHECK-NEXT: vsetvli a0, zero, e8, m2, ta, ma
+; CHECK-NEXT: vmerge.vim v28, v10, 1, v0
+; CHECK-NEXT: vmv1r.v v0, v8
+; CHECK-NEXT: vmerge.vim v24, v10, 1, v0
+; CHECK-NEXT: vsetvli a0, zero, e8, m1, ta, ma
+; CHECK-NEXT: vslidedown.vx v0, v8, a1
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: slli a0, a0, 3
; CHECK-NEXT: add a0, sp, a0
; CHECK-NEXT: addi a0, a0, 16
-; CHECK-NEXT: vsetvli a2, zero, e8, m2, ta, ma
-; CHECK-NEXT: vmerge.vim v30, v10, 1, v0
+; CHECK-NEXT: vsetvli a1, zero, e8, m2, ta, ma
+; CHECK-NEXT: vmerge.vim v26, v10, 1, v0
; CHECK-NEXT: vs8r.v v24, (a0)
-; CHECK-NEXT: vsetvli a2, zero, e8, m1, ta, ma
-; CHECK-NEXT: vlseg8e8.v v18, (a1)
+; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma
+; CHECK-NEXT: vlseg8e8.v v18, (a2)
; CHECK-NEXT: vlseg8e8.v v10, (a0)
; CHECK-NEXT: vmv2r.v v8, v18
; CHECK-NEXT: vmv2r.v v26, v20
diff --git a/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-sdnode.ll
index 3da04eb7e6abe..78aae96242fd3 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-sdnode.ll
@@ -887,9 +887,9 @@ define half @vreduce_ord_fadd_nxv3f16(<vscale x 3 x half> %v, half %s) {
; CHECK-LABEL: vreduce_ord_fadd_nxv3f16:
; CHECK: # %bb.0:
; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: srli a0, a0, 3
-; CHECK-NEXT: slli a1, a0, 1
-; CHECK-NEXT: add a0, a1, a0
+; CHECK-NEXT: srli a1, a0, 3
+; CHECK-NEXT: srli a0, a0, 2
+; CHECK-NEXT: add a0, a0, a1
; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; CHECK-NEXT: vfmv.s.f v9, fa0
; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma
@@ -906,8 +906,7 @@ define half @vreduce_ord_fadd_nxv6f16(<vscale x 6 x half> %v, half %s) {
; CHECK-LABEL: vreduce_ord_fadd_nxv6f16:
; CHECK: # %bb.0:
; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: srli a1, a0, 3
-; CHECK-NEXT: slli a1, a1, 1
+; CHECK-NEXT: srli a1, a0, 2
; CHECK-NEXT: sub a0, a0, a1
; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; CHECK-NEXT: vfmv.s.f v10, fa0
@@ -925,8 +924,7 @@ define half @vreduce_ord_fadd_nxv10f16(<vscale x 10 x half> %v, half %s) {
; CHECK-LABEL: vreduce_ord_fadd_nxv10f16:
; CHECK: # %bb.0:
; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: srli a1, a0, 3
-; CHECK-NEXT: slli a1, a1, 1
+; CHECK-NEXT: srli a1, a0, 2
; CHECK-NEXT: add a0, a0, a1
; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; CHECK-NEXT: vfmv.s.f v12, fa0
@@ -944,9 +942,8 @@ define half @vreduce_ord_fadd_nxv12f16(<vscale x 12 x half> %v, half %s) {
; CHECK-LABEL: vreduce_ord_fadd_nxv12f16:
; CHECK: # %bb.0:
; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: srli a0, a0, 3
-; CHECK-NEXT: slli a1, a0, 2
-; CHECK-NEXT: slli a0, a0, 4
+; CHECK-NEXT: srli a1, a0, 1
+; CHECK-NEXT: slli a0, a0, 1
; CHECK-NEXT: sub a0, a0, a1
; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; CHECK-NEXT: vfmv.s.f v12, fa0
@@ -965,9 +962,9 @@ define half @vreduce_fadd_nxv3f16(<vscale x 3 x half> %v, half %s) {
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; CHECK-NEXT: vfmv.s.f v9, fa0
-; CHECK-NEXT: srli a0, a0, 3
-; CHECK-NEXT: slli a1, a0, 1
-; CHECK-NEXT: add a0, a1, a0
+; CHECK-NEXT: srli a1, a0, 3
+; CHECK-NEXT: srli a0, a0, 2
+; CHECK-NEXT: add a0, a0, a1
; CHECK-NEXT: lui a1, 1048568
; CHECK-NEXT: vmv.s.x v10, a1
; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma
@@ -984,8 +981,7 @@ define half @vreduce_fadd_nxv6f16(<vscale x 6 x half> %v, half %s) {
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; CHECK-NEXT: vfmv.s.f v10, fa0
-; CHECK-NEXT: srli a1, a0, 3
-; CHECK-NEXT: slli a1, a1, 1
+; CHECK-NEXT: srli a1, a0, 2
; CHECK-NEXT: sub a0, a0, a1
; CHECK-NEXT: lui a1, 1048568
; CHECK-NEXT: vmv.s.x v11, a1
@@ -1002,13 +998,12 @@ declare half @llvm.vector.reduce.fmin.nxv10f16(<vscale x 10 x half>)
define half @vreduce_fmin_nxv10f16(<vscale x 10 x half> %v) {
; CHECK-LABEL: vreduce_fmin_nxv10f16:
; CHECK: # %bb.0:
-; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: lui a1, %hi(.LCPI73_0)
-; CHECK-NEXT: addi a1, a1, %lo(.LCPI73_0)
+; CHECK-NEXT: lui a0, %hi(.LCPI73_0)
+; CHECK-NEXT: addi a0, a0, %lo(.LCPI73_0)
; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; CHECK-NEXT: vle16.v v12, (a1)
-; CHECK-NEXT: srli a1, a0, 3
-; CHECK-NEXT: slli a1, a1, 1
+; CHECK-NEXT: vle16.v v12, (a0)
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: srli a1, a0, 2
; CHECK-NEXT: add a0, a0, a1
; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma
; CHECK-NEXT: vfredmin.vs v12, v8, v12
@@ -1024,9 +1019,8 @@ define half @vreduce_fmax_nxv12f16(<vscale x 12 x half> %v) {
; CHECK-LABEL: vreduce_fmax_nxv12f16:
; CHECK: # %bb.0:
; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: srli a0, a0, 3
-; CHECK-NEXT: slli a1, a0, 2
-; CHECK-NEXT: slli a0, a0, 4
+; CHECK-NEXT: srli a1, a0, 1
+; CHECK-NEXT: slli a0, a0, 1
; CHECK-NEXT: sub a0, a0, a1
; CHECK-NEXT: li a1, -512
; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma
>From 82a270056dd9e246de233cea6fcc5c12a205437d Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper at sifive.com>
Date: Tue, 17 Jun 2025 10:49:17 -0700
Subject: [PATCH 2/3] fixup! clang-format
---
llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 3 +--
1 file changed, 1 insertion(+), 2 deletions(-)
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index a6679d726622c..4c1bedd5940e4 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -7375,8 +7375,7 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
SDNodeFlags Flags;
Flags.setExact(true);
SDValue VScale = DAG.getNode(ISD::SRL, DL, XLenVT, Res,
- DAG.getConstant(3, DL, XLenVT),
- Flags);
+ DAG.getConstant(3, DL, XLenVT), Flags);
Res = DAG.getNode(ISD::MUL, DL, XLenVT, VScale,
DAG.getConstant(Val, DL, XLenVT));
}
>From f9f69e7cf0997a25251b0d097314279f393f1f13 Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper at sifive.com>
Date: Tue, 17 Jun 2025 15:05:40 -0700
Subject: [PATCH 3/3] fixup! Add to the earlier SRL for the power 2 case too.
---
llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 7 +++++--
1 file changed, 5 insertions(+), 2 deletions(-)
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 4c1bedd5940e4..138e0e134dfa6 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -7360,12 +7360,15 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
uint64_t Val = Op.getConstantOperandVal(0);
if (isPowerOf2_64(Val)) {
uint64_t Log2 = Log2_64(Val);
- if (Log2 < 3)
+ if (Log2 < 3) {
+ SDNodeFlags Flags;
+ Flags.setExact(true);
Res = DAG.getNode(ISD::SRL, DL, XLenVT, Res,
DAG.getConstant(3 - Log2, DL, VT));
- else if (Log2 > 3)
+ } else if (Log2 > 3) {
Res = DAG.getNode(ISD::SHL, DL, XLenVT, Res,
DAG.getConstant(Log2 - 3, DL, XLenVT));
+ }
} else if ((Val % 8) == 0) {
// If the multiplier is a multiple of 8, scale it down to avoid needing
// to shift the VLENB value.
More information about the llvm-commits
mailing list