[llvm] [DAGCombiner] Fold umax/umin operations with vscale operands (PR #154461)
Jim Lin via llvm-commits
llvm-commits at lists.llvm.org
Wed Aug 20 00:40:17 PDT 2025
https://github.com/tclin914 updated https://github.com/llvm/llvm-project/pull/154461
>From 79ea0c1b7042c0e6ba0f964514227f10341236e1 Mon Sep 17 00:00:00 2001
From: Jim Lin <jim at andestech.com>
Date: Tue, 19 Aug 2025 17:40:02 +0800
Subject: [PATCH 1/3] [DAGCombiner] Fold umax/umin operations with vscale
operands
If umax/umin operations with vscale operands, that can be constant
folded.
---
llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 10 ++++
llvm/test/CodeGen/RISCV/rvv/strided-vpload.ll | 20 +++----
llvm/test/CodeGen/RISCV/rvv/vadd-vp.ll | 31 ++++-------
llvm/test/CodeGen/RISCV/rvv/vmax-vp.ll | 31 ++++-------
llvm/test/CodeGen/RISCV/rvv/vmaxu-vp.ll | 31 ++++-------
llvm/test/CodeGen/RISCV/rvv/vmin-vp.ll | 31 ++++-------
llvm/test/CodeGen/RISCV/rvv/vminu-vp.ll | 31 ++++-------
llvm/test/CodeGen/RISCV/rvv/vselect-vp.ll | 52 ++++++-------------
8 files changed, 88 insertions(+), 149 deletions(-)
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 6eb8468e2573e..75d61d281224a 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -6070,6 +6070,16 @@ SDValue DAGCombiner::visitIMINMAX(SDNode *N) {
if (N0 == N1)
return N0;
+ // fold operation with vscale operands.
+ if (N0.getOpcode() == ISD::VSCALE && N1.getOpcode() == ISD::VSCALE) {
+ uint64_t C0 = N0->getConstantOperandVal(0);
+ uint64_t C1 = N1->getConstantOperandVal(0);
+ if (Opcode == ISD::UMAX)
+ return C0 > C1 ? N0 : N1;
+ else if (Opcode == ISD::UMIN)
+ return C0 > C1 ? N1 : N0;
+ }
+
// canonicalize constant to RHS
if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
!DAG.isConstantIntBuildVectorOrConstantInt(N1))
diff --git a/llvm/test/CodeGen/RISCV/rvv/strided-vpload.ll b/llvm/test/CodeGen/RISCV/rvv/strided-vpload.ll
index ecd098edb30ae..b6aa4affbb10f 100644
--- a/llvm/test/CodeGen/RISCV/rvv/strided-vpload.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/strided-vpload.ll
@@ -939,21 +939,17 @@ define <vscale x 16 x i64> @zero_strided_vadd_nxv16i64(<vscale x 16 x i64> %v, p
; CHECK-RV32: # %bb.0:
; CHECK-RV32-NEXT: csrr a1, vlenb
; CHECK-RV32-NEXT: srli a2, a1, 3
-; CHECK-RV32-NEXT: sub a3, a2, a1
-; CHECK-RV32-NEXT: sltu a4, a2, a3
-; CHECK-RV32-NEXT: addi a4, a4, -1
-; CHECK-RV32-NEXT: and a3, a4, a3
-; CHECK-RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma
-; CHECK-RV32-NEXT: vlse64.v v24, (a0), zero
-; CHECK-RV32-NEXT: bltu a2, a1, .LBB61_2
-; CHECK-RV32-NEXT: # %bb.1:
-; CHECK-RV32-NEXT: mv a2, a1
-; CHECK-RV32-NEXT: .LBB61_2:
+; CHECK-RV32-NEXT: sub a1, a2, a1
+; CHECK-RV32-NEXT: sltu a3, a2, a1
+; CHECK-RV32-NEXT: addi a3, a3, -1
+; CHECK-RV32-NEXT: and a1, a3, a1
; CHECK-RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma
+; CHECK-RV32-NEXT: vlse64.v v24, (a0), zero
+; CHECK-RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma
; CHECK-RV32-NEXT: vlse64.v v0, (a0), zero
; CHECK-RV32-NEXT: vsetvli a0, zero, e64, m8, ta, ma
-; CHECK-RV32-NEXT: vadd.vv v16, v16, v24
-; CHECK-RV32-NEXT: vadd.vv v8, v8, v0
+; CHECK-RV32-NEXT: vadd.vv v8, v8, v24
+; CHECK-RV32-NEXT: vadd.vv v16, v16, v0
; CHECK-RV32-NEXT: ret
;
; CHECK-RV64-LABEL: zero_strided_vadd_nxv16i64:
diff --git a/llvm/test/CodeGen/RISCV/rvv/vadd-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vadd-vp.ll
index 6e9f4d45cd6b4..a29c78270d595 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vadd-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vadd-vp.ll
@@ -1393,31 +1393,22 @@ define <vscale x 32 x i32> @vadd_vi_nxv32i32_unmasked(<vscale x 32 x i32> %va, i
declare i32 @llvm.vscale.i32()
-; FIXME: The upper half of the operation is doing nothing.
-; FIXME: The branches comparing vscale vs. vscale should be constant-foldable.
-
-define <vscale x 32 x i32> @vadd_vi_nxv32i32_evl_nx8(<vscale x 32 x i32> %va, <vscale x 32 x i1> %m) {
+define <vscale x 32 x i32> @vadd_vi_nxv32i32_evl_nx8(<vscale x 32 x i32> %va, <vscale x 32 x i1> %m) vscale_range(2,1024) {
; CHECK-LABEL: vadd_vi_nxv32i32_evl_nx8:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a0, zero, e8, mf2, ta, ma
-; CHECK-NEXT: vmv1r.v v24, v0
; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: srli a2, a0, 2
-; CHECK-NEXT: slli a1, a0, 1
-; CHECK-NEXT: vslidedown.vx v0, v0, a2
-; CHECK-NEXT: sub a2, a0, a1
-; CHECK-NEXT: sltu a3, a0, a2
-; CHECK-NEXT: addi a3, a3, -1
-; CHECK-NEXT: and a2, a3, a2
-; CHECK-NEXT: vsetvli zero, a2, e32, m8, ta, ma
-; CHECK-NEXT: vadd.vi v16, v16, -1, v0.t
-; CHECK-NEXT: bltu a0, a1, .LBB120_2
-; CHECK-NEXT: # %bb.1:
-; CHECK-NEXT: mv a0, a1
-; CHECK-NEXT: .LBB120_2:
-; CHECK-NEXT: vmv1r.v v0, v24
+; CHECK-NEXT: srli a1, a0, 2
; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma
; CHECK-NEXT: vadd.vi v8, v8, -1, v0.t
+; CHECK-NEXT: vsetvli a2, zero, e8, mf2, ta, ma
+; CHECK-NEXT: vslidedown.vx v0, v0, a1
+; CHECK-NEXT: slli a1, a0, 1
+; CHECK-NEXT: sub a1, a0, a1
+; CHECK-NEXT: sltu a0, a0, a1
+; CHECK-NEXT: addi a0, a0, -1
+; CHECK-NEXT: and a0, a0, a1
+; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma
+; CHECK-NEXT: vadd.vi v16, v16, -1, v0.t
; CHECK-NEXT: ret
%evl = call i32 @llvm.vscale.i32()
%evl0 = mul i32 %evl, 8
diff --git a/llvm/test/CodeGen/RISCV/rvv/vmax-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vmax-vp.ll
index ec95e81b8dd94..dd41a0d125a8d 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vmax-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vmax-vp.ll
@@ -1026,31 +1026,22 @@ define <vscale x 32 x i32> @vmax_vx_nxv32i32_unmasked(<vscale x 32 x i32> %va, i
declare i32 @llvm.vscale.i32()
-; FIXME: The upper half of the operation is doing nothing.
-; FIXME: The branches comparing vscale vs. vscale should be constant-foldable.
-
-define <vscale x 32 x i32> @vmax_vx_nxv32i32_evl_nx8(<vscale x 32 x i32> %va, i32 %b, <vscale x 32 x i1> %m) {
+define <vscale x 32 x i32> @vmax_vx_nxv32i32_evl_nx8(<vscale x 32 x i32> %va, i32 %b, <vscale x 32 x i1> %m) vscale_range(2,1024) {
; CHECK-LABEL: vmax_vx_nxv32i32_evl_nx8:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, ma
-; CHECK-NEXT: vmv1r.v v24, v0
; CHECK-NEXT: csrr a1, vlenb
-; CHECK-NEXT: srli a3, a1, 2
-; CHECK-NEXT: slli a2, a1, 1
-; CHECK-NEXT: vslidedown.vx v0, v0, a3
-; CHECK-NEXT: sub a3, a1, a2
-; CHECK-NEXT: sltu a4, a1, a3
-; CHECK-NEXT: addi a4, a4, -1
-; CHECK-NEXT: and a3, a4, a3
-; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, ma
-; CHECK-NEXT: vmax.vx v16, v16, a0, v0.t
-; CHECK-NEXT: bltu a1, a2, .LBB82_2
-; CHECK-NEXT: # %bb.1:
-; CHECK-NEXT: mv a1, a2
-; CHECK-NEXT: .LBB82_2:
-; CHECK-NEXT: vmv1r.v v0, v24
+; CHECK-NEXT: srli a2, a1, 2
; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma
; CHECK-NEXT: vmax.vx v8, v8, a0, v0.t
+; CHECK-NEXT: vsetvli a3, zero, e8, mf2, ta, ma
+; CHECK-NEXT: vslidedown.vx v0, v0, a2
+; CHECK-NEXT: slli a2, a1, 1
+; CHECK-NEXT: sub a2, a1, a2
+; CHECK-NEXT: sltu a1, a1, a2
+; CHECK-NEXT: addi a1, a1, -1
+; CHECK-NEXT: and a1, a1, a2
+; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma
+; CHECK-NEXT: vmax.vx v16, v16, a0, v0.t
; CHECK-NEXT: ret
%elt.head = insertelement <vscale x 32 x i32> poison, i32 %b, i32 0
%vb = shufflevector <vscale x 32 x i32> %elt.head, <vscale x 32 x i32> poison, <vscale x 32 x i32> zeroinitializer
diff --git a/llvm/test/CodeGen/RISCV/rvv/vmaxu-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vmaxu-vp.ll
index bd1a6c6e55c70..9490bd5a30d40 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vmaxu-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vmaxu-vp.ll
@@ -1025,31 +1025,22 @@ define <vscale x 32 x i32> @vmaxu_vx_nxv32i32_unmasked(<vscale x 32 x i32> %va,
declare i32 @llvm.vscale.i32()
-; FIXME: The upper half of the operation is doing nothing.
-; FIXME: The branches comparing vscale vs. vscale should be constant-foldable.
-
-define <vscale x 32 x i32> @vmaxu_vx_nxv32i32_evl_nx8(<vscale x 32 x i32> %va, i32 %b, <vscale x 32 x i1> %m) {
+define <vscale x 32 x i32> @vmaxu_vx_nxv32i32_evl_nx8(<vscale x 32 x i32> %va, i32 %b, <vscale x 32 x i1> %m) vscale_range(2,1024) {
; CHECK-LABEL: vmaxu_vx_nxv32i32_evl_nx8:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, ma
-; CHECK-NEXT: vmv1r.v v24, v0
; CHECK-NEXT: csrr a1, vlenb
-; CHECK-NEXT: srli a3, a1, 2
-; CHECK-NEXT: slli a2, a1, 1
-; CHECK-NEXT: vslidedown.vx v0, v0, a3
-; CHECK-NEXT: sub a3, a1, a2
-; CHECK-NEXT: sltu a4, a1, a3
-; CHECK-NEXT: addi a4, a4, -1
-; CHECK-NEXT: and a3, a4, a3
-; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, ma
-; CHECK-NEXT: vmaxu.vx v16, v16, a0, v0.t
-; CHECK-NEXT: bltu a1, a2, .LBB82_2
-; CHECK-NEXT: # %bb.1:
-; CHECK-NEXT: mv a1, a2
-; CHECK-NEXT: .LBB82_2:
-; CHECK-NEXT: vmv1r.v v0, v24
+; CHECK-NEXT: srli a2, a1, 2
; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma
; CHECK-NEXT: vmaxu.vx v8, v8, a0, v0.t
+; CHECK-NEXT: vsetvli a3, zero, e8, mf2, ta, ma
+; CHECK-NEXT: vslidedown.vx v0, v0, a2
+; CHECK-NEXT: slli a2, a1, 1
+; CHECK-NEXT: sub a2, a1, a2
+; CHECK-NEXT: sltu a1, a1, a2
+; CHECK-NEXT: addi a1, a1, -1
+; CHECK-NEXT: and a1, a1, a2
+; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma
+; CHECK-NEXT: vmaxu.vx v16, v16, a0, v0.t
; CHECK-NEXT: ret
%elt.head = insertelement <vscale x 32 x i32> poison, i32 %b, i32 0
%vb = shufflevector <vscale x 32 x i32> %elt.head, <vscale x 32 x i32> poison, <vscale x 32 x i32> zeroinitializer
diff --git a/llvm/test/CodeGen/RISCV/rvv/vmin-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vmin-vp.ll
index 6f1f8e1298321..8a750fbe8bf70 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vmin-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vmin-vp.ll
@@ -1026,31 +1026,22 @@ define <vscale x 32 x i32> @vmin_vx_nxv32i32_unmasked(<vscale x 32 x i32> %va, i
declare i32 @llvm.vscale.i32()
-; FIXME: The upper half of the operation is doing nothing.
-; FIXME: The branches comparing vscale vs. vscale should be constant-foldable.
-
-define <vscale x 32 x i32> @vmin_vx_nxv32i32_evl_nx8(<vscale x 32 x i32> %va, i32 %b, <vscale x 32 x i1> %m) {
+define <vscale x 32 x i32> @vmin_vx_nxv32i32_evl_nx8(<vscale x 32 x i32> %va, i32 %b, <vscale x 32 x i1> %m) vscale_range(2,1024) {
; CHECK-LABEL: vmin_vx_nxv32i32_evl_nx8:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, ma
-; CHECK-NEXT: vmv1r.v v24, v0
; CHECK-NEXT: csrr a1, vlenb
-; CHECK-NEXT: srli a3, a1, 2
-; CHECK-NEXT: slli a2, a1, 1
-; CHECK-NEXT: vslidedown.vx v0, v0, a3
-; CHECK-NEXT: sub a3, a1, a2
-; CHECK-NEXT: sltu a4, a1, a3
-; CHECK-NEXT: addi a4, a4, -1
-; CHECK-NEXT: and a3, a4, a3
-; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, ma
-; CHECK-NEXT: vmin.vx v16, v16, a0, v0.t
-; CHECK-NEXT: bltu a1, a2, .LBB82_2
-; CHECK-NEXT: # %bb.1:
-; CHECK-NEXT: mv a1, a2
-; CHECK-NEXT: .LBB82_2:
-; CHECK-NEXT: vmv1r.v v0, v24
+; CHECK-NEXT: srli a2, a1, 2
; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma
; CHECK-NEXT: vmin.vx v8, v8, a0, v0.t
+; CHECK-NEXT: vsetvli a3, zero, e8, mf2, ta, ma
+; CHECK-NEXT: vslidedown.vx v0, v0, a2
+; CHECK-NEXT: slli a2, a1, 1
+; CHECK-NEXT: sub a2, a1, a2
+; CHECK-NEXT: sltu a1, a1, a2
+; CHECK-NEXT: addi a1, a1, -1
+; CHECK-NEXT: and a1, a1, a2
+; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma
+; CHECK-NEXT: vmin.vx v16, v16, a0, v0.t
; CHECK-NEXT: ret
%elt.head = insertelement <vscale x 32 x i32> poison, i32 %b, i32 0
%vb = shufflevector <vscale x 32 x i32> %elt.head, <vscale x 32 x i32> poison, <vscale x 32 x i32> zeroinitializer
diff --git a/llvm/test/CodeGen/RISCV/rvv/vminu-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vminu-vp.ll
index 47e7b7d70c07e..8290cf23f1370 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vminu-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vminu-vp.ll
@@ -1025,31 +1025,22 @@ define <vscale x 32 x i32> @vminu_vx_nxv32i32_unmasked(<vscale x 32 x i32> %va,
declare i32 @llvm.vscale.i32()
-; FIXME: The upper half of the operation is doing nothing.
-; FIXME: The branches comparing vscale vs. vscale should be constant-foldable.
-
-define <vscale x 32 x i32> @vminu_vx_nxv32i32_evl_nx8(<vscale x 32 x i32> %va, i32 %b, <vscale x 32 x i1> %m) {
+define <vscale x 32 x i32> @vminu_vx_nxv32i32_evl_nx8(<vscale x 32 x i32> %va, i32 %b, <vscale x 32 x i1> %m) vscale_range(2,1024) {
; CHECK-LABEL: vminu_vx_nxv32i32_evl_nx8:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, ma
-; CHECK-NEXT: vmv1r.v v24, v0
; CHECK-NEXT: csrr a1, vlenb
-; CHECK-NEXT: srli a3, a1, 2
-; CHECK-NEXT: slli a2, a1, 1
-; CHECK-NEXT: vslidedown.vx v0, v0, a3
-; CHECK-NEXT: sub a3, a1, a2
-; CHECK-NEXT: sltu a4, a1, a3
-; CHECK-NEXT: addi a4, a4, -1
-; CHECK-NEXT: and a3, a4, a3
-; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, ma
-; CHECK-NEXT: vminu.vx v16, v16, a0, v0.t
-; CHECK-NEXT: bltu a1, a2, .LBB82_2
-; CHECK-NEXT: # %bb.1:
-; CHECK-NEXT: mv a1, a2
-; CHECK-NEXT: .LBB82_2:
-; CHECK-NEXT: vmv1r.v v0, v24
+; CHECK-NEXT: srli a2, a1, 2
; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma
; CHECK-NEXT: vminu.vx v8, v8, a0, v0.t
+; CHECK-NEXT: vsetvli a3, zero, e8, mf2, ta, ma
+; CHECK-NEXT: vslidedown.vx v0, v0, a2
+; CHECK-NEXT: slli a2, a1, 1
+; CHECK-NEXT: sub a2, a1, a2
+; CHECK-NEXT: sltu a1, a1, a2
+; CHECK-NEXT: addi a1, a1, -1
+; CHECK-NEXT: and a1, a1, a2
+; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma
+; CHECK-NEXT: vminu.vx v16, v16, a0, v0.t
; CHECK-NEXT: ret
%elt.head = insertelement <vscale x 32 x i32> poison, i32 %b, i32 0
%vb = shufflevector <vscale x 32 x i32> %elt.head, <vscale x 32 x i32> poison, <vscale x 32 x i32> zeroinitializer
diff --git a/llvm/test/CodeGen/RISCV/rvv/vselect-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vselect-vp.ll
index 522c83fd9fa99..29e8c735654c2 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vselect-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vselect-vp.ll
@@ -397,48 +397,26 @@ define <vscale x 32 x i32> @select_nxv32i32(<vscale x 32 x i1> %a, <vscale x 32
declare i32 @llvm.vscale.i32()
-define <vscale x 32 x i32> @select_evl_nxv32i32(<vscale x 32 x i1> %a, <vscale x 32 x i32> %b, <vscale x 32 x i32> %c) {
+define <vscale x 32 x i32> @select_evl_nxv32i32(<vscale x 32 x i1> %a, <vscale x 32 x i32> %b, <vscale x 32 x i32> %c) vscale_range(2,1024) {
; CHECK-LABEL: select_evl_nxv32i32:
; CHECK: # %bb.0:
-; CHECK-NEXT: addi sp, sp, -16
-; CHECK-NEXT: .cfi_def_cfa_offset 16
-; CHECK-NEXT: csrr a1, vlenb
-; CHECK-NEXT: slli a1, a1, 3
-; CHECK-NEXT: sub sp, sp, a1
-; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
-; CHECK-NEXT: addi a1, sp, 16
-; CHECK-NEXT: vs8r.v v8, (a1) # vscale x 64-byte Folded Spill
-; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, ma
-; CHECK-NEXT: vmv1r.v v7, v0
+; CHECK-NEXT: vl8re32.v v24, (a0)
; CHECK-NEXT: csrr a1, vlenb
-; CHECK-NEXT: slli a3, a1, 3
+; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma
+; CHECK-NEXT: vmerge.vvm v8, v24, v8, v0
+; CHECK-NEXT: slli a2, a1, 3
+; CHECK-NEXT: add a0, a0, a2
; CHECK-NEXT: slli a2, a1, 1
-; CHECK-NEXT: srli a4, a1, 2
-; CHECK-NEXT: add a3, a0, a3
-; CHECK-NEXT: sub a5, a1, a2
-; CHECK-NEXT: vl8re32.v v24, (a3)
-; CHECK-NEXT: sltu a3, a1, a5
-; CHECK-NEXT: addi a3, a3, -1
-; CHECK-NEXT: vl8re32.v v8, (a0)
-; CHECK-NEXT: vslidedown.vx v0, v0, a4
-; CHECK-NEXT: and a3, a3, a5
-; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, ma
+; CHECK-NEXT: sub a2, a1, a2
+; CHECK-NEXT: vl8re32.v v24, (a0)
+; CHECK-NEXT: sltu a0, a1, a2
+; CHECK-NEXT: addi a0, a0, -1
+; CHECK-NEXT: srli a1, a1, 2
+; CHECK-NEXT: vsetvli a3, zero, e8, mf2, ta, ma
+; CHECK-NEXT: vslidedown.vx v0, v0, a1
+; CHECK-NEXT: and a0, a0, a2
+; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma
; CHECK-NEXT: vmerge.vvm v16, v24, v16, v0
-; CHECK-NEXT: bltu a1, a2, .LBB28_2
-; CHECK-NEXT: # %bb.1:
-; CHECK-NEXT: mv a1, a2
-; CHECK-NEXT: .LBB28_2:
-; CHECK-NEXT: vmv1r.v v0, v7
-; CHECK-NEXT: addi a0, sp, 16
-; CHECK-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
-; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma
-; CHECK-NEXT: vmerge.vvm v8, v8, v24, v0
-; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: slli a0, a0, 3
-; CHECK-NEXT: add sp, sp, a0
-; CHECK-NEXT: .cfi_def_cfa sp, 16
-; CHECK-NEXT: addi sp, sp, 16
-; CHECK-NEXT: .cfi_def_cfa_offset 0
; CHECK-NEXT: ret
%evl = call i32 @llvm.vscale.i32()
%evl0 = mul i32 %evl, 8
>From ee35f21a7e7f473998d77f65f470af33ab5f7ac5 Mon Sep 17 00:00:00 2001
From: Jim Lin <jim at andestech.com>
Date: Wed, 20 Aug 2025 12:59:24 +0800
Subject: [PATCH 2/3] fold -> Fold
---
llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 75d61d281224a..c0a50564d25a8 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -6070,7 +6070,7 @@ SDValue DAGCombiner::visitIMINMAX(SDNode *N) {
if (N0 == N1)
return N0;
- // fold operation with vscale operands.
+ // Fold operation with vscale operands.
if (N0.getOpcode() == ISD::VSCALE && N1.getOpcode() == ISD::VSCALE) {
uint64_t C0 = N0->getConstantOperandVal(0);
uint64_t C1 = N1->getConstantOperandVal(0);
>From 737be7d08d4e1d2a031a6631bace4c8afac91e5a Mon Sep 17 00:00:00 2001
From: Jim Lin <jim at andestech.com>
Date: Wed, 20 Aug 2025 15:35:23 +0800
Subject: [PATCH 3/3] Remove `vscale_range(2,1024)`
---
llvm/test/CodeGen/RISCV/rvv/vadd-vp.ll | 57 +++++++++-----
llvm/test/CodeGen/RISCV/rvv/vmax-vp.ll | 57 +++++++++-----
llvm/test/CodeGen/RISCV/rvv/vmaxu-vp.ll | 57 +++++++++-----
llvm/test/CodeGen/RISCV/rvv/vmin-vp.ll | 57 +++++++++-----
llvm/test/CodeGen/RISCV/rvv/vminu-vp.ll | 57 +++++++++-----
llvm/test/CodeGen/RISCV/rvv/vselect-vp.ll | 93 +++++++++++++++++------
6 files changed, 268 insertions(+), 110 deletions(-)
diff --git a/llvm/test/CodeGen/RISCV/rvv/vadd-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vadd-vp.ll
index a29c78270d595..946c0bbd7ff6f 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vadd-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vadd-vp.ll
@@ -1393,23 +1393,46 @@ define <vscale x 32 x i32> @vadd_vi_nxv32i32_unmasked(<vscale x 32 x i32> %va, i
declare i32 @llvm.vscale.i32()
-define <vscale x 32 x i32> @vadd_vi_nxv32i32_evl_nx8(<vscale x 32 x i32> %va, <vscale x 32 x i1> %m) vscale_range(2,1024) {
-; CHECK-LABEL: vadd_vi_nxv32i32_evl_nx8:
-; CHECK: # %bb.0:
-; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: srli a1, a0, 2
-; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma
-; CHECK-NEXT: vadd.vi v8, v8, -1, v0.t
-; CHECK-NEXT: vsetvli a2, zero, e8, mf2, ta, ma
-; CHECK-NEXT: vslidedown.vx v0, v0, a1
-; CHECK-NEXT: slli a1, a0, 1
-; CHECK-NEXT: sub a1, a0, a1
-; CHECK-NEXT: sltu a0, a0, a1
-; CHECK-NEXT: addi a0, a0, -1
-; CHECK-NEXT: and a0, a0, a1
-; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma
-; CHECK-NEXT: vadd.vi v16, v16, -1, v0.t
-; CHECK-NEXT: ret
+define <vscale x 32 x i32> @vadd_vi_nxv32i32_evl_nx8(<vscale x 32 x i32> %va, <vscale x 32 x i1> %m) {
+; RV32-LABEL: vadd_vi_nxv32i32_evl_nx8:
+; RV32: # %bb.0:
+; RV32-NEXT: csrr a0, vlenb
+; RV32-NEXT: srli a1, a0, 2
+; RV32-NEXT: vsetvli zero, a0, e32, m8, ta, ma
+; RV32-NEXT: vadd.vi v8, v8, -1, v0.t
+; RV32-NEXT: vsetvli a2, zero, e8, mf2, ta, ma
+; RV32-NEXT: vslidedown.vx v0, v0, a1
+; RV32-NEXT: slli a1, a0, 1
+; RV32-NEXT: sub a1, a0, a1
+; RV32-NEXT: sltu a0, a0, a1
+; RV32-NEXT: addi a0, a0, -1
+; RV32-NEXT: and a0, a0, a1
+; RV32-NEXT: vsetvli zero, a0, e32, m8, ta, ma
+; RV32-NEXT: vadd.vi v16, v16, -1, v0.t
+; RV32-NEXT: ret
+;
+; RV64-LABEL: vadd_vi_nxv32i32_evl_nx8:
+; RV64: # %bb.0:
+; RV64-NEXT: vsetvli a0, zero, e8, mf2, ta, ma
+; RV64-NEXT: vmv1r.v v24, v0
+; RV64-NEXT: csrr a0, vlenb
+; RV64-NEXT: srli a2, a0, 2
+; RV64-NEXT: slli a1, a0, 1
+; RV64-NEXT: vslidedown.vx v0, v0, a2
+; RV64-NEXT: sub a2, a0, a1
+; RV64-NEXT: sltu a3, a0, a2
+; RV64-NEXT: addi a3, a3, -1
+; RV64-NEXT: and a2, a3, a2
+; RV64-NEXT: vsetvli zero, a2, e32, m8, ta, ma
+; RV64-NEXT: vadd.vi v16, v16, -1, v0.t
+; RV64-NEXT: bltu a0, a1, .LBB120_2
+; RV64-NEXT: # %bb.1:
+; RV64-NEXT: mv a0, a1
+; RV64-NEXT: .LBB120_2:
+; RV64-NEXT: vmv1r.v v0, v24
+; RV64-NEXT: vsetvli zero, a0, e32, m8, ta, ma
+; RV64-NEXT: vadd.vi v8, v8, -1, v0.t
+; RV64-NEXT: ret
%evl = call i32 @llvm.vscale.i32()
%evl0 = mul i32 %evl, 8
%v = call <vscale x 32 x i32> @llvm.vp.add.nxv32i32(<vscale x 32 x i32> %va, <vscale x 32 x i32> splat (i32 -1), <vscale x 32 x i1> %m, i32 %evl0)
diff --git a/llvm/test/CodeGen/RISCV/rvv/vmax-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vmax-vp.ll
index dd41a0d125a8d..d81936354f6f3 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vmax-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vmax-vp.ll
@@ -1026,23 +1026,46 @@ define <vscale x 32 x i32> @vmax_vx_nxv32i32_unmasked(<vscale x 32 x i32> %va, i
declare i32 @llvm.vscale.i32()
-define <vscale x 32 x i32> @vmax_vx_nxv32i32_evl_nx8(<vscale x 32 x i32> %va, i32 %b, <vscale x 32 x i1> %m) vscale_range(2,1024) {
-; CHECK-LABEL: vmax_vx_nxv32i32_evl_nx8:
-; CHECK: # %bb.0:
-; CHECK-NEXT: csrr a1, vlenb
-; CHECK-NEXT: srli a2, a1, 2
-; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma
-; CHECK-NEXT: vmax.vx v8, v8, a0, v0.t
-; CHECK-NEXT: vsetvli a3, zero, e8, mf2, ta, ma
-; CHECK-NEXT: vslidedown.vx v0, v0, a2
-; CHECK-NEXT: slli a2, a1, 1
-; CHECK-NEXT: sub a2, a1, a2
-; CHECK-NEXT: sltu a1, a1, a2
-; CHECK-NEXT: addi a1, a1, -1
-; CHECK-NEXT: and a1, a1, a2
-; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma
-; CHECK-NEXT: vmax.vx v16, v16, a0, v0.t
-; CHECK-NEXT: ret
+define <vscale x 32 x i32> @vmax_vx_nxv32i32_evl_nx8(<vscale x 32 x i32> %va, i32 %b, <vscale x 32 x i1> %m) {
+; RV32-LABEL: vmax_vx_nxv32i32_evl_nx8:
+; RV32: # %bb.0:
+; RV32-NEXT: csrr a1, vlenb
+; RV32-NEXT: srli a2, a1, 2
+; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma
+; RV32-NEXT: vmax.vx v8, v8, a0, v0.t
+; RV32-NEXT: vsetvli a3, zero, e8, mf2, ta, ma
+; RV32-NEXT: vslidedown.vx v0, v0, a2
+; RV32-NEXT: slli a2, a1, 1
+; RV32-NEXT: sub a2, a1, a2
+; RV32-NEXT: sltu a1, a1, a2
+; RV32-NEXT: addi a1, a1, -1
+; RV32-NEXT: and a1, a1, a2
+; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma
+; RV32-NEXT: vmax.vx v16, v16, a0, v0.t
+; RV32-NEXT: ret
+;
+; RV64-LABEL: vmax_vx_nxv32i32_evl_nx8:
+; RV64: # %bb.0:
+; RV64-NEXT: vsetvli a1, zero, e8, mf2, ta, ma
+; RV64-NEXT: vmv1r.v v24, v0
+; RV64-NEXT: csrr a1, vlenb
+; RV64-NEXT: srli a3, a1, 2
+; RV64-NEXT: slli a2, a1, 1
+; RV64-NEXT: vslidedown.vx v0, v0, a3
+; RV64-NEXT: sub a3, a1, a2
+; RV64-NEXT: sltu a4, a1, a3
+; RV64-NEXT: addi a4, a4, -1
+; RV64-NEXT: and a3, a4, a3
+; RV64-NEXT: vsetvli zero, a3, e32, m8, ta, ma
+; RV64-NEXT: vmax.vx v16, v16, a0, v0.t
+; RV64-NEXT: bltu a1, a2, .LBB82_2
+; RV64-NEXT: # %bb.1:
+; RV64-NEXT: mv a1, a2
+; RV64-NEXT: .LBB82_2:
+; RV64-NEXT: vmv1r.v v0, v24
+; RV64-NEXT: vsetvli zero, a1, e32, m8, ta, ma
+; RV64-NEXT: vmax.vx v8, v8, a0, v0.t
+; RV64-NEXT: ret
%elt.head = insertelement <vscale x 32 x i32> poison, i32 %b, i32 0
%vb = shufflevector <vscale x 32 x i32> %elt.head, <vscale x 32 x i32> poison, <vscale x 32 x i32> zeroinitializer
%evl = call i32 @llvm.vscale.i32()
diff --git a/llvm/test/CodeGen/RISCV/rvv/vmaxu-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vmaxu-vp.ll
index 9490bd5a30d40..7603bcef1973e 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vmaxu-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vmaxu-vp.ll
@@ -1025,23 +1025,46 @@ define <vscale x 32 x i32> @vmaxu_vx_nxv32i32_unmasked(<vscale x 32 x i32> %va,
declare i32 @llvm.vscale.i32()
-define <vscale x 32 x i32> @vmaxu_vx_nxv32i32_evl_nx8(<vscale x 32 x i32> %va, i32 %b, <vscale x 32 x i1> %m) vscale_range(2,1024) {
-; CHECK-LABEL: vmaxu_vx_nxv32i32_evl_nx8:
-; CHECK: # %bb.0:
-; CHECK-NEXT: csrr a1, vlenb
-; CHECK-NEXT: srli a2, a1, 2
-; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma
-; CHECK-NEXT: vmaxu.vx v8, v8, a0, v0.t
-; CHECK-NEXT: vsetvli a3, zero, e8, mf2, ta, ma
-; CHECK-NEXT: vslidedown.vx v0, v0, a2
-; CHECK-NEXT: slli a2, a1, 1
-; CHECK-NEXT: sub a2, a1, a2
-; CHECK-NEXT: sltu a1, a1, a2
-; CHECK-NEXT: addi a1, a1, -1
-; CHECK-NEXT: and a1, a1, a2
-; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma
-; CHECK-NEXT: vmaxu.vx v16, v16, a0, v0.t
-; CHECK-NEXT: ret
+define <vscale x 32 x i32> @vmaxu_vx_nxv32i32_evl_nx8(<vscale x 32 x i32> %va, i32 %b, <vscale x 32 x i1> %m) {
+; RV32-LABEL: vmaxu_vx_nxv32i32_evl_nx8:
+; RV32: # %bb.0:
+; RV32-NEXT: csrr a1, vlenb
+; RV32-NEXT: srli a2, a1, 2
+; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma
+; RV32-NEXT: vmaxu.vx v8, v8, a0, v0.t
+; RV32-NEXT: vsetvli a3, zero, e8, mf2, ta, ma
+; RV32-NEXT: vslidedown.vx v0, v0, a2
+; RV32-NEXT: slli a2, a1, 1
+; RV32-NEXT: sub a2, a1, a2
+; RV32-NEXT: sltu a1, a1, a2
+; RV32-NEXT: addi a1, a1, -1
+; RV32-NEXT: and a1, a1, a2
+; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma
+; RV32-NEXT: vmaxu.vx v16, v16, a0, v0.t
+; RV32-NEXT: ret
+;
+; RV64-LABEL: vmaxu_vx_nxv32i32_evl_nx8:
+; RV64: # %bb.0:
+; RV64-NEXT: vsetvli a1, zero, e8, mf2, ta, ma
+; RV64-NEXT: vmv1r.v v24, v0
+; RV64-NEXT: csrr a1, vlenb
+; RV64-NEXT: srli a3, a1, 2
+; RV64-NEXT: slli a2, a1, 1
+; RV64-NEXT: vslidedown.vx v0, v0, a3
+; RV64-NEXT: sub a3, a1, a2
+; RV64-NEXT: sltu a4, a1, a3
+; RV64-NEXT: addi a4, a4, -1
+; RV64-NEXT: and a3, a4, a3
+; RV64-NEXT: vsetvli zero, a3, e32, m8, ta, ma
+; RV64-NEXT: vmaxu.vx v16, v16, a0, v0.t
+; RV64-NEXT: bltu a1, a2, .LBB82_2
+; RV64-NEXT: # %bb.1:
+; RV64-NEXT: mv a1, a2
+; RV64-NEXT: .LBB82_2:
+; RV64-NEXT: vmv1r.v v0, v24
+; RV64-NEXT: vsetvli zero, a1, e32, m8, ta, ma
+; RV64-NEXT: vmaxu.vx v8, v8, a0, v0.t
+; RV64-NEXT: ret
%elt.head = insertelement <vscale x 32 x i32> poison, i32 %b, i32 0
%vb = shufflevector <vscale x 32 x i32> %elt.head, <vscale x 32 x i32> poison, <vscale x 32 x i32> zeroinitializer
%evl = call i32 @llvm.vscale.i32()
diff --git a/llvm/test/CodeGen/RISCV/rvv/vmin-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vmin-vp.ll
index 8a750fbe8bf70..3922b09f1f02d 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vmin-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vmin-vp.ll
@@ -1026,23 +1026,46 @@ define <vscale x 32 x i32> @vmin_vx_nxv32i32_unmasked(<vscale x 32 x i32> %va, i
declare i32 @llvm.vscale.i32()
-define <vscale x 32 x i32> @vmin_vx_nxv32i32_evl_nx8(<vscale x 32 x i32> %va, i32 %b, <vscale x 32 x i1> %m) vscale_range(2,1024) {
-; CHECK-LABEL: vmin_vx_nxv32i32_evl_nx8:
-; CHECK: # %bb.0:
-; CHECK-NEXT: csrr a1, vlenb
-; CHECK-NEXT: srli a2, a1, 2
-; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma
-; CHECK-NEXT: vmin.vx v8, v8, a0, v0.t
-; CHECK-NEXT: vsetvli a3, zero, e8, mf2, ta, ma
-; CHECK-NEXT: vslidedown.vx v0, v0, a2
-; CHECK-NEXT: slli a2, a1, 1
-; CHECK-NEXT: sub a2, a1, a2
-; CHECK-NEXT: sltu a1, a1, a2
-; CHECK-NEXT: addi a1, a1, -1
-; CHECK-NEXT: and a1, a1, a2
-; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma
-; CHECK-NEXT: vmin.vx v16, v16, a0, v0.t
-; CHECK-NEXT: ret
+define <vscale x 32 x i32> @vmin_vx_nxv32i32_evl_nx8(<vscale x 32 x i32> %va, i32 %b, <vscale x 32 x i1> %m) {
+; RV32-LABEL: vmin_vx_nxv32i32_evl_nx8:
+; RV32: # %bb.0:
+; RV32-NEXT: csrr a1, vlenb
+; RV32-NEXT: srli a2, a1, 2
+; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma
+; RV32-NEXT: vmin.vx v8, v8, a0, v0.t
+; RV32-NEXT: vsetvli a3, zero, e8, mf2, ta, ma
+; RV32-NEXT: vslidedown.vx v0, v0, a2
+; RV32-NEXT: slli a2, a1, 1
+; RV32-NEXT: sub a2, a1, a2
+; RV32-NEXT: sltu a1, a1, a2
+; RV32-NEXT: addi a1, a1, -1
+; RV32-NEXT: and a1, a1, a2
+; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma
+; RV32-NEXT: vmin.vx v16, v16, a0, v0.t
+; RV32-NEXT: ret
+;
+; RV64-LABEL: vmin_vx_nxv32i32_evl_nx8:
+; RV64: # %bb.0:
+; RV64-NEXT: vsetvli a1, zero, e8, mf2, ta, ma
+; RV64-NEXT: vmv1r.v v24, v0
+; RV64-NEXT: csrr a1, vlenb
+; RV64-NEXT: srli a3, a1, 2
+; RV64-NEXT: slli a2, a1, 1
+; RV64-NEXT: vslidedown.vx v0, v0, a3
+; RV64-NEXT: sub a3, a1, a2
+; RV64-NEXT: sltu a4, a1, a3
+; RV64-NEXT: addi a4, a4, -1
+; RV64-NEXT: and a3, a4, a3
+; RV64-NEXT: vsetvli zero, a3, e32, m8, ta, ma
+; RV64-NEXT: vmin.vx v16, v16, a0, v0.t
+; RV64-NEXT: bltu a1, a2, .LBB82_2
+; RV64-NEXT: # %bb.1:
+; RV64-NEXT: mv a1, a2
+; RV64-NEXT: .LBB82_2:
+; RV64-NEXT: vmv1r.v v0, v24
+; RV64-NEXT: vsetvli zero, a1, e32, m8, ta, ma
+; RV64-NEXT: vmin.vx v8, v8, a0, v0.t
+; RV64-NEXT: ret
%elt.head = insertelement <vscale x 32 x i32> poison, i32 %b, i32 0
%vb = shufflevector <vscale x 32 x i32> %elt.head, <vscale x 32 x i32> poison, <vscale x 32 x i32> zeroinitializer
%evl = call i32 @llvm.vscale.i32()
diff --git a/llvm/test/CodeGen/RISCV/rvv/vminu-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vminu-vp.ll
index 8290cf23f1370..59af953fd52d3 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vminu-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vminu-vp.ll
@@ -1025,23 +1025,46 @@ define <vscale x 32 x i32> @vminu_vx_nxv32i32_unmasked(<vscale x 32 x i32> %va,
declare i32 @llvm.vscale.i32()
-define <vscale x 32 x i32> @vminu_vx_nxv32i32_evl_nx8(<vscale x 32 x i32> %va, i32 %b, <vscale x 32 x i1> %m) vscale_range(2,1024) {
-; CHECK-LABEL: vminu_vx_nxv32i32_evl_nx8:
-; CHECK: # %bb.0:
-; CHECK-NEXT: csrr a1, vlenb
-; CHECK-NEXT: srli a2, a1, 2
-; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma
-; CHECK-NEXT: vminu.vx v8, v8, a0, v0.t
-; CHECK-NEXT: vsetvli a3, zero, e8, mf2, ta, ma
-; CHECK-NEXT: vslidedown.vx v0, v0, a2
-; CHECK-NEXT: slli a2, a1, 1
-; CHECK-NEXT: sub a2, a1, a2
-; CHECK-NEXT: sltu a1, a1, a2
-; CHECK-NEXT: addi a1, a1, -1
-; CHECK-NEXT: and a1, a1, a2
-; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma
-; CHECK-NEXT: vminu.vx v16, v16, a0, v0.t
-; CHECK-NEXT: ret
+define <vscale x 32 x i32> @vminu_vx_nxv32i32_evl_nx8(<vscale x 32 x i32> %va, i32 %b, <vscale x 32 x i1> %m) {
+; RV32-LABEL: vminu_vx_nxv32i32_evl_nx8:
+; RV32: # %bb.0:
+; RV32-NEXT: csrr a1, vlenb
+; RV32-NEXT: srli a2, a1, 2
+; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma
+; RV32-NEXT: vminu.vx v8, v8, a0, v0.t
+; RV32-NEXT: vsetvli a3, zero, e8, mf2, ta, ma
+; RV32-NEXT: vslidedown.vx v0, v0, a2
+; RV32-NEXT: slli a2, a1, 1
+; RV32-NEXT: sub a2, a1, a2
+; RV32-NEXT: sltu a1, a1, a2
+; RV32-NEXT: addi a1, a1, -1
+; RV32-NEXT: and a1, a1, a2
+; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma
+; RV32-NEXT: vminu.vx v16, v16, a0, v0.t
+; RV32-NEXT: ret
+;
+; RV64-LABEL: vminu_vx_nxv32i32_evl_nx8:
+; RV64: # %bb.0:
+; RV64-NEXT: vsetvli a1, zero, e8, mf2, ta, ma
+; RV64-NEXT: vmv1r.v v24, v0
+; RV64-NEXT: csrr a1, vlenb
+; RV64-NEXT: srli a3, a1, 2
+; RV64-NEXT: slli a2, a1, 1
+; RV64-NEXT: vslidedown.vx v0, v0, a3
+; RV64-NEXT: sub a3, a1, a2
+; RV64-NEXT: sltu a4, a1, a3
+; RV64-NEXT: addi a4, a4, -1
+; RV64-NEXT: and a3, a4, a3
+; RV64-NEXT: vsetvli zero, a3, e32, m8, ta, ma
+; RV64-NEXT: vminu.vx v16, v16, a0, v0.t
+; RV64-NEXT: bltu a1, a2, .LBB82_2
+; RV64-NEXT: # %bb.1:
+; RV64-NEXT: mv a1, a2
+; RV64-NEXT: .LBB82_2:
+; RV64-NEXT: vmv1r.v v0, v24
+; RV64-NEXT: vsetvli zero, a1, e32, m8, ta, ma
+; RV64-NEXT: vminu.vx v8, v8, a0, v0.t
+; RV64-NEXT: ret
%elt.head = insertelement <vscale x 32 x i32> poison, i32 %b, i32 0
%vb = shufflevector <vscale x 32 x i32> %elt.head, <vscale x 32 x i32> poison, <vscale x 32 x i32> zeroinitializer
%evl = call i32 @llvm.vscale.i32()
diff --git a/llvm/test/CodeGen/RISCV/rvv/vselect-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vselect-vp.ll
index 29e8c735654c2..2ed3c9bfe2c16 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vselect-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vselect-vp.ll
@@ -1,12 +1,12 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=riscv32 -mattr=+d,+m,+zvfh,+v -target-abi=ilp32d \
-; RUN: -verify-machineinstrs < %s | FileCheck %s
+; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32
; RUN: llc -mtriple=riscv64 -mattr=+d,+m,+zvfh,+v -target-abi=lp64d \
-; RUN: -verify-machineinstrs < %s | FileCheck %s
+; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64
; RUN: llc -mtriple=riscv32 -mattr=+d,+m,+zvfhmin,+v -target-abi=ilp32d \
-; RUN: -verify-machineinstrs < %s | FileCheck %s
+; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32
; RUN: llc -mtriple=riscv64 -mattr=+d,+m,+zvfhmin,+v -target-abi=lp64d \
-; RUN: -verify-machineinstrs < %s | FileCheck %s
+; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64
declare <vscale x 1 x i1> @llvm.vp.select.nxv1i1(<vscale x 1 x i1>, <vscale x 1 x i1>, <vscale x 1 x i1>, i32)
@@ -397,27 +397,70 @@ define <vscale x 32 x i32> @select_nxv32i32(<vscale x 32 x i1> %a, <vscale x 32
declare i32 @llvm.vscale.i32()
-define <vscale x 32 x i32> @select_evl_nxv32i32(<vscale x 32 x i1> %a, <vscale x 32 x i32> %b, <vscale x 32 x i32> %c) vscale_range(2,1024) {
-; CHECK-LABEL: select_evl_nxv32i32:
-; CHECK: # %bb.0:
-; CHECK-NEXT: vl8re32.v v24, (a0)
-; CHECK-NEXT: csrr a1, vlenb
-; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma
-; CHECK-NEXT: vmerge.vvm v8, v24, v8, v0
-; CHECK-NEXT: slli a2, a1, 3
-; CHECK-NEXT: add a0, a0, a2
-; CHECK-NEXT: slli a2, a1, 1
-; CHECK-NEXT: sub a2, a1, a2
-; CHECK-NEXT: vl8re32.v v24, (a0)
-; CHECK-NEXT: sltu a0, a1, a2
-; CHECK-NEXT: addi a0, a0, -1
-; CHECK-NEXT: srli a1, a1, 2
-; CHECK-NEXT: vsetvli a3, zero, e8, mf2, ta, ma
-; CHECK-NEXT: vslidedown.vx v0, v0, a1
-; CHECK-NEXT: and a0, a0, a2
-; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma
-; CHECK-NEXT: vmerge.vvm v16, v24, v16, v0
-; CHECK-NEXT: ret
+define <vscale x 32 x i32> @select_evl_nxv32i32(<vscale x 32 x i1> %a, <vscale x 32 x i32> %b, <vscale x 32 x i32> %c) {
+; RV32-LABEL: select_evl_nxv32i32:
+; RV32: # %bb.0:
+; RV32-NEXT: vl8re32.v v24, (a0)
+; RV32-NEXT: csrr a1, vlenb
+; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma
+; RV32-NEXT: vmerge.vvm v8, v24, v8, v0
+; RV32-NEXT: slli a2, a1, 3
+; RV32-NEXT: add a0, a0, a2
+; RV32-NEXT: slli a2, a1, 1
+; RV32-NEXT: sub a2, a1, a2
+; RV32-NEXT: vl8re32.v v24, (a0)
+; RV32-NEXT: sltu a0, a1, a2
+; RV32-NEXT: addi a0, a0, -1
+; RV32-NEXT: srli a1, a1, 2
+; RV32-NEXT: vsetvli a3, zero, e8, mf2, ta, ma
+; RV32-NEXT: vslidedown.vx v0, v0, a1
+; RV32-NEXT: and a0, a0, a2
+; RV32-NEXT: vsetvli zero, a0, e32, m8, ta, ma
+; RV32-NEXT: vmerge.vvm v16, v24, v16, v0
+; RV32-NEXT: ret
+;
+; RV64-LABEL: select_evl_nxv32i32:
+; RV64: # %bb.0:
+; RV64-NEXT: addi sp, sp, -16
+; RV64-NEXT: .cfi_def_cfa_offset 16
+; RV64-NEXT: csrr a1, vlenb
+; RV64-NEXT: slli a1, a1, 3
+; RV64-NEXT: sub sp, sp, a1
+; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
+; RV64-NEXT: addi a1, sp, 16
+; RV64-NEXT: vs8r.v v8, (a1) # vscale x 64-byte Folded Spill
+; RV64-NEXT: vsetvli a1, zero, e8, mf2, ta, ma
+; RV64-NEXT: vmv1r.v v7, v0
+; RV64-NEXT: csrr a1, vlenb
+; RV64-NEXT: slli a3, a1, 3
+; RV64-NEXT: slli a2, a1, 1
+; RV64-NEXT: srli a4, a1, 2
+; RV64-NEXT: add a3, a0, a3
+; RV64-NEXT: sub a5, a1, a2
+; RV64-NEXT: vl8re32.v v24, (a3)
+; RV64-NEXT: sltu a3, a1, a5
+; RV64-NEXT: addi a3, a3, -1
+; RV64-NEXT: vl8re32.v v8, (a0)
+; RV64-NEXT: vslidedown.vx v0, v0, a4
+; RV64-NEXT: and a3, a3, a5
+; RV64-NEXT: vsetvli zero, a3, e32, m8, ta, ma
+; RV64-NEXT: vmerge.vvm v16, v24, v16, v0
+; RV64-NEXT: bltu a1, a2, .LBB28_2
+; RV64-NEXT: # %bb.1:
+; RV64-NEXT: mv a1, a2
+; RV64-NEXT: .LBB28_2:
+; RV64-NEXT: vmv1r.v v0, v7
+; RV64-NEXT: addi a0, sp, 16
+; RV64-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
+; RV64-NEXT: vsetvli zero, a1, e32, m8, ta, ma
+; RV64-NEXT: vmerge.vvm v8, v8, v24, v0
+; RV64-NEXT: csrr a0, vlenb
+; RV64-NEXT: slli a0, a0, 3
+; RV64-NEXT: add sp, sp, a0
+; RV64-NEXT: .cfi_def_cfa sp, 16
+; RV64-NEXT: addi sp, sp, 16
+; RV64-NEXT: .cfi_def_cfa_offset 0
+; RV64-NEXT: ret
%evl = call i32 @llvm.vscale.i32()
%evl0 = mul i32 %evl, 8
%v = call <vscale x 32 x i32> @llvm.vp.select.nxv32i32(<vscale x 32 x i1> %a, <vscale x 32 x i32> %b, <vscale x 32 x i32> %c, i32 %evl0)
More information about the llvm-commits
mailing list