[llvm] [RISCV] Add DAG combine for (vmv_s_x_vl merge, (and scalar, mask), vl). (PR #131711)
via llvm-commits
llvm-commits at lists.llvm.org
Mon Mar 17 21:55:48 PDT 2025
https://github.com/yanming123456 updated https://github.com/llvm/llvm-project/pull/131711
>From 0fa1659075dec3248da79d6e2eab8966a4daf922 Mon Sep 17 00:00:00 2001
From: yanming <ming.yan at terapines.com>
Date: Tue, 18 Mar 2025 11:11:34 +0800
Subject: [PATCH] [RISCV] Add DAG combine for (vmv_s_x_vl merge, (and scalar,
mask), vl).
If the number of trailing one bits of `mask` is not less than SEW,
combine (vmv_s_x_vl merge, (and scalar, mask), vl) -> (vmv_s_x_vl merge, scalar, vl).
The vmv.s.x instruction copies the scalar integer register to element 0 of the
destination vector register. If SEW < XLEN, the least-significant bits are copied
and the upper XLEN-SEW bits are ignored.
---
llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 8 +
.../rvv/fixed-vectors-reduction-int-vp.ll | 121 ++++--------
.../CodeGen/RISCV/rvv/vreductions-int-vp.ll | 180 +++++-------------
3 files changed, 88 insertions(+), 221 deletions(-)
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 27a4bbce1f5fc..48d8fc23dc1bb 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -19183,6 +19183,14 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
SDValue Scalar = N->getOperand(1);
SDValue VL = N->getOperand(2);
+ // The vmv.s.x instruction copies the scalar integer register to element 0
+ // of the destination vector register. If SEW < XLEN, the least-significant
+ // bits are copied and the upper XLEN-SEW bits are ignored.
+ unsigned ScalarSize = Scalar.getValueSizeInBits();
+ unsigned EltWidth = VT.getScalarSizeInBits();
+ if (ScalarSize > EltWidth && SimplifyDemandedLowBitsHelper(1, EltWidth))
+ return SDValue(N, 0);
+
if (Scalar.getOpcode() == RISCVISD::VMV_X_S && Passthru.isUndef() &&
Scalar.getOperand(0).getValueType() == N->getValueType(0))
return Scalar.getOperand(0);
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-int-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-int-vp.ll
index f920e39e7d295..52dd87068b0c8 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-int-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-int-vp.ll
@@ -24,7 +24,6 @@ declare i8 @llvm.vp.reduce.umax.v2i8(i8, <2 x i8>, <2 x i1>, i32)
define signext i8 @vpreduce_umax_v2i8(i8 signext %s, <2 x i8> %v, <2 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vpreduce_umax_v2i8:
; CHECK: # %bb.0:
-; CHECK-NEXT: andi a0, a0, 255
; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; CHECK-NEXT: vmv.s.x v9, a0
; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma
@@ -55,7 +54,6 @@ declare i8 @llvm.vp.reduce.umin.v2i8(i8, <2 x i8>, <2 x i1>, i32)
define signext i8 @vpreduce_umin_v2i8(i8 signext %s, <2 x i8> %v, <2 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vpreduce_umin_v2i8:
; CHECK: # %bb.0:
-; CHECK-NEXT: andi a0, a0, 255
; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; CHECK-NEXT: vmv.s.x v9, a0
; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma
@@ -131,7 +129,6 @@ declare i8 @llvm.vp.reduce.umin.v3i8(i8, <3 x i8>, <3 x i1>, i32)
define signext i8 @vpreduce_umin_v3i8(i8 signext %s, <3 x i8> %v, <3 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vpreduce_umin_v3i8:
; CHECK: # %bb.0:
-; CHECK-NEXT: andi a0, a0, 255
; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; CHECK-NEXT: vmv.s.x v9, a0
; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma
@@ -162,7 +159,6 @@ declare i8 @llvm.vp.reduce.umax.v4i8(i8, <4 x i8>, <4 x i1>, i32)
define signext i8 @vpreduce_umax_v4i8(i8 signext %s, <4 x i8> %v, <4 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vpreduce_umax_v4i8:
; CHECK: # %bb.0:
-; CHECK-NEXT: andi a0, a0, 255
; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; CHECK-NEXT: vmv.s.x v9, a0
; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma
@@ -193,7 +189,6 @@ declare i8 @llvm.vp.reduce.umin.v4i8(i8, <4 x i8>, <4 x i1>, i32)
define signext i8 @vpreduce_umin_v4i8(i8 signext %s, <4 x i8> %v, <4 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vpreduce_umin_v4i8:
; CHECK: # %bb.0:
-; CHECK-NEXT: andi a0, a0, 255
; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; CHECK-NEXT: vmv.s.x v9, a0
; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma
@@ -282,27 +277,14 @@ define signext i16 @vpreduce_add_v2i16(i16 signext %s, <2 x i16> %v, <2 x i1> %m
declare i16 @llvm.vp.reduce.umax.v2i16(i16, <2 x i16>, <2 x i1>, i32)
define signext i16 @vpreduce_umax_v2i16(i16 signext %s, <2 x i16> %v, <2 x i1> %m, i32 zeroext %evl) {
-; RV32-LABEL: vpreduce_umax_v2i16:
-; RV32: # %bb.0:
-; RV32-NEXT: slli a0, a0, 16
-; RV32-NEXT: srli a0, a0, 16
-; RV32-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; RV32-NEXT: vmv.s.x v9, a0
-; RV32-NEXT: vsetvli zero, a1, e16, mf4, ta, ma
-; RV32-NEXT: vredmaxu.vs v9, v8, v9, v0.t
-; RV32-NEXT: vmv.x.s a0, v9
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vpreduce_umax_v2i16:
-; RV64: # %bb.0:
-; RV64-NEXT: slli a0, a0, 48
-; RV64-NEXT: srli a0, a0, 48
-; RV64-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; RV64-NEXT: vmv.s.x v9, a0
-; RV64-NEXT: vsetvli zero, a1, e16, mf4, ta, ma
-; RV64-NEXT: vredmaxu.vs v9, v8, v9, v0.t
-; RV64-NEXT: vmv.x.s a0, v9
-; RV64-NEXT: ret
+; CHECK-LABEL: vpreduce_umax_v2i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; CHECK-NEXT: vmv.s.x v9, a0
+; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma
+; CHECK-NEXT: vredmaxu.vs v9, v8, v9, v0.t
+; CHECK-NEXT: vmv.x.s a0, v9
+; CHECK-NEXT: ret
%r = call i16 @llvm.vp.reduce.umax.v2i16(i16 %s, <2 x i16> %v, <2 x i1> %m, i32 %evl)
ret i16 %r
}
@@ -325,27 +307,14 @@ define signext i16 @vpreduce_smax_v2i16(i16 signext %s, <2 x i16> %v, <2 x i1> %
declare i16 @llvm.vp.reduce.umin.v2i16(i16, <2 x i16>, <2 x i1>, i32)
define signext i16 @vpreduce_umin_v2i16(i16 signext %s, <2 x i16> %v, <2 x i1> %m, i32 zeroext %evl) {
-; RV32-LABEL: vpreduce_umin_v2i16:
-; RV32: # %bb.0:
-; RV32-NEXT: slli a0, a0, 16
-; RV32-NEXT: srli a0, a0, 16
-; RV32-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; RV32-NEXT: vmv.s.x v9, a0
-; RV32-NEXT: vsetvli zero, a1, e16, mf4, ta, ma
-; RV32-NEXT: vredminu.vs v9, v8, v9, v0.t
-; RV32-NEXT: vmv.x.s a0, v9
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vpreduce_umin_v2i16:
-; RV64: # %bb.0:
-; RV64-NEXT: slli a0, a0, 48
-; RV64-NEXT: srli a0, a0, 48
-; RV64-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; RV64-NEXT: vmv.s.x v9, a0
-; RV64-NEXT: vsetvli zero, a1, e16, mf4, ta, ma
-; RV64-NEXT: vredminu.vs v9, v8, v9, v0.t
-; RV64-NEXT: vmv.x.s a0, v9
-; RV64-NEXT: ret
+; CHECK-LABEL: vpreduce_umin_v2i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; CHECK-NEXT: vmv.s.x v9, a0
+; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma
+; CHECK-NEXT: vredminu.vs v9, v8, v9, v0.t
+; CHECK-NEXT: vmv.x.s a0, v9
+; CHECK-NEXT: ret
%r = call i16 @llvm.vp.reduce.umin.v2i16(i16 %s, <2 x i16> %v, <2 x i1> %m, i32 %evl)
ret i16 %r
}
@@ -428,27 +397,14 @@ define signext i16 @vpreduce_add_v4i16(i16 signext %s, <4 x i16> %v, <4 x i1> %m
declare i16 @llvm.vp.reduce.umax.v4i16(i16, <4 x i16>, <4 x i1>, i32)
define signext i16 @vpreduce_umax_v4i16(i16 signext %s, <4 x i16> %v, <4 x i1> %m, i32 zeroext %evl) {
-; RV32-LABEL: vpreduce_umax_v4i16:
-; RV32: # %bb.0:
-; RV32-NEXT: slli a0, a0, 16
-; RV32-NEXT: srli a0, a0, 16
-; RV32-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; RV32-NEXT: vmv.s.x v9, a0
-; RV32-NEXT: vsetvli zero, a1, e16, mf2, ta, ma
-; RV32-NEXT: vredmaxu.vs v9, v8, v9, v0.t
-; RV32-NEXT: vmv.x.s a0, v9
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vpreduce_umax_v4i16:
-; RV64: # %bb.0:
-; RV64-NEXT: slli a0, a0, 48
-; RV64-NEXT: srli a0, a0, 48
-; RV64-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; RV64-NEXT: vmv.s.x v9, a0
-; RV64-NEXT: vsetvli zero, a1, e16, mf2, ta, ma
-; RV64-NEXT: vredmaxu.vs v9, v8, v9, v0.t
-; RV64-NEXT: vmv.x.s a0, v9
-; RV64-NEXT: ret
+; CHECK-LABEL: vpreduce_umax_v4i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; CHECK-NEXT: vmv.s.x v9, a0
+; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma
+; CHECK-NEXT: vredmaxu.vs v9, v8, v9, v0.t
+; CHECK-NEXT: vmv.x.s a0, v9
+; CHECK-NEXT: ret
%r = call i16 @llvm.vp.reduce.umax.v4i16(i16 %s, <4 x i16> %v, <4 x i1> %m, i32 %evl)
ret i16 %r
}
@@ -471,27 +427,14 @@ define signext i16 @vpreduce_smax_v4i16(i16 signext %s, <4 x i16> %v, <4 x i1> %
declare i16 @llvm.vp.reduce.umin.v4i16(i16, <4 x i16>, <4 x i1>, i32)
define signext i16 @vpreduce_umin_v4i16(i16 signext %s, <4 x i16> %v, <4 x i1> %m, i32 zeroext %evl) {
-; RV32-LABEL: vpreduce_umin_v4i16:
-; RV32: # %bb.0:
-; RV32-NEXT: slli a0, a0, 16
-; RV32-NEXT: srli a0, a0, 16
-; RV32-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; RV32-NEXT: vmv.s.x v9, a0
-; RV32-NEXT: vsetvli zero, a1, e16, mf2, ta, ma
-; RV32-NEXT: vredminu.vs v9, v8, v9, v0.t
-; RV32-NEXT: vmv.x.s a0, v9
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vpreduce_umin_v4i16:
-; RV64: # %bb.0:
-; RV64-NEXT: slli a0, a0, 48
-; RV64-NEXT: srli a0, a0, 48
-; RV64-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; RV64-NEXT: vmv.s.x v9, a0
-; RV64-NEXT: vsetvli zero, a1, e16, mf2, ta, ma
-; RV64-NEXT: vredminu.vs v9, v8, v9, v0.t
-; RV64-NEXT: vmv.x.s a0, v9
-; RV64-NEXT: ret
+; CHECK-LABEL: vpreduce_umin_v4i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; CHECK-NEXT: vmv.s.x v9, a0
+; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma
+; CHECK-NEXT: vredminu.vs v9, v8, v9, v0.t
+; CHECK-NEXT: vmv.x.s a0, v9
+; CHECK-NEXT: ret
%r = call i16 @llvm.vp.reduce.umin.v4i16(i16 %s, <4 x i16> %v, <4 x i1> %m, i32 %evl)
ret i16 %r
}
diff --git a/llvm/test/CodeGen/RISCV/rvv/vreductions-int-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vreductions-int-vp.ll
index eacfce098bddb..7c6782fc1dcd4 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vreductions-int-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vreductions-int-vp.ll
@@ -24,7 +24,6 @@ declare i8 @llvm.vp.reduce.umax.nxv1i8(i8, <vscale x 1 x i8>, <vscale x 1 x i1>,
define signext i8 @vpreduce_umax_nxv1i8(i8 signext %s, <vscale x 1 x i8> %v, <vscale x 1 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vpreduce_umax_nxv1i8:
; CHECK: # %bb.0:
-; CHECK-NEXT: andi a0, a0, 255
; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; CHECK-NEXT: vmv.s.x v9, a0
; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma
@@ -55,7 +54,6 @@ declare i8 @llvm.vp.reduce.umin.nxv1i8(i8, <vscale x 1 x i8>, <vscale x 1 x i1>,
define signext i8 @vpreduce_umin_nxv1i8(i8 signext %s, <vscale x 1 x i8> %v, <vscale x 1 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vpreduce_umin_nxv1i8:
; CHECK: # %bb.0:
-; CHECK-NEXT: andi a0, a0, 255
; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; CHECK-NEXT: vmv.s.x v9, a0
; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma
@@ -146,7 +144,6 @@ declare i8 @llvm.vp.reduce.umax.nxv2i8(i8, <vscale x 2 x i8>, <vscale x 2 x i1>,
define signext i8 @vpreduce_umax_nxv2i8(i8 signext %s, <vscale x 2 x i8> %v, <vscale x 2 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vpreduce_umax_nxv2i8:
; CHECK: # %bb.0:
-; CHECK-NEXT: andi a0, a0, 255
; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; CHECK-NEXT: vmv.s.x v9, a0
; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma
@@ -177,7 +174,6 @@ declare i8 @llvm.vp.reduce.umin.nxv2i8(i8, <vscale x 2 x i8>, <vscale x 2 x i1>,
define signext i8 @vpreduce_umin_nxv2i8(i8 signext %s, <vscale x 2 x i8> %v, <vscale x 2 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vpreduce_umin_nxv2i8:
; CHECK: # %bb.0:
-; CHECK-NEXT: andi a0, a0, 255
; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; CHECK-NEXT: vmv.s.x v9, a0
; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma
@@ -283,7 +279,6 @@ declare i8 @llvm.vp.reduce.umax.nxv4i8(i8, <vscale x 4 x i8>, <vscale x 4 x i1>,
define signext i8 @vpreduce_umax_nxv4i8(i8 signext %s, <vscale x 4 x i8> %v, <vscale x 4 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vpreduce_umax_nxv4i8:
; CHECK: # %bb.0:
-; CHECK-NEXT: andi a0, a0, 255
; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; CHECK-NEXT: vmv.s.x v9, a0
; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma
@@ -314,7 +309,6 @@ declare i8 @llvm.vp.reduce.umin.nxv4i8(i8, <vscale x 4 x i8>, <vscale x 4 x i1>,
define signext i8 @vpreduce_umin_nxv4i8(i8 signext %s, <vscale x 4 x i8> %v, <vscale x 4 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vpreduce_umin_nxv4i8:
; CHECK: # %bb.0:
-; CHECK-NEXT: andi a0, a0, 255
; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; CHECK-NEXT: vmv.s.x v9, a0
; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma
@@ -403,27 +397,14 @@ define signext i16 @vpreduce_add_nxv1i16(i16 signext %s, <vscale x 1 x i16> %v,
declare i16 @llvm.vp.reduce.umax.nxv1i16(i16, <vscale x 1 x i16>, <vscale x 1 x i1>, i32)
define signext i16 @vpreduce_umax_nxv1i16(i16 signext %s, <vscale x 1 x i16> %v, <vscale x 1 x i1> %m, i32 zeroext %evl) {
-; RV32-LABEL: vpreduce_umax_nxv1i16:
-; RV32: # %bb.0:
-; RV32-NEXT: slli a0, a0, 16
-; RV32-NEXT: srli a0, a0, 16
-; RV32-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; RV32-NEXT: vmv.s.x v9, a0
-; RV32-NEXT: vsetvli zero, a1, e16, mf4, ta, ma
-; RV32-NEXT: vredmaxu.vs v9, v8, v9, v0.t
-; RV32-NEXT: vmv.x.s a0, v9
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vpreduce_umax_nxv1i16:
-; RV64: # %bb.0:
-; RV64-NEXT: slli a0, a0, 48
-; RV64-NEXT: srli a0, a0, 48
-; RV64-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; RV64-NEXT: vmv.s.x v9, a0
-; RV64-NEXT: vsetvli zero, a1, e16, mf4, ta, ma
-; RV64-NEXT: vredmaxu.vs v9, v8, v9, v0.t
-; RV64-NEXT: vmv.x.s a0, v9
-; RV64-NEXT: ret
+; CHECK-LABEL: vpreduce_umax_nxv1i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; CHECK-NEXT: vmv.s.x v9, a0
+; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma
+; CHECK-NEXT: vredmaxu.vs v9, v8, v9, v0.t
+; CHECK-NEXT: vmv.x.s a0, v9
+; CHECK-NEXT: ret
%r = call i16 @llvm.vp.reduce.umax.nxv1i16(i16 %s, <vscale x 1 x i16> %v, <vscale x 1 x i1> %m, i32 %evl)
ret i16 %r
}
@@ -446,27 +427,14 @@ define signext i16 @vpreduce_smax_nxv1i16(i16 signext %s, <vscale x 1 x i16> %v,
declare i16 @llvm.vp.reduce.umin.nxv1i16(i16, <vscale x 1 x i16>, <vscale x 1 x i1>, i32)
define signext i16 @vpreduce_umin_nxv1i16(i16 signext %s, <vscale x 1 x i16> %v, <vscale x 1 x i1> %m, i32 zeroext %evl) {
-; RV32-LABEL: vpreduce_umin_nxv1i16:
-; RV32: # %bb.0:
-; RV32-NEXT: slli a0, a0, 16
-; RV32-NEXT: srli a0, a0, 16
-; RV32-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; RV32-NEXT: vmv.s.x v9, a0
-; RV32-NEXT: vsetvli zero, a1, e16, mf4, ta, ma
-; RV32-NEXT: vredminu.vs v9, v8, v9, v0.t
-; RV32-NEXT: vmv.x.s a0, v9
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vpreduce_umin_nxv1i16:
-; RV64: # %bb.0:
-; RV64-NEXT: slli a0, a0, 48
-; RV64-NEXT: srli a0, a0, 48
-; RV64-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; RV64-NEXT: vmv.s.x v9, a0
-; RV64-NEXT: vsetvli zero, a1, e16, mf4, ta, ma
-; RV64-NEXT: vredminu.vs v9, v8, v9, v0.t
-; RV64-NEXT: vmv.x.s a0, v9
-; RV64-NEXT: ret
+; CHECK-LABEL: vpreduce_umin_nxv1i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; CHECK-NEXT: vmv.s.x v9, a0
+; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma
+; CHECK-NEXT: vredminu.vs v9, v8, v9, v0.t
+; CHECK-NEXT: vmv.x.s a0, v9
+; CHECK-NEXT: ret
%r = call i16 @llvm.vp.reduce.umin.nxv1i16(i16 %s, <vscale x 1 x i16> %v, <vscale x 1 x i1> %m, i32 %evl)
ret i16 %r
}
@@ -549,27 +517,14 @@ define signext i16 @vpreduce_add_nxv2i16(i16 signext %s, <vscale x 2 x i16> %v,
declare i16 @llvm.vp.reduce.umax.nxv2i16(i16, <vscale x 2 x i16>, <vscale x 2 x i1>, i32)
define signext i16 @vpreduce_umax_nxv2i16(i16 signext %s, <vscale x 2 x i16> %v, <vscale x 2 x i1> %m, i32 zeroext %evl) {
-; RV32-LABEL: vpreduce_umax_nxv2i16:
-; RV32: # %bb.0:
-; RV32-NEXT: slli a0, a0, 16
-; RV32-NEXT: srli a0, a0, 16
-; RV32-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; RV32-NEXT: vmv.s.x v9, a0
-; RV32-NEXT: vsetvli zero, a1, e16, mf2, ta, ma
-; RV32-NEXT: vredmaxu.vs v9, v8, v9, v0.t
-; RV32-NEXT: vmv.x.s a0, v9
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vpreduce_umax_nxv2i16:
-; RV64: # %bb.0:
-; RV64-NEXT: slli a0, a0, 48
-; RV64-NEXT: srli a0, a0, 48
-; RV64-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; RV64-NEXT: vmv.s.x v9, a0
-; RV64-NEXT: vsetvli zero, a1, e16, mf2, ta, ma
-; RV64-NEXT: vredmaxu.vs v9, v8, v9, v0.t
-; RV64-NEXT: vmv.x.s a0, v9
-; RV64-NEXT: ret
+; CHECK-LABEL: vpreduce_umax_nxv2i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; CHECK-NEXT: vmv.s.x v9, a0
+; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma
+; CHECK-NEXT: vredmaxu.vs v9, v8, v9, v0.t
+; CHECK-NEXT: vmv.x.s a0, v9
+; CHECK-NEXT: ret
%r = call i16 @llvm.vp.reduce.umax.nxv2i16(i16 %s, <vscale x 2 x i16> %v, <vscale x 2 x i1> %m, i32 %evl)
ret i16 %r
}
@@ -592,27 +547,14 @@ define signext i16 @vpreduce_smax_nxv2i16(i16 signext %s, <vscale x 2 x i16> %v,
declare i16 @llvm.vp.reduce.umin.nxv2i16(i16, <vscale x 2 x i16>, <vscale x 2 x i1>, i32)
define signext i16 @vpreduce_umin_nxv2i16(i16 signext %s, <vscale x 2 x i16> %v, <vscale x 2 x i1> %m, i32 zeroext %evl) {
-; RV32-LABEL: vpreduce_umin_nxv2i16:
-; RV32: # %bb.0:
-; RV32-NEXT: slli a0, a0, 16
-; RV32-NEXT: srli a0, a0, 16
-; RV32-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; RV32-NEXT: vmv.s.x v9, a0
-; RV32-NEXT: vsetvli zero, a1, e16, mf2, ta, ma
-; RV32-NEXT: vredminu.vs v9, v8, v9, v0.t
-; RV32-NEXT: vmv.x.s a0, v9
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vpreduce_umin_nxv2i16:
-; RV64: # %bb.0:
-; RV64-NEXT: slli a0, a0, 48
-; RV64-NEXT: srli a0, a0, 48
-; RV64-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; RV64-NEXT: vmv.s.x v9, a0
-; RV64-NEXT: vsetvli zero, a1, e16, mf2, ta, ma
-; RV64-NEXT: vredminu.vs v9, v8, v9, v0.t
-; RV64-NEXT: vmv.x.s a0, v9
-; RV64-NEXT: ret
+; CHECK-LABEL: vpreduce_umin_nxv2i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; CHECK-NEXT: vmv.s.x v9, a0
+; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma
+; CHECK-NEXT: vredminu.vs v9, v8, v9, v0.t
+; CHECK-NEXT: vmv.x.s a0, v9
+; CHECK-NEXT: ret
%r = call i16 @llvm.vp.reduce.umin.nxv2i16(i16 %s, <vscale x 2 x i16> %v, <vscale x 2 x i1> %m, i32 %evl)
ret i16 %r
}
@@ -695,27 +637,14 @@ define signext i16 @vpreduce_add_nxv4i16(i16 signext %s, <vscale x 4 x i16> %v,
declare i16 @llvm.vp.reduce.umax.nxv4i16(i16, <vscale x 4 x i16>, <vscale x 4 x i1>, i32)
define signext i16 @vpreduce_umax_nxv4i16(i16 signext %s, <vscale x 4 x i16> %v, <vscale x 4 x i1> %m, i32 zeroext %evl) {
-; RV32-LABEL: vpreduce_umax_nxv4i16:
-; RV32: # %bb.0:
-; RV32-NEXT: slli a0, a0, 16
-; RV32-NEXT: srli a0, a0, 16
-; RV32-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; RV32-NEXT: vmv.s.x v9, a0
-; RV32-NEXT: vsetvli zero, a1, e16, m1, ta, ma
-; RV32-NEXT: vredmaxu.vs v9, v8, v9, v0.t
-; RV32-NEXT: vmv.x.s a0, v9
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vpreduce_umax_nxv4i16:
-; RV64: # %bb.0:
-; RV64-NEXT: slli a0, a0, 48
-; RV64-NEXT: srli a0, a0, 48
-; RV64-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; RV64-NEXT: vmv.s.x v9, a0
-; RV64-NEXT: vsetvli zero, a1, e16, m1, ta, ma
-; RV64-NEXT: vredmaxu.vs v9, v8, v9, v0.t
-; RV64-NEXT: vmv.x.s a0, v9
-; RV64-NEXT: ret
+; CHECK-LABEL: vpreduce_umax_nxv4i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; CHECK-NEXT: vmv.s.x v9, a0
+; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma
+; CHECK-NEXT: vredmaxu.vs v9, v8, v9, v0.t
+; CHECK-NEXT: vmv.x.s a0, v9
+; CHECK-NEXT: ret
%r = call i16 @llvm.vp.reduce.umax.nxv4i16(i16 %s, <vscale x 4 x i16> %v, <vscale x 4 x i1> %m, i32 %evl)
ret i16 %r
}
@@ -738,27 +667,14 @@ define signext i16 @vpreduce_smax_nxv4i16(i16 signext %s, <vscale x 4 x i16> %v,
declare i16 @llvm.vp.reduce.umin.nxv4i16(i16, <vscale x 4 x i16>, <vscale x 4 x i1>, i32)
define signext i16 @vpreduce_umin_nxv4i16(i16 signext %s, <vscale x 4 x i16> %v, <vscale x 4 x i1> %m, i32 zeroext %evl) {
-; RV32-LABEL: vpreduce_umin_nxv4i16:
-; RV32: # %bb.0:
-; RV32-NEXT: slli a0, a0, 16
-; RV32-NEXT: srli a0, a0, 16
-; RV32-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; RV32-NEXT: vmv.s.x v9, a0
-; RV32-NEXT: vsetvli zero, a1, e16, m1, ta, ma
-; RV32-NEXT: vredminu.vs v9, v8, v9, v0.t
-; RV32-NEXT: vmv.x.s a0, v9
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vpreduce_umin_nxv4i16:
-; RV64: # %bb.0:
-; RV64-NEXT: slli a0, a0, 48
-; RV64-NEXT: srli a0, a0, 48
-; RV64-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; RV64-NEXT: vmv.s.x v9, a0
-; RV64-NEXT: vsetvli zero, a1, e16, m1, ta, ma
-; RV64-NEXT: vredminu.vs v9, v8, v9, v0.t
-; RV64-NEXT: vmv.x.s a0, v9
-; RV64-NEXT: ret
+; CHECK-LABEL: vpreduce_umin_nxv4i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; CHECK-NEXT: vmv.s.x v9, a0
+; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma
+; CHECK-NEXT: vredminu.vs v9, v8, v9, v0.t
+; CHECK-NEXT: vmv.x.s a0, v9
+; CHECK-NEXT: ret
%r = call i16 @llvm.vp.reduce.umin.nxv4i16(i16 %s, <vscale x 4 x i16> %v, <vscale x 4 x i1> %m, i32 %evl)
ret i16 %r
}
More information about the llvm-commits
mailing list