[llvm] b7bf96a - [LegalizeTypes][VP] Add widening support for vp.reduce.*
Victor Perez via llvm-commits
llvm-commits at lists.llvm.org
Tue Jan 18 02:21:13 PST 2022
Author: Victor Perez
Date: 2022-01-18T10:21:01Z
New Revision: b7bf96a258653582a996dbc0225fc4a38def5d3f
URL: https://github.com/llvm/llvm-project/commit/b7bf96a258653582a996dbc0225fc4a38def5d3f
DIFF: https://github.com/llvm/llvm-project/commit/b7bf96a258653582a996dbc0225fc4a38def5d3f.diff
LOG: [LegalizeTypes][VP] Add widening support for vp.reduce.*
When widening these intrinsics, we do not have to insert neutral
elements at the end of the vector as when widening vector.reduce.*
intrinsics, thanks to vector predication semantics.
Reviewed By: craig.topper
Differential Revision: https://reviews.llvm.org/D117467
Added:
Modified:
llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-fp-vp.ll
llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-int-vp.ll
llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-mask-vp.ll
llvm/test/CodeGen/RISCV/rvv/vreductions-fp-vp.ll
llvm/test/CodeGen/RISCV/rvv/vreductions-int-vp.ll
llvm/test/CodeGen/RISCV/rvv/vreductions-mask-vp.ll
Removed:
################################################################################
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
index 7928431f1fea..fa158e2e766a 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
@@ -905,6 +905,23 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer {
}
void SetWidenedVector(SDValue Op, SDValue Result);
+ /// Given a mask Mask, returns the larger vector into which Mask was widened.
+ SDValue GetWidenedMask(SDValue Mask, ElementCount EC) {
+ // For VP operations, we must also widen the mask. Note that the mask type
+ // may not actually need widening, leading it be split along with the VP
+ // operation.
+ // FIXME: This could lead to an infinite split/widen loop. We only handle
+ // the case where the mask needs widening to an identically-sized type as
+ // the vector inputs.
+ assert(getTypeAction(Mask.getValueType()) ==
+ TargetLowering::TypeWidenVector &&
+ "Unable to widen binary VP op");
+ Mask = GetWidenedVector(Mask);
+ assert(Mask.getValueType().getVectorElementCount() == EC &&
+ "Unable to widen binary VP op");
+ return Mask;
+ }
+
// Widen Vector Result Promotion.
void WidenVectorResult(SDNode *N, unsigned ResNo);
SDValue WidenVecRes_MERGE_VALUES(SDNode* N, unsigned ResNo);
@@ -964,6 +981,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer {
SDValue WidenVecOp_FCOPYSIGN(SDNode *N);
SDValue WidenVecOp_VECREDUCE(SDNode *N);
SDValue WidenVecOp_VECREDUCE_SEQ(SDNode *N);
+ SDValue WidenVecOp_VP_REDUCE(SDNode *N);
/// Helper function to generate a set of operations to perform
/// a vector operation for a wider type.
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index 04158aaffe11..becbbb7bb874 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -3445,20 +3445,8 @@ SDValue DAGTypeLegalizer::WidenVecRes_Binary(SDNode *N) {
assert(N->getNumOperands() == 4 && "Unexpected number of operands!");
assert(N->isVPOpcode() && "Expected VP opcode");
- // For VP operations, we must also widen the mask. Note that the mask type
- // may not actually need widening, leading it be split along with the VP
- // operation.
- // FIXME: This could lead to an infinite split/widen loop. We only handle the
- // case where the mask needs widening to an identically-sized type as the
- // vector inputs.
- SDValue Mask = N->getOperand(2);
- assert(getTypeAction(Mask.getValueType()) ==
- TargetLowering::TypeWidenVector &&
- "Unable to widen binary VP op");
- Mask = GetWidenedVector(Mask);
- assert(Mask.getValueType().getVectorElementCount() ==
- WidenVT.getVectorElementCount() &&
- "Unable to widen binary VP op");
+ SDValue Mask =
+ GetWidenedMask(N->getOperand(2), WidenVT.getVectorElementCount());
return DAG.getNode(N->getOpcode(), dl, WidenVT,
{InOp1, InOp2, Mask, N->getOperand(3)}, N->getFlags());
}
@@ -4978,6 +4966,23 @@ bool DAGTypeLegalizer::WidenVectorOperand(SDNode *N, unsigned OpNo) {
case ISD::VECREDUCE_SEQ_FMUL:
Res = WidenVecOp_VECREDUCE_SEQ(N);
break;
+ case ISD::VP_REDUCE_FADD:
+ case ISD::VP_REDUCE_SEQ_FADD:
+ case ISD::VP_REDUCE_FMUL:
+ case ISD::VP_REDUCE_SEQ_FMUL:
+ case ISD::VP_REDUCE_ADD:
+ case ISD::VP_REDUCE_MUL:
+ case ISD::VP_REDUCE_AND:
+ case ISD::VP_REDUCE_OR:
+ case ISD::VP_REDUCE_XOR:
+ case ISD::VP_REDUCE_SMAX:
+ case ISD::VP_REDUCE_SMIN:
+ case ISD::VP_REDUCE_UMAX:
+ case ISD::VP_REDUCE_UMIN:
+ case ISD::VP_REDUCE_FMAX:
+ case ISD::VP_REDUCE_FMIN:
+ Res = WidenVecOp_VP_REDUCE(N);
+ break;
}
// If Res is null, the sub-method took care of registering the result.
@@ -5571,6 +5576,19 @@ SDValue DAGTypeLegalizer::WidenVecOp_VECREDUCE_SEQ(SDNode *N) {
return DAG.getNode(Opc, dl, N->getValueType(0), AccOp, Op, Flags);
}
+SDValue DAGTypeLegalizer::WidenVecOp_VP_REDUCE(SDNode *N) {
+ assert(N->isVPOpcode() && "Expected VP opcode");
+
+ SDLoc dl(N);
+ SDValue Op = GetWidenedVector(N->getOperand(1));
+ SDValue Mask = GetWidenedMask(N->getOperand(2),
+ Op.getValueType().getVectorElementCount());
+
+ return DAG.getNode(N->getOpcode(), dl, N->getValueType(0),
+ {N->getOperand(0), Op, Mask, N->getOperand(3)},
+ N->getFlags());
+}
+
SDValue DAGTypeLegalizer::WidenVecOp_VSELECT(SDNode *N) {
// This only gets called in the case that the left and right inputs and
// result are of a legal odd vector type, and the condition is illegal i1 of
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-fp-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-fp-vp.ll
index 7ac93c381c74..3410a1a83616 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-fp-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-fp-vp.ll
@@ -210,6 +210,34 @@ define double @vpreduce_ord_fadd_v2f64(double %s, <2 x double> %v, <2 x i1> %m,
ret double %r
}
+declare double @llvm.vp.reduce.fadd.v3f64(double, <3 x double>, <3 x i1>, i32)
+
+define double @vpreduce_fadd_v3f64(double %s, <3 x double> %v, <3 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vpreduce_fadd_v3f64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, mu
+; CHECK-NEXT: vfmv.s.f v10, fa0
+; CHECK-NEXT: vsetvli zero, a0, e64, m2, tu, mu
+; CHECK-NEXT: vfredusum.vs v10, v8, v10, v0.t
+; CHECK-NEXT: vfmv.f.s fa0, v10
+; CHECK-NEXT: ret
+ %r = call reassoc double @llvm.vp.reduce.fadd.v3f64(double %s, <3 x double> %v, <3 x i1> %m, i32 %evl)
+ ret double %r
+}
+
+define double @vpreduce_ord_fadd_v3f64(double %s, <3 x double> %v, <3 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vpreduce_ord_fadd_v3f64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, mu
+; CHECK-NEXT: vfmv.s.f v10, fa0
+; CHECK-NEXT: vsetvli zero, a0, e64, m2, tu, mu
+; CHECK-NEXT: vfredosum.vs v10, v8, v10, v0.t
+; CHECK-NEXT: vfmv.f.s fa0, v10
+; CHECK-NEXT: ret
+ %r = call double @llvm.vp.reduce.fadd.v3f64(double %s, <3 x double> %v, <3 x i1> %m, i32 %evl)
+ ret double %r
+}
+
declare double @llvm.vp.reduce.fadd.v4f64(double, <4 x double>, <4 x i1>, i32)
define double @vpreduce_fadd_v4f64(double %s, <4 x double> %v, <4 x i1> %m, i32 zeroext %evl) {
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-int-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-int-vp.ll
index bdde15127f43..405d7aa6b98f 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-int-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-int-vp.ll
@@ -126,6 +126,22 @@ define signext i8 @vpreduce_xor_v2i8(i8 signext %s, <2 x i8> %v, <2 x i1> %m, i3
ret i8 %r
}
+declare i8 @llvm.vp.reduce.umin.v3i8(i8, <3 x i8>, <3 x i1>, i32)
+
+define signext i8 @vpreduce_umin_v3i8(i8 signext %s, <3 x i8> %v, <3 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vpreduce_umin_v3i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: andi a0, a0, 255
+; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, mu
+; CHECK-NEXT: vmv.s.x v9, a0
+; CHECK-NEXT: vsetvli zero, a1, e8, mf4, tu, mu
+; CHECK-NEXT: vredminu.vs v9, v8, v9, v0.t
+; CHECK-NEXT: vmv.x.s a0, v9
+; CHECK-NEXT: ret
+ %r = call i8 @llvm.vp.reduce.umin.v3i8(i8 %s, <3 x i8> %v, <3 x i1> %m, i32 %evl)
+ ret i8 %r
+}
+
declare i8 @llvm.vp.reduce.add.v4i8(i8, <4 x i8>, <4 x i1>, i32)
define signext i8 @vpreduce_add_v4i8(i8 signext %s, <4 x i8> %v, <4 x i1> %m, i32 zeroext %evl) {
@@ -831,17 +847,17 @@ define signext i32 @vpreduce_xor_v64i32(i32 signext %s, <64 x i32> %v, <64 x i1>
; CHECK: # %bb.0:
; CHECK-NEXT: addi a3, a1, -32
; CHECK-NEXT: li a2, 0
-; CHECK-NEXT: bltu a1, a3, .LBB48_2
+; CHECK-NEXT: bltu a1, a3, .LBB49_2
; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: mv a2, a3
-; CHECK-NEXT: .LBB48_2:
+; CHECK-NEXT: .LBB49_2:
; CHECK-NEXT: vsetivli zero, 4, e8, mf2, ta, mu
; CHECK-NEXT: li a3, 32
; CHECK-NEXT: vslidedown.vi v24, v0, 4
-; CHECK-NEXT: bltu a1, a3, .LBB48_4
+; CHECK-NEXT: bltu a1, a3, .LBB49_4
; CHECK-NEXT: # %bb.3:
; CHECK-NEXT: li a1, 32
-; CHECK-NEXT: .LBB48_4:
+; CHECK-NEXT: .LBB49_4:
; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, mu
; CHECK-NEXT: vmv.s.x v25, a0
; CHECK-NEXT: vsetvli zero, a1, e32, m8, tu, mu
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-mask-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-mask-vp.ll
index 67f2eed33ffd..59ebdcdbc048 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-mask-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-mask-vp.ll
@@ -212,6 +212,23 @@ define signext i1 @vpreduce_xor_v8i1(i1 signext %s, <8 x i1> %v, <8 x i1> %m, i3
ret i1 %r
}
+declare i1 @llvm.vp.reduce.and.v10i1(i1, <10 x i1>, <10 x i1>, i32)
+
+define signext i1 @vpreduce_and_v10i1(i1 signext %s, <10 x i1> %v, <10 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vpreduce_and_v10i1:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, mu
+; CHECK-NEXT: vmnand.mm v9, v0, v0
+; CHECK-NEXT: vmv1r.v v0, v8
+; CHECK-NEXT: vcpop.m a1, v9, v0.t
+; CHECK-NEXT: seqz a1, a1
+; CHECK-NEXT: and a0, a1, a0
+; CHECK-NEXT: neg a0, a0
+; CHECK-NEXT: ret
+ %r = call i1 @llvm.vp.reduce.and.v10i1(i1 %s, <10 x i1> %v, <10 x i1> %m, i32 %evl)
+ ret i1 %r
+}
+
declare i1 @llvm.vp.reduce.and.v16i1(i1, <16 x i1>, <16 x i1>, i32)
define signext i1 @vpreduce_and_v16i1(i1 signext %s, <16 x i1> %v, <16 x i1> %m, i32 zeroext %evl) {
@@ -237,20 +254,20 @@ define signext i1 @vpreduce_and_v256i1(i1 signext %s, <256 x i1> %v, <256 x i1>
; CHECK-NEXT: addi a2, a1, -128
; CHECK-NEXT: vmv1r.v v11, v0
; CHECK-NEXT: li a3, 0
-; CHECK-NEXT: bltu a1, a2, .LBB13_2
+; CHECK-NEXT: bltu a1, a2, .LBB14_2
; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: mv a3, a2
-; CHECK-NEXT: .LBB13_2:
+; CHECK-NEXT: .LBB14_2:
; CHECK-NEXT: vsetvli zero, a3, e8, m8, ta, mu
; CHECK-NEXT: vmnand.mm v8, v8, v8
; CHECK-NEXT: vmv1r.v v0, v10
; CHECK-NEXT: vcpop.m a2, v8, v0.t
; CHECK-NEXT: li a3, 128
; CHECK-NEXT: seqz a2, a2
-; CHECK-NEXT: bltu a1, a3, .LBB13_4
+; CHECK-NEXT: bltu a1, a3, .LBB14_4
; CHECK-NEXT: # %bb.3:
; CHECK-NEXT: li a1, 128
-; CHECK-NEXT: .LBB13_4:
+; CHECK-NEXT: .LBB14_4:
; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, mu
; CHECK-NEXT: vmnand.mm v8, v11, v11
; CHECK-NEXT: vmv1r.v v0, v9
diff --git a/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-vp.ll
index ec5839fd0d8a..6cd3478cae76 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-vp.ll
@@ -300,6 +300,35 @@ define double @vpreduce_ord_fadd_nxv2f64(double %s, <vscale x 2 x double> %v, <v
ret double %r
}
+declare double @llvm.vp.reduce.fadd.nxv3f64(double, <vscale x 3 x double>, <vscale x 3 x i1>, i32)
+
+define double @vpreduce_fadd_nxv3f64(double %s, <vscale x 3 x double> %v, <vscale x 3 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vpreduce_fadd_nxv3f64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, mu
+; CHECK-NEXT: vfmv.s.f v12, fa0
+; CHECK-NEXT: vsetvli zero, a0, e64, m4, tu, mu
+; CHECK-NEXT: vfredusum.vs v12, v8, v12, v0.t
+; CHECK-NEXT: vfmv.f.s fa0, v12
+; CHECK-NEXT: ret
+ %r = call reassoc double @llvm.vp.reduce.fadd.nxv3f64(double %s, <vscale x 3 x double> %v, <vscale x 3 x i1> %m, i32 %evl)
+ ret double %r
+}
+
+define double @vpreduce_ord_fadd_nxv3f64(double %s, <vscale x 4 x double> %v, <vscale x 4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vpreduce_ord_fadd_nxv3f64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, mu
+; CHECK-NEXT: vfmv.s.f v12, fa0
+; CHECK-NEXT: vsetvli zero, a0, e64, m4, tu, mu
+; CHECK-NEXT: vfredosum.vs v12, v8, v12, v0.t
+; CHECK-NEXT: vfmv.f.s fa0, v12
+; CHECK-NEXT: ret
+ %r = call double @llvm.vp.reduce.fadd.nxv4f64(double %s, <vscale x 4 x double> %v, <vscale x 4 x i1> %m, i32 %evl)
+ ret double %r
+}
+
+
declare double @llvm.vp.reduce.fadd.nxv4f64(double, <vscale x 4 x double>, <vscale x 4 x i1>, i32)
define double @vpreduce_fadd_nxv4f64(double %s, <vscale x 4 x double> %v, <vscale x 4 x i1> %m, i32 zeroext %evl) {
diff --git a/llvm/test/CodeGen/RISCV/rvv/vreductions-int-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vreductions-int-vp.ll
index cb0703706e73..997a8d67de71 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vreductions-int-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vreductions-int-vp.ll
@@ -248,6 +248,21 @@ define signext i8 @vpreduce_xor_nxv2i8(i8 signext %s, <vscale x 2 x i8> %v, <vsc
ret i8 %r
}
+declare i8 @llvm.vp.reduce.smax.nxv3i8(i8, <vscale x 3 x i8>, <vscale x 3 x i1>, i32)
+
+define signext i8 @vpreduce_smax_nxv3i8(i8 signext %s, <vscale x 3 x i8> %v, <vscale x 3 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vpreduce_smax_nxv3i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, mu
+; CHECK-NEXT: vmv.s.x v9, a0
+; CHECK-NEXT: vsetvli zero, a1, e8, mf2, tu, mu
+; CHECK-NEXT: vredmax.vs v9, v8, v9, v0.t
+; CHECK-NEXT: vmv.x.s a0, v9
+; CHECK-NEXT: ret
+ %r = call i8 @llvm.vp.reduce.smax.nxv3i8(i8 %s, <vscale x 3 x i8> %v, <vscale x 3 x i1> %m, i32 %evl)
+ ret i8 %r
+}
+
declare i8 @llvm.vp.reduce.add.nxv4i8(i8, <vscale x 4 x i8>, <vscale x 4 x i1>, i32)
define signext i8 @vpreduce_add_nxv4i8(i8 signext %s, <vscale x 4 x i8> %v, <vscale x 4 x i1> %m, i32 zeroext %evl) {
@@ -1144,10 +1159,10 @@ define signext i32 @vpreduce_umax_nxv32i32(i32 signext %s, <vscale x 32 x i32> %
; RV32-NEXT: slli a3, a3, 1
; RV32-NEXT: vmv.s.x v25, a0
; RV32-NEXT: mv a0, a1
-; RV32-NEXT: bltu a1, a3, .LBB66_2
+; RV32-NEXT: bltu a1, a3, .LBB67_2
; RV32-NEXT: # %bb.1:
; RV32-NEXT: mv a0, a3
-; RV32-NEXT: .LBB66_2:
+; RV32-NEXT: .LBB67_2:
; RV32-NEXT: li a4, 0
; RV32-NEXT: vsetvli a5, zero, e8, mf2, ta, mu
; RV32-NEXT: vslidedown.vx v24, v0, a2
@@ -1157,10 +1172,10 @@ define signext i32 @vpreduce_umax_nxv32i32(i32 signext %s, <vscale x 32 x i32> %
; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, mu
; RV32-NEXT: sub a0, a1, a3
; RV32-NEXT: vmv.s.x v8, a2
-; RV32-NEXT: bltu a1, a0, .LBB66_4
+; RV32-NEXT: bltu a1, a0, .LBB67_4
; RV32-NEXT: # %bb.3:
; RV32-NEXT: mv a4, a0
-; RV32-NEXT: .LBB66_4:
+; RV32-NEXT: .LBB67_4:
; RV32-NEXT: vsetvli zero, a4, e32, m8, tu, mu
; RV32-NEXT: vmv1r.v v0, v24
; RV32-NEXT: vredmaxu.vs v8, v16, v8, v0.t
@@ -1175,10 +1190,10 @@ define signext i32 @vpreduce_umax_nxv32i32(i32 signext %s, <vscale x 32 x i32> %
; RV64-NEXT: slli a0, a3, 1
; RV64-NEXT: srli a3, a4, 32
; RV64-NEXT: mv a4, a1
-; RV64-NEXT: bltu a1, a0, .LBB66_2
+; RV64-NEXT: bltu a1, a0, .LBB67_2
; RV64-NEXT: # %bb.1:
; RV64-NEXT: mv a4, a0
-; RV64-NEXT: .LBB66_2:
+; RV64-NEXT: .LBB67_2:
; RV64-NEXT: li a5, 0
; RV64-NEXT: vsetvli a2, zero, e8, mf2, ta, mu
; RV64-NEXT: vslidedown.vx v24, v0, a6
@@ -1190,10 +1205,10 @@ define signext i32 @vpreduce_umax_nxv32i32(i32 signext %s, <vscale x 32 x i32> %
; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, mu
; RV64-NEXT: sub a0, a1, a0
; RV64-NEXT: vmv.s.x v8, a2
-; RV64-NEXT: bltu a1, a0, .LBB66_4
+; RV64-NEXT: bltu a1, a0, .LBB67_4
; RV64-NEXT: # %bb.3:
; RV64-NEXT: mv a5, a0
-; RV64-NEXT: .LBB66_4:
+; RV64-NEXT: .LBB67_4:
; RV64-NEXT: vsetvli zero, a5, e32, m8, tu, mu
; RV64-NEXT: vmv1r.v v0, v24
; RV64-NEXT: vredmaxu.vs v8, v16, v8, v0.t
diff --git a/llvm/test/CodeGen/RISCV/rvv/vreductions-mask-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vreductions-mask-vp.ll
index 86e8eaa601e4..bd4a0b038724 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vreductions-mask-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vreductions-mask-vp.ll
@@ -314,6 +314,24 @@ define signext i1 @vpreduce_xor_nxv32i1(i1 signext %s, <vscale x 32 x i1> %v, <v
ret i1 %r
}
+declare i1 @llvm.vp.reduce.or.nxv40i1(i1, <vscale x 40 x i1>, <vscale x 40 x i1>, i32)
+
+define signext i1 @vpreduce_or_nxv40i1(i1 signext %s, <vscale x 40 x i1> %v, <vscale x 40 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vpreduce_or_nxv40i1:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vmv1r.v v9, v0
+; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, mu
+; CHECK-NEXT: vmv1r.v v0, v8
+; CHECK-NEXT: vcpop.m a1, v9, v0.t
+; CHECK-NEXT: snez a1, a1
+; CHECK-NEXT: or a0, a1, a0
+; CHECK-NEXT: andi a0, a0, 1
+; CHECK-NEXT: neg a0, a0
+; CHECK-NEXT: ret
+ %r = call i1 @llvm.vp.reduce.or.nxv40i1(i1 %s, <vscale x 40 x i1> %v, <vscale x 40 x i1> %m, i32 %evl)
+ ret i1 %r
+}
+
declare i1 @llvm.vp.reduce.and.nxv64i1(i1, <vscale x 64 x i1>, <vscale x 64 x i1>, i32)
define signext i1 @vpreduce_and_nxv64i1(i1 signext %s, <vscale x 64 x i1> %v, <vscale x 64 x i1> %m, i32 zeroext %evl) {
@@ -349,6 +367,23 @@ define signext i1 @vpreduce_or_nxv64i1(i1 signext %s, <vscale x 64 x i1> %v, <vs
ret i1 %r
}
+declare i1 @llvm.vp.reduce.xor.nxv64i1(i1, <vscale x 64 x i1>, <vscale x 64 x i1>, i32)
+
+define signext i1 @vpreduce_xor_nxv64i1(i1 signext %s, <vscale x 64 x i1> %v, <vscale x 64 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vpreduce_xor_nxv64i1:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vmv1r.v v9, v0
+; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, mu
+; CHECK-NEXT: vmv1r.v v0, v8
+; CHECK-NEXT: vcpop.m a1, v9, v0.t
+; CHECK-NEXT: xor a0, a1, a0
+; CHECK-NEXT: andi a0, a0, 1
+; CHECK-NEXT: neg a0, a0
+; CHECK-NEXT: ret
+ %r = call i1 @llvm.vp.reduce.xor.nxv64i1(i1 %s, <vscale x 64 x i1> %v, <vscale x 64 x i1> %m, i32 %evl)
+ ret i1 %r
+}
+
declare i1 @llvm.vp.reduce.or.nxv128i1(i1, <vscale x 128 x i1>, <vscale x 128 x i1>, i32)
define signext i1 @vpreduce_or_nxv128i1(i1 signext %s, <vscale x 128 x i1> %v, <vscale x 128 x i1> %m, i32 zeroext %evl) {
@@ -358,10 +393,10 @@ define signext i1 @vpreduce_or_nxv128i1(i1 signext %s, <vscale x 128 x i1> %v, <
; CHECK-NEXT: slli a2, a2, 3
; CHECK-NEXT: vmv1r.v v11, v0
; CHECK-NEXT: mv a3, a1
-; CHECK-NEXT: bltu a1, a2, .LBB20_2
+; CHECK-NEXT: bltu a1, a2, .LBB22_2
; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: mv a3, a2
-; CHECK-NEXT: .LBB20_2:
+; CHECK-NEXT: .LBB22_2:
; CHECK-NEXT: li a4, 0
; CHECK-NEXT: vsetvli zero, a3, e8, m8, ta, mu
; CHECK-NEXT: vmv1r.v v0, v9
@@ -369,10 +404,10 @@ define signext i1 @vpreduce_or_nxv128i1(i1 signext %s, <vscale x 128 x i1> %v, <
; CHECK-NEXT: snez a3, a3
; CHECK-NEXT: sub a2, a1, a2
; CHECK-NEXT: or a0, a3, a0
-; CHECK-NEXT: bltu a1, a2, .LBB20_4
+; CHECK-NEXT: bltu a1, a2, .LBB22_4
; CHECK-NEXT: # %bb.3:
; CHECK-NEXT: mv a4, a2
-; CHECK-NEXT: .LBB20_4:
+; CHECK-NEXT: .LBB22_4:
; CHECK-NEXT: vsetvli zero, a4, e8, m8, ta, mu
; CHECK-NEXT: vmv1r.v v0, v10
; CHECK-NEXT: vcpop.m a1, v8, v0.t
@@ -384,20 +419,3 @@ define signext i1 @vpreduce_or_nxv128i1(i1 signext %s, <vscale x 128 x i1> %v, <
%r = call i1 @llvm.vp.reduce.or.nxv128i1(i1 %s, <vscale x 128 x i1> %v, <vscale x 128 x i1> %m, i32 %evl)
ret i1 %r
}
-
-declare i1 @llvm.vp.reduce.xor.nxv64i1(i1, <vscale x 64 x i1>, <vscale x 64 x i1>, i32)
-
-define signext i1 @vpreduce_xor_nxv64i1(i1 signext %s, <vscale x 64 x i1> %v, <vscale x 64 x i1> %m, i32 zeroext %evl) {
-; CHECK-LABEL: vpreduce_xor_nxv64i1:
-; CHECK: # %bb.0:
-; CHECK-NEXT: vmv1r.v v9, v0
-; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, mu
-; CHECK-NEXT: vmv1r.v v0, v8
-; CHECK-NEXT: vcpop.m a1, v9, v0.t
-; CHECK-NEXT: xor a0, a1, a0
-; CHECK-NEXT: andi a0, a0, 1
-; CHECK-NEXT: neg a0, a0
-; CHECK-NEXT: ret
- %r = call i1 @llvm.vp.reduce.xor.nxv64i1(i1 %s, <vscale x 64 x i1> %v, <vscale x 64 x i1> %m, i32 %evl)
- ret i1 %r
-}
More information about the llvm-commits
mailing list