[llvm] 6e12711 - [VP][fix] Don't discard masks in reductions
Simon Moll via llvm-commits
llvm-commits at lists.llvm.org
Wed May 25 06:55:00 PDT 2022
Author: Simon Moll
Date: 2022-05-25T15:54:45+02:00
New Revision: 6e12711081d76fea8dc2f4d9579cfdf379497bbf
URL: https://github.com/llvm/llvm-project/commit/6e12711081d76fea8dc2f4d9579cfdf379497bbf
DIFF: https://github.com/llvm/llvm-project/commit/6e12711081d76fea8dc2f4d9579cfdf379497bbf.diff
LOG: [VP][fix] Don't discard masks in reductions
When expanding VP reductions to non VP-code, the reduction pass was
ignoring the mask before. Fix this by keeping the mask and selecting
neutral elements where the mask is zero.
Reviewed By: frasercrmck
Differential Revision: https://reviews.llvm.org/D126362
Added:
Modified:
llvm/lib/CodeGen/ExpandVectorPredication.cpp
llvm/test/CodeGen/Generic/expand-vp.ll
Removed:
################################################################################
diff --git a/llvm/lib/CodeGen/ExpandVectorPredication.cpp b/llvm/lib/CodeGen/ExpandVectorPredication.cpp
index 088e683efdbe..aa52914f25b0 100644
--- a/llvm/lib/CodeGen/ExpandVectorPredication.cpp
+++ b/llvm/lib/CodeGen/ExpandVectorPredication.cpp
@@ -113,6 +113,17 @@ static void replaceOperation(Value &NewOp, VPIntrinsic &OldOp) {
OldOp.eraseFromParent();
}
+static bool maySpeculateLanes(VPIntrinsic &VPI) {
+ // The result of VP reductions depends on the mask and evl.
+ if (isa<VPReductionIntrinsic>(VPI))
+ return false;
+ // Fallback to whether the intrinsic is speculatable.
+ // FIXME: Check whether the replacing non-VP code will be speculatable
+ // instead. VP intrinsics themselves are never speculatable because of
+ // UB if %evl is greater than the runtime vector length.
+ return isSafeToSpeculativelyExecute(cast<Operator>(&VPI));
+}
+
//// } Helpers
namespace {
@@ -216,8 +227,7 @@ Value *CachingVPExpander::convertEVLToMask(IRBuilder<> &Builder,
Value *
CachingVPExpander::expandPredicationInBinaryOperator(IRBuilder<> &Builder,
VPIntrinsic &VPI) {
- assert((isSafeToSpeculativelyExecute(&VPI) ||
- VPI.canIgnoreVectorLengthParam()) &&
+ assert((maySpeculateLanes(VPI) || VPI.canIgnoreVectorLengthParam()) &&
"Implicitly dropping %evl in non-speculatable operator!");
auto OC = static_cast<Instruction::BinaryOps>(*VPI.getFunctionalOpcode());
@@ -296,8 +306,7 @@ static Value *getNeutralReductionElement(const VPReductionIntrinsic &VPI,
Value *
CachingVPExpander::expandPredicationInReduction(IRBuilder<> &Builder,
VPReductionIntrinsic &VPI) {
- assert((isSafeToSpeculativelyExecute(&VPI) ||
- VPI.canIgnoreVectorLengthParam()) &&
+ assert((maySpeculateLanes(VPI) || VPI.canIgnoreVectorLengthParam()) &&
"Implicitly dropping %evl in non-speculatable operator!");
Value *Mask = VPI.getMaskParam();
@@ -471,9 +480,9 @@ struct TransformJob {
bool isDone() const { return Strategy.shouldDoNothing(); }
};
-void sanitizeStrategy(Instruction &I, VPLegalization &LegalizeStrat) {
+void sanitizeStrategy(VPIntrinsic &VPI, VPLegalization &LegalizeStrat) {
// Speculatable instructions do not strictly need predication.
- if (isSafeToSpeculativelyExecute(&I)) {
+ if (maySpeculateLanes(VPI)) {
// Converting a speculatable VP intrinsic means dropping %mask and %evl.
// No need to expand %evl into the %mask only to ignore that code.
if (LegalizeStrat.OpStrategy == VPLegalization::Convert)
@@ -518,7 +527,7 @@ bool CachingVPExpander::expandVectorPredication() {
if (!VPI)
continue;
auto VPStrat = getVPLegalizationStrategy(*VPI);
- sanitizeStrategy(I, VPStrat);
+ sanitizeStrategy(*VPI, VPStrat);
if (!VPStrat.shouldDoNothing())
Worklist.emplace_back(VPI, VPStrat);
}
diff --git a/llvm/test/CodeGen/Generic/expand-vp.ll b/llvm/test/CodeGen/Generic/expand-vp.ll
index f4dfbdd2e16c..6c8112ea60e2 100644
--- a/llvm/test/CodeGen/Generic/expand-vp.ll
+++ b/llvm/test/CodeGen/Generic/expand-vp.ll
@@ -166,62 +166,70 @@ define void @test_vp_reduce_fp_v4(float %f, <4 x float> %vf, <4 x i1> %m, i32 %n
; Check that reductions use the correct neutral element for masked-off elements
; ALL-CONVERT: define void @test_vp_reduce_int_v4(i32 %start, <4 x i32> %vi, <4 x i1> %m, i32 %n) {
-; ALL-CONVERT-NEXT: [[ADD:%.+]] = select <4 x i1> %m, <4 x i32> %vi, <4 x i32> zeroinitializer
+; ALL-CONVERT-NEXT: [[NINS:%.+]] = insertelement <4 x i32> poison, i32 %n, i32 0
+; ALL-CONVERT-NEXT: [[NSPLAT:%.+]] = shufflevector <4 x i32> [[NINS]], <4 x i32> poison, <4 x i32> zeroinitializer
+; ALL-CONVERT-NEXT: [[EVLM:%.+]] = icmp ult <4 x i32> <i32 0, i32 1, i32 2, i32 3>, [[NSPLAT]]
+; ALL-CONVERT-NEXT: [[NEWM:%.+]] = and <4 x i1> [[EVLM]], %m
+; ALL-CONVERT-NEXT: [[ADD:%.+]] = select <4 x i1> [[NEWM]], <4 x i32> %vi, <4 x i32> zeroinitializer
; ALL-CONVERT-NEXT: [[RED:%.+]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[ADD]])
; ALL-CONVERT-NEXT: %{{.+}} = add i32 [[RED]], %start
-; ALL-CONVERT-NEXT: [[MUL:%.+]] = select <4 x i1> %m, <4 x i32> %vi, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+; ALL-CONVERT: [[MUL:%.+]] = select <4 x i1> %{{.+}}, <4 x i32> %vi, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
; ALL-CONVERT-NEXT: [[RED:%.+]] = call i32 @llvm.vector.reduce.mul.v4i32(<4 x i32> [[MUL]])
; ALL-CONVERT-NEXT: %{{.+}} = mul i32 [[RED]], %start
-; ALL-CONVERT-NEXT: [[AND:%.+]] = select <4 x i1> %m, <4 x i32> %vi, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>
+; ALL-CONVERT: [[AND:%.+]] = select <4 x i1> %{{.+}}, <4 x i32> %vi, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>
; ALL-CONVERT-NEXT: [[RED:%.+]] = call i32 @llvm.vector.reduce.and.v4i32(<4 x i32> [[AND]])
; ALL-CONVERT-NEXT: %{{.+}} = and i32 [[RED]], %start
-; ALL-CONVERT-NEXT: [[OR:%.+]] = select <4 x i1> %m, <4 x i32> %vi, <4 x i32> zeroinitializer
+; ALL-CONVERT: [[OR:%.+]] = select <4 x i1> %{{.+}}, <4 x i32> %vi, <4 x i32> zeroinitializer
; ALL-CONVERT-NEXT: [[RED:%.+]] = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> [[OR]])
; ALL-CONVERT-NEXT: %{{.+}} = or i32 [[RED]], %start
-; ALL-CONVERT-NEXT: [[XOR:%.+]] = select <4 x i1> %m, <4 x i32> %vi, <4 x i32> zeroinitializer
+; ALL-CONVERT: [[XOR:%.+]] = select <4 x i1> %{{.+}}, <4 x i32> %vi, <4 x i32> zeroinitializer
; ALL-CONVERT-NEXT: [[RED:%.+]] = call i32 @llvm.vector.reduce.xor.v4i32(<4 x i32> [[XOR]])
; ALL-CONVERT-NEXT: %{{.+}} = xor i32 [[RED]], %start
-; ALL-CONVERT-NEXT: [[SMIN:%.+]] = select <4 x i1> %m, <4 x i32> %vi, <4 x i32> <i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647>
+; ALL-CONVERT: [[SMIN:%.+]] = select <4 x i1> %{{.+}}, <4 x i32> %vi, <4 x i32> <i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647>
; ALL-CONVERT-NEXT: [[RED:%.+]] = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> [[SMIN]])
; ALL-CONVERT-NEXT: %{{.+}} = call i32 @llvm.smin.i32(i32 [[RED]], i32 %start)
-; ALL-CONVERT-NEXT: [[SMAX:%.+]] = select <4 x i1> %m, <4 x i32> %vi, <4 x i32> <i32 -2147483648, i32 -2147483648, i32 -2147483648, i32 -2147483648>
+; ALL-CONVERT: [[SMAX:%.+]] = select <4 x i1> %{{.+}}, <4 x i32> %vi, <4 x i32> <i32 -2147483648, i32 -2147483648, i32 -2147483648, i32 -2147483648>
; ALL-CONVERT-NEXT: [[RED:%.+]] = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> [[SMAX]])
; ALL-CONVERT-NEXT: %{{.+}} = call i32 @llvm.smax.i32(i32 [[RED]], i32 %start)
-; ALL-CONVERT-NEXT: [[UMIN:%.+]] = select <4 x i1> %m, <4 x i32> %vi, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>
+; ALL-CONVERT: [[UMIN:%.+]] = select <4 x i1> %{{.+}}, <4 x i32> %vi, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>
; ALL-CONVERT-NEXT: [[RED:%.+]] = call i32 @llvm.vector.reduce.umin.v4i32(<4 x i32> [[UMIN]])
; ALL-CONVERT-NEXT: %{{.+}} = call i32 @llvm.umin.i32(i32 [[RED]], i32 %start)
-; ALL-CONVERT-NEXT: [[UMAX:%.+]] = select <4 x i1> %m, <4 x i32> %vi, <4 x i32> zeroinitializer
+; ALL-CONVERT: [[UMAX:%.+]] = select <4 x i1> %{{.+}}, <4 x i32> %vi, <4 x i32> zeroinitializer
; ALL-CONVERT-NEXT: [[RED:%.+]] = call i32 @llvm.vector.reduce.umax.v4i32(<4 x i32> [[UMAX]])
; ALL-CONVERT-NEXT: %{{.+}} = call i32 @llvm.umax.i32(i32 [[RED]], i32 %start)
; ALL-CONVERT-NEXT: ret void
; Check that reductions use the correct neutral element for masked-off elements
; ALL-CONVERT: define void @test_vp_reduce_fp_v4(float %f, <4 x float> %vf, <4 x i1> %m, i32 %n) {
-; ALL-CONVERT-NEXT: [[FMIN:%.+]] = select <4 x i1> %m, <4 x float> %vf, <4 x float> <float 0x7FF8000000000000, float 0x7FF8000000000000, float 0x7FF8000000000000, float 0x7FF8000000000000>
+; ALL-CONVERT-NEXT: [[NINS:%.+]] = insertelement <4 x i32> poison, i32 %n, i32 0
+; ALL-CONVERT-NEXT: [[NSPLAT:%.+]] = shufflevector <4 x i32> [[NINS]], <4 x i32> poison, <4 x i32> zeroinitializer
+; ALL-CONVERT-NEXT: [[EVLM:%.+]] = icmp ult <4 x i32> <i32 0, i32 1, i32 2, i32 3>, [[NSPLAT]]
+; ALL-CONVERT-NEXT: [[NEWM:%.+]] = and <4 x i1> [[EVLM]], %m
+; ALL-CONVERT-NEXT: [[FMIN:%.+]] = select <4 x i1> [[NEWM]], <4 x float> %vf, <4 x float> <float 0x7FF8000000000000, float 0x7FF8000000000000, float 0x7FF8000000000000, float 0x7FF8000000000000>
; ALL-CONVERT-NEXT: [[RED:%.+]] = call float @llvm.vector.reduce.fmin.v4f32(<4 x float> [[FMIN]])
; ALL-CONVERT-NEXT: %{{.+}} = call float @llvm.minnum.f32(float [[RED]], float %f)
-; ALL-CONVERT-NEXT: [[FMIN_NNAN:%.+]] = select <4 x i1> %m, <4 x float> %vf, <4 x float> <float 0x7FF0000000000000, float 0x7FF0000000000000, float 0x7FF0000000000000, float 0x7FF0000000000000>
+; ALL-CONVERT: [[FMIN_NNAN:%.+]] = select <4 x i1> %{{.+}}, <4 x float> %vf, <4 x float> <float 0x7FF0000000000000, float 0x7FF0000000000000, float 0x7FF0000000000000, float 0x7FF0000000000000>
; ALL-CONVERT-NEXT: [[RED:%.+]] = call nnan float @llvm.vector.reduce.fmin.v4f32(<4 x float> [[FMIN_NNAN]])
; ALL-CONVERT-NEXT: %{{.+}} = call nnan float @llvm.minnum.f32(float [[RED]], float %f)
-; ALL-CONVERT-NEXT: [[FMIN_NNAN_NINF:%.+]] = select <4 x i1> %m, <4 x float> %vf, <4 x float> <float 0x47EFFFFFE0000000, float 0x47EFFFFFE0000000, float 0x47EFFFFFE0000000, float 0x47EFFFFFE0000000>
+; ALL-CONVERT: [[FMIN_NNAN_NINF:%.+]] = select <4 x i1> %{{.+}}, <4 x float> %vf, <4 x float> <float 0x47EFFFFFE0000000, float 0x47EFFFFFE0000000, float 0x47EFFFFFE0000000, float 0x47EFFFFFE0000000>
; ALL-CONVERT-NEXT: [[RED:%.+]] = call nnan ninf float @llvm.vector.reduce.fmin.v4f32(<4 x float> [[FMIN_NNAN_NINF]])
; ALL-CONVERT-NEXT: %{{.+}} = call nnan ninf float @llvm.minnum.f32(float [[RED]], float %f)
-; ALL-CONVERT-NEXT: [[FMAX:%.+]] = select <4 x i1> %m, <4 x float> %vf, <4 x float> <float 0xFFF8000000000000, float 0xFFF8000000000000, float 0xFFF8000000000000, float 0xFFF8000000000000>
+; ALL-CONVERT: [[FMAX:%.+]] = select <4 x i1> %{{.+}}, <4 x float> %vf, <4 x float> <float 0xFFF8000000000000, float 0xFFF8000000000000, float 0xFFF8000000000000, float 0xFFF8000000000000>
; ALL-CONVERT-NEXT: [[RED:%.+]] = call float @llvm.vector.reduce.fmax.v4f32(<4 x float> [[FMAX]])
; ALL-CONVERT-NEXT: %{{.+}} = call float @llvm.maxnum.f32(float [[RED]], float %f)
-; ALL-CONVERT-NEXT: [[FMAX_NNAN:%.+]] = select <4 x i1> %m, <4 x float> %vf, <4 x float> <float 0xFFF0000000000000, float 0xFFF0000000000000, float 0xFFF0000000000000, float 0xFFF0000000000000>
+; ALL-CONVERT: [[FMAX_NNAN:%.+]] = select <4 x i1> %{{.+}}, <4 x float> %vf, <4 x float> <float 0xFFF0000000000000, float 0xFFF0000000000000, float 0xFFF0000000000000, float 0xFFF0000000000000>
; ALL-CONVERT-NEXT: [[RED:%.+]] = call nnan float @llvm.vector.reduce.fmax.v4f32(<4 x float> [[FMAX_NNAN]])
; ALL-CONVERT-NEXT: %{{.+}} = call nnan float @llvm.maxnum.f32(float [[RED]], float %f)
-; ALL-CONVERT-NEXT: [[FMAX_NNAN_NINF:%.+]] = select <4 x i1> %m, <4 x float> %vf, <4 x float> <float 0xC7EFFFFFE0000000, float 0xC7EFFFFFE0000000, float 0xC7EFFFFFE0000000, float 0xC7EFFFFFE0000000>
+; ALL-CONVERT: [[FMAX_NNAN_NINF:%.+]] = select <4 x i1> %{{.+}}, <4 x float> %vf, <4 x float> <float 0xC7EFFFFFE0000000, float 0xC7EFFFFFE0000000, float 0xC7EFFFFFE0000000, float 0xC7EFFFFFE0000000>
; ALL-CONVERT-NEXT: [[RED:%.+]] = call nnan ninf float @llvm.vector.reduce.fmax.v4f32(<4 x float> [[FMAX_NNAN_NINF]])
; ALL-CONVERT-NEXT: %{{.+}} = call nnan ninf float @llvm.maxnum.f32(float [[RED]], float %f)
-; ALL-CONVERT-NEXT: [[FADD:%.+]] = select <4 x i1> %m, <4 x float> %vf, <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>
+; ALL-CONVERT: [[FADD:%.+]] = select <4 x i1> %{{.+}}, <4 x float> %vf, <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>
; ALL-CONVERT-NEXT: %{{.+}} = call float @llvm.vector.reduce.fadd.v4f32(float %f, <4 x float> [[FADD]])
-; ALL-CONVERT-NEXT: [[FADD:%.+]] = select <4 x i1> %m, <4 x float> %vf, <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>
+; ALL-CONVERT: [[FADD:%.+]] = select <4 x i1> %{{.+}}, <4 x float> %vf, <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>
; ALL-CONVERT-NEXT: %{{.+}} = call reassoc float @llvm.vector.reduce.fadd.v4f32(float %f, <4 x float> [[FADD]])
-; ALL-CONVERT-NEXT: [[FMUL:%.+]] = select <4 x i1> %m, <4 x float> %vf, <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>
+; ALL-CONVERT: [[FMUL:%.+]] = select <4 x i1> %{{.+}}, <4 x float> %vf, <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>
; ALL-CONVERT-NEXT: %{{.+}} = call float @llvm.vector.reduce.fmul.v4f32(float %f, <4 x float> [[FMUL]])
-; ALL-CONVERT-NEXT: [[FMUL:%.+]] = select <4 x i1> %m, <4 x float> %vf, <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>
+; ALL-CONVERT: [[FMUL:%.+]] = select <4 x i1> %{{.+}}, <4 x float> %vf, <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>
; ALL-CONVERT-NEXT: %{{.+}} = call reassoc float @llvm.vector.reduce.fmul.v4f32(float %f, <4 x float> [[FMUL]])
; ALL-CONVERT-NEXT: ret void
@@ -332,29 +340,37 @@ define void @test_vp_reduce_fp_v4(float %f, <4 x float> %vf, <4 x i1> %m, i32 %n
; DISCARD_LEGAL: ret void
; DISCARD_LEGAL: define void @test_vp_reduce_int_v4(i32 %start, <4 x i32> %vi, <4 x i1> %m, i32 %n) {
-; DISCARD_LEGAL-NEXT: %r0 = call i32 @llvm.vp.reduce.add.v4i32(i32 %start, <4 x i32> %vi, <4 x i1> %m, i32 4)
-; DISCARD_LEGAL-NEXT: %r1 = call i32 @llvm.vp.reduce.mul.v4i32(i32 %start, <4 x i32> %vi, <4 x i1> %m, i32 4)
-; DISCARD_LEGAL-NEXT: %r2 = call i32 @llvm.vp.reduce.and.v4i32(i32 %start, <4 x i32> %vi, <4 x i1> %m, i32 4)
-; DISCARD_LEGAL-NEXT: %r3 = call i32 @llvm.vp.reduce.or.v4i32(i32 %start, <4 x i32> %vi, <4 x i1> %m, i32 4)
-; DISCARD_LEGAL-NEXT: %r4 = call i32 @llvm.vp.reduce.xor.v4i32(i32 %start, <4 x i32> %vi, <4 x i1> %m, i32 4)
-; DISCARD_LEGAL-NEXT: %r5 = call i32 @llvm.vp.reduce.smin.v4i32(i32 %start, <4 x i32> %vi, <4 x i1> %m, i32 4)
-; DISCARD_LEGAL-NEXT: %r6 = call i32 @llvm.vp.reduce.smax.v4i32(i32 %start, <4 x i32> %vi, <4 x i1> %m, i32 4)
-; DISCARD_LEGAL-NEXT: %r7 = call i32 @llvm.vp.reduce.umin.v4i32(i32 %start, <4 x i32> %vi, <4 x i1> %m, i32 4)
-; DISCARD_LEGAL-NEXT: %r8 = call i32 @llvm.vp.reduce.umax.v4i32(i32 %start, <4 x i32> %vi, <4 x i1> %m, i32 4)
-; DISCARD_LEGAL-NEXT: ret void
+; DISCARD_LEGAL-NEXT: [[NSPLATINS:%.+]] = insertelement <4 x i32> poison, i32 %n, i32 0
+; DISCARD_LEGAL-NEXT: [[NSPLAT:%.+]] = shufflevector <4 x i32> [[NSPLATINS]], <4 x i32> poison, <4 x i32> zeroinitializer
+; DISCARD_LEGAL-NEXT: [[EVLMASK:%.+]] = icmp ult <4 x i32> <i32 0, i32 1, i32 2, i32 3>, [[NSPLAT]]
+; DISCARD_LEGAL-NEXT: [[NEWMASK:%.+]] = and <4 x i1> [[EVLMASK]], %m
+; DISCARD_LEGAL-NEXT: %r0 = call i32 @llvm.vp.reduce.add.v4i32(i32 %start, <4 x i32> %vi, <4 x i1> [[NEWMASK]], i32 4)
+; DISCARD_LEGAL-NOT: %r1 = call i32 @llvm.vp.reduce.mul.v4i32(i32 %start, <4 x i32> %vi, <4 x i1> %m, i32 4)
+; DISCARD_LEGAL-NOT: %r2 = call i32 @llvm.vp.reduce.and.v4i32(i32 %start, <4 x i32> %vi, <4 x i1> %m, i32 4)
+; DISCARD_LEGAL-NOT: %r3 = call i32 @llvm.vp.reduce.or.v4i32(i32 %start, <4 x i32> %vi, <4 x i1> %m, i32 4)
+; DISCARD_LEGAL-NOT: %r4 = call i32 @llvm.vp.reduce.xor.v4i32(i32 %start, <4 x i32> %vi, <4 x i1> %m, i32 4)
+; DISCARD_LEGAL-NOT: %r5 = call i32 @llvm.vp.reduce.smin.v4i32(i32 %start, <4 x i32> %vi, <4 x i1> %m, i32 4)
+; DISCARD_LEGAL-NOT: %r6 = call i32 @llvm.vp.reduce.smax.v4i32(i32 %start, <4 x i32> %vi, <4 x i1> %m, i32 4)
+; DISCARD_LEGAL-NOT: %r7 = call i32 @llvm.vp.reduce.umin.v4i32(i32 %start, <4 x i32> %vi, <4 x i1> %m, i32 4)
+; DISCARD_LEGAL-NOT: %r8 = call i32 @llvm.vp.reduce.umax.v4i32(i32 %start, <4 x i32> %vi, <4 x i1> %m, i32 4)
+; DISCARD_LEGAL: ret void
; DISCARD_LEGAL: define void @test_vp_reduce_fp_v4(float %f, <4 x float> %vf, <4 x i1> %m, i32 %n) {
-; DISCARD_LEGAL-NEXT: %r0 = call float @llvm.vp.reduce.fmin.v4f32(float %f, <4 x float> %vf, <4 x i1> %m, i32 4)
-; DISCARD_LEGAL-NEXT: %r1 = call nnan float @llvm.vp.reduce.fmin.v4f32(float %f, <4 x float> %vf, <4 x i1> %m, i32 4)
-; DISCARD_LEGAL-NEXT: %r2 = call nnan ninf float @llvm.vp.reduce.fmin.v4f32(float %f, <4 x float> %vf, <4 x i1> %m, i32 4)
-; DISCARD_LEGAL-NEXT: %r3 = call float @llvm.vp.reduce.fmax.v4f32(float %f, <4 x float> %vf, <4 x i1> %m, i32 4)
-; DISCARD_LEGAL-NEXT: %r4 = call nnan float @llvm.vp.reduce.fmax.v4f32(float %f, <4 x float> %vf, <4 x i1> %m, i32 4)
-; DISCARD_LEGAL-NEXT: %r5 = call nnan ninf float @llvm.vp.reduce.fmax.v4f32(float %f, <4 x float> %vf, <4 x i1> %m, i32 4)
-; DISCARD_LEGAL-NEXT: %r6 = call float @llvm.vp.reduce.fadd.v4f32(float %f, <4 x float> %vf, <4 x i1> %m, i32 4)
-; DISCARD_LEGAL-NEXT: %r7 = call reassoc float @llvm.vp.reduce.fadd.v4f32(float %f, <4 x float> %vf, <4 x i1> %m, i32 4)
-; DISCARD_LEGAL-NEXT: %r8 = call float @llvm.vp.reduce.fmul.v4f32(float %f, <4 x float> %vf, <4 x i1> %m, i32 4)
-; DISCARD_LEGAL-NEXT: %r9 = call reassoc float @llvm.vp.reduce.fmul.v4f32(float %f, <4 x float> %vf, <4 x i1> %m, i32 4)
-; DISCARD_LEGAL-NEXT: ret void
+; DISCARD_LEGAL-NEXT: [[NSPLATINS:%.+]] = insertelement <4 x i32> poison, i32 %n, i32 0
+; DISCARD_LEGAL-NEXT: [[NSPLAT:%.+]] = shufflevector <4 x i32> [[NSPLATINS]], <4 x i32> poison, <4 x i32> zeroinitializer
+; DISCARD_LEGAL-NEXT: [[EVLMASK:%.+]] = icmp ult <4 x i32> <i32 0, i32 1, i32 2, i32 3>, [[NSPLAT]]
+; DISCARD_LEGAL-NEXT: [[NEWMASK:%.+]] = and <4 x i1> [[EVLMASK]], %m
+; DISCARD_LEGAL-NEXT: %r0 = call float @llvm.vp.reduce.fmin.v4f32(float %f, <4 x float> %vf, <4 x i1> [[NEWMASK]], i32 4)
+; DISCARD_LEGAL-NOT: %r1 = call nnan float @llvm.vp.reduce.fmin.v4f32(float %f, <4 x float> %vf, <4 x i1> %m, i32 4)
+; DISCARD_LEGAL-NOT: %r2 = call nnan ninf float @llvm.vp.reduce.fmin.v4f32(float %f, <4 x float> %vf, <4 x i1> %m, i32 4)
+; DISCARD_LEGAL-NOT: %r3 = call float @llvm.vp.reduce.fmax.v4f32(float %f, <4 x float> %vf, <4 x i1> %m, i32 4)
+; DISCARD_LEGAL-NOT: %r4 = call nnan float @llvm.vp.reduce.fmax.v4f32(float %f, <4 x float> %vf, <4 x i1> %m, i32 4)
+; DISCARD_LEGAL-NOT: %r5 = call nnan ninf float @llvm.vp.reduce.fmax.v4f32(float %f, <4 x float> %vf, <4 x i1> %m, i32 4)
+; DISCARD_LEGAL-NOT: %r6 = call float @llvm.vp.reduce.fadd.v4f32(float %f, <4 x float> %vf, <4 x i1> %m, i32 4)
+; DISCARD_LEGAL-NOT: %r7 = call reassoc float @llvm.vp.reduce.fadd.v4f32(float %f, <4 x float> %vf, <4 x i1> %m, i32 4)
+; DISCARD_LEGAL-NOT: %r8 = call float @llvm.vp.reduce.fmul.v4f32(float %f, <4 x float> %vf, <4 x i1> %m, i32 4)
+; DISCARD_LEGAL-NOT: %r9 = call reassoc float @llvm.vp.reduce.fmul.v4f32(float %f, <4 x float> %vf, <4 x i1> %m, i32 4)
+; DISCARD_LEGAL: ret void
; Convert %evl into %mask everywhere (%evl Convert, %mask Legal)
;
More information about the llvm-commits
mailing list