[llvm] b011611 - [SLP] add tests for reduction ordering; NFC
Sanjay Patel via llvm-commits
llvm-commits at lists.llvm.org
Wed Sep 16 10:28:30 PDT 2020
Author: Sanjay Patel
Date: 2020-09-16T13:28:19-04:00
New Revision: b011611e373c3d6dfddde5120ce7974cc8719d4a
URL: https://github.com/llvm/llvm-project/commit/b011611e373c3d6dfddde5120ce7974cc8719d4a
DIFF: https://github.com/llvm/llvm-project/commit/b011611e373c3d6dfddde5120ce7974cc8719d4a.diff
LOG: [SLP] add tests for reduction ordering; NFC
Added:
Modified:
llvm/test/Transforms/SLPVectorizer/X86/compare-reduce.ll
Removed:
################################################################################
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/compare-reduce.ll b/llvm/test/Transforms/SLPVectorizer/X86/compare-reduce.ll
index 3ac8c04774a4c..daa96bfa84aef 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/compare-reduce.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/compare-reduce.ll
@@ -74,3 +74,150 @@ for.end: ; preds = %for.inc
declare i32 @printf(i8* nocapture, ...)
+; PR41312 - the order of the reduction ops should not prevent forming a reduction.
+; The 'wrong' member of the reduction requires a greater cost if grouped with the
+; other candidates in the reduction because it does not have matching predicate
+; and/or constant operand.
+
+define float @merge_anyof_v4f32_wrong_first(<4 x float> %x) {
+; CHECK-LABEL: @merge_anyof_v4f32_wrong_first(
+; CHECK-NEXT: [[X0:%.*]] = extractelement <4 x float> [[X:%.*]], i32 0
+; CHECK-NEXT: [[X1:%.*]] = extractelement <4 x float> [[X]], i32 1
+; CHECK-NEXT: [[X2:%.*]] = extractelement <4 x float> [[X]], i32 2
+; CHECK-NEXT: [[X3:%.*]] = extractelement <4 x float> [[X]], i32 3
+; CHECK-NEXT: [[CMP3WRONG:%.*]] = fcmp olt float [[X3]], 4.200000e+01
+; CHECK-NEXT: [[CMP0:%.*]] = fcmp ogt float [[X0]], 1.000000e+00
+; CHECK-NEXT: [[CMP1:%.*]] = fcmp ogt float [[X1]], 1.000000e+00
+; CHECK-NEXT: [[CMP2:%.*]] = fcmp ogt float [[X2]], 1.000000e+00
+; CHECK-NEXT: [[CMP3:%.*]] = fcmp ogt float [[X3]], 1.000000e+00
+; CHECK-NEXT: [[OR03:%.*]] = or i1 [[CMP0]], [[CMP3WRONG]]
+; CHECK-NEXT: [[OR031:%.*]] = or i1 [[OR03]], [[CMP1]]
+; CHECK-NEXT: [[OR0312:%.*]] = or i1 [[OR031]], [[CMP2]]
+; CHECK-NEXT: [[OR03123:%.*]] = or i1 [[OR0312]], [[CMP3]]
+; CHECK-NEXT: [[R:%.*]] = select i1 [[OR03123]], float -1.000000e+00, float 1.000000e+00
+; CHECK-NEXT: ret float [[R]]
+;
+ %x0 = extractelement <4 x float> %x, i32 0
+ %x1 = extractelement <4 x float> %x, i32 1
+ %x2 = extractelement <4 x float> %x, i32 2
+ %x3 = extractelement <4 x float> %x, i32 3
+ %cmp3wrong = fcmp olt float %x3, 42.0
+ %cmp0 = fcmp ogt float %x0, 1.0
+ %cmp1 = fcmp ogt float %x1, 1.0
+ %cmp2 = fcmp ogt float %x2, 1.0
+ %cmp3 = fcmp ogt float %x3, 1.0
+ %or03 = or i1 %cmp0, %cmp3wrong
+ %or031 = or i1 %or03, %cmp1
+ %or0312 = or i1 %or031, %cmp2
+ %or03123 = or i1 %or0312, %cmp3
+ %r = select i1 %or03123, float -1.0, float 1.0
+ ret float %r
+}
+
+define float @merge_anyof_v4f32_wrong_last(<4 x float> %x) {
+; CHECK-LABEL: @merge_anyof_v4f32_wrong_last(
+; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[X:%.*]], i32 3
+; CHECK-NEXT: [[CMP3WRONG:%.*]] = fcmp olt float [[TMP1]], 4.200000e+01
+; CHECK-NEXT: [[TMP2:%.*]] = fcmp ogt <4 x float> [[X]], <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>
+; CHECK-NEXT: [[TMP3:%.*]] = call i1 @llvm.experimental.vector.reduce.or.v4i1(<4 x i1> [[TMP2]])
+; CHECK-NEXT: [[TMP4:%.*]] = or i1 [[TMP3]], [[CMP3WRONG]]
+; CHECK-NEXT: [[R:%.*]] = select i1 [[TMP4]], float -1.000000e+00, float 1.000000e+00
+; CHECK-NEXT: ret float [[R]]
+;
+ %x0 = extractelement <4 x float> %x, i32 0
+ %x1 = extractelement <4 x float> %x, i32 1
+ %x2 = extractelement <4 x float> %x, i32 2
+ %x3 = extractelement <4 x float> %x, i32 3
+ %cmp3wrong = fcmp olt float %x3, 42.0
+ %cmp0 = fcmp ogt float %x0, 1.0
+ %cmp1 = fcmp ogt float %x1, 1.0
+ %cmp2 = fcmp ogt float %x2, 1.0
+ %cmp3 = fcmp ogt float %x3, 1.0
+ %or03 = or i1 %cmp0, %cmp3
+ %or031 = or i1 %or03, %cmp1
+ %or0312 = or i1 %or031, %cmp2
+ %or03123 = or i1 %or0312, %cmp3wrong
+ %r = select i1 %or03123, float -1.0, float 1.0
+ ret float %r
+}
+
+define i32 @merge_anyof_v4i32_wrong_middle(<4 x i32> %x) {
+; CHECK-LABEL: @merge_anyof_v4i32_wrong_middle(
+; CHECK-NEXT: [[X0:%.*]] = extractelement <4 x i32> [[X:%.*]], i32 0
+; CHECK-NEXT: [[X1:%.*]] = extractelement <4 x i32> [[X]], i32 1
+; CHECK-NEXT: [[X2:%.*]] = extractelement <4 x i32> [[X]], i32 2
+; CHECK-NEXT: [[X3:%.*]] = extractelement <4 x i32> [[X]], i32 3
+; CHECK-NEXT: [[CMP3WRONG:%.*]] = icmp slt i32 [[X3]], 42
+; CHECK-NEXT: [[CMP0:%.*]] = icmp sgt i32 [[X0]], 1
+; CHECK-NEXT: [[CMP1:%.*]] = icmp sgt i32 [[X1]], 1
+; CHECK-NEXT: [[CMP2:%.*]] = icmp sgt i32 [[X2]], 1
+; CHECK-NEXT: [[CMP3:%.*]] = icmp sgt i32 [[X3]], 1
+; CHECK-NEXT: [[OR03:%.*]] = or i1 [[CMP0]], [[CMP3]]
+; CHECK-NEXT: [[OR033:%.*]] = or i1 [[OR03]], [[CMP3WRONG]]
+; CHECK-NEXT: [[OR0332:%.*]] = or i1 [[OR033]], [[CMP2]]
+; CHECK-NEXT: [[OR03321:%.*]] = or i1 [[OR0332]], [[CMP1]]
+; CHECK-NEXT: [[R:%.*]] = select i1 [[OR03321]], i32 -1, i32 1
+; CHECK-NEXT: ret i32 [[R]]
+;
+ %x0 = extractelement <4 x i32> %x, i32 0
+ %x1 = extractelement <4 x i32> %x, i32 1
+ %x2 = extractelement <4 x i32> %x, i32 2
+ %x3 = extractelement <4 x i32> %x, i32 3
+ %cmp3wrong = icmp slt i32 %x3, 42
+ %cmp0 = icmp sgt i32 %x0, 1
+ %cmp1 = icmp sgt i32 %x1, 1
+ %cmp2 = icmp sgt i32 %x2, 1
+ %cmp3 = icmp sgt i32 %x3, 1
+ %or03 = or i1 %cmp0, %cmp3
+ %or033 = or i1 %or03, %cmp3wrong
+ %or0332 = or i1 %or033, %cmp2
+ %or03321 = or i1 %or0332, %cmp1
+ %r = select i1 %or03321, i32 -1, i32 1
+ ret i32 %r
+}
+
+define i32 @merge_anyof_v4i32_wrong_middle_better_rdx(<4 x i32> %x, <4 x i32> %y) {
+; CHECK-LABEL: @merge_anyof_v4i32_wrong_middle_better_rdx(
+; CHECK-NEXT: [[X0:%.*]] = extractelement <4 x i32> [[X:%.*]], i32 0
+; CHECK-NEXT: [[X1:%.*]] = extractelement <4 x i32> [[X]], i32 1
+; CHECK-NEXT: [[X2:%.*]] = extractelement <4 x i32> [[X]], i32 2
+; CHECK-NEXT: [[X3:%.*]] = extractelement <4 x i32> [[X]], i32 3
+; CHECK-NEXT: [[Y0:%.*]] = extractelement <4 x i32> [[Y:%.*]], i32 0
+; CHECK-NEXT: [[Y1:%.*]] = extractelement <4 x i32> [[Y]], i32 1
+; CHECK-NEXT: [[Y2:%.*]] = extractelement <4 x i32> [[Y]], i32 2
+; CHECK-NEXT: [[Y3:%.*]] = extractelement <4 x i32> [[Y]], i32 3
+; CHECK-NEXT: [[CMP1:%.*]] = icmp sgt i32 [[X1]], [[Y1]]
+; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i32> undef, i32 [[X0]], i32 0
+; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x i32> [[TMP1]], i32 [[X3]], i32 1
+; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i32> [[TMP2]], i32 [[Y3]], i32 2
+; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x i32> [[TMP3]], i32 [[X2]], i32 3
+; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x i32> undef, i32 [[Y0]], i32 0
+; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x i32> [[TMP5]], i32 [[Y3]], i32 1
+; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x i32> [[TMP6]], i32 [[X3]], i32 2
+; CHECK-NEXT: [[TMP8:%.*]] = insertelement <4 x i32> [[TMP7]], i32 [[Y2]], i32 3
+; CHECK-NEXT: [[TMP9:%.*]] = icmp sgt <4 x i32> [[TMP4]], [[TMP8]]
+; CHECK-NEXT: [[TMP10:%.*]] = call i1 @llvm.experimental.vector.reduce.or.v4i1(<4 x i1> [[TMP9]])
+; CHECK-NEXT: [[TMP11:%.*]] = or i1 [[TMP10]], [[CMP1]]
+; CHECK-NEXT: [[R:%.*]] = select i1 [[TMP11]], i32 -1, i32 1
+; CHECK-NEXT: ret i32 [[R]]
+;
+ %x0 = extractelement <4 x i32> %x, i32 0
+ %x1 = extractelement <4 x i32> %x, i32 1
+ %x2 = extractelement <4 x i32> %x, i32 2
+ %x3 = extractelement <4 x i32> %x, i32 3
+ %y0 = extractelement <4 x i32> %y, i32 0
+ %y1 = extractelement <4 x i32> %y, i32 1
+ %y2 = extractelement <4 x i32> %y, i32 2
+ %y3 = extractelement <4 x i32> %y, i32 3
+ %cmp3wrong = icmp slt i32 %x3, %y3
+ %cmp0 = icmp sgt i32 %x0, %y0
+ %cmp1 = icmp sgt i32 %x1, %y1
+ %cmp2 = icmp sgt i32 %x2, %y2
+ %cmp3 = icmp sgt i32 %x3, %y3
+ %or03 = or i1 %cmp0, %cmp3
+ %or033 = or i1 %or03, %cmp3wrong
+ %or0332 = or i1 %or033, %cmp2
+ %or03321 = or i1 %or0332, %cmp1
+ %r = select i1 %or03321, i32 -1, i32 1
+ ret i32 %r
+}
More information about the llvm-commits
mailing list