[llvm] e69f8ba - [RISCV][NFC] Add test case for SLP reduction vectorization failure
Luke Lau via llvm-commits
llvm-commits at lists.llvm.org
Tue Mar 21 08:57:59 PDT 2023
Author: Luke Lau
Date: 2023-03-21T15:57:52Z
New Revision: e69f8bac42e5c3e636a1b06c08fc2739ac1d9b36
URL: https://github.com/llvm/llvm-project/commit/e69f8bac42e5c3e636a1b06c08fc2739ac1d9b36
DIFF: https://github.com/llvm/llvm-project/commit/e69f8bac42e5c3e636a1b06c08fc2739ac1d9b36.diff
LOG: [RISCV][NFC] Add test case for SLP reduction vectorization failure
Horizontal reductions still occur on RISC-V, despite the maximum SLP VF
reported back by TTI being 1, to disable SLP.
This can cause the cost model to think it can vectorize a gather into
smaller, widened loads, when it will actually fail to do so.
This should ultimately be fixed whenever SLP is re-enabled for RISC-V at
some point.
Reviewed By: reames
Differential Revision: https://reviews.llvm.org/D146529
Added:
Modified:
llvm/test/Transforms/SLPVectorizer/RISCV/reductions.ll
Removed:
################################################################################
diff --git a/llvm/test/Transforms/SLPVectorizer/RISCV/reductions.ll b/llvm/test/Transforms/SLPVectorizer/RISCV/reductions.ll
index 268e4f3189d5e..10f9c04892972 100644
--- a/llvm/test/Transforms/SLPVectorizer/RISCV/reductions.ll
+++ b/llvm/test/Transforms/SLPVectorizer/RISCV/reductions.ll
@@ -821,4 +821,96 @@ entry:
ret i64 %add.15
}
+declare i32 @llvm.abs.i32(i32, i1)
+; FIXME: This horizontal reduction occurs because the cost model thinks it can
+; vectorize the loads here. However, because -riscv-v-slp-max-vf is set to 1 by
+; default, tryToVectorizeList fails and we end up with this very expensive
+; scalarized load.
+;
+; This is the code the cost model thinks it's going to generate, which you can
+; get by passing -riscv-v-slp-max-vf=0
+;
+; define i32 @stride_sum_abs_
diff (ptr %p, ptr %q, i64 %stride) #0 {
+; %p.2 = getelementptr inbounds i32, ptr %p, i64 %stride
+; %q.2 = getelementptr inbounds i32, ptr %q, i64 %stride
+; %p.3 = getelementptr inbounds i32, ptr %p.2, i64 1
+; %q.3 = getelementptr inbounds i32, ptr %q.2, i64 1
+; %1 = load <2 x i32>, ptr %p, align 4
+; %2 = load <2 x i32>, ptr %q, align 4
+; %x.2 = load i32, ptr %p.2, align 4
+; %y.2 = load i32, ptr %q.2, align 4
+; %x.3 = load i32, ptr %p.3, align 4
+; %y.3 = load i32, ptr %q.3, align 4
+; %3 = shufflevector <2 x i32> %1, <2 x i32> poison, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
+; %4 = insertelement <4 x i32> %3, i32 %x.2, i32 2
+; %5 = insertelement <4 x i32> %4, i32 %x.3, i32 3
+; %6 = shufflevector <2 x i32> %2, <2 x i32> poison, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
+; %7 = insertelement <4 x i32> %6, i32 %y.2, i32 2
+; %8 = insertelement <4 x i32> %7, i32 %y.3, i32 3
+; %9 = sub <4 x i32> %5, %8
+; %10 = call <4 x i32> @llvm.abs.v4i32(<4 x i32> %9, i1 true)
+; %11 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %10)
+; ret i32 %11
+; }
+define i32 @stride_sum_abs_
diff (ptr %p, ptr %q, i64 %stride) {
+; CHECK-LABEL: @stride_sum_abs_
diff (
+; CHECK-NEXT: [[P_1:%.*]] = getelementptr inbounds i32, ptr [[P:%.*]], i64 1
+; CHECK-NEXT: [[Q_1:%.*]] = getelementptr inbounds i32, ptr [[Q:%.*]], i64 1
+; CHECK-NEXT: [[P_2:%.*]] = getelementptr inbounds i32, ptr [[P]], i64 [[STRIDE:%.*]]
+; CHECK-NEXT: [[Q_2:%.*]] = getelementptr inbounds i32, ptr [[Q]], i64 [[STRIDE]]
+; CHECK-NEXT: [[P_3:%.*]] = getelementptr inbounds i32, ptr [[P_2]], i64 1
+; CHECK-NEXT: [[Q_3:%.*]] = getelementptr inbounds i32, ptr [[Q_2]], i64 1
+; CHECK-NEXT: [[X_0:%.*]] = load i32, ptr [[P]], align 4
+; CHECK-NEXT: [[Y_0:%.*]] = load i32, ptr [[Q]], align 4
+; CHECK-NEXT: [[X_1:%.*]] = load i32, ptr [[P_1]], align 4
+; CHECK-NEXT: [[Y_1:%.*]] = load i32, ptr [[Q_1]], align 4
+; CHECK-NEXT: [[X_2:%.*]] = load i32, ptr [[P_2]], align 4
+; CHECK-NEXT: [[Y_2:%.*]] = load i32, ptr [[Q_2]], align 4
+; CHECK-NEXT: [[X_3:%.*]] = load i32, ptr [[P_3]], align 4
+; CHECK-NEXT: [[Y_3:%.*]] = load i32, ptr [[Q_3]], align 4
+; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i32> poison, i32 [[X_0]], i32 0
+; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x i32> [[TMP1]], i32 [[X_1]], i32 1
+; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i32> [[TMP2]], i32 [[X_2]], i32 2
+; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x i32> [[TMP3]], i32 [[X_3]], i32 3
+; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x i32> poison, i32 [[Y_0]], i32 0
+; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x i32> [[TMP5]], i32 [[Y_1]], i32 1
+; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x i32> [[TMP6]], i32 [[Y_2]], i32 2
+; CHECK-NEXT: [[TMP8:%.*]] = insertelement <4 x i32> [[TMP7]], i32 [[Y_3]], i32 3
+; CHECK-NEXT: [[TMP9:%.*]] = sub <4 x i32> [[TMP4]], [[TMP8]]
+; CHECK-NEXT: [[TMP10:%.*]] = call <4 x i32> @llvm.abs.v4i32(<4 x i32> [[TMP9]], i1 true)
+; CHECK-NEXT: [[TMP11:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP10]])
+; CHECK-NEXT: ret i32 [[TMP11]]
+;
+ %x.0 = load i32, ptr %p
+ %y.0 = load i32, ptr %q
+ %sub.0 = sub i32 %x.0, %y.0
+ %abs.0 = tail call i32 @llvm.abs.i32(i32 %sub.0, i1 true)
+
+ %p.1 = getelementptr inbounds i32, ptr %p, i64 1
+ %x.1 = load i32, ptr %p.1
+ %q.1 = getelementptr inbounds i32, ptr %q, i64 1
+ %y.1 = load i32, ptr %q.1
+ %sub.1 = sub i32 %x.1, %y.1
+ %abs.1 = tail call i32 @llvm.abs.i32(i32 %sub.1, i1 true)
+ %sum.0 = add i32 %abs.0, %abs.1
+
+ %p.2 = getelementptr inbounds i32, ptr %p, i64 %stride
+ %q.2 = getelementptr inbounds i32, ptr %q, i64 %stride
+
+ %x.2 = load i32, ptr %p.2
+ %y.2 = load i32, ptr %q.2
+ %sub.2 = sub i32 %x.2, %y.2
+ %abs.2 = tail call i32 @llvm.abs.i32(i32 %sub.2, i1 true)
+ %sum.1 = add i32 %sum.0, %abs.2
+
+ %p.3 = getelementptr inbounds i32, ptr %p.2, i64 1
+ %x.3 = load i32, ptr %p.3
+ %q.3 = getelementptr inbounds i32, ptr %q.2, i64 1
+ %y.3 = load i32, ptr %q.3
+ %sub.3 = sub i32 %x.3, %y.3
+ %abs.3 = tail call i32 @llvm.abs.i32(i32 %sub.3, i1 true)
+ %sum.2 = add i32 %sum.1, %abs.3
+
+ ret i32 %sum.2
+}
More information about the llvm-commits
mailing list