[llvm] 28a3fc3 - [SLP][AArch64] Add test case for vectorization regression case reported on D134605
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Fri Sep 30 03:08:09 PDT 2022
Author: Simon Pilgrim
Date: 2022-09-30T11:07:54+01:00
New Revision: 28a3fc39a6e9c1f742611eff53394b13f9bdcf3a
URL: https://github.com/llvm/llvm-project/commit/28a3fc39a6e9c1f742611eff53394b13f9bdcf3a
DIFF: https://github.com/llvm/llvm-project/commit/28a3fc39a6e9c1f742611eff53394b13f9bdcf3a.diff
LOG: [SLP][AArch64] Add test case for vectorization regression case reported on D134605
Added:
llvm/test/Transforms/SLPVectorizer/AArch64/scalarization-overhead.ll
Modified:
Removed:
################################################################################
diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/scalarization-overhead.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/scalarization-overhead.ll
new file mode 100644
index 0000000000000..833374d9f6104
--- /dev/null
+++ b/llvm/test/Transforms/SLPVectorizer/AArch64/scalarization-overhead.ll
@@ -0,0 +1,123 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -mtriple=arm64-apple-macosx11.0.0 -slp-vectorizer -S < %s | FileCheck %s
+
+; TODO: Test case reported on D134605 where the vectorization was causing a slowdown due to an underestimation in the cost of the extractions.
+
+define fastcc i64 @zot(float %arg, float %arg1, float %arg2, float %arg3, float %arg4, ptr %arg5, i1 %arg6, i1 %arg7, i1 %arg8) {
+; CHECK-LABEL: @zot(
+; CHECK-NEXT: bb:
+; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x float> <float 0.000000e+00, float poison>, float [[ARG:%.*]], i32 1
+; CHECK-NEXT: [[TMP1:%.*]] = fmul fast <2 x float> zeroinitializer, [[TMP0]]
+; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x float> poison, float [[ARG3:%.*]], i32 0
+; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x float> [[TMP2]], float [[ARG3]], i32 1
+; CHECK-NEXT: [[TMP4:%.*]] = fmul fast <2 x float> [[TMP3]], <float 1.000000e+00, float 1.000000e+00>
+; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x float> <float poison, float 0.000000e+00>, float [[ARG3]], i32 0
+; CHECK-NEXT: [[TMP6:%.*]] = fadd fast <2 x float> <float 1.000000e+00, float 0.000000e+00>, [[TMP5]]
+; CHECK-NEXT: [[TMP7:%.*]] = fadd fast <2 x float> [[TMP6]], <float 2.000000e+00, float 1.000000e+00>
+; CHECK-NEXT: [[TMP8:%.*]] = fadd fast <2 x float> [[TMP3]], <float 1.000000e+00, float 1.000000e+00>
+; CHECK-NEXT: br i1 [[ARG6:%.*]], label [[BB18:%.*]], label [[BB57:%.*]]
+; CHECK: bb18:
+; CHECK-NEXT: [[TMP9:%.*]] = phi <2 x float> [ [[TMP8]], [[BB:%.*]] ]
+; CHECK-NEXT: [[TMP10:%.*]] = phi <2 x float> [ [[TMP7]], [[BB]] ]
+; CHECK-NEXT: [[TMP11:%.*]] = extractelement <2 x float> [[TMP8]], i32 0
+; CHECK-NEXT: [[VAL23:%.*]] = fmul fast float [[TMP11]], 2.000000e+00
+; CHECK-NEXT: [[TMP12:%.*]] = extractelement <2 x float> [[TMP8]], i32 1
+; CHECK-NEXT: [[VAL24:%.*]] = fmul fast float [[TMP12]], 3.000000e+00
+; CHECK-NEXT: br i1 [[ARG7:%.*]], label [[BB25:%.*]], label [[BB57]]
+; CHECK: bb25:
+; CHECK-NEXT: [[TMP13:%.*]] = phi <2 x float> [ [[TMP9]], [[BB18]] ]
+; CHECK-NEXT: [[TMP14:%.*]] = phi <2 x float> [ [[TMP10]], [[BB18]] ]
+; CHECK-NEXT: br label [[BB30:%.*]]
+; CHECK: bb30:
+; CHECK-NEXT: [[VAL31:%.*]] = phi float [ [[VAL55:%.*]], [[BB30]] ], [ 0.000000e+00, [[BB25]] ]
+; CHECK-NEXT: [[VAL32:%.*]] = phi float [ [[TMP27:%.*]], [[BB30]] ], [ 0.000000e+00, [[BB25]] ]
+; CHECK-NEXT: [[VAL38:%.*]] = getelementptr inbounds i8, ptr [[ARG5:%.*]], i64 2
+; CHECK-NEXT: [[TMP15:%.*]] = load <2 x i8>, ptr [[ARG5]], align 1
+; CHECK-NEXT: [[TMP16:%.*]] = uitofp <2 x i8> [[TMP15]] to <2 x float>
+; CHECK-NEXT: [[TMP17:%.*]] = fsub fast <2 x float> [[TMP16]], [[TMP1]]
+; CHECK-NEXT: [[TMP18:%.*]] = fmul fast <2 x float> [[TMP17]], [[TMP14]]
+; CHECK-NEXT: [[TMP19:%.*]] = extractelement <2 x float> [[TMP18]], i32 0
+; CHECK-NEXT: [[TMP20:%.*]] = extractelement <2 x float> [[TMP18]], i32 1
+; CHECK-NEXT: [[VAL50:%.*]] = fadd fast float [[TMP20]], [[TMP19]]
+; CHECK-NEXT: [[TMP21:%.*]] = load <2 x i8>, ptr [[VAL38]], align 1
+; CHECK-NEXT: [[TMP22:%.*]] = uitofp <2 x i8> [[TMP21]] to <2 x float>
+; CHECK-NEXT: [[TMP23:%.*]] = fsub fast <2 x float> [[TMP22]], [[TMP4]]
+; CHECK-NEXT: [[TMP24:%.*]] = fmul fast <2 x float> [[TMP23]], [[TMP13]]
+; CHECK-NEXT: [[TMP25:%.*]] = extractelement <2 x float> [[TMP24]], i32 0
+; CHECK-NEXT: [[VAL52:%.*]] = fadd fast float [[VAL50]], [[TMP25]]
+; CHECK-NEXT: [[TMP26:%.*]] = extractelement <2 x float> [[TMP24]], i32 1
+; CHECK-NEXT: [[VAL54:%.*]] = fadd fast float [[VAL52]], [[TMP26]]
+; CHECK-NEXT: [[VAL55]] = tail call fast float @llvm.minnum.f32(float [[VAL31]], float [[ARG1:%.*]])
+; CHECK-NEXT: [[VAL56:%.*]] = tail call fast float @llvm.maxnum.f32(float [[ARG2:%.*]], float [[VAL54]])
+; CHECK-NEXT: call void @ham(float [[VAL55]], float [[VAL56]])
+; CHECK-NEXT: [[TMP27]] = extractelement <2 x float> [[TMP1]], i32 1
+; CHECK-NEXT: br i1 [[ARG8:%.*]], label [[BB30]], label [[BB57]]
+; CHECK: bb57:
+; CHECK-NEXT: ret i64 0
+;
+bb:
+ %val = fmul fast float 0.000000e+00, 0.000000e+00
+ %val9 = fmul fast float 0.000000e+00, %arg
+ %val10 = fmul fast float %arg3, 1.000000e+00
+ %val11 = fmul fast float %arg3, 1.000000e+00
+ %val12 = fadd fast float %arg3, 1.000000e+00
+ %val13 = fadd fast float %val12, 2.000000e+00
+ %val14 = fadd fast float 0.000000e+00, 0.000000e+00
+ %val15 = fadd fast float %val14, 1.000000e+00
+ %val16 = fadd fast float %arg3, 1.000000e+00
+ %val17 = fadd fast float %arg3, 1.000000e+00
+ br i1 %arg6, label %bb18, label %bb57
+
+bb18: ; preds = %bb
+ %val19 = phi float [ %val13, %bb ]
+ %val20 = phi float [ %val15, %bb ]
+ %val21 = phi float [ %val16, %bb ]
+ %val22 = phi float [ %val17, %bb ]
+ %val23 = fmul fast float %val16, 2.000000e+00
+ %val24 = fmul fast float %val17, 3.000000e+00
+ br i1 %arg7, label %bb25, label %bb57
+
+bb25: ; preds = %bb18
+ %val26 = phi float [ %val19, %bb18 ]
+ %val27 = phi float [ %val20, %bb18 ]
+ %val28 = phi float [ %val21, %bb18 ]
+ %val29 = phi float [ %val22, %bb18 ]
+ br label %bb30
+
+bb30: ; preds = %bb30, %bb25
+ %val31 = phi float [ %val55, %bb30 ], [ 0.000000e+00, %bb25 ]
+ %val32 = phi float [ %val9, %bb30 ], [ 0.000000e+00, %bb25 ]
+ %val33 = load i8, ptr %arg5, align 1
+ %val34 = uitofp i8 %val33 to float
+ %val35 = getelementptr inbounds i8, ptr %arg5, i64 1
+ %val36 = load i8, ptr %val35, align 1
+ %val37 = uitofp i8 %val36 to float
+ %val38 = getelementptr inbounds i8, ptr %arg5, i64 2
+ %val39 = load i8, ptr %val38, align 1
+ %val40 = uitofp i8 %val39 to float
+ %val41 = getelementptr inbounds i8, ptr %arg5, i64 3
+ %val42 = load i8, ptr %val41, align 1
+ %val43 = uitofp i8 %val42 to float
+ %val44 = fsub fast float %val34, %val
+ %val45 = fsub fast float %val37, %val9
+ %val46 = fsub fast float %val40, %val10
+ %val47 = fsub fast float %val43, %val11
+ %val48 = fmul fast float %val44, %val26
+ %val49 = fmul fast float %val45, %val27
+ %val50 = fadd fast float %val49, %val48
+ %val51 = fmul fast float %val46, %val28
+ %val52 = fadd fast float %val50, %val51
+ %val53 = fmul fast float %val47, %val29
+ %val54 = fadd fast float %val52, %val53
+ %val55 = tail call fast float @llvm.minnum.f32(float %val31, float %arg1)
+ %val56 = tail call fast float @llvm.maxnum.f32(float %arg2, float %val54)
+ call void @ham(float %val55, float %val56)
+ br i1 %arg8, label %bb30, label %bb57
+
+bb57: ; preds = %bb30, %bb18, %bb
+ ret i64 0
+}
+
+declare float @llvm.maxnum.f32(float, float)
+declare float @llvm.minnum.f32(float, float)
+declare void @ham(float, float)
More information about the llvm-commits
mailing list