[PATCH] D57779: [SLP] Add support for throttling.
Alexey Bataev via Phabricator via llvm-commits
llvm-commits at lists.llvm.org
Fri Feb 12 05:24:45 PST 2021
ABataev added a comment.
In D57779#2559601 <https://reviews.llvm.org/D57779#2559601>, @dtemirbulatov wrote:
> Here is another example:
> source_filename = "psspread.c"
> target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
> target triple = "x86_64-unknown-linux-gnu"
>
> define dso_local void @spread_q_poisson() local_unnamed_addr #0 {
> entry:
>
> %div.i = fdiv float undef, undef
> %conv13.i = fdiv float 1.000000e+00, %div.i
> %conv18.i = fdiv float 1.000000e+00, undef
> %conv23.i = fdiv float 1.000000e+00, undef
> %conv162 = fptosi float undef to i32
> %0 = load float, float* undef, align 4
> %1 = load i32, i32* undef, align 4
> %add187.us = add nsw i32 %1, %conv162
> %add191.us = add nsw i32 undef, undef
> %add195.us = add nsw i32 undef, undef
> %conv196.us = sitofp i32 %add187.us to float
> %mul197.us = fmul float %conv13.i, %conv196.us
> %sub198.us = fsub float undef, %mul197.us
> %mul.i363.us = fmul float %sub198.us, %sub198.us
> %conv200.us = sitofp i32 %add191.us to float
> %mul201.us = fmul float %conv18.i, %conv200.us
> %sub202.us = fsub float undef, %mul201.us
> %mul.i362.us = fmul float %sub202.us, %sub202.us
> %conv204.us = sitofp i32 %add195.us to float
> %mul205.us = fmul float %conv23.i, %conv204.us
> %sub206.us = fsub float %0, %mul205.us
> %mul.i.us = fmul float %sub206.us, %sub206.us
> %add208.us = fadd float %mul.i363.us, %mul.i362.us
> %add209.us = fadd float %add208.us, %mul.i.us
> %cmp210.us = fcmp olt float %add209.us, undef
> %add230.us = add nsw i32 undef, %add195.us
> unreachable
>
> }
>
> attributes #0 = { "use-soft-float"="false" }
>
> !llvm.ident = !{!0}
>
> !0 = !{!"clang version 13.0.0 (/home/dtemirbulatov/llvm/llvm-project-thl/llvm/tools/clang eec04092d67b94f47439a9065b6bd4cd60165be2 <https://reviews.llvm.org/rGeec04092d67b94f47439a9065b6bd4cd60165be2>)"}
>
> with proposed change it produces :
> ; ModuleID = 'bug.ll'
> source_filename = "psspread.c"
> target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
> target triple = "x86_64-unknown-linux-gnu"
>
> define dso_local void @spread_q_poisson() local_unnamed_addr #0 {
> entry:
>
> %div.i = fdiv float undef, undef
> %conv13.i = fdiv float 1.000000e+00, %div.i
> %conv162 = fptosi float undef to i32
> %0 = load float, float* undef, align 4
> %1 = load i32, i32* undef, align 4
> %add187.us = add nsw i32 %1, %conv162
> %conv196.us = sitofp i32 %add187.us to float
> %mul197.us = fmul float %conv13.i, %conv196.us
> %sub198.us = fsub float undef, %mul197.us
> %mul.i363.us = fmul float %sub198.us, %sub198.us
> %2 = insertelement <2 x float> <float undef, float poison>, float %0, i32 1
> %3 = fsub <2 x float> %2, <float 0x7FF8000000000000, float 0x7FF8000000000000>
> %4 = fmul <2 x float> %3, %3
> %5 = extractelement <2 x float> %4, i32 0
> %add208.us = fadd float %mul.i363.us, %5
> %6 = extractelement <2 x float> %4, i32 1
> %add209.us = fadd float %add208.us, %6
> %cmp210.us = fcmp olt float %add209.us, undef
> %add230.us = add nsw i32 undef, undef
> unreachable
>
> }
>
> attributes #0 = { "use-soft-float"="false" }
>
> !llvm.ident = !{!0}
>
> !0 = !{!"clang version 13.0.0 (/home/dtemirbulatov/llvm/llvm-project-thl/llvm/tools/clang eec04092d67b94f47439a9065b6bd4cd60165be2 <https://reviews.llvm.org/rGeec04092d67b94f47439a9065b6bd4cd60165be2>)"}
>
> but if we immediately decide to vectorize patrially to get this output:
> ; ModuleID = 'bug.ll'
> source_filename = "psspread.c"
> target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
> target triple = "x86_64-unknown-linux-gnu"
>
> define dso_local void @spread_q_poisson() local_unnamed_addr #0 {
> entry:
>
> %div.i = fdiv float undef, undef
> %conv18.i = fdiv float 1.000000e+00, undef
> %0 = insertelement <2 x float> poison, float %div.i, i32 0
> %1 = insertelement <2 x float> %0, float undef, i32 1
> %2 = fdiv <2 x float> <float 1.000000e+00, float 1.000000e+00>, %1
> %conv162 = fptosi float undef to i32
> %3 = load float, float* undef, align 4
> %4 = load i32, i32* undef, align 4
> %add187.us = add nsw i32 %4, %conv162
> %add191.us = add nsw i32 undef, undef
> %add195.us = add nsw i32 undef, undef
> %conv200.us = sitofp i32 %add191.us to float
> %mul201.us = fmul float %conv18.i, %conv200.us
> %sub202.us = fsub float undef, %mul201.us
> %mul.i362.us = fmul float %sub202.us, %sub202.us
> %5 = insertelement <2 x i32> poison, i32 %add187.us, i32 0
> %6 = insertelement <2 x i32> %5, i32 %add195.us, i32 1
> %7 = sitofp <2 x i32> %6 to <2 x float>
> %8 = fmul <2 x float> %2, %7
> %9 = insertelement <2 x float> <float undef, float poison>, float %3, i32 1
> %10 = fsub <2 x float> %9, %8
> %11 = fmul <2 x float> %10, %10
> %12 = extractelement <2 x float> %11, i32 0
> %add208.us = fadd float %12, %mul.i362.us
> %13 = extractelement <2 x float> %11, i32 1
> %add209.us = fadd float %add208.us, %13
> %cmp210.us = fcmp olt float %add209.us, undef
> %add230.us = add nsw i32 undef, %add195.us
> unreachable
>
> }
>
> attributes #0 = { "use-soft-float"="false" }
>
> !llvm.ident = !{!0}
>
> !0 = !{!"clang version 13.0.0 (/home/dtemirbulatov/llvm/llvm-project-thl/llvm/tools/clang eec04092d67b94f47439a9065b6bd4cd60165be2 <https://reviews.llvm.org/rGeec04092d67b94f47439a9065b6bd4cd60165be2>)"}
I see that immediate vectorization is better as it vectorizes more, no? Also, there is a problem, looks like it is caused by the multinode analysis. I'm trying to improve this in my non-power-2 patch, will prepare a separate patch for it.
CHANGES SINCE LAST ACTION
https://reviews.llvm.org/D57779/new/
https://reviews.llvm.org/D57779
More information about the llvm-commits
mailing list