[PATCH] D57779: [SLP] Add support for throttling.

Fri Feb 12 05:24:45 PST 2021

ABataev added a comment.

In D57779#2559601 <https://reviews.llvm.org/D57779#2559601>, @dtemirbulatov wrote:

> Here is another example:
> source_filename = "psspread.c"
> target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
> target triple = "x86_64-unknown-linux-gnu"
>
> define dso_local void @spread_q_poisson() local_unnamed_addr #0 {
> entry:
>
>   %div.i = fdiv float undef, undef
>   %conv13.i = fdiv float 1.000000e+00, %div.i
>   %conv18.i = fdiv float 1.000000e+00, undef
>   %conv23.i = fdiv float 1.000000e+00, undef
>   %conv162 = fptosi float undef to i32
>   %0 = load float, float* undef, align 4
>   %1 = load i32, i32* undef, align 4
>   %add187.us = add nsw i32 %1, %conv162
>   %add191.us = add nsw i32 undef, undef
>   %add195.us = add nsw i32 undef, undef
>   %conv196.us = sitofp i32 %add187.us to float
>   %mul197.us = fmul float %conv13.i, %conv196.us
>   %sub198.us = fsub float undef, %mul197.us
>   %mul.i363.us = fmul float %sub198.us, %sub198.us
>   %conv200.us = sitofp i32 %add191.us to float
>   %mul201.us = fmul float %conv18.i, %conv200.us
>   %sub202.us = fsub float undef, %mul201.us
>   %mul.i362.us = fmul float %sub202.us, %sub202.us
>   %conv204.us = sitofp i32 %add195.us to float
>   %mul205.us = fmul float %conv23.i, %conv204.us
>   %sub206.us = fsub float %0, %mul205.us
>   %mul.i.us = fmul float %sub206.us, %sub206.us
>   %add208.us = fadd float %mul.i363.us, %mul.i362.us
>   %add209.us = fadd float %add208.us, %mul.i.us
>   %cmp210.us = fcmp olt float %add209.us, undef
>   %add230.us = add nsw i32 undef, %add195.us
>   unreachable
>
> }
>
> attributes #0 = { "use-soft-float"="false" }
>
> !llvm.ident = !{!0}
>
> !0 = !{!"clang version 13.0.0 (/home/dtemirbulatov/llvm/llvm-project-thl/llvm/tools/clang eec04092d67b94f47439a9065b6bd4cd60165be2 <https://reviews.llvm.org/rGeec04092d67b94f47439a9065b6bd4cd60165be2>)"}
>
> with proposed change it produces :
> ; ModuleID = 'bug.ll'
> source_filename = "psspread.c"
> target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
> target triple = "x86_64-unknown-linux-gnu"
>
> define dso_local void @spread_q_poisson() local_unnamed_addr #0 {
> entry:
>
>   %div.i = fdiv float undef, undef
>   %conv13.i = fdiv float 1.000000e+00, %div.i
>   %conv162 = fptosi float undef to i32
>   %0 = load float, float* undef, align 4
>   %1 = load i32, i32* undef, align 4
>   %add187.us = add nsw i32 %1, %conv162
>   %conv196.us = sitofp i32 %add187.us to float
>   %mul197.us = fmul float %conv13.i, %conv196.us
>   %sub198.us = fsub float undef, %mul197.us
>   %mul.i363.us = fmul float %sub198.us, %sub198.us
>   %2 = insertelement <2 x float> <float undef, float poison>, float %0, i32 1
>   %3 = fsub <2 x float> %2, <float 0x7FF8000000000000, float 0x7FF8000000000000>
>   %4 = fmul <2 x float> %3, %3
>   %5 = extractelement <2 x float> %4, i32 0
>   %add208.us = fadd float %mul.i363.us, %5
>   %6 = extractelement <2 x float> %4, i32 1
>   %add209.us = fadd float %add208.us, %6
>   %cmp210.us = fcmp olt float %add209.us, undef
>   %add230.us = add nsw i32 undef, undef
>   unreachable
>
> }
>
> attributes #0 = { "use-soft-float"="false" }
>
> !llvm.ident = !{!0}
>
> !0 = !{!"clang version 13.0.0 (/home/dtemirbulatov/llvm/llvm-project-thl/llvm/tools/clang eec04092d67b94f47439a9065b6bd4cd60165be2 <https://reviews.llvm.org/rGeec04092d67b94f47439a9065b6bd4cd60165be2>)"}
>
> but if we immediately decide to vectorize patrially to get this output:
> ; ModuleID = 'bug.ll'
> source_filename = "psspread.c"
> target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
> target triple = "x86_64-unknown-linux-gnu"
>
> define dso_local void @spread_q_poisson() local_unnamed_addr #0 {
> entry:
>
>   %div.i = fdiv float undef, undef
>   %conv18.i = fdiv float 1.000000e+00, undef
>   %0 = insertelement <2 x float> poison, float %div.i, i32 0
>   %1 = insertelement <2 x float> %0, float undef, i32 1
>   %2 = fdiv <2 x float> <float 1.000000e+00, float 1.000000e+00>, %1
>   %conv162 = fptosi float undef to i32
>   %3 = load float, float* undef, align 4
>   %4 = load i32, i32* undef, align 4
>   %add187.us = add nsw i32 %4, %conv162
>   %add191.us = add nsw i32 undef, undef
>   %add195.us = add nsw i32 undef, undef
>   %conv200.us = sitofp i32 %add191.us to float
>   %mul201.us = fmul float %conv18.i, %conv200.us
>   %sub202.us = fsub float undef, %mul201.us
>   %mul.i362.us = fmul float %sub202.us, %sub202.us
>   %5 = insertelement <2 x i32> poison, i32 %add187.us, i32 0
>   %6 = insertelement <2 x i32> %5, i32 %add195.us, i32 1
>   %7 = sitofp <2 x i32> %6 to <2 x float>
>   %8 = fmul <2 x float> %2, %7
>   %9 = insertelement <2 x float> <float undef, float poison>, float %3, i32 1
>   %10 = fsub <2 x float> %9, %8
>   %11 = fmul <2 x float> %10, %10
>   %12 = extractelement <2 x float> %11, i32 0
>   %add208.us = fadd float %12, %mul.i362.us
>   %13 = extractelement <2 x float> %11, i32 1
>   %add209.us = fadd float %add208.us, %13
>   %cmp210.us = fcmp olt float %add209.us, undef
>   %add230.us = add nsw i32 undef, %add195.us
>   unreachable
>
> }
>
> attributes #0 = { "use-soft-float"="false" }
>
> !llvm.ident = !{!0}
>
> !0 = !{!"clang version 13.0.0 (/home/dtemirbulatov/llvm/llvm-project-thl/llvm/tools/clang eec04092d67b94f47439a9065b6bd4cd60165be2 <https://reviews.llvm.org/rGeec04092d67b94f47439a9065b6bd4cd60165be2>)"}

I see that immediate vectorization is better as it vectorizes more, no? Also, there is a problem, looks like it is caused by the multinode analysis. I'm trying to improve this in my non-power-2 patch, will prepare a separate patch for it.

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D57779/new/

https://reviews.llvm.org/D57779