[PATCH] D113442: [InstCombine] Enable fold select into operand for FAdd, FMul, and FSub.

Mon Nov 8 16:37:54 PST 2021

huihuiz added a comment.

Take test.ll attached.

Run: opt  -polly-process-unprofitable  -polly-remarks-minimal  -polly-use-llvm-names    -polly-codegen-verify  -analyze -polly-scops test.ll
Then you will see reduction is not detected.
MustWriteAccess :=  [Reduction Type: NONE] [Scalar: 0]

[X] -> { Stmt_for_body[i0] -> MemRef_sum_014_reg2mem[0] };

After enable fold select into operand for FAdd, then you will see reduction is detected.
run: opt -S -instcombine test.ll -o test2.ll (run with this patch)
opt  -polly-process-unprofitable  -polly-remarks-minimal  -polly-use-llvm-names    -polly-codegen-verify  -analyze -polly-scops test2.ll

MustWriteAccess :=  [Reduction Type: +] [Scalar: 0]

[X] -> { Stmt_for_body[i0] -> MemRef_sum_014_reg2mem[0] };

  target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
  target triple = "aarch64-none-linux-gnu"

  define float @test(i32 %n, float* noalias nocapture readonly %a) {
  entry:
    %sum.014.reg2mem = alloca float, align 4
    %sum.0.lcssa.reg2mem = alloca float, align 4
    br label %entry.split15

  entry.split15:                                    ; preds = %entry
    br label %entry.split

  entry.split:                                      ; preds = %entry.split15
    %cmp12 = icmp sgt i32 %n, 0
    store float 0.000000e+00, float* %sum.0.lcssa.reg2mem, align 4
    br i1 %cmp12, label %for.body.preheader, label %for.end

  for.body.preheader:                               ; preds = %entry.split
    %wide.trip.count = zext i32 %n to i64
    store float 0.000000e+00, float* %sum.014.reg2mem, align 4
    br label %for.body

  for.body:                                         ; preds = %for.body.preheader, %for.body
    %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ]
    %sum.014.reload = load float, float* %sum.014.reg2mem, align 4
    %arrayidx = getelementptr inbounds float, float* %a, i64 %indvars.iv
    %0 = load float, float* %arrayidx, align 4
    %cmp1 = fcmp fast ogt float %0, 0.000000e+00
    %add = fadd fast float %0, %sum.014.reload
    %sum.1 = select i1 %cmp1, float %add, float %sum.014.reload
    %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
    %exitcond.not = icmp eq i64 %indvars.iv.next, %wide.trip.count
    store float %sum.1, float* %sum.014.reg2mem, align 4
    br i1 %exitcond.not, label %for.end.loopexit, label %for.body

  for.end.loopexit:                                 ; preds = %for.body
    %1 = load float, float* %sum.014.reg2mem, align 4
    store float %1, float* %sum.0.lcssa.reg2mem, align 4
    br label %for.end

  for.end:                                          ; preds = %for.end.loopexit, %entry.split
    %sum.0.lcssa.reload = load float, float* %sum.0.lcssa.reg2mem, align 4
    ret float %sum.0.lcssa.reload
  }

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D113442/new/

https://reviews.llvm.org/D113442