[llvm] [LV] Use frozen start value for FindLastIV if needed. (PR #132691)

Luke Lau via llvm-commits llvm-commits at lists.llvm.org
Mon Mar 24 03:34:08 PDT 2025


lukel97 wrote:

Makes sense to me. Although I think AnyOf reductions might have the same problem too?


With `opt -p loop-vectorize -force-vector-width=4`:

```llvm
define i32 @select_i32_from_icmp(ptr %v, i32 %a, i32 %b, i64 %n) {
entry:
  br label %loop

loop:                                      ; preds = %entry, %loop
  %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
  %rdx = phi i32 [ %a, %entry ], [ %sel, %loop ]
  %gep.v.iv = getelementptr inbounds i32, ptr %v, i64 %iv
  %load.v.iv = load i32, ptr %gep.v.iv, align 4
  %cmp.load.iv.3 = icmp eq i32 %load.v.iv, 3
  %sel = select i1 %cmp.load.iv.3, i32 %rdx, i32 %b
  %iv.next = add nuw nsw i64 %iv, 1
  %exit.cond = icmp eq i64 %iv.next, %n
  br i1 %exit.cond, label %exit, label %loop

exit:                                     ; preds = %loop
  ret i32 %sel
}
```

Becomes

```llvm
define i32 @select_i32_from_icmp(ptr %v, i32 %a, i32 %b, i64 %n) {
entry:
  %min.iters.check = icmp ult i64 %n, 4
  br i1 %min.iters.check, label %scalar.ph, label %vector.ph

vector.ph:                                        ; preds = %entry
  %n.mod.vf = urem i64 %n, 4
  %n.vec = sub i64 %n, %n.mod.vf
  br label %vector.body

vector.body:                                      ; preds = %vector.body, %vector.ph
  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
  %vec.phi = phi <4 x i1> [ zeroinitializer, %vector.ph ], [ %5, %vector.body ]
  %0 = add i64 %index, 0
  %1 = getelementptr inbounds i32, ptr %v, i64 %0
  %2 = getelementptr inbounds i32, ptr %1, i32 0
  %wide.load = load <4 x i32>, ptr %2, align 4
  %3 = icmp eq <4 x i32> %wide.load, splat (i32 3)
  %4 = xor <4 x i1> %3, splat (i1 true)
  %5 = or <4 x i1> %vec.phi, %4
  %index.next = add nuw i64 %index, 4
  %6 = icmp eq i64 %index.next, %n.vec
  br i1 %6, label %middle.block, label %vector.body, !llvm.loop !0

middle.block:                                     ; preds = %vector.body
  %7 = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> %5)
  %8 = freeze i1 %7
  %rdx.select = select i1 %8, i32 %b, i32 %a
  %cmp.n = icmp eq i64 %n, %n.vec
  br i1 %cmp.n, label %exit, label %scalar.ph

scalar.ph:                                        ; preds = %entry, %middle.block
  %bc.resume.val = phi i64 [ %n.vec, %middle.block ], [ 0, %entry ]
  %bc.merge.rdx = phi i32 [ %rdx.select, %middle.block ], [ %a, %entry ]
  br label %loop

loop:                                             ; preds = %scalar.ph, %loop
  %iv = phi i64 [ %bc.resume.val, %scalar.ph ], [ %iv.next, %loop ]
  %rdx = phi i32 [ %bc.merge.rdx, %scalar.ph ], [ %sel, %loop ]
  %gep.v.iv = getelementptr inbounds i32, ptr %v, i64 %iv
  %load.v.iv = load i32, ptr %gep.v.iv, align 4
  %cmp.load.iv.3 = icmp eq i32 %load.v.iv, 3
  %sel = select i1 %cmp.load.iv.3, i32 %rdx, i32 %b
  %iv.next = add nuw nsw i64 %iv, 1
  %exit.cond = icmp eq i64 %iv.next, %n
  br i1 %exit.cond, label %exit, label %loop, !llvm.loop !3

exit:                                             ; preds = %middle.block, %loop
  %sel.lcssa = phi i32 [ %sel, %loop ], [ %rdx.select, %middle.block ]
  ret i32 %sel.lcssa
}
```

We now have two uses of `%a` in `%rdx.select` and `%bc.merge.rdx`.

https://github.com/llvm/llvm-project/pull/132691


More information about the llvm-commits mailing list