[llvm] [LoopVectorizer] Add support for partial reductions (PR #92418)

Thu Dec 19 13:54:02 PST 2024

fhahn wrote:

It looks like this started triggering an assertion when building llvm-test-suite on ARM64 macOS.

I managed to reduce the failure to the IR below.

Running    `opt -p loop-vectorize` triggers:  `Assertion failed: ((VF.isScalar() || V->getType()->isVectorTy()) && "scalar values must be stored as (0, 0)"), function set, file VPlan.h, line 284.`

```
        target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-n32:64-S128-Fn32"
        target triple = "arm64-apple-macosx15.0.0"

        define void @test(i64 %idx.neg, i8 %0) #0 {
        entry:
          br label %while.body

        while.body:                                       ; preds = %while.body, %entry
          %n1ptr.0.idx131 = phi i64 [ %n1ptr.0.add, %while.body ], [ %idx.neg, %entry ]
          %n2ptr.0.idx130 = phi i64 [ %n2ptr.0.add, %while.body ], [ 0, %entry ]
          %sum.1129 = phi i64 [ %add99, %while.body ], [ 0, %entry ]
          %n1ptr.0.add = add i64 %n1ptr.0.idx131, 1
          %conv = sext i8 %0 to i64
          %n2ptr.0.add = add i64 %n2ptr.0.idx130, 1
          %1 = load i8, ptr null, align 1
          %conv97 = sext i8 %1 to i64
          %mul = mul i64 %conv97, %conv
          %add99 = add i64 %mul, %sum.1129
          %cmp94 = icmp ugt i64 %n1ptr.0.idx131, 0
          %cmp95 = icmp ne i64 %n2ptr.0.idx130, -1
          %2 = and i1 %cmp94, %cmp95
          br i1 %2, label %while.body, label %while.end.loopexit

        while.end.loopexit:                               ; preds = %while.body
          %add99.lcssa = phi i64 [ %add99, %while.body ]
          ret void
        }

        attributes #0 = { "target-cpu"="apple-m1" }
```

I reverted the patch for now to get things back to green,  fingers it is just a minor issue and can be recommitted tomorrow

https://github.com/llvm/llvm-project/pull/92418