[PATCH] D93617: [LoopVectorize] Take argument's size into consideration when computing leader's demanded bits

Thu Dec 31 08:00:39 PST 2020

nikic added inline comments.

================
Comment at: llvm/lib/Analysis/VectorUtils.cpp:539
+    if (!isa<Instruction>(Val)) {
+      if (!isa<Argument>(Val))
+        continue;
----------------
I think we can't entirely ignore non-arguments (aka constants) here. Take a look at this modified form of `llvm/test/Transforms/LoopVectorize/avoid-truncate-icmp-operands.ll`:

```
; RUN: opt -loop-vectorize -S < %s | FileCheck %s
target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
target triple = "aarch64-unknown-linux-gnu"

@a = dso_local local_unnamed_addr global i64 0, align 8
@b = dso_local local_unnamed_addr global i16 0, align 4

define dso_local void @myFunc(i32 %d, i32 %e) {
; CHECK:        pred.store.continue2:
; CHECK-NEXT:      %{{[0-9]+}} = icmp ult <2 x i64> %broadcast.splat{{[0-9]*}}, %broadcast.splat{{[0-9]*}}
for.body29.lr.ph:
  br label %for.body29

for.cond25.for.cond.cleanup28_crit_edge:          ; preds = %for.inc
  ret void

for.body29:                                       ; preds = %for.inc, %for.body29.lr.ph
  %n.078 = phi i16 [ undef, %for.body29.lr.ph ], [ %add34, %for.inc ]
  br i1 undef, label %for.inc, label %if.then

if.then:                                          ; preds = %for.body29
  %conv31 = zext i8 undef to i64
  store i64 %conv31, i64* @a, align 8
  %cmp.i = icmp ult i32 %e, %d
  %.sroa.speculated = select i1 %cmp.i, i64 10000000000, i64 20000000000
  %conv32 = trunc i64 %.sroa.speculated to i16
  store i16 %conv32, i16* @b, align 4
  br label %for.inc

for.inc:                                          ; preds = %if.then, %for.body29
  %add34 = add nsw i16 %n.078, 2
  %cmp27 = icmp slt i16 %add34, 16
  br i1 %cmp27, label %for.body29, label %for.cond25.for.cond.cleanup28_crit_edge, !llvm.loop !6
}

!6 = distinct !{!6, !7}
!7 = !{!"llvm.loop.vectorize.enable", i1 true}
```

This gets vectorized with i32 width, but the two select constants are larger than 32-bit and get truncated.

I think the logic here should be: For `ConstantInt`, take the `getActiveBits()` of the constant value, for everything else (including Arguments) take the getTypeSizeInBits (what you're effectively doing now).

================
Comment at: llvm/lib/Analysis/VectorUtils.cpp:541
+        continue;
+      uint64_t BitSize = Val->getType()->getScalarSizeInBits();
+      if (BitSize == 0)
----------------
You should use `DL.getTypeSizeInBits(Val->getType()` here, which will remove the need for the `== 0` check.

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D93617/new/

https://reviews.llvm.org/D93617