[PATCH] D80870: [LV] Make sure smallest/widest type sizes are powers-of-2.

Sat May 30 08:27:44 PDT 2020

fhahn created this revision.
fhahn added reviewers: Ayal, gilr, rengolin.
Herald added subscribers: rkruppe, hiraditya.
Herald added a project: LLVM.

LV currently only supports power of 2 vectorization factors, which has
been made explicit with the assertion added in
840450549c9199150cbdee29acef756c19660ca1 <https://reviews.llvm.org/rG840450549c9199150cbdee29acef756c19660ca1>.

However, if the widest type is not a power-of-2 the computed maxVF won't
be a power-of-2 either. This patch changes getSmallestAndWidestTypes to
round up to the next power-of-2. This can happen in practice for
x86_fp80, for example. Alternatively we could force the computed max VF
to the next-lowest power-of-2

Fixes PR46139.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D80870

Files:
  llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
  llvm/test/Transforms/LoopVectorize/X86/fp80-widest-type.ll


Index: llvm/test/Transforms/LoopVectorize/X86/fp80-widest-type.ll
===================================================================

--- /dev/null
+++ llvm/test/Transforms/LoopVectorize/X86/fp80-widest-type.ll
@@ -0,0 +1,38 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -loop-vectorize -S %s -mattr=+avx512f | FileCheck %s
+
+target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.15.0"
+
+; Make sure non-power-of-2 types are round up to the next power of 2.
+
+define x86_fp80 @test() {
+; CHECK-LABEL: @test(
+; CHECK-NEXT:  foo.exit:
+; CHECK-NEXT:    br label [[FOR_BODY3_I_3:%.*]]
+; CHECK:       for.body3.i.3:
+; CHECK-NEXT:    [[N_ADDR_112_I_3:%.*]] = phi i64 [ [[DEC_I_3:%.*]], [[FOR_BODY3_I_3]] ], [ 24, [[FOO_EXIT:%.*]] ]
+; CHECK-NEXT:    [[X_ADDR_111_I_3:%.*]] = phi x86_fp80 [ [[MUL_I_3:%.*]], [[FOR_BODY3_I_3]] ], [ undef, [[FOO_EXIT]] ]
+; CHECK-NEXT:    [[MUL_I_3]] = fmul x86_fp80 [[X_ADDR_111_I_3]], 0xK40008000000000000000
+; CHECK-NEXT:    [[DEC_I_3]] = add nsw i64 [[N_ADDR_112_I_3]], -1
+; CHECK-NEXT:    [[CMP2_I_3:%.*]] = icmp sgt i64 [[N_ADDR_112_I_3]], 1
+; CHECK-NEXT:    br i1 [[CMP2_I_3]], label [[FOR_BODY3_I_3]], label [[FOO_EXIT_3:%.*]]
+; CHECK:       foo.exit.3:
+; CHECK-NEXT:    [[MUL_LCSSA:%.*]] = phi x86_fp80 [ [[MUL_I_3]], [[FOR_BODY3_I_3]] ]
+; CHECK-NEXT:    ret x86_fp80 [[MUL_LCSSA]]
+;
+foo.exit:
+  br label %for.body3.i.3
+
+for.body3.i.3:                                    ; preds = %for.body3.i.3, %foo.exit
+  %n.addr.112.i.3 = phi i64 [ %dec.i.3, %for.body3.i.3 ], [ 24, %foo.exit ]
+  %x.addr.111.i.3 = phi x86_fp80 [ %mul.i.3, %for.body3.i.3 ], [ undef, %foo.exit ]
+  %mul.i.3 = fmul x86_fp80 %x.addr.111.i.3, 0xK40008000000000000000
+  %dec.i.3 = add nsw i64 %n.addr.112.i.3, -1
+  %cmp2.i.3 = icmp sgt i64 %n.addr.112.i.3, 1
+  br i1 %cmp2.i.3, label %for.body3.i.3, label %foo.exit.3
+
+foo.exit.3:                                       ; preds = %for.body3.i.3
+  %mul.lcssa = phi x86_fp80 [ %mul.i.3, %for.body3.i.3 ]
+  ret x86_fp80 %mul.lcssa
+}
Index: llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
===================================================================
--- llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -998,7 +998,8 @@
 
   /// \return The size (in bits) of the smallest and widest types in the code
   /// that needs to be vectorized. We ignore values that remain scalar such as
-  /// 64 bit loop indices.
+  /// 64 bit loop indices. Non-power of 2 sizes are round up to the next power
+  /// of 2.
   std::pair<unsigned, unsigned> getSmallestAndWidestTypes();
 
   /// \return The desired interleave count.
@@ -5225,7 +5226,8 @@
     }
   }
 
-  return {MinWidth, MaxWidth};
+  // Round up to the next power of 2, if min or max widths aren't powers of 2.
+  return {PowerOf2Ceil(MinWidth), PowerOf2Ceil(MaxWidth)};
 }
 
 unsigned LoopVectorizationCostModel::selectInterleaveCount(unsigned VF,


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D80870.267459.patch
Type: text/x-patch
Size: 3057 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20200530/f7e73281/attachment.bin>