[llvm] r268921 - [VectorUtils] Query number of sign bits to allow more truncations

Mon May 9 07:32:32 PDT 2016

Author: jamesm
Date: Mon May  9 09:32:30 2016
New Revision: 268921

URL: http://llvm.org/viewvc/llvm-project?rev=268921&view=rev
Log:
[VectorUtils] Query number of sign bits to allow more truncations

When deciding if a vector calculation can be done in a smaller bitwidth, use sign bit information from ValueTracking to add more information and allow more truncations.

Modified:
    llvm/trunk/lib/Analysis/VectorUtils.cpp
    llvm/trunk/test/Transforms/LoopVectorize/AArch64/loop-vectorization-factors.ll

Modified: llvm/trunk/lib/Analysis/VectorUtils.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Analysis/VectorUtils.cpp?rev=268921&r1=268920&r2=268921&view=diff
==============================================================================

--- llvm/trunk/lib/Analysis/VectorUtils.cpp (original)
+++ llvm/trunk/lib/Analysis/VectorUtils.cpp Mon May  9 09:32:30 2016
@@ -320,6 +320,9 @@ llvm::computeMinimumValueSizes(ArrayRef<
   SmallPtrSet<Instruction *, 4> InstructionSet;
   MapVector<Instruction *, uint64_t> MinBWs;
 
+  assert(Blocks.size() > 0 && "Must have at least one block!");
+  const DataLayout &DL = Blocks[0]->getModule()->getDataLayout();
+  
   // Determine the roots. We work bottom-up, from truncs or icmps.
   bool SeenExtFromIllegalType = false;
   for (auto *BB : Blocks)
@@ -363,12 +366,19 @@ llvm::computeMinimumValueSizes(ArrayRef<
 
     // If we encounter a type that is larger than 64 bits, we can't represent
     // it so bail out.
-    if (DB.getDemandedBits(I).getBitWidth() > 64)
+    APInt NeededBits = DB.getDemandedBits(I);
+    unsigned BW = NeededBits.getBitWidth();
+    if (BW > 64)
       return MapVector<Instruction *, uint64_t>();
 
-    uint64_t V = DB.getDemandedBits(I).getZExtValue();
-    DBits[Leader] |= V;
-    DBits[I] = V;
+    auto NSB = ComputeNumSignBits(I, DL);
+
+    // Query demanded bits for the bits required by the instruction. Remove
+    // any bits that are equal to the sign bit, because we can truncate the
+    // instruction without changing their value.
+    NeededBits &= APInt::getLowBitsSet(BW, BW - NSB);
+    DBits[Leader] |= NeededBits.getZExtValue();
+    DBits[I] |= NeededBits.getZExtValue();
 
     // Casts, loads and instructions outside of our range terminate a chain
     // successfully.

Modified: llvm/trunk/test/Transforms/LoopVectorize/AArch64/loop-vectorization-factors.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopVectorize/AArch64/loop-vectorization-factors.ll?rev=268921&r1=268920&r2=268921&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/LoopVectorize/AArch64/loop-vectorization-factors.ll (original)
+++ llvm/trunk/test/Transforms/LoopVectorize/AArch64/loop-vectorization-factors.ll Mon May  9 09:32:30 2016
@@ -263,5 +263,41 @@ for.body:
   br i1 %exitcond, label %for.cond.cleanup, label %for.body
 }
 
+; CHECK-LABEL: @add_g
+; CHECK: load <16 x i8>
+; CHECK: xor <16 x i8>
+; CHECK: icmp ult <16 x i8>
+; CHECK: select <16 x i1> {{.*}}, <16 x i8>
+; CHECK: store <16 x i8>
+define void @add_g(i8* noalias nocapture readonly %p, i8* noalias nocapture readonly %q, i8* noalias nocapture
+%r, i8 %arg1, i32 %len) #0 {
+  %1 = icmp sgt i32 %len, 0
+  br i1 %1, label %.lr.ph, label %._crit_edge
+
+.lr.ph:                                           ; preds = %0
+  %2 = sext i8 %arg1 to i64
+  br label %3
+
+._crit_edge:                                      ; preds = %3, %0
+  ret void
+
+; <label>:3                                       ; preds = %3, %.lr.ph
+  %indvars.iv = phi i64 [ 0, %.lr.ph ], [ %indvars.iv.next, %3 ]
+  %x4 = getelementptr inbounds i8, i8* %p, i64 %indvars.iv
+  %x5 = load i8, i8* %x4
+  %x7 = getelementptr inbounds i8, i8* %q, i64 %indvars.iv
+  %x8 = load i8, i8* %x7
+  %x9 = zext i8 %x5 to i32
+  %x10 = xor i32 %x9, 255
+  %x11 = icmp ult i32 %x10, 24
+  %x12 = select i1 %x11, i32 %x10, i32 24
+  %x13 = trunc i32 %x12 to i8
+  store i8 %x13, i8* %x4
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %len
+  br i1 %exitcond, label %._crit_edge, label %3
+}
+
 attributes #0 = { nounwind }