[PATCH] D25344: Add a fast path to alignTo.

Thu Oct 6 14:58:32 PDT 2016

The attached test passes all tests. I will benchmark to see if it
makes any difference.

I also noticed a missing optimization. It would be nice if we could
keep a single function but have the optimizer take care of it, so I
tried

uint64_t foo(uint64_t Value, uint64_t Align) {
  return alignToNonP2(Value, 1 << Align);
}

but it still produces

define i64 @_Z3foomm(i64 %Value, i64 %Align) local_unnamed_addr #0 {
entry:
  %sh_prom = trunc i64 %Align to i32
  %shl = shl i32 1, %sh_prom
  %conv = sext i32 %shl to i64
  %add.i = add i64 %Value, -1
  %sub.i = add i64 %add.i, %conv
  %div.i = urem i64 %sub.i, %conv
  %add2.i = sub i64 %sub.i, %div.i
  ret i64 %add2.i
}

Changing 1 to 1ULL does cause us to optimize it

define i64 @_Z3foomm(i64 %Value, i64 %Align) local_unnamed_addr #0 {
entry:
  %shl = shl i64 1, %Align
  %add.i = add i64 %Value, -1
  %sub.i = add i64 %add.i, %shl
  %.not = sub i64 0, %shl
  %add2.i = and i64 %sub.i, %.not
  ret i64 %add2.i
}


Cheers,
Rafael

On 6 October 2016 at 17:00, Rafael Espíndola <rafael.espindola at gmail.com> wrote:
> On 6 October 2016 at 16:39, Davide Italiano <dccitaliano at gmail.com> wrote:
>> On Thu, Oct 6, 2016 at 1:37 PM, Rui Ueyama <ruiu at google.com> wrote:
>>> Or to make alignTo accept only power of twos and fix code that passes
>>> non-power-of-twos.
>>>
>>
>> Do you know how many of these cases are in LLVM and if there are legitimate?
>
> Interesting idea. I added an assert and I am running the tests.
>
> Cheers,
> Rafael
-------------- next part --------------

diff --git a/include/llvm/Support/MathExtras.h b/include/llvm/Support/MathExtras.h
index fdf7f27..f8a10e1 100644
--- a/include/llvm/Support/MathExtras.h
+++ b/include/llvm/Support/MathExtras.h
@@ -670,6 +670,14 @@ inline uint64_t PowerOf2Floor(uint64_t A) {
 /// \endcode
 inline uint64_t alignTo(uint64_t Value, uint64_t Align, uint64_t Skew = 0) {
   assert(Align != 0u && "Align can't be 0.");
+  assert(isPowerOf2_64(Align) && "Alignment is not a power of two!");
+  Skew &= Align - 1;
+  return ((Value + Align - 1 - Skew) & -Align) + Skew;
+}
+
+inline uint64_t alignToNonP2(uint64_t Value, uint64_t Align,
+                             uint64_t Skew = 0) {
+  assert(Align != 0u && "Align can't be 0.");
   Skew %= Align;
   return (Value + Align - 1 - Skew) / Align * Align + Skew;
 }
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 5a4d49d..261553c 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -3058,7 +3058,7 @@ void SelectionDAGBuilder::visitShuffleVector(const User &I) {
       }
     }
 
-    unsigned PaddedMaskNumElts = alignTo(MaskNumElts, SrcNumElts);
+    unsigned PaddedMaskNumElts = alignToNonP2(MaskNumElts, SrcNumElts);
     unsigned NumConcat = PaddedMaskNumElts / SrcNumElts;
     EVT PaddedVT = EVT::getVectorVT(*DAG.getContext(), VT.getScalarType(),
                                     PaddedMaskNumElts);
diff --git a/lib/Target/ARM/ThumbRegisterInfo.cpp b/lib/Target/ARM/ThumbRegisterInfo.cpp
index c0da255..0e99ab4 100644
--- a/lib/Target/ARM/ThumbRegisterInfo.cpp
+++ b/lib/Target/ARM/ThumbRegisterInfo.cpp
@@ -279,7 +279,7 @@ void llvm::emitThumbRegPlusImmediate(MachineBasicBlock &MBB,
 
   unsigned RequiredExtraInstrs;
   if (ExtraRange)
-    RequiredExtraInstrs = alignTo(RangeAfterCopy, ExtraRange) / ExtraRange;
+    RequiredExtraInstrs = alignToNonP2(RangeAfterCopy, ExtraRange) / ExtraRange;
   else if (RangeAfterCopy > 0)
     // We need an extra instruction but none is available
     RequiredExtraInstrs = 1000000;
diff --git a/unittests/Support/MathExtrasTest.cpp b/unittests/Support/MathExtrasTest.cpp
index d373030..763a431 100644
--- a/unittests/Support/MathExtrasTest.cpp
+++ b/unittests/Support/MathExtrasTest.cpp
@@ -221,7 +221,7 @@ TEST(MathExtras, alignTo) {
   EXPECT_EQ(7u, alignTo(5, 8, 7));
   EXPECT_EQ(17u, alignTo(17, 8, 1));
   EXPECT_EQ(3u, alignTo(~0LL, 8, 3));
-  EXPECT_EQ(552u, alignTo(321, 255, 42));
+  EXPECT_EQ(552u, alignToNonP2(321, 255, 42));
 }
 
 template<typename T>