[llvm] r299329 - [BypassSlowDivision] Do not bypass division of hash-like values

Nikolai Bozhenov via llvm-commits llvm-commits at lists.llvm.org
Sun Apr 2 06:14:30 PDT 2017


Author: n.bozhenov
Date: Sun Apr  2 08:14:30 2017
New Revision: 299329

URL: http://llvm.org/viewvc/llvm-project?rev=299329&view=rev
Log:
[BypassSlowDivision] Do not bypass division of hash-like values

Disable bypassing if one of the operands looks like a hash value. Slow
division often occurs in hashtable implementations and fast division is
never taken there because a hash value is extremely unlikely to have
enough upper bits set to zero.

A value is considered to be hash-like if it is produced by

1) XOR operation
2) Multiplication by a constant wider than the shorter type
3) PHI node with all incoming values being hash-like

Differential Revision: https://reviews.llvm.org/D28200

Modified:
    llvm/trunk/lib/Transforms/Utils/BypassSlowDivision.cpp
    llvm/trunk/test/Transforms/CodeGenPrepare/NVPTX/bypass-slow-div-special-cases.ll

Modified: llvm/trunk/lib/Transforms/Utils/BypassSlowDivision.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Utils/BypassSlowDivision.cpp?rev=299329&r1=299328&r2=299329&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/Utils/BypassSlowDivision.cpp (original)
+++ llvm/trunk/lib/Transforms/Utils/BypassSlowDivision.cpp Sun Apr  2 08:14:30 2017
@@ -17,6 +17,7 @@
 
 #include "llvm/Transforms/Utils/BypassSlowDivision.h"
 #include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/Analysis/ValueTracking.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/IRBuilder.h"
@@ -81,17 +82,18 @@ namespace llvm {
 
   typedef DenseMap<DivOpInfo, QuotRemPair> DivCacheTy;
   typedef DenseMap<unsigned, unsigned> BypassWidthsTy;
+  typedef SmallPtrSet<Instruction *, 4> VisitedSetTy;
 }
 
 namespace {
 enum ValueRange {
   /// Operand definitely fits into BypassType. No runtime checks are needed.
-  VALRNG_SHORT,
+  VALRNG_KNOWN_SHORT,
   /// A runtime check is required, as value range is unknown.
   VALRNG_UNKNOWN,
   /// Operand is unlikely to fit into BypassType. The bypassing should be
   /// disabled.
-  VALRNG_LONG
+  VALRNG_LIKELY_LONG
 };
 
 class FastDivInsertionTask {
@@ -100,7 +102,8 @@ class FastDivInsertionTask {
   IntegerType *BypassType = nullptr;
   BasicBlock *MainBB = nullptr;
 
-  ValueRange getValueRange(Value *Op);
+  bool isHashLikeValue(Value *V, VisitedSetTy &Visited);
+  ValueRange getValueRange(Value *Op, VisitedSetTy &Visited);
   QuotRemWithBB createSlowBB(BasicBlock *Successor);
   QuotRemWithBB createFastBB(BasicBlock *Successor);
   QuotRemPair createDivRemPhiNodes(QuotRemWithBB &LHS, QuotRemWithBB &RHS,
@@ -187,8 +190,65 @@ Value *FastDivInsertionTask::getReplacem
   return isDivisionOp() ? Value.Quotient : Value.Remainder;
 }
 
+/// \brief Check if a value looks like a hash.
+///
+/// The routine is expected to detect values computed using the most common hash
+/// algorithms. Typically, hash computations end with one of the following
+/// instructions:
+///
+/// 1) MUL with a constant wider than BypassType
+/// 2) XOR instruction
+///
+/// And even if we are wrong and the value is not a hash, it is still quite
+/// unlikely that such values will fit into BypassType.
+///
+/// To detect string hash algorithms like FNV we have to look through PHI-nodes.
+/// It is implemented as a depth-first search for values that look neither long
+/// nor hash-like.
+bool FastDivInsertionTask::isHashLikeValue(Value *V, VisitedSetTy &Visited) {
+  Instruction *I = dyn_cast<Instruction>(V);
+  if (!I)
+    return false;
+
+  switch (I->getOpcode()) {
+  case Instruction::Xor:
+    return true;
+  case Instruction::Mul: {
+    // After Constant Hoisting pass, long constants may be represented as
+    // bitcast instructions. As a result, some constants may look like an
+    // instruction at first, and an additional check is necessary to find out if
+    // an operand is actually a constant.
+    Value *Op1 = I->getOperand(1);
+    ConstantInt *C = dyn_cast<ConstantInt>(Op1);
+    if (!C && isa<BitCastInst>(Op1))
+      C = dyn_cast<ConstantInt>(cast<BitCastInst>(Op1)->getOperand(0));
+    return C && C->getValue().getMinSignedBits() > BypassType->getBitWidth();
+  }
+  case Instruction::PHI: {
+    // Stop IR traversal in case of a crazy input code. This limits recursion
+    // depth.
+    if (Visited.size() >= 16)
+      return false;
+    // Do not visit nodes that have been visited already. We return true because
+    // it means that we couldn't find any value that doesn't look hash-like.
+    if (Visited.find(I) != Visited.end())
+      return true;
+    Visited.insert(I);
+    return llvm::all_of(cast<PHINode>(I)->incoming_values(), [&](Value *V) {
+      // Ignore undef values as they probably don't affect the division
+      // operands.
+      return getValueRange(V, Visited) == VALRNG_LIKELY_LONG ||
+             isa<UndefValue>(V);
+    });
+  }
+  default:
+    return false;
+  }
+}
+
 /// Check if an integer value fits into our bypass type.
-ValueRange FastDivInsertionTask::getValueRange(Value *V) {
+ValueRange FastDivInsertionTask::getValueRange(Value *V,
+                                               VisitedSetTy &Visited) {
   unsigned ShortLen = BypassType->getBitWidth();
   unsigned LongLen = V->getType()->getIntegerBitWidth();
 
@@ -201,10 +261,17 @@ ValueRange FastDivInsertionTask::getValu
   computeKnownBits(V, Zeros, Ones, DL);
 
   if (Zeros.countLeadingOnes() >= HiBits)
-    return VALRNG_SHORT;
+    return VALRNG_KNOWN_SHORT;
 
   if (Ones.countLeadingZeros() < HiBits)
-    return VALRNG_LONG;
+    return VALRNG_LIKELY_LONG;
+
+  // Long integer divisions are often used in hashtable implementations. It's
+  // not worth bypassing such divisions because hash values are extremely
+  // unlikely to have enough leading zeros. The call below tries to detect
+  // values that are unlikely to fit BypassType (including hashes).
+  if (isHashLikeValue(V, Visited))
+    return VALRNG_LIKELY_LONG;
 
   return VALRNG_UNKNOWN;
 }
@@ -308,16 +375,18 @@ Optional<QuotRemPair> FastDivInsertionTa
     return None;
   }
 
-  ValueRange DividendRange = getValueRange(Dividend);
-  if (DividendRange == VALRNG_LONG)
+  VisitedSetTy SetL;
+  ValueRange DividendRange = getValueRange(Dividend, SetL);
+  if (DividendRange == VALRNG_LIKELY_LONG)
     return None;
 
-  ValueRange DivisorRange = getValueRange(Divisor);
-  if (DivisorRange == VALRNG_LONG)
+  VisitedSetTy SetR;
+  ValueRange DivisorRange = getValueRange(Divisor, SetR);
+  if (DivisorRange == VALRNG_LIKELY_LONG)
     return None;
 
-  bool DividendShort = (DividendRange == VALRNG_SHORT);
-  bool DivisorShort = (DivisorRange == VALRNG_SHORT);
+  bool DividendShort = (DividendRange == VALRNG_KNOWN_SHORT);
+  bool DivisorShort = (DivisorRange == VALRNG_KNOWN_SHORT);
 
   if (DividendShort && DivisorShort) {
     // If both operands are known to be short then just replace the long

Modified: llvm/trunk/test/Transforms/CodeGenPrepare/NVPTX/bypass-slow-div-special-cases.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/CodeGenPrepare/NVPTX/bypass-slow-div-special-cases.ll?rev=299329&r1=299328&r2=299329&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/CodeGenPrepare/NVPTX/bypass-slow-div-special-cases.ll (original)
+++ llvm/trunk/test/Transforms/CodeGenPrepare/NVPTX/bypass-slow-div-special-cases.ll Sun Apr  2 08:14:30 2017
@@ -93,3 +93,124 @@ define void @Test_special_case(i32 %a, i
   store i64 %res, i64* %retptr
   ret void
 }
+
+
+; Do not bypass a division if one of the operands looks like a hash value.
+define void @Test_dont_bypass_xor(i64 %a, i64 %b, i64 %l, i64* %retptr) {
+; CHECK-LABEL: @Test_dont_bypass_xor(
+; CHECK-NEXT:    [[C:%.*]] = xor i64 [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    [[RES:%.*]] = udiv i64 [[C]], [[L:%.*]]
+; CHECK-NEXT:    store i64 [[RES]], i64* [[RETPTR:%.*]]
+; CHECK-NEXT:    ret void
+;
+  %c = xor i64 %a, %b
+  %res = udiv i64 %c, %l
+  store i64 %res, i64* %retptr
+  ret void
+}
+
+define void @Test_dont_bypass_phi_xor(i64 %a, i64 %b, i64 %l, i64* %retptr) {
+; CHECK-LABEL: @Test_dont_bypass_phi_xor(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i64 [[B:%.*]], 0
+; CHECK-NEXT:    br i1 [[CMP]], label [[MERGE:%.*]], label [[XORPATH:%.*]]
+; CHECK:       xorpath:
+; CHECK-NEXT:    [[C:%.*]] = xor i64 [[A:%.*]], [[B]]
+; CHECK-NEXT:    br label [[MERGE]]
+; CHECK:       merge:
+; CHECK-NEXT:    [[E:%.*]] = phi i64 [ undef, [[ENTRY:%.*]] ], [ [[C]], [[XORPATH]] ]
+; CHECK-NEXT:    [[RES:%.*]] = sdiv i64 [[E]], [[L:%.*]]
+; CHECK-NEXT:    store i64 [[RES]], i64* [[RETPTR:%.*]]
+; CHECK-NEXT:    ret void
+;
+entry:
+  %cmp = icmp eq i64 %b, 0
+  br i1 %cmp, label %merge, label %xorpath
+
+xorpath:
+  %c = xor i64 %a, %b
+  br label %merge
+
+merge:
+  %e = phi i64 [ undef, %entry ], [ %c, %xorpath ]
+  %res = sdiv i64 %e, %l
+  store i64 %res, i64* %retptr
+  ret void
+}
+
+define void @Test_dont_bypass_mul_long_const(i64 %a, i64 %l, i64* %retptr) {
+; CHECK-LABEL: @Test_dont_bypass_mul_long_const(
+; CHECK-NEXT:    [[C:%.*]] = mul i64 [[A:%.*]], 5229553307
+; CHECK-NEXT:    [[RES:%.*]] = urem i64 [[C]], [[L:%.*]]
+; CHECK-NEXT:    store i64 [[RES]], i64* [[RETPTR:%.*]]
+; CHECK-NEXT:    ret void
+;
+  %c = mul i64 %a, 5229553307 ; the constant doesn't fit 32 bits
+  %res = urem i64 %c, %l
+  store i64 %res, i64* %retptr
+  ret void
+}
+
+define void @Test_bypass_phi_mul_const(i64 %a, i64 %b, i64* %retptr) {
+; CHECK-LABEL: @Test_bypass_phi_mul_const(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[A_MUL:%.*]] = mul nsw i64 [[A:%.*]], 34806414968801
+; CHECK-NEXT:    [[P:%.*]] = icmp sgt i64 [[A]], [[B:%.*]]
+; CHECK-NEXT:    br i1 [[P]], label [[BRANCH:%.*]], label [[MERGE:%.*]]
+; CHECK:       branch:
+; CHECK-NEXT:    br label [[MERGE]]
+; CHECK:       merge:
+; CHECK-NEXT:    [[LHS:%.*]] = phi i64 [ 42, [[BRANCH]] ], [ [[A_MUL]], [[ENTRY:%.*]] ]
+; CHECK-NEXT:    [[TMP0:%.*]] = or i64 [[LHS]], [[B]]
+; CHECK-NEXT:    [[TMP1:%.*]] = and i64 [[TMP0]], -4294967296
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 0
+; CHECK-NEXT:    br i1 [[TMP2]], label [[TMP3:%.*]], label [[TMP8:%.*]]
+; CHECK:         [[TMP4:%.*]] = trunc i64 [[B]] to i32
+; CHECK-NEXT:    [[TMP5:%.*]] = trunc i64 [[LHS]] to i32
+; CHECK-NEXT:    [[TMP6:%.*]] = udiv i32 [[TMP5]], [[TMP4]]
+; CHECK-NEXT:    [[TMP7:%.*]] = zext i32 [[TMP6]] to i64
+; CHECK-NEXT:    br label [[TMP10:%.*]]
+; CHECK:         [[TMP9:%.*]] = sdiv i64 [[LHS]], [[B]]
+; CHECK-NEXT:    br label [[TMP10]]
+; CHECK:         [[TMP11:%.*]] = phi i64 [ [[TMP7]], [[TMP3]] ], [ [[TMP9]], [[TMP8]] ]
+; CHECK-NEXT:    store i64 [[TMP11]], i64* [[RETPTR:%.*]]
+; CHECK-NEXT:    ret void
+;
+entry:
+  %a.mul = mul nsw i64 %a, 34806414968801
+  %p = icmp sgt i64 %a, %b
+  br i1 %p, label %branch, label %merge
+
+branch:
+  br label %merge
+
+merge:
+  %lhs = phi i64 [ 42, %branch ], [ %a.mul, %entry ]
+  %res = sdiv i64 %lhs, %b
+  store i64 %res, i64* %retptr
+  ret void
+}
+
+define void @Test_bypass_mul_short_const(i64 %a, i64 %l, i64* %retptr) {
+; CHECK-LABEL: @Test_bypass_mul_short_const(
+; CHECK-NEXT:    [[C:%.*]] = mul i64 [[A:%.*]], -42
+; CHECK-NEXT:    [[TMP1:%.*]] = or i64 [[C]], [[L:%.*]]
+; CHECK-NEXT:    [[TMP2:%.*]] = and i64 [[TMP1]], -4294967296
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp eq i64 [[TMP2]], 0
+; CHECK-NEXT:    br i1 [[TMP3]], label [[TMP4:%.*]], label [[TMP9:%.*]]
+; CHECK:         [[TMP5:%.*]] = trunc i64 [[L]] to i32
+; CHECK-NEXT:    [[TMP6:%.*]] = trunc i64 [[C]] to i32
+; CHECK-NEXT:    [[TMP7:%.*]] = urem i32 [[TMP6]], [[TMP5]]
+; CHECK-NEXT:    [[TMP8:%.*]] = zext i32 [[TMP7]] to i64
+; CHECK-NEXT:    br label [[TMP11:%.*]]
+; CHECK:         [[TMP10:%.*]] = urem i64 [[C]], [[L]]
+; CHECK-NEXT:    br label [[TMP11]]
+; CHECK:         [[TMP12:%.*]] = phi i64 [ [[TMP8]], [[TMP4]] ], [ [[TMP10]], [[TMP9]] ]
+; CHECK-NEXT:    store i64 [[TMP12]], i64* [[RETPTR:%.*]]
+; CHECK-NEXT:    ret void
+;
+  %c = mul i64 %a, -42
+  %res = urem i64 %c, %l
+  store i64 %res, i64* %retptr
+  ret void
+}




More information about the llvm-commits mailing list