[llvm] r225274 - [CodeGenPrepare] Improved logic to speculate calls to cttz/ctlz.

Andrea Di Biagio Andrea_DiBiagio at sn.scee.net
Tue Jan 6 09:41:19 PST 2015


Author: adibiagio
Date: Tue Jan  6 11:41:18 2015
New Revision: 225274

URL: http://llvm.org/viewvc/llvm-project?rev=225274&view=rev
Log:
[CodeGenPrepare] Improved logic to speculate calls to cttz/ctlz.

This patch improves the logic added at revision 224899 (see review D6728) that
teaches the backend when it is profitable to speculate calls to cttz/ctlz.

The original algorithm conservatively avoided speculating more than one
instruction from a basic block in a control flow grap modelling an if-statement.
In particular, the only allowed instruction (excluding the terminator) was a
call to cttz/ctlz. However, there are cases where we could be less conservative
and still be able to speculate a call to cttz/ctlz.

With this patch, CodeGenPrepare now tries to speculate a cttz/ctlz if the
result is zero extended/truncated in the same basic block, and the zext/trunc
instruction is "free" for the target.

Added new test cases to CodeGen/X86/cttz-ctlz.ll

Differential Revision: http://reviews.llvm.org/D6853

Modified:
    llvm/trunk/lib/CodeGen/CodeGenPrepare.cpp
    llvm/trunk/test/CodeGen/X86/cttz-ctlz.ll

Modified: llvm/trunk/lib/CodeGen/CodeGenPrepare.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/CodeGenPrepare.cpp?rev=225274&r1=225273&r2=225274&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/CodeGenPrepare.cpp (original)
+++ llvm/trunk/lib/CodeGen/CodeGenPrepare.cpp Tue Jan  6 11:41:18 2015
@@ -4008,15 +4008,41 @@ static bool OptimizeBranchInst(BranchIns
   // See if ThenBB contains only one instruction (excluding the
   // terminator and DbgInfoIntrinsic calls).
   IntrinsicInst *II = nullptr;
+  CastInst *CI = nullptr;
   for (BasicBlock::iterator I = ThenBB->begin(),
                             E = std::prev(ThenBB->end()); I != E; ++I) {
     // Skip debug info.
     if (isa<DbgInfoIntrinsic>(I))
       continue;
 
-    if (II)
-      // Avoid speculating more than one instruction.
-      return false;
+    // Check if this is a zero extension or a truncate of a previously
+    // matched call to intrinsic cttz/ctlz.
+    if (II) {
+      // Early exit if we already found a "free" zero extend/truncate.
+      if (CI)
+        return false;
+
+      Type *SrcTy = II->getType();
+      Type *DestTy = I->getType();
+      Value *V;
+ 
+      if (match(cast<Instruction>(I), m_ZExt(m_Value(V))) && V == II) {
+        // Speculate this zero extend only if it is "free" for the target.
+        if (TLI.isZExtFree(SrcTy, DestTy)) {
+          CI = cast<CastInst>(I);
+          continue;
+        }
+      } else if (match(cast<Instruction>(I), m_Trunc(m_Value(V))) && V == II) {
+        // Speculate this truncate only if it is "free" for the target.
+        if (TLI.isTruncateFree(SrcTy, DestTy)) {
+          CI = cast<CastInst>(I);
+          continue;
+        }
+      } else {
+        // Avoid speculating more than one instruction.
+        return false;
+      }
+    }
 
     // See if this is a call to intrinsic cttz/ctlz.
     if (match(cast<Instruction>(I), m_Intrinsic<Intrinsic::cttz>())) {
@@ -4041,11 +4067,14 @@ static bool OptimizeBranchInst(BranchIns
     Value *ThenV = PN->getIncomingValueForBlock(ThenBB);
     Value *OrigV = PN->getIncomingValueForBlock(EntryBB);
 
-    if (!OrigV || ThenV != II)
+    if (!OrigV)
       return false;
 
+    if (ThenV != II && (!CI || ThenV != CI))
+      return false;
+    
     if (ConstantInt *CInt = dyn_cast<ConstantInt>(OrigV)) {
-      unsigned BitWidth = ThenV->getType()->getIntegerBitWidth();
+      unsigned BitWidth = II->getType()->getIntegerBitWidth();
 
       // Don't try to simplify this phi node if 'ThenV' is a cttz/ctlz
       // intrinsic call, but 'OrigV' is not equal to the 'size-of' in bits
@@ -4070,7 +4099,7 @@ static bool OptimizeBranchInst(BranchIns
                           ConstantInt::getFalse(II->getContext()) };
         Module *M = EntryBB->getParent()->getParent();
         Value *IF = Intrinsic::getDeclaration(M, II->getIntrinsicID(), Ty);
-        IRBuilder<> Builder(BrInst);
+        IRBuilder<> Builder(II);
         Instruction *NewI = Builder.CreateCall(IF, Args);
 
         // Replace the old call to cttz/ctlz.

Modified: llvm/trunk/test/CodeGen/X86/cttz-ctlz.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/cttz-ctlz.ll?rev=225274&r1=225273&r2=225274&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/cttz-ctlz.ll (original)
+++ llvm/trunk/test/CodeGen/X86/cttz-ctlz.ll Tue Jan  6 11:41:18 2015
@@ -241,6 +241,178 @@ cond.end:
   ret i16 %cond
 }
 
+; The following tests verify that calls to cttz/ctlz are speculated even if
+; basic block %cond.true has an extra zero extend/truncate which is "free"
+; for the target.
+
+define i64 @test1e(i32 %x) {
+; ALL-LABEL: @test1e(
+; LZCNT: icmp eq i32 %x, 0
+; LZCNT: call i32 @llvm.cttz.i32(i32 %x, i1 true)
+; BMI: call i32 @llvm.cttz.i32(i32 %x, i1 false)
+; GENERIC: icmp eq i32 %x, 0
+; GENERIC: call i32 @llvm.cttz.i32(i32 %x, i1 true)
+entry:
+  %tobool = icmp eq i32 %x, 0
+  br i1 %tobool, label %cond.end, label %cond.true
+
+cond.true:                                        ; preds = %entry
+  %0 = tail call i32 @llvm.cttz.i32(i32 %x, i1 true)
+  %phitmp2 = zext i32 %0 to i64
+  br label %cond.end
+
+cond.end:                                         ; preds = %entry, %cond.true
+  %cond = phi i64 [ %phitmp2, %cond.true ], [ 32, %entry ]
+  ret i64 %cond
+}
+
+define i32 @test2e(i64 %x) {
+; ALL-LABEL: @test2e(
+; LZCNT: icmp eq i64 %x, 0
+; LZCNT: call i64 @llvm.cttz.i64(i64 %x, i1 true)
+; BMI: call i64 @llvm.cttz.i64(i64 %x, i1 false)
+; GENERIC: icmp eq i64 %x, 0
+; GENERIC: call i64 @llvm.cttz.i64(i64 %x, i1 true)
+entry:
+  %tobool = icmp eq i64 %x, 0
+  br i1 %tobool, label %cond.end, label %cond.true
+
+cond.true:                                        ; preds = %entry
+  %0 = tail call i64 @llvm.cttz.i64(i64 %x, i1 true)
+  %cast = trunc i64 %0 to i32
+  br label %cond.end
+
+cond.end:                                         ; preds = %entry, %cond.true
+  %cond = phi i32 [ %cast, %cond.true ], [ 64, %entry ]
+  ret i32 %cond
+}
+
+define i64 @test3e(i32 %x) {
+; ALL-LABEL: @test3e(
+; BMI: icmp eq i32 %x, 0
+; BMI: call i32 @llvm.ctlz.i32(i32 %x, i1 true)
+; LZCNT: call i32 @llvm.ctlz.i32(i32 %x, i1 false)
+; GENERIC: icmp eq i32 %x, 0
+; GENERIC: call i32 @llvm.ctlz.i32(i32 %x, i1 true)
+entry:
+  %tobool = icmp eq i32 %x, 0
+  br i1 %tobool, label %cond.end, label %cond.true
+
+cond.true:                                        ; preds = %entry
+  %0 = tail call i32 @llvm.ctlz.i32(i32 %x, i1 true)
+  %phitmp2 = zext i32 %0 to i64
+  br label %cond.end
+
+cond.end:                                         ; preds = %entry, %cond.true
+  %cond = phi i64 [ %phitmp2, %cond.true ], [ 32, %entry ]
+  ret i64 %cond
+}
+
+define i32 @test4e(i64 %x) {
+; ALL-LABEL: @test4e(
+; BMI: icmp eq i64 %x, 0
+; BMI: call i64 @llvm.ctlz.i64(i64 %x, i1 true)
+; LZCNT: call i64 @llvm.ctlz.i64(i64 %x, i1 false)
+; GENERIC: icmp eq i64 %x, 0
+; GENERIC: call i64 @llvm.ctlz.i64(i64 %x, i1 true)
+entry:
+  %tobool = icmp eq i64 %x, 0
+  br i1 %tobool, label %cond.end, label %cond.true
+
+cond.true:                                        ; preds = %entry
+  %0 = tail call i64 @llvm.ctlz.i64(i64 %x, i1 true)
+  %cast = trunc i64 %0 to i32
+  br label %cond.end
+
+cond.end:                                         ; preds = %entry, %cond.true
+  %cond = phi i32 [ %cast, %cond.true ], [ 64, %entry ]
+  ret i32 %cond
+}
+
+define i16 @test5e(i64 %x) {
+; ALL-LABEL: @test5e(
+; BMI: icmp eq i64 %x, 0
+; BMI: call i64 @llvm.ctlz.i64(i64 %x, i1 true)
+; LZCNT: call i64 @llvm.ctlz.i64(i64 %x, i1 false)
+; GENERIC: icmp eq i64 %x, 0
+; GENERIC: call i64 @llvm.ctlz.i64(i64 %x, i1 true)
+entry:
+  %tobool = icmp eq i64 %x, 0
+  br i1 %tobool, label %cond.end, label %cond.true
+
+cond.true:                                        ; preds = %entry
+  %0 = tail call i64 @llvm.ctlz.i64(i64 %x, i1 true)
+  %cast = trunc i64 %0 to i16
+  br label %cond.end
+
+cond.end:                                         ; preds = %entry, %cond.true
+  %cond = phi i16 [ %cast, %cond.true ], [ 64, %entry ]
+  ret i16 %cond
+}
+
+define i16 @test6e(i32 %x) {
+; ALL-LABEL: @test6e(
+; BMI: icmp eq i32 %x, 0
+; BMI: call i32 @llvm.ctlz.i32(i32 %x, i1 true)
+; LZCNT: call i32 @llvm.ctlz.i32(i32 %x, i1 false)
+; GENERIC: icmp eq i32 %x, 0
+; GENERIC: call i32 @llvm.ctlz.i32(i32 %x, i1 true)
+entry:
+  %tobool = icmp eq i32 %x, 0
+  br i1 %tobool, label %cond.end, label %cond.true
+
+cond.true:                                        ; preds = %entry
+  %0 = tail call i32 @llvm.ctlz.i32(i32 %x, i1 true)
+  %cast = trunc i32 %0 to i16
+  br label %cond.end
+
+cond.end:                                         ; preds = %entry, %cond.true
+  %cond = phi i16 [ %cast, %cond.true ], [ 32, %entry ]
+  ret i16 %cond
+}
+
+define i16 @test7e(i64 %x) {
+; ALL-LABEL: @test7e(
+; LZCNT: icmp eq i64 %x, 0
+; LZCNT: call i64 @llvm.cttz.i64(i64 %x, i1 true)
+; BMI: call i64 @llvm.cttz.i64(i64 %x, i1 false)
+; GENERIC: icmp eq i64 %x, 0
+; GENERIC: call i64 @llvm.cttz.i64(i64 %x, i1 true)
+entry:
+  %tobool = icmp eq i64 %x, 0
+  br i1 %tobool, label %cond.end, label %cond.true
+
+cond.true:                                        ; preds = %entry
+  %0 = tail call i64 @llvm.cttz.i64(i64 %x, i1 true)
+  %cast = trunc i64 %0 to i16
+  br label %cond.end
+
+cond.end:                                         ; preds = %entry, %cond.true
+  %cond = phi i16 [ %cast, %cond.true ], [ 64, %entry ]
+  ret i16 %cond
+}
+
+define i16 @test8e(i32 %x) {
+; ALL-LABEL: @test8e(
+; LZCNT: icmp eq i32 %x, 0
+; LZCNT: call i32 @llvm.cttz.i32(i32 %x, i1 true)
+; BMI: call i32 @llvm.cttz.i32(i32 %x, i1 false)
+; GENERIC: icmp eq i32 %x, 0
+; GENERIC: call i32 @llvm.cttz.i32(i32 %x, i1 true)
+entry:
+  %tobool = icmp eq i32 %x, 0
+  br i1 %tobool, label %cond.end, label %cond.true
+
+cond.true:                                        ; preds = %entry
+  %0 = tail call i32 @llvm.cttz.i32(i32 %x, i1 true)
+  %cast = trunc i32 %0 to i16
+  br label %cond.end
+
+cond.end:                                         ; preds = %entry, %cond.true
+  %cond = phi i16 [ %cast, %cond.true ], [ 32, %entry ]
+  ret i16 %cond
+}
+
 
 declare i64 @llvm.ctlz.i64(i64, i1)
 declare i32 @llvm.ctlz.i32(i32, i1)





More information about the llvm-commits mailing list