[llvm] r228829 - [TTI] Improved cost heuristic for cttz/ctlz calls.

Andrea Di Biagio Andrea_DiBiagio at sn.scee.net
Wed Feb 11 06:22:18 PST 2015


Author: adibiagio
Date: Wed Feb 11 08:22:18 2015
New Revision: 228829

URL: http://llvm.org/viewvc/llvm-project?rev=228829&view=rev
Log:
[TTI] Improved cost heuristic for cttz/ctlz calls.

This patch is a follow-up of r228826 (see code-review: D7506).

Now that SimplifyCFG uses TargetTransformInfo for cost analysis, we 
have to fix the cost heuristic for intrinsic calls to cttz/ctlz.

This patch defines method 'getIntrinsicCost' in BasicTTIImpl: now, BasicTTIImpl
queries TLI to check if a call to cttz/ctlz is cheap for the target.

Added test cases in Transforms/SimplifyCFG/X86 to verify that on x86,
SimplifyCFG only speculates a call to cttz/ctlz if it is cheap.

Differential Revision: http://reviews.llvm.org/D7554

Added:
    llvm/trunk/test/Transforms/SimplifyCFG/X86/speculate-cttz-ctlz.ll
Modified:
    llvm/trunk/include/llvm/CodeGen/BasicTTIImpl.h
    llvm/trunk/test/Transforms/SimplifyCFG/SpeculativeExec.ll

Modified: llvm/trunk/include/llvm/CodeGen/BasicTTIImpl.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/CodeGen/BasicTTIImpl.h?rev=228829&r1=228828&r2=228829&view=diff
==============================================================================
--- llvm/trunk/include/llvm/CodeGen/BasicTTIImpl.h (original)
+++ llvm/trunk/include/llvm/CodeGen/BasicTTIImpl.h Wed Feb 11 08:22:18 2015
@@ -150,6 +150,28 @@ public:
     return getTLI()->isTypeLegal(VT);
   }
 
+  unsigned getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
+                            ArrayRef<const Value *> Arguments) {
+    return BaseT::getIntrinsicCost(IID, RetTy, Arguments);
+  }
+
+  unsigned getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
+                            ArrayRef<Type *> ParamTys) {
+    if (IID == Intrinsic::cttz) {
+      if (getTLI()->isCheapToSpeculateCttz())
+        return TargetTransformInfo::TCC_Basic;
+      return TargetTransformInfo::TCC_Expensive;
+    }
+
+    if (IID == Intrinsic::ctlz) {
+       if (getTLI()->isCheapToSpeculateCtlz())
+        return TargetTransformInfo::TCC_Basic;
+      return TargetTransformInfo::TCC_Expensive;
+    }
+
+    return BaseT::getIntrinsicCost(IID, RetTy, ParamTys);
+  }
+
   unsigned getJumpBufAlignment() { return getTLI()->getJumpBufAlignment(); }
 
   unsigned getJumpBufSize() { return getTLI()->getJumpBufSize(); }

Modified: llvm/trunk/test/Transforms/SimplifyCFG/SpeculativeExec.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SimplifyCFG/SpeculativeExec.ll?rev=228829&r1=228828&r2=228829&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/SimplifyCFG/SpeculativeExec.ll (original)
+++ llvm/trunk/test/Transforms/SimplifyCFG/SpeculativeExec.ll Wed Feb 11 08:22:18 2015
@@ -28,22 +28,6 @@ bb3:		; preds = %bb2, %entry
 	ret i32 %tmp5
 }
 
-declare i8 @llvm.cttz.i8(i8, i1)
-
-define i8 @test2(i8 %a) {
-; CHECK-LABEL: @test2(
-  br i1 undef, label %bb_true, label %bb_false
-bb_true:
-  %b = tail call i8 @llvm.cttz.i8(i8 %a, i1 false)
-  br label %join
-bb_false:
-  br label %join
-join:
-  %c = phi i8 [%b, %bb_true], [%a, %bb_false]
-; CHECK: select
-  ret i8 %c
-}
-
 define i8* @test4(i1* %dummy, i8* %a, i8* %b) {
 ; Test that we don't speculate an arbitrarily large number of unfolded constant
 ; expressions.

Added: llvm/trunk/test/Transforms/SimplifyCFG/X86/speculate-cttz-ctlz.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SimplifyCFG/X86/speculate-cttz-ctlz.ll?rev=228829&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/SimplifyCFG/X86/speculate-cttz-ctlz.ll (added)
+++ llvm/trunk/test/Transforms/SimplifyCFG/X86/speculate-cttz-ctlz.ll Wed Feb 11 08:22:18 2015
@@ -0,0 +1,141 @@
+; RUN: opt -S -simplifycfg -mtriple=x86_64-unknown-unknown -mattr=+bmi < %s | FileCheck %s --check-prefix=ALL --check-prefix=BMI
+; RUN: opt -S -simplifycfg -mtriple=x86_64-unknown-unknown -mattr=+lzcnt < %s | FileCheck %s --check-prefix=ALL --check-prefix=LZCNT
+; RUN: opt -S -simplifycfg -mtriple=x86_64-unknown-unknown < %s | FileCheck %s --check-prefix=ALL --check-prefix=GENERIC
+
+
+define i64 @test1(i64 %A) {
+; ALL-LABEL: @test1(
+; ALL: [[COND:%[A-Za-z0-9]+]] = icmp eq i64 %A, 0
+; ALL: [[CTLZ:%[A-Za-z0-9]+]] = tail call i64 @llvm.ctlz.i64(i64 %A, i1 true)
+; LZCNT-NEXT: select i1 [[COND]], i64 64, i64 [[CTLZ]]
+; BMI-NOT: select
+; GENERIC-NOT: select
+; ALL: ret
+entry:
+  %tobool = icmp eq i64 %A, 0
+  br i1 %tobool, label %cond.end, label %cond.true
+
+cond.true:                                        ; preds = %entry
+  %0 = tail call i64 @llvm.ctlz.i64(i64 %A, i1 true)
+  br label %cond.end
+
+cond.end:                                         ; preds = %entry, %cond.true
+  %cond = phi i64 [ %0, %cond.true ], [ 64, %entry ]
+  ret i64 %cond
+}
+
+define i32 @test2(i32 %A) {
+; ALL-LABEL: @test2(
+; ALL: [[COND:%[A-Za-z0-9]+]] = icmp eq i32 %A, 0
+; ALL: [[CTLZ:%[A-Za-z0-9]+]] = tail call i32 @llvm.ctlz.i32(i32 %A, i1 true)
+; LZCNT-NEXT: select i1 [[COND]], i32 32, i32 [[CTLZ]]
+; BMI-NOT: select
+; GENERIC-NOT: select
+; ALL: ret
+entry:
+  %tobool = icmp eq i32 %A, 0
+  br i1 %tobool, label %cond.end, label %cond.true
+
+cond.true:                                        ; preds = %entry
+  %0 = tail call i32 @llvm.ctlz.i32(i32 %A, i1 true)
+  br label %cond.end
+
+cond.end:                                         ; preds = %entry, %cond.true
+  %cond = phi i32 [ %0, %cond.true ], [ 32, %entry ]
+  ret i32 %cond
+}
+
+
+define signext i16 @test3(i16 signext %A) {
+; ALL-LABEL: @test3(
+; ALL: [[COND:%[A-Za-z0-9]+]] = icmp eq i16 %A, 0
+; ALL: [[CTLZ:%[A-Za-z0-9]+]] = tail call i16 @llvm.ctlz.i16(i16 %A, i1 true)
+; LZCNT-NEXT: select i1 [[COND]], i16 16, i16 [[CTLZ]]
+; BMI-NOT: select
+; GENERIC-NOT: select
+; ALL: ret
+entry:
+  %tobool = icmp eq i16 %A, 0
+  br i1 %tobool, label %cond.end, label %cond.true
+
+cond.true:                                        ; preds = %entry
+  %0 = tail call i16 @llvm.ctlz.i16(i16 %A, i1 true)
+  br label %cond.end
+
+cond.end:                                         ; preds = %entry, %cond.true
+  %cond = phi i16 [ %0, %cond.true ], [ 16, %entry ]
+  ret i16 %cond
+}
+
+
+define i64 @test1b(i64 %A) {
+; ALL-LABEL: @test1b(
+; ALL: [[COND:%[A-Za-z0-9]+]] = icmp eq i64 %A, 0
+; ALL: [[CTTZ:%[A-Za-z0-9]+]] = tail call i64 @llvm.cttz.i64(i64 %A, i1 true)
+; BMI-NEXT: select i1 [[COND]], i64 64, i64 [[CTTZ]]
+; LZCNT-NOT: select
+; GENERIC-NOT: select
+; ALL: ret
+entry:
+  %tobool = icmp eq i64 %A, 0
+  br i1 %tobool, label %cond.end, label %cond.true
+
+cond.true:                                        ; preds = %entry
+  %0 = tail call i64 @llvm.cttz.i64(i64 %A, i1 true)
+  br label %cond.end
+
+cond.end:                                         ; preds = %entry, %cond.true
+  %cond = phi i64 [ %0, %cond.true ], [ 64, %entry ]
+  ret i64 %cond
+}
+
+
+define i32 @test2b(i32 %A) {
+; ALL-LABEL: @test2b(
+; ALL: [[COND:%[A-Za-z0-9]+]] = icmp eq i32 %A, 0
+; ALL: [[CTTZ:%[A-Za-z0-9]+]] = tail call i32 @llvm.cttz.i32(i32 %A, i1 true)
+; BMI-NEXT: select i1 [[COND]], i32 32, i32 [[CTTZ]]
+; LZCNT-NOT: select
+; GENERIC-NOT: select
+; ALL: ret
+entry:
+  %tobool = icmp eq i32 %A, 0
+  br i1 %tobool, label %cond.end, label %cond.true
+
+cond.true:                                        ; preds = %entry
+  %0 = tail call i32 @llvm.cttz.i32(i32 %A, i1 true)
+  br label %cond.end
+
+cond.end:                                         ; preds = %entry, %cond.true
+  %cond = phi i32 [ %0, %cond.true ], [ 32, %entry ]
+  ret i32 %cond
+}
+
+
+define signext i16 @test3b(i16 signext %A) {
+; ALL-LABEL: @test3b(
+; ALL: [[COND:%[A-Za-z0-9]+]] = icmp eq i16 %A, 0
+; ALL: [[CTTZ:%[A-Za-z0-9]+]] = tail call i16 @llvm.cttz.i16(i16 %A, i1 true)
+; BMI-NEXT: select i1 [[COND]], i16 16, i16 [[CTTZ]]
+; LZCNT-NOT: select
+; GENERIC-NOT: select
+; ALL: ret
+entry:
+  %tobool = icmp eq i16 %A, 0
+  br i1 %tobool, label %cond.end, label %cond.true
+
+cond.true:                                        ; preds = %entry
+  %0 = tail call i16 @llvm.cttz.i16(i16 %A, i1 true)
+  br label %cond.end
+
+cond.end:                                         ; preds = %entry, %cond.true
+  %cond = phi i16 [ %0, %cond.true ], [ 16, %entry ]
+  ret i16 %cond
+}
+
+declare i64 @llvm.ctlz.i64(i64, i1)
+declare i32 @llvm.ctlz.i32(i32, i1)
+declare i16 @llvm.ctlz.i16(i16, i1)
+declare i64 @llvm.cttz.i64(i64, i1)
+declare i32 @llvm.cttz.i32(i32, i1)
+declare i16 @llvm.cttz.i16(i16, i1)





More information about the llvm-commits mailing list