[llvm] r227197 - [InstCombine] Teach how to fold a select into a cttz/ctlz with the 'is_zero_undef' flag.

Andrea Di Biagio Andrea_DiBiagio at sn.scee.net
Tue Jan 27 07:58:15 PST 2015


Author: adibiagio
Date: Tue Jan 27 09:58:14 2015
New Revision: 227197

URL: http://llvm.org/viewvc/llvm-project?rev=227197&view=rev
Log:
[InstCombine] Teach how to fold a select into a cttz/ctlz with the 'is_zero_undef' flag.

This patch teaches the Instruction Combiner how to fold a cttz/ctlz followed by
a icmp plus select into a single cttz/ctlz with flag 'is_zero_undef' cleared.

Added test InstCombine/select-cmp-cttz-ctlz.ll.

Added:
    llvm/trunk/test/Transforms/InstCombine/select-cmp-cttz-ctlz.ll
Modified:
    llvm/trunk/lib/Transforms/InstCombine/InstCombineSelect.cpp
    llvm/trunk/test/Transforms/InstCombine/intrinsics.ll

Modified: llvm/trunk/lib/Transforms/InstCombine/InstCombineSelect.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/InstCombine/InstCombineSelect.cpp?rev=227197&r1=227196&r2=227197&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/InstCombine/InstCombineSelect.cpp (original)
+++ llvm/trunk/lib/Transforms/InstCombine/InstCombineSelect.cpp Tue Jan 27 09:58:14 2015
@@ -437,6 +437,66 @@ static Value *foldSelectICmpAndOr(const
   return Builder->CreateOr(V, Y);
 }
 
+/// Attempt to fold a cttz/ctlz followed by a icmp plus select into a single
+/// call to cttz/ctlz with flag 'is_zero_undef' cleared.
+///
+/// For example, we can fold the following code sequence:
+/// \code
+///   %0 = tail call i32 @llvm.cttz.i32(i32 %x, i1 true)
+///   %1 = icmp ne i32 %x, 0
+///   %2 = select i1 %1, i32 %0, i32 32
+/// \code
+/// 
+/// into:
+///   %0 = tail call i32 @llvm.cttz.i32(i32 %x, i1 false)
+static Value *foldSelectCttzCtlz(ICmpInst *ICI, Value *TrueVal, Value *FalseVal,
+                                  InstCombiner::BuilderTy *Builder) {
+  ICmpInst::Predicate Pred = ICI->getPredicate();
+  Value *CmpLHS = ICI->getOperand(0);
+  Value *CmpRHS = ICI->getOperand(1);
+
+  // Check if the condition value compares a value for equality against zero.
+  if (!ICI->isEquality() || !match(CmpRHS, m_Zero()))
+    return nullptr;
+
+  Value *Count = FalseVal;
+  Value *ValueOnZero = TrueVal;
+  if (Pred == ICmpInst::ICMP_NE)
+    std::swap(Count, ValueOnZero);
+
+  // Skip zero extend/truncate.
+  Value *V = nullptr;
+  if (match(Count, m_ZExt(m_Value(V))) ||
+      match(Count, m_Trunc(m_Value(V))))
+    Count = V;
+
+  // Check if the value propagated on zero is a constant number equal to the
+  // sizeof in bits of 'Count'.
+  unsigned SizeOfInBits = Count->getType()->getScalarSizeInBits();
+  if (!match(ValueOnZero, m_SpecificInt(SizeOfInBits)))
+    return nullptr;
+
+  // Check that 'Count' is a call to intrinsic cttz/ctlz. Also check that the
+  // input to the cttz/ctlz is used as LHS for the compare instruction.
+  if (match(Count, m_Intrinsic<Intrinsic::cttz>(m_Specific(CmpLHS))) ||
+      match(Count, m_Intrinsic<Intrinsic::ctlz>(m_Specific(CmpLHS)))) {
+    IntrinsicInst *II = cast<IntrinsicInst>(Count);
+    IRBuilder<> Builder(II);
+    if (cast<ConstantInt>(II->getArgOperand(1))->isOne()) {
+      // Explicitly clear the 'undef_on_zero' flag.
+      IntrinsicInst *NewI = cast<IntrinsicInst>(II->clone());
+      Type *Ty = NewI->getArgOperand(1)->getType();
+      NewI->setArgOperand(1, Constant::getNullValue(Ty));
+      Builder.Insert(NewI);
+      Count = NewI;
+    }
+
+    return Builder.CreateZExtOrTrunc(Count, ValueOnZero->getType());
+  }
+
+  return nullptr;
+}
+
 /// visitSelectInstWithICmp - Visit a SelectInst that has an
 /// ICmpInst as its first operand.
 ///
@@ -665,6 +725,9 @@ Instruction *InstCombiner::visitSelectIn
   if (Value *V = foldSelectICmpAndOr(SI, TrueVal, FalseVal, Builder))
     return ReplaceInstUsesWith(SI, V);
 
+  if (Value *V = foldSelectCttzCtlz(ICI, TrueVal, FalseVal, Builder))
+    return ReplaceInstUsesWith(SI, V);
+
   return Changed ? &SI : nullptr;
 }
 

Modified: llvm/trunk/test/Transforms/InstCombine/intrinsics.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstCombine/intrinsics.ll?rev=227197&r1=227196&r2=227197&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/InstCombine/intrinsics.ll (original)
+++ llvm/trunk/test/Transforms/InstCombine/intrinsics.ll Tue Jan 27 09:58:14 2015
@@ -349,7 +349,8 @@ define i32 @ctlz_select(i32 %Value) noun
   ret i32 %s
 
 ; CHECK-LABEL: @ctlz_select(
-; CHECK: select i1 %tobool, i32 %ctlz, i32 32
+; CHECK-NEXT: call i32 @llvm.ctlz.i32(i32 %Value, i1 false)
+; CHECK-NEXT: ret i32
 }
 
 define i32 @cttz_select(i32 %Value) nounwind {
@@ -359,5 +360,6 @@ define i32 @cttz_select(i32 %Value) noun
   ret i32 %s
 
 ; CHECK-LABEL: @cttz_select(
-; CHECK: select i1 %tobool, i32 %cttz, i32 32
+; CHECK-NEXT: call i32 @llvm.cttz.i32(i32 %Value, i1 false)
+; CHECK-NEXT: ret i32
 }

Added: llvm/trunk/test/Transforms/InstCombine/select-cmp-cttz-ctlz.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstCombine/select-cmp-cttz-ctlz.ll?rev=227197&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/InstCombine/select-cmp-cttz-ctlz.ll (added)
+++ llvm/trunk/test/Transforms/InstCombine/select-cmp-cttz-ctlz.ll Tue Jan 27 09:58:14 2015
@@ -0,0 +1,300 @@
+; RUN: opt -instcombine -S < %s | FileCheck %s
+
+; This test is to verify that the instruction combiner is able to fold
+; a cttz/ctlz followed by a icmp + select into a single cttz/ctlz with
+; the 'is_zero_undef' flag cleared.
+
+define i16 @test1(i16 %x) {
+; CHECK-LABEL: @test1(
+; CHECK: [[VAR:%[a-zA-Z0-9]+]] = tail call i16 @llvm.ctlz.i16(i16 %x, i1 false)
+; CHECK-NEXT: ret i16 [[VAR]]
+entry:
+  %0 = tail call i16 @llvm.ctlz.i16(i16 %x, i1 true)
+  %tobool = icmp ne i16 %x, 0
+  %cond = select i1 %tobool, i16 %0, i16 16
+  ret i16 %cond
+}
+
+define i32 @test2(i32 %x) {
+; CHECK-LABEL: @test2(
+; CHECK: [[VAR:%[a-zA-Z0-9]+]] = tail call i32 @llvm.ctlz.i32(i32 %x, i1 false)
+; CHECK-NEXT: ret i32 [[VAR]]
+entry:
+  %0 = tail call i32 @llvm.ctlz.i32(i32 %x, i1 true)
+  %tobool = icmp ne i32 %x, 0
+  %cond = select i1 %tobool, i32 %0, i32 32
+  ret i32 %cond
+}
+
+define i64 @test3(i64 %x) {
+; CHECK-LABEL: @test3(
+; CHECK: [[VAR:%[a-zA-Z0-9]+]] = tail call i64 @llvm.ctlz.i64(i64 %x, i1 false)
+; CHECK-NEXT: ret i64 [[VAR]]
+entry:
+  %0 = tail call i64 @llvm.ctlz.i64(i64 %x, i1 true)
+  %tobool = icmp ne i64 %x, 0
+  %cond = select i1 %tobool, i64 %0, i64 64
+  ret i64 %cond
+}
+
+define i16 @test4(i16 %x) {
+; CHECK-LABEL: @test4(
+; CHECK: [[VAR:%[a-zA-Z0-9]+]] = tail call i16 @llvm.ctlz.i16(i16 %x, i1 false)
+; CHECK-NEXT: ret i16 [[VAR]]
+entry:
+  %0 = tail call i16 @llvm.ctlz.i16(i16 %x, i1 true)
+  %tobool = icmp eq i16 %x, 0
+  %cond = select i1 %tobool, i16 16, i16 %0
+  ret i16 %cond
+}
+
+define i32 @test5(i32 %x) {
+; CHECK-LABEL: @test5(
+; CHECK: [[VAR:%[a-zA-Z0-9]+]] = tail call i32 @llvm.ctlz.i32(i32 %x, i1 false)
+; CHECK-NEXT: ret i32 [[VAR]]
+entry:
+  %0 = tail call i32 @llvm.ctlz.i32(i32 %x, i1 true)
+  %tobool = icmp eq i32 %x, 0
+  %cond = select i1 %tobool, i32 32, i32 %0
+  ret i32 %cond
+}
+
+define i64 @test6(i64 %x) {
+; CHECK-LABEL: @test6(
+; CHECK: [[VAR:%[a-zA-Z0-9]+]] = tail call i64 @llvm.ctlz.i64(i64 %x, i1 false)
+; CHECK-NEXT: ret i64 [[VAR]]
+entry:
+  %0 = tail call i64 @llvm.ctlz.i64(i64 %x, i1 true)
+  %tobool = icmp eq i64 %x, 0
+  %cond = select i1 %tobool, i64 64, i64 %0
+  ret i64 %cond
+}
+
+define i16 @test1b(i16 %x) {
+; CHECK-LABEL: @test1b(
+; CHECK: [[VAR:%[a-zA-Z0-9]+]] = tail call i16 @llvm.cttz.i16(i16 %x, i1 false)
+; CHECK-NEXT: ret i16 [[VAR]]
+entry:
+  %0 = tail call i16 @llvm.cttz.i16(i16 %x, i1 true)
+  %tobool = icmp ne i16 %x, 0
+  %cond = select i1 %tobool, i16 %0, i16 16
+  ret i16 %cond
+}
+
+define i32 @test2b(i32 %x) {
+; CHECK-LABEL: @test2b(
+; CHECK: [[VAR:%[a-zA-Z0-9]+]] = tail call i32 @llvm.cttz.i32(i32 %x, i1 false)
+; CHECK-NEXT: ret i32 [[VAR]]
+entry:
+  %0 = tail call i32 @llvm.cttz.i32(i32 %x, i1 true)
+  %tobool = icmp ne i32 %x, 0
+  %cond = select i1 %tobool, i32 %0, i32 32
+  ret i32 %cond
+}
+
+define i64 @test3b(i64 %x) {
+; CHECK-LABEL: @test3b(
+; CHECK: [[VAR:%[a-zA-Z0-9]+]] = tail call i64 @llvm.cttz.i64(i64 %x, i1 false)
+; CHECK-NEXT: ret i64 [[VAR]]
+entry:
+  %0 = tail call i64 @llvm.cttz.i64(i64 %x, i1 true)
+  %tobool = icmp ne i64 %x, 0
+  %cond = select i1 %tobool, i64 %0, i64 64
+  ret i64 %cond
+}
+
+define i16 @test4b(i16 %x) {
+; CHECK-LABEL: @test4b(
+; CHECK: [[VAR:%[a-zA-Z0-9]+]] = tail call i16 @llvm.cttz.i16(i16 %x, i1 false)
+; CHECK-NEXT: ret i16 [[VAR]]
+entry:
+  %0 = tail call i16 @llvm.cttz.i16(i16 %x, i1 true)
+  %tobool = icmp eq i16 %x, 0
+  %cond = select i1 %tobool, i16 16, i16 %0
+  ret i16 %cond
+}
+
+define i32 @test5b(i32 %x) {
+; CHECK-LABEL: @test5b(
+; CHECK: [[VAR:%[a-zA-Z0-9]+]] = tail call i32 @llvm.cttz.i32(i32 %x, i1 false)
+; CHECK-NEXT: ret i32 [[VAR]]
+entry:
+  %0 = tail call i32 @llvm.cttz.i32(i32 %x, i1 true)
+  %tobool = icmp eq i32 %x, 0
+  %cond = select i1 %tobool, i32 32, i32 %0
+  ret i32 %cond
+}
+
+define i64 @test6b(i64 %x) {
+; CHECK-LABEL: @test6b(
+; CHECK: [[VAR:%[a-zA-Z0-9]+]] = tail call i64 @llvm.cttz.i64(i64 %x, i1 false)
+; CHECK-NEXT: ret i64 [[VAR]]
+entry:
+  %0 = tail call i64 @llvm.cttz.i64(i64 %x, i1 true)
+  %tobool = icmp eq i64 %x, 0
+  %cond = select i1 %tobool, i64 64, i64 %0
+  ret i64 %cond
+}
+
+define i32 @test1c(i16 %x) {
+; CHECK-LABEL: @test1c(
+; CHECK: [[VAR1:%[a-zA-Z0-9]+]] = tail call i16 @llvm.cttz.i16(i16 %x, i1 false)
+; CHECK-NEXT: [[VAR2:%[a-zA-Z0-9]+]] = zext i16 [[VAR1]] to i32
+; CHECK-NEXT: ret i32 [[VAR2]]
+entry:
+  %0 = tail call i16 @llvm.cttz.i16(i16 %x, i1 true)
+  %cast2 = zext i16 %0 to i32
+  %tobool = icmp ne i16 %x, 0
+  %cond = select i1 %tobool, i32 %cast2, i32 16
+  ret i32 %cond
+}
+
+define i64 @test2c(i16 %x) {
+; CHECK-LABEL: @test2c(
+; CHECK: [[VAR1:%[a-zA-Z0-9]+]] = tail call i16 @llvm.cttz.i16(i16 %x, i1 false)
+; CHECK-NEXT: [[VAR2:%[a-zA-Z0-9]+]] = zext i16 [[VAR1]] to i64
+; CHECK-NEXT: ret i64 [[VAR2]]
+entry:
+  %0 = tail call i16 @llvm.cttz.i16(i16 %x, i1 true)
+  %conv = zext i16 %0 to i64
+  %tobool = icmp ne i16 %x, 0
+  %cond = select i1 %tobool, i64 %conv, i64 16
+  ret i64 %cond
+}
+
+define i64 @test3c(i32 %x) {
+; CHECK-LABEL: @test3c(
+; CHECK: [[VAR1:%[a-zA-Z0-9]+]] = tail call i32 @llvm.cttz.i32(i32 %x, i1 false)
+; CHECK-NEXT: [[VAR2:%[a-zA-Z0-9]+]] = zext i32 [[VAR1]] to i64
+; CHECK-NEXT: ret i64 [[VAR2]]
+entry:
+  %0 = tail call i32 @llvm.cttz.i32(i32 %x, i1 true)
+  %conv = zext i32 %0 to i64
+  %tobool = icmp ne i32 %x, 0
+  %cond = select i1 %tobool, i64 %conv, i64 32
+  ret i64 %cond
+}
+
+define i32 @test4c(i16 %x) {
+; CHECK-LABEL: @test4c(
+; CHECK: [[VAR1:%[a-zA-Z0-9]+]] = tail call i16 @llvm.ctlz.i16(i16 %x, i1 false)
+; CHECK-NEXT: [[VAR2:%[a-zA-Z0-9]+]] = zext i16 [[VAR1]] to i32
+; CHECK-NEXT: ret i32 [[VAR2]]
+entry:
+  %0 = tail call i16 @llvm.ctlz.i16(i16 %x, i1 true)
+  %cast = zext i16 %0 to i32
+  %tobool = icmp ne i16 %x, 0
+  %cond = select i1 %tobool, i32 %cast, i32 16
+  ret i32 %cond
+}
+
+define i64 @test5c(i16 %x) {
+; CHECK-LABEL: @test5c(
+; CHECK: [[VAR1:%[a-zA-Z0-9]+]] = tail call i16 @llvm.ctlz.i16(i16 %x, i1 false)
+; CHECK-NEXT: [[VAR2:%[a-zA-Z0-9]+]] = zext i16 [[VAR1]] to i64
+; CHECK-NEXT: ret i64 [[VAR2]]
+entry:
+  %0 = tail call i16 @llvm.ctlz.i16(i16 %x, i1 true)
+  %cast = zext i16 %0 to i64
+  %tobool = icmp ne i16 %x, 0
+  %cond = select i1 %tobool, i64 %cast, i64 16
+  ret i64 %cond
+}
+
+define i64 @test6c(i32 %x) {
+; CHECK-LABEL: @test6c(
+; CHECK: [[VAR1:%[a-zA-Z0-9]+]] = tail call i32 @llvm.ctlz.i32(i32 %x, i1 false)
+; CHECK-NEXT: [[VAR2:%[a-zA-Z0-9]+]] = zext i32 [[VAR1]] to i64
+; CHECK-NEXT: ret i64 [[VAR2]]
+entry:
+  %0 = tail call i32 @llvm.ctlz.i32(i32 %x, i1 true)
+  %cast = zext i32 %0 to i64
+  %tobool = icmp ne i32 %x, 0
+  %cond = select i1 %tobool, i64 %cast, i64 32
+  ret i64 %cond
+}
+
+define i16 @test1d(i64 %x) {
+; CHECK-LABEL: @test1d(
+; CHECK: [[VAR1:%[a-zA-Z0-9]+]] = tail call i64 @llvm.cttz.i64(i64 %x, i1 false)
+; CHECK-NEXT: [[VAR2:%[a-zA-Z0-9]+]] = trunc i64 [[VAR1]] to i16
+; CHECK-NEXT: ret i16 [[VAR2]]
+entry:
+  %0 = tail call i64 @llvm.cttz.i64(i64 %x, i1 true)
+  %conv = trunc i64 %0 to i16
+  %tobool = icmp ne i64 %x, 0
+  %cond = select i1 %tobool, i16 %conv, i16 64
+  ret i16 %cond
+}
+
+define i32 @test2d(i64 %x) {
+; CHECK-LABEL: @test2d(
+; CHECK: [[VAR1:%[a-zA-Z0-9]+]] = tail call i64 @llvm.cttz.i64(i64 %x, i1 false)
+; CHECK-NEXT: [[VAR2:%[a-zA-Z0-9]+]] = trunc i64 [[VAR1]] to i32
+; CHECK-NEXT: ret i32 [[VAR2]]
+entry:
+  %0 = tail call i64 @llvm.cttz.i64(i64 %x, i1 true)
+  %cast = trunc i64 %0 to i32
+  %tobool = icmp ne i64 %x, 0
+  %cond = select i1 %tobool, i32 %cast, i32 64
+  ret i32 %cond
+}
+
+define i16 @test3d(i32 %x) {
+; CHECK-LABEL: @test3d(
+; CHECK: [[VAR1:%[a-zA-Z0-9]+]] = tail call i32 @llvm.cttz.i32(i32 %x, i1 false)
+; CHECK-NEXT: [[VAR2:%[a-zA-Z0-9]+]] = trunc i32 [[VAR1]] to i16
+; CHECK-NEXT: ret i16 [[VAR2]]
+entry:
+  %0 = tail call i32 @llvm.cttz.i32(i32 %x, i1 true)
+  %cast = trunc i32 %0 to i16
+  %tobool = icmp ne i32 %x, 0
+  %cond = select i1 %tobool, i16 %cast, i16 32
+  ret i16 %cond
+}
+
+define i16 @test4d(i64 %x) {
+; CHECK-LABEL: @test4d(
+; CHECK: [[VAR1:%[a-zA-Z0-9]+]] = tail call i64 @llvm.ctlz.i64(i64 %x, i1 false)
+; CHECK-NEXT: [[VAR2:%[a-zA-Z0-9]+]] = trunc i64 [[VAR1]] to i16
+; CHECK-NEXT: ret i16 [[VAR2]]
+entry:
+  %0 = tail call i64 @llvm.ctlz.i64(i64 %x, i1 true)
+  %cast = trunc i64 %0 to i16
+  %tobool = icmp ne i64 %x, 0
+  %cond = select i1 %tobool, i16 %cast, i16 64
+  ret i16 %cond
+}
+
+define i32 @test5d(i64 %x) {
+; CHECK-LABEL: @test5d(
+; CHECK: [[VAR1:%[a-zA-Z0-9]+]] = tail call i64 @llvm.ctlz.i64(i64 %x, i1 false)
+; CHECK-NEXT: [[VAR2:%[a-zA-Z0-9]+]] = trunc i64 [[VAR1]] to i32
+; CHECK-NEXT: ret i32 [[VAR2]]
+entry:
+  %0 = tail call i64 @llvm.ctlz.i64(i64 %x, i1 true)
+  %cast = trunc i64 %0 to i32
+  %tobool = icmp ne i64 %x, 0
+  %cond = select i1 %tobool, i32 %cast, i32 64
+  ret i32 %cond
+}
+
+define i16 @test6d(i32 %x) {
+; CHECK-LABEL: @test6d(
+; CHECK: [[VAR1:%[a-zA-Z0-9]+]] = tail call i32 @llvm.ctlz.i32(i32 %x, i1 false)
+; CHECK-NEXT: [[VAR2:%[a-zA-Z0-9]+]] = trunc i32 [[VAR1]] to i16
+; CHECK-NEXT: ret i16 [[VAR2]]
+entry:
+  %0 = tail call i32 @llvm.ctlz.i32(i32 %x, i1 true)
+  %cast = trunc i32 %0 to i16
+  %tobool = icmp ne i32 %x, 0
+  %cond = select i1 %tobool, i16 %cast, i16 32
+  ret i16 %cond
+}
+
+declare i16 @llvm.ctlz.i16(i16, i1)
+declare i32 @llvm.ctlz.i32(i32, i1)
+declare i64 @llvm.ctlz.i64(i64, i1)
+declare i16 @llvm.cttz.i16(i16, i1)
+declare i32 @llvm.cttz.i32(i32, i1)
+declare i64 @llvm.cttz.i64(i64, i1)





More information about the llvm-commits mailing list