[llvm] r301806 - [SelectionDAG] Use known ones to provide a better bound for the known zeros for CTTZ/CTLZ operations.

Mon May 1 09:08:06 PDT 2017

Author: ctopper
Date: Mon May  1 11:08:06 2017
New Revision: 301806

URL: http://llvm.org/viewvc/llvm-project?rev=301806&view=rev
Log:
[SelectionDAG] Use known ones to provide a better bound for the known zeros for CTTZ/CTLZ operations.

This is the SelectionDAG version of D32521. If know where at least one 1 is located in the input to these intrinsics we can place an upper bound on the number of bits needed to represent the count and thus increase the number of known zeros in the output.

I think we can also refine this further for CTTZ_UNDEF/CTLZ_UNDEF by assuming that the answer will never be BitWidth. I've left this out for now because it caused other test failures across multiple targets. Usually because of turning ADD into OR based on this new information.

I'll fix CTPOP in a future patch.

Differential Revision: https://reviews.llvm.org/D32692

Modified:
    llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
    llvm/trunk/test/CodeGen/X86/clz.ll

Modified: llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp?rev=301806&r1=301805&r2=301806&view=diff
==============================================================================

--- llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp (original)
+++ llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp Mon May  1 11:08:06 2017
@@ -2353,9 +2353,23 @@ void SelectionDAG::computeKnownBits(SDVa
     break;
   }
   case ISD::CTTZ:
-  case ISD::CTTZ_ZERO_UNDEF:
+  case ISD::CTTZ_ZERO_UNDEF: {
+    computeKnownBits(Op.getOperand(0), Known2, DemandedElts, Depth + 1);
+    // If we have a known 1, its position is our upper bound.
+    unsigned PossibleTZ = Known2.One.countTrailingZeros();
+    unsigned LowBits = Log2_32(PossibleTZ) + 1;
+    Known.Zero.setBitsFrom(LowBits);
+    break;
+  }
   case ISD::CTLZ:
-  case ISD::CTLZ_ZERO_UNDEF:
+  case ISD::CTLZ_ZERO_UNDEF: {
+    computeKnownBits(Op.getOperand(0), Known2, DemandedElts, Depth + 1);
+    // If we have a known 1, its position is our upper bound.
+    unsigned PossibleLZ = Known2.One.countLeadingZeros();
+    unsigned LowBits = Log2_32(PossibleLZ) + 1;
+    Known.Zero.setBitsFrom(LowBits);
+    break;
+  }
   case ISD::CTPOP: {
     Known.Zero.setBitsFrom(Log2_32(BitWidth)+1);
     break;

Modified: llvm/trunk/test/CodeGen/X86/clz.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/clz.ll?rev=301806&r1=301805&r2=301806&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/clz.ll (original)
+++ llvm/trunk/test/CodeGen/X86/clz.ll Mon May  1 11:08:06 2017
@@ -786,7 +786,6 @@ define i8 @cttz_i8_knownbits(i8 %x)  {
 ; X32-NEXT:    orb $2, %al
 ; X32-NEXT:    movzbl %al, %eax
 ; X32-NEXT:    bsfl %eax, %eax
-; X32-NEXT:    andb $1, %al
 ; X32-NEXT:    # kill: %AL<def> %AL<kill> %EAX<kill>
 ; X32-NEXT:    retl
 ;
@@ -795,7 +794,6 @@ define i8 @cttz_i8_knownbits(i8 %x)  {
 ; X64-NEXT:    orb $2, %dil
 ; X64-NEXT:    movzbl %dil, %eax
 ; X64-NEXT:    bsfl %eax, %eax
-; X64-NEXT:    andb $1, %al
 ; X64-NEXT:    # kill: %AL<def> %AL<kill> %EAX<kill>
 ; X64-NEXT:    retq
 ;
@@ -805,7 +803,6 @@ define i8 @cttz_i8_knownbits(i8 %x)  {
 ; X32-CLZ-NEXT:    orb $2, %al
 ; X32-CLZ-NEXT:    movzbl %al, %eax
 ; X32-CLZ-NEXT:    tzcntl %eax, %eax
-; X32-CLZ-NEXT:    andb $1, %al
 ; X32-CLZ-NEXT:    # kill: %AL<def> %AL<kill> %EAX<kill>
 ; X32-CLZ-NEXT:    retl
 ;
@@ -814,7 +811,6 @@ define i8 @cttz_i8_knownbits(i8 %x)  {
 ; X64-CLZ-NEXT:    orb $2, %dil
 ; X64-CLZ-NEXT:    movzbl %dil, %eax
 ; X64-CLZ-NEXT:    tzcntl %eax, %eax
-; X64-CLZ-NEXT:    andb $1, %al
 ; X64-CLZ-NEXT:    # kill: %AL<def> %AL<kill> %EAX<kill>
 ; X64-CLZ-NEXT:    retq
   %x2 = or i8 %x, 2
@@ -830,8 +826,7 @@ define i8 @ctlz_i8_knownbits(i8 %x)  {
 ; X32-NEXT:    orb $64, %al
 ; X32-NEXT:    movzbl %al, %eax
 ; X32-NEXT:    bsrl %eax, %eax
-; X32-NEXT:    notl %eax
-; X32-NEXT:    andb $1, %al
+; X32-NEXT:    xorl $7, %eax
 ; X32-NEXT:    # kill: %AL<def> %AL<kill> %EAX<kill>
 ; X32-NEXT:    retl
 ;
@@ -840,8 +835,7 @@ define i8 @ctlz_i8_knownbits(i8 %x)  {
 ; X64-NEXT:    orb $64, %dil
 ; X64-NEXT:    movzbl %dil, %eax
 ; X64-NEXT:    bsrl %eax, %eax
-; X64-NEXT:    notl %eax
-; X64-NEXT:    andb $1, %al
+; X64-NEXT:    xorl $7, %eax
 ; X64-NEXT:    # kill: %AL<def> %AL<kill> %EAX<kill>
 ; X64-NEXT:    retq
 ;
@@ -852,7 +846,6 @@ define i8 @ctlz_i8_knownbits(i8 %x)  {
 ; X32-CLZ-NEXT:    movzbl %al, %eax
 ; X32-CLZ-NEXT:    lzcntl %eax, %eax
 ; X32-CLZ-NEXT:    addl $-24, %eax
-; X32-CLZ-NEXT:    andb $1, %al
 ; X32-CLZ-NEXT:    # kill: %AL<def> %AL<kill> %EAX<kill>
 ; X32-CLZ-NEXT:    retl
 ;
@@ -862,7 +855,6 @@ define i8 @ctlz_i8_knownbits(i8 %x)  {
 ; X64-CLZ-NEXT:    movzbl %dil, %eax
 ; X64-CLZ-NEXT:    lzcntl %eax, %eax
 ; X64-CLZ-NEXT:    addl $-24, %eax
-; X64-CLZ-NEXT:    andb $1, %al
 ; X64-CLZ-NEXT:    # kill: %AL<def> %AL<kill> %EAX<kill>
 ; X64-CLZ-NEXT:    retq