[llvm] r343498 - [X86] Improve test instruction shrinking when the sign flag is used and the output of the and is truncated

Mon Oct 1 10:10:45 PDT 2018

Author: ctopper
Date: Mon Oct  1 10:10:45 2018
New Revision: 343498

URL: http://llvm.org/viewvc/llvm-project?rev=343498&view=rev
Log:
[X86] Improve test instruction shrinking when the sign flag is used and the output of the and is truncated

Currently we skip looking through truncates if the sign flag is used. But that's overly restrictive.

It's safe to look through the truncate as long as we ensure one of the 3 things when we shrink. Either the MSB of the mask at the shrunken size isn't set. If the mask bit is set then either the shrunk size needs to be equal to the compare size or the sign flag needs to be unused.

There are still missed opportunities to shrink a load and fold it in here. This will be fixed in a future patch.

Differential Revision: https://reviews.llvm.org/D52669

Modified:
    llvm/trunk/lib/Target/X86/X86ISelDAGToDAG.cpp
    llvm/trunk/test/CodeGen/X86/2010-08-04-MaskedSignedCompare.ll
    llvm/trunk/test/CodeGen/X86/test-shrink.ll

Modified: llvm/trunk/lib/Target/X86/X86ISelDAGToDAG.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelDAGToDAG.cpp?rev=343498&r1=343497&r2=343498&view=diff
==============================================================================

--- llvm/trunk/lib/Target/X86/X86ISelDAGToDAG.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelDAGToDAG.cpp Mon Oct  1 10:10:45 2018
@@ -3392,8 +3392,11 @@ void X86DAGToDAGISel::Select(SDNode *Nod
     SDValue N0 = Node->getOperand(0);
     SDValue N1 = Node->getOperand(1);
 
-    if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse() &&
-        hasNoSignedComparisonUses(Node))
+    // Save the original VT of the compare.
+    MVT CmpVT = N0.getSimpleValueType();
+
+    // We can peek through truncates, but we need to be careful below.
+    if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse())
       N0 = N0.getOperand(0);
 
     // Look for (X86cmp (and $op, $imm), 0) and see if we can convert it to
@@ -3411,14 +3414,21 @@ void X86DAGToDAGISel::Select(SDNode *Nod
       int SubRegOp;
       unsigned Op;
 
+      // For each of these checks we need to be careful if the sign flag is
+      // being used. It is only safe to use the sign flag in two conditions,
+      // either the sign bit in the shrunken mask is zero or the final test
+      // size is equal to the original compare size.
+
       if (isUInt<8>(Mask) &&
-          (!(Mask & 0x80) || hasNoSignedComparisonUses(Node))) {
+          (!(Mask & 0x80) || CmpVT == MVT::i8 ||
+           hasNoSignedComparisonUses(Node))) {
         // For example, convert "testl %eax, $8" to "testb %al, $8"
         VT = MVT::i8;
         SubRegOp = X86::sub_8bit;
         Op = X86::TEST8ri;
       } else if (OptForMinSize && isUInt<16>(Mask) &&
-                 (!(Mask & 0x8000) || hasNoSignedComparisonUses(Node))) {
+                 (!(Mask & 0x8000) || CmpVT == MVT::i16 ||
+                  hasNoSignedComparisonUses(Node))) {
         // For example, "testl %eax, $32776" to "testw %ax, $32776".
         // NOTE: We only want to form TESTW instructions if optimizing for
         // min size. Otherwise we only save one byte and possibly get a length
@@ -3427,7 +3437,8 @@ void X86DAGToDAGISel::Select(SDNode *Nod
         SubRegOp = X86::sub_16bit;
         Op = X86::TEST16ri;
       } else if (isUInt<32>(Mask) && N0.getValueType() != MVT::i16 &&
-                 (!(Mask & 0x80000000) || hasNoSignedComparisonUses(Node))) {
+                 (!(Mask & 0x80000000) || CmpVT == MVT::i32 ||
+                  hasNoSignedComparisonUses(Node))) {
         // For example, "testq %rax, $268468232" to "testl %eax, $268468232".
         // NOTE: We only want to run that transform if N0 is 32 or 64 bits.
         // Otherwize, we find ourselves in a position where we have to do
@@ -3441,6 +3452,8 @@ void X86DAGToDAGISel::Select(SDNode *Nod
         break;
       }
 
+      // FIXME: We should be able to fold loads here.
+
       SDValue Imm = CurDAG->getTargetConstant(Mask, dl, VT);
       SDValue Reg = N0.getOperand(0);
 

Modified: llvm/trunk/test/CodeGen/X86/2010-08-04-MaskedSignedCompare.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/2010-08-04-MaskedSignedCompare.ll?rev=343498&r1=343497&r2=343498&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/2010-08-04-MaskedSignedCompare.ll (original)
+++ llvm/trunk/test/CodeGen/X86/2010-08-04-MaskedSignedCompare.ll Mon Oct  1 10:10:45 2018
@@ -12,8 +12,7 @@ define i32 @main() nounwind {
 ; CHECK-NEXT:    xorl %eax, %eax
 ; CHECK-NEXT:    cmpq {{.*}}(%rip), %rax
 ; CHECK-NEXT:    sbbl %eax, %eax
-; CHECK-NEXT:    andl $150, %eax
-; CHECK-NEXT:    testb %al, %al
+; CHECK-NEXT:    testb $-106, %al
 ; CHECK-NEXT:    jle .LBB0_1
 ; CHECK-NEXT:  # %bb.2: # %if.then
 ; CHECK-NEXT:    movl $1, {{.*}}(%rip)

Modified: llvm/trunk/test/CodeGen/X86/test-shrink.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/test-shrink.ll?rev=343498&r1=343497&r2=343498&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/test-shrink.ll (original)
+++ llvm/trunk/test/CodeGen/X86/test-shrink.ll Mon Oct  1 10:10:45 2018
@@ -578,8 +578,7 @@ no:
 define void @and16_trunc_8_sign(i16 %x) nounwind {
 ; CHECK-LINUX64-LABEL: and16_trunc_8_sign:
 ; CHECK-LINUX64:       # %bb.0:
-; CHECK-LINUX64-NEXT:    andl $128, %edi
-; CHECK-LINUX64-NEXT:    testb %dil, %dil
+; CHECK-LINUX64-NEXT:    testb $-128, %dil
 ; CHECK-LINUX64-NEXT:    jg .LBB13_2
 ; CHECK-LINUX64-NEXT:  # %bb.1: # %yes
 ; CHECK-LINUX64-NEXT:    pushq %rax
@@ -592,8 +591,7 @@ define void @and16_trunc_8_sign(i16 %x)
 ; CHECK-WIN32-64:       # %bb.0:
 ; CHECK-WIN32-64-NEXT:    subq $40, %rsp
 ; CHECK-WIN32-64-NEXT:    # kill: def $cx killed $cx def $ecx
-; CHECK-WIN32-64-NEXT:    andl $128, %ecx
-; CHECK-WIN32-64-NEXT:    testb %cl, %cl
+; CHECK-WIN32-64-NEXT:    testb $-128, %cl
 ; CHECK-WIN32-64-NEXT:    jg .LBB13_2
 ; CHECK-WIN32-64-NEXT:  # %bb.1: # %yes
 ; CHECK-WIN32-64-NEXT:    callq bar
@@ -604,8 +602,7 @@ define void @and16_trunc_8_sign(i16 %x)
 ; CHECK-X86-LABEL: and16_trunc_8_sign:
 ; CHECK-X86:       # %bb.0:
 ; CHECK-X86-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
-; CHECK-X86-NEXT:    andl $128, %eax
-; CHECK-X86-NEXT:    testb %al, %al
+; CHECK-X86-NEXT:    testb $-128, %al
 ; CHECK-X86-NEXT:    jg .LBB13_2
 ; CHECK-X86-NEXT:  # %bb.1: # %yes
 ; CHECK-X86-NEXT:    calll bar
@@ -626,8 +623,7 @@ no:
 define void @and32_trunc_8_sign(i32 %x) nounwind {
 ; CHECK-LINUX64-LABEL: and32_trunc_8_sign:
 ; CHECK-LINUX64:       # %bb.0:
-; CHECK-LINUX64-NEXT:    andl $128, %edi
-; CHECK-LINUX64-NEXT:    testb %dil, %dil
+; CHECK-LINUX64-NEXT:    testb $-128, %dil
 ; CHECK-LINUX64-NEXT:    jg .LBB14_2
 ; CHECK-LINUX64-NEXT:  # %bb.1: # %yes
 ; CHECK-LINUX64-NEXT:    pushq %rax
@@ -639,8 +635,7 @@ define void @and32_trunc_8_sign(i32 %x)
 ; CHECK-WIN32-64-LABEL: and32_trunc_8_sign:
 ; CHECK-WIN32-64:       # %bb.0:
 ; CHECK-WIN32-64-NEXT:    subq $40, %rsp
-; CHECK-WIN32-64-NEXT:    andl $128, %ecx
-; CHECK-WIN32-64-NEXT:    testb %cl, %cl
+; CHECK-WIN32-64-NEXT:    testb $-128, %cl
 ; CHECK-WIN32-64-NEXT:    jg .LBB14_2
 ; CHECK-WIN32-64-NEXT:  # %bb.1: # %yes
 ; CHECK-WIN32-64-NEXT:    callq bar
@@ -650,9 +645,8 @@ define void @and32_trunc_8_sign(i32 %x)
 ;
 ; CHECK-X86-LABEL: and32_trunc_8_sign:
 ; CHECK-X86:       # %bb.0:
-; CHECK-X86-NEXT:    movl $128, %eax
-; CHECK-X86-NEXT:    andl {{[0-9]+}}(%esp), %eax
-; CHECK-X86-NEXT:    testb %al, %al
+; CHECK-X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; CHECK-X86-NEXT:    testb $-128, %al
 ; CHECK-X86-NEXT:    jg .LBB14_2
 ; CHECK-X86-NEXT:  # %bb.1: # %yes
 ; CHECK-X86-NEXT:    calll bar
@@ -673,8 +667,7 @@ no:
 define void @and64_trunc_8_sign(i64 %x) nounwind {
 ; CHECK-LINUX64-LABEL: and64_trunc_8_sign:
 ; CHECK-LINUX64:       # %bb.0:
-; CHECK-LINUX64-NEXT:    andl $128, %edi
-; CHECK-LINUX64-NEXT:    testb %dil, %dil
+; CHECK-LINUX64-NEXT:    testb $-128, %dil
 ; CHECK-LINUX64-NEXT:    jg .LBB15_2
 ; CHECK-LINUX64-NEXT:  # %bb.1: # %yes
 ; CHECK-LINUX64-NEXT:    pushq %rax
@@ -686,8 +679,7 @@ define void @and64_trunc_8_sign(i64 %x)
 ; CHECK-WIN32-64-LABEL: and64_trunc_8_sign:
 ; CHECK-WIN32-64:       # %bb.0:
 ; CHECK-WIN32-64-NEXT:    subq $40, %rsp
-; CHECK-WIN32-64-NEXT:    andl $128, %ecx
-; CHECK-WIN32-64-NEXT:    testb %cl, %cl
+; CHECK-WIN32-64-NEXT:    testb $-128, %cl
 ; CHECK-WIN32-64-NEXT:    jg .LBB15_2
 ; CHECK-WIN32-64-NEXT:  # %bb.1: # %yes
 ; CHECK-WIN32-64-NEXT:    callq bar
@@ -697,9 +689,8 @@ define void @and64_trunc_8_sign(i64 %x)
 ;
 ; CHECK-X86-LABEL: and64_trunc_8_sign:
 ; CHECK-X86:       # %bb.0:
-; CHECK-X86-NEXT:    movl $128, %eax
-; CHECK-X86-NEXT:    andl {{[0-9]+}}(%esp), %eax
-; CHECK-X86-NEXT:    testb %al, %al
+; CHECK-X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; CHECK-X86-NEXT:    testb $-128, %al
 ; CHECK-X86-NEXT:    jg .LBB15_2
 ; CHECK-X86-NEXT:  # %bb.1: # %yes
 ; CHECK-X86-NEXT:    calll bar
@@ -720,8 +711,7 @@ no:
 define void @and32_trunc_16_sign(i32 %x) minsize nounwind {
 ; CHECK-LINUX64-LABEL: and32_trunc_16_sign:
 ; CHECK-LINUX64:       # %bb.0:
-; CHECK-LINUX64-NEXT:    andl $32768, %edi # imm = 0x8000
-; CHECK-LINUX64-NEXT:    testw %di, %di
+; CHECK-LINUX64-NEXT:    testw $-32768, %di # imm = 0x8000
 ; CHECK-LINUX64-NEXT:    jg .LBB16_2
 ; CHECK-LINUX64-NEXT:  # %bb.1: # %yes
 ; CHECK-LINUX64-NEXT:    pushq %rax
@@ -733,8 +723,7 @@ define void @and32_trunc_16_sign(i32 %x)
 ; CHECK-WIN32-64-LABEL: and32_trunc_16_sign:
 ; CHECK-WIN32-64:       # %bb.0:
 ; CHECK-WIN32-64-NEXT:    subq $40, %rsp
-; CHECK-WIN32-64-NEXT:    andl $32768, %ecx # imm = 0x8000
-; CHECK-WIN32-64-NEXT:    testw %cx, %cx
+; CHECK-WIN32-64-NEXT:    testw $-32768, %cx # imm = 0x8000
 ; CHECK-WIN32-64-NEXT:    jg .LBB16_2
 ; CHECK-WIN32-64-NEXT:  # %bb.1: # %yes
 ; CHECK-WIN32-64-NEXT:    callq bar
@@ -744,9 +733,8 @@ define void @and32_trunc_16_sign(i32 %x)
 ;
 ; CHECK-X86-LABEL: and32_trunc_16_sign:
 ; CHECK-X86:       # %bb.0:
-; CHECK-X86-NEXT:    movl $32768, %eax # imm = 0x8000
-; CHECK-X86-NEXT:    andl {{[0-9]+}}(%esp), %eax
-; CHECK-X86-NEXT:    testw %ax, %ax
+; CHECK-X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; CHECK-X86-NEXT:    testw $-32768, %ax # imm = 0x8000
 ; CHECK-X86-NEXT:    jg .LBB16_2
 ; CHECK-X86-NEXT:  # %bb.1: # %yes
 ; CHECK-X86-NEXT:    calll bar
@@ -767,8 +755,7 @@ no:
 define void @and64_trunc_32_sign(i64 %x) minsize nounwind {
 ; CHECK-LINUX64-LABEL: and64_trunc_32_sign:
 ; CHECK-LINUX64:       # %bb.0:
-; CHECK-LINUX64-NEXT:    andl $32768, %edi # imm = 0x8000
-; CHECK-LINUX64-NEXT:    testw %di, %di
+; CHECK-LINUX64-NEXT:    testw $-32768, %di # imm = 0x8000
 ; CHECK-LINUX64-NEXT:    jg .LBB17_2
 ; CHECK-LINUX64-NEXT:  # %bb.1: # %yes
 ; CHECK-LINUX64-NEXT:    pushq %rax
@@ -780,8 +767,7 @@ define void @and64_trunc_32_sign(i64 %x)
 ; CHECK-WIN32-64-LABEL: and64_trunc_32_sign:
 ; CHECK-WIN32-64:       # %bb.0:
 ; CHECK-WIN32-64-NEXT:    subq $40, %rsp
-; CHECK-WIN32-64-NEXT:    andl $32768, %ecx # imm = 0x8000
-; CHECK-WIN32-64-NEXT:    testw %cx, %cx
+; CHECK-WIN32-64-NEXT:    testw $-32768, %cx # imm = 0x8000
 ; CHECK-WIN32-64-NEXT:    jg .LBB17_2
 ; CHECK-WIN32-64-NEXT:  # %bb.1: # %yes
 ; CHECK-WIN32-64-NEXT:    callq bar
@@ -791,9 +777,8 @@ define void @and64_trunc_32_sign(i64 %x)
 ;
 ; CHECK-X86-LABEL: and64_trunc_32_sign:
 ; CHECK-X86:       # %bb.0:
-; CHECK-X86-NEXT:    movl $32768, %eax # imm = 0x8000
-; CHECK-X86-NEXT:    andl {{[0-9]+}}(%esp), %eax
-; CHECK-X86-NEXT:    testw %ax, %ax
+; CHECK-X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; CHECK-X86-NEXT:    testw $-32768, %ax # imm = 0x8000
 ; CHECK-X86-NEXT:    jg .LBB17_2
 ; CHECK-X86-NEXT:  # %bb.1: # %yes
 ; CHECK-X86-NEXT:    calll bar