[llvm] r349661 - [X86] Don't match TESTrr from (cmp (and X, Y), 0) during isel. Defer to post processing

Wed Dec 19 10:49:14 PST 2018

Author: ctopper
Date: Wed Dec 19 10:49:13 2018
New Revision: 349661

URL: http://llvm.org/viewvc/llvm-project?rev=349661&view=rev
Log:
[X86] Don't match TESTrr from (cmp (and X, Y), 0) during isel. Defer to post processing

The (cmp (and X, Y) 0) pattern is greedy and ends up forming a TESTrr and consuming the and when it might be better to use one of the BMI/TBM like BLSR or BLSI.

This patch moves removes the pattern from isel and adds a post processing check to combine TESTrr+ANDrr into just a TESTrr. With this patch we are able to select the BMI/TBM instructions, but we'll also emit a TESTrr when the result is compared to 0. In many cases the peephole pass will be able to use optimizeCompareInstr to remove the TEST, but its probably not perfect.

Differential Revision: https://reviews.llvm.org/D55870

Modified:
    llvm/trunk/lib/Target/X86/X86ISelDAGToDAG.cpp
    llvm/trunk/lib/Target/X86/X86InstrArithmetic.td
    llvm/trunk/test/CodeGen/X86/bmi.ll
    llvm/trunk/test/CodeGen/X86/tbm_patterns.ll

Modified: llvm/trunk/lib/Target/X86/X86ISelDAGToDAG.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelDAGToDAG.cpp?rev=349661&r1=349660&r2=349661&view=diff
==============================================================================

--- llvm/trunk/lib/Target/X86/X86ISelDAGToDAG.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelDAGToDAG.cpp Wed Dec 19 10:49:13 2018
@@ -898,9 +898,30 @@ void X86DAGToDAGISel::PostprocessISelDAG
       continue;
     }
 
-    // Attempt to remove vectors moves that were inserted to zero upper bits.
+    // Look for a TESTrr+ANDrr pattern where both operands of the test are
+    // the same. Rewrite to remove the AND.
+    unsigned Opc = N->getMachineOpcode();
+    if ((Opc == X86::TEST8rr || Opc == X86::TEST16rr ||
+         Opc == X86::TEST32rr || Opc == X86::TEST64rr) &&
+        N->getOperand(0) == N->getOperand(1) &&
+        N->isOnlyUserOf(N->getOperand(0).getNode()) &&
+        N->getOperand(0).isMachineOpcode()) {
+      SDValue And = N->getOperand(0);
+      unsigned N0Opc = And.getMachineOpcode();
+      if (N0Opc == X86::AND8rr || N0Opc == X86::AND16rr ||
+          N0Opc == X86::AND32rr || N0Opc == X86::AND64rr) {
+        MachineSDNode *Test = CurDAG->getMachineNode(Opc, SDLoc(N),
+                                                     MVT::i32,
+                                                     And.getOperand(0),
+                                                     And.getOperand(1));
+        ReplaceUses(N, Test);
+        MadeChange = true;
+        continue;
+      }
+    }
 
-    if (N->getMachineOpcode() != TargetOpcode::SUBREG_TO_REG)
+    // Attempt to remove vectors moves that were inserted to zero upper bits.
+    if (Opc != TargetOpcode::SUBREG_TO_REG)
       continue;
 
     unsigned SubRegIdx = N->getConstantOperandVal(2);

Modified: llvm/trunk/lib/Target/X86/X86InstrArithmetic.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrArithmetic.td?rev=349661&r1=349660&r2=349661&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrArithmetic.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrArithmetic.td Wed Dec 19 10:49:13 2018
@@ -1212,10 +1212,13 @@ def X86testpat : PatFrag<(ops node:$lhs,
 let isCompare = 1 in {
   let Defs = [EFLAGS] in {
     let isCommutable = 1 in {
-      def TEST8rr  : BinOpRR_F<0x84, "test", Xi8 , X86testpat>;
-      def TEST16rr : BinOpRR_F<0x84, "test", Xi16, X86testpat>;
-      def TEST32rr : BinOpRR_F<0x84, "test", Xi32, X86testpat>;
-      def TEST64rr : BinOpRR_F<0x84, "test", Xi64, X86testpat>;
+      // Avoid selecting these and instead use a test+and. Post processing will
+      // combine them. This gives bunch of other patterns that start with
+      // and a chance to match.
+      def TEST8rr  : BinOpRR_F<0x84, "test", Xi8 , null_frag>;
+      def TEST16rr : BinOpRR_F<0x84, "test", Xi16, null_frag>;
+      def TEST32rr : BinOpRR_F<0x84, "test", Xi32, null_frag>;
+      def TEST64rr : BinOpRR_F<0x84, "test", Xi64, null_frag>;
     } // isCommutable
 
     def TEST8mr    : BinOpMR_F<0x84, "test", Xi8 , X86testpat>;

Modified: llvm/trunk/test/CodeGen/X86/bmi.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/bmi.ll?rev=349661&r1=349660&r2=349661&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/bmi.ll (original)
+++ llvm/trunk/test/CodeGen/X86/bmi.ll Wed Dec 19 10:49:13 2018
@@ -519,10 +519,7 @@ define i32 @blsi32_z(i32 %a, i32 %b) nou
 define i32 @blsi32_z2(i32 %a, i32 %b, i32 %c) nounwind {
 ; X86-LABEL: blsi32_z2:
 ; X86:       # %bb.0:
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    movl %eax, %ecx
-; X86-NEXT:    negl %ecx
-; X86-NEXT:    testl %eax, %ecx
+; X86-NEXT:    blsil {{[0-9]+}}(%esp), %eax
 ; X86-NEXT:    leal {{[0-9]+}}(%esp), %eax
 ; X86-NEXT:    leal {{[0-9]+}}(%esp), %ecx
 ; X86-NEXT:    cmovel %eax, %ecx
@@ -532,9 +529,7 @@ define i32 @blsi32_z2(i32 %a, i32 %b, i3
 ; X64-LABEL: blsi32_z2:
 ; X64:       # %bb.0:
 ; X64-NEXT:    movl %esi, %eax
-; X64-NEXT:    movl %edi, %ecx
-; X64-NEXT:    negl %ecx
-; X64-NEXT:    testl %edi, %ecx
+; X64-NEXT:    blsil %edi, %ecx
 ; X64-NEXT:    cmovnel %edx, %eax
 ; X64-NEXT:    retq
   %t0 = sub i32 0, %a
@@ -629,9 +624,7 @@ define i64 @blsi64_z2(i64 %a, i64 %b, i6
 ; X64-LABEL: blsi64_z2:
 ; X64:       # %bb.0:
 ; X64-NEXT:    movq %rsi, %rax
-; X64-NEXT:    movq %rdi, %rcx
-; X64-NEXT:    negq %rcx
-; X64-NEXT:    testq %rdi, %rcx
+; X64-NEXT:    blsiq %rdi, %rcx
 ; X64-NEXT:    cmovneq %rdx, %rax
 ; X64-NEXT:    retq
   %t0 = sub i64 0, %a
@@ -875,9 +868,7 @@ define i32 @blsr32_z(i32 %a, i32 %b) nou
 define i32 @blsr32_z2(i32 %a, i32 %b, i32 %c) nounwind {
 ; X86-LABEL: blsr32_z2:
 ; X86:       # %bb.0:
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    leal -1(%eax), %ecx
-; X86-NEXT:    testl %eax, %ecx
+; X86-NEXT:    blsrl {{[0-9]+}}(%esp), %eax
 ; X86-NEXT:    leal {{[0-9]+}}(%esp), %eax
 ; X86-NEXT:    leal {{[0-9]+}}(%esp), %ecx
 ; X86-NEXT:    cmovel %eax, %ecx
@@ -887,9 +878,7 @@ define i32 @blsr32_z2(i32 %a, i32 %b, i3
 ; X64-LABEL: blsr32_z2:
 ; X64:       # %bb.0:
 ; X64-NEXT:    movl %esi, %eax
-; X64-NEXT:    # kill: def $edi killed $edi def $rdi
-; X64-NEXT:    leal -1(%rdi), %ecx
-; X64-NEXT:    testl %edi, %ecx
+; X64-NEXT:    blsrl %edi, %ecx
 ; X64-NEXT:    cmovnel %edx, %eax
 ; X64-NEXT:    retq
   %t0 = sub i32 %a, 1
@@ -984,8 +973,7 @@ define i64 @blsr64_z2(i64 %a, i64 %b, i6
 ; X64-LABEL: blsr64_z2:
 ; X64:       # %bb.0:
 ; X64-NEXT:    movq %rsi, %rax
-; X64-NEXT:    leaq -1(%rdi), %rcx
-; X64-NEXT:    testq %rdi, %rcx
+; X64-NEXT:    blsrq %rdi, %rcx
 ; X64-NEXT:    cmovneq %rdx, %rax
 ; X64-NEXT:    retq
   %t0 = sub i64 %a, 1

Modified: llvm/trunk/test/CodeGen/X86/tbm_patterns.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/tbm_patterns.ll?rev=349661&r1=349660&r2=349661&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/tbm_patterns.ll (original)
+++ llvm/trunk/test/CodeGen/X86/tbm_patterns.ll Wed Dec 19 10:49:13 2018
@@ -150,9 +150,7 @@ define i32 @test_x86_tbm_blcfill_u32_z2(
 ; CHECK-LABEL: test_x86_tbm_blcfill_u32_z2:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    movl %esi, %eax
-; CHECK-NEXT:    # kill: def $edi killed $edi def $rdi
-; CHECK-NEXT:    leal 1(%rdi), %ecx
-; CHECK-NEXT:    testl %edi, %ecx
+; CHECK-NEXT:    blcfilll %edi, %ecx
 ; CHECK-NEXT:    cmovnel %edx, %eax
 ; CHECK-NEXT:    retq
   %t0 = add i32 %a, 1
@@ -189,8 +187,7 @@ define i64 @test_x86_tbm_blcfill_u64_z2(
 ; CHECK-LABEL: test_x86_tbm_blcfill_u64_z2:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    movq %rsi, %rax
-; CHECK-NEXT:    leaq 1(%rdi), %rcx
-; CHECK-NEXT:    testq %rdi, %rcx
+; CHECK-NEXT:    blcfillq %rdi, %rcx
 ; CHECK-NEXT:    cmovneq %rdx, %rax
 ; CHECK-NEXT:    retq
   %t0 = add i64 %a, 1
@@ -334,10 +331,7 @@ define i32 @test_x86_tbm_blcic_u32_z2(i3
 ; CHECK-LABEL: test_x86_tbm_blcic_u32_z2:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    movl %esi, %eax
-; CHECK-NEXT:    movl %edi, %ecx
-; CHECK-NEXT:    notl %ecx
-; CHECK-NEXT:    incl %edi
-; CHECK-NEXT:    testl %ecx, %edi
+; CHECK-NEXT:    blcicl %edi, %ecx
 ; CHECK-NEXT:    cmovnel %edx, %eax
 ; CHECK-NEXT:    retq
   %t0 = xor i32 %a, -1
@@ -377,10 +371,7 @@ define i64 @test_x86_tbm_blcic_u64_z2(i6
 ; CHECK-LABEL: test_x86_tbm_blcic_u64_z2:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    movq %rsi, %rax
-; CHECK-NEXT:    movq %rdi, %rcx
-; CHECK-NEXT:    notq %rcx
-; CHECK-NEXT:    incq %rdi
-; CHECK-NEXT:    testq %rcx, %rdi
+; CHECK-NEXT:    blcicq %rdi, %rcx
 ; CHECK-NEXT:    cmovneq %rdx, %rax
 ; CHECK-NEXT:    retq
   %t0 = xor i64 %a, -1
@@ -823,10 +814,7 @@ define i32 @test_x86_tbm_tzmsk_u32_z2(i3
 ; CHECK-LABEL: test_x86_tbm_tzmsk_u32_z2:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    movl %esi, %eax
-; CHECK-NEXT:    movl %edi, %ecx
-; CHECK-NEXT:    notl %ecx
-; CHECK-NEXT:    decl %edi
-; CHECK-NEXT:    testl %edi, %ecx
+; CHECK-NEXT:    tzmskl %edi, %ecx
 ; CHECK-NEXT:    cmovnel %edx, %eax
 ; CHECK-NEXT:    retq
   %t0 = xor i32 %a, -1
@@ -866,10 +854,7 @@ define i64 @test_x86_tbm_tzmsk_u64_z2(i6
 ; CHECK-LABEL: test_x86_tbm_tzmsk_u64_z2:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    movq %rsi, %rax
-; CHECK-NEXT:    movq %rdi, %rcx
-; CHECK-NEXT:    notq %rcx
-; CHECK-NEXT:    decq %rdi
-; CHECK-NEXT:    testq %rdi, %rcx
+; CHECK-NEXT:    tzmskq %rdi, %rcx
 ; CHECK-NEXT:    cmovneq %rdx, %rax
 ; CHECK-NEXT:    retq
   %t0 = xor i64 %a, -1