[llvm] r323690 - [X86] Avoid using high register trick for test instruction

Mon Jan 29 12:54:33 PST 2018

Author: deadalnix
Date: Mon Jan 29 12:54:33 2018
New Revision: 323690

URL: http://llvm.org/viewvc/llvm-project?rev=323690&view=rev
Log:
[X86] Avoid using high register trick for test instruction

Summary:
It seems it's main effect is to create addition copies when values are inr register that do not support this trick, which increase register pressure and makes the code bigger.

The main noteworthy regression I was able to observe was pattern of the type (setcc (trunc (and X, C)), 0) where C is such as it would benefit from the hi register trick. To prevent this, a new pattern is added to materialize such pattern using a 32 bits test. This has the added benefit of working with any constant that is materializable as a 32bits immediate, not just the ones that can leverage the high register trick, as demonstrated by the test case in test-shrink.ll using the constant 2049 .

Reviewers: craig.topper, niravd, spatel, hfinkel

Subscribers: llvm-commits

Differential Revision: https://reviews.llvm.org/D42646

Modified:
    llvm/trunk/lib/Target/X86/X86ISelDAGToDAG.cpp
    llvm/trunk/lib/Target/X86/X86InstrArithmetic.td
    llvm/trunk/lib/Target/X86/X86InstrInfo.cpp
    llvm/trunk/lib/Target/X86/X86MacroFusion.cpp
    llvm/trunk/test/CodeGen/X86/test-shrink.ll
    llvm/trunk/test/CodeGen/X86/testb-je-fusion.ll
    llvm/trunk/test/CodeGen/X86/vastart-defs-eflags.ll

Modified: llvm/trunk/lib/Target/X86/X86ISelDAGToDAG.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelDAGToDAG.cpp?rev=323690&r1=323689&r2=323690&view=diff
==============================================================================

--- llvm/trunk/lib/Target/X86/X86ISelDAGToDAG.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelDAGToDAG.cpp Mon Jan 29 12:54:33 2018
@@ -3073,67 +3073,33 @@ void X86DAGToDAGISel::Select(SDNode *Nod
         return;
       }
 
-      // For example, "testl %eax, $2048" to "testb %ah, $8".
-      if (isShiftedUInt<8, 8>(Mask) &&
-          (!(Mask & 0x8000) || hasNoSignedComparisonUses(Node))) {
-        // Shift the immediate right by 8 bits.
-        SDValue ShiftedImm = CurDAG->getTargetConstant(Mask >> 8, dl, MVT::i8);
-        SDValue Reg = N0.getOperand(0);
-
-        // Extract the h-register.
-        SDValue Subreg = CurDAG->getTargetExtractSubreg(X86::sub_8bit_hi, dl,
-                                                        MVT::i8, Reg);
-
-        // Emit a testb.  The EXTRACT_SUBREG becomes a COPY that can only
-        // target GR8_NOREX registers, so make sure the register class is
-        // forced.
-        SDNode *NewNode = CurDAG->getMachineNode(X86::TEST8ri_NOREX, dl,
-                                                 MVT::i32, Subreg, ShiftedImm);
-        // Replace SUB|CMP with TEST, since SUB has two outputs while TEST has
-        // one, do not call ReplaceAllUsesWith.
-        ReplaceUses(SDValue(Node, (Opcode == X86ISD::SUB ? 1 : 0)),
-                    SDValue(NewNode, 0));
-        CurDAG->RemoveDeadNode(Node);
-        return;
-      }
-
-      // For example, "testl %eax, $32776" to "testw %ax, $32776".
-      // NOTE: We only want to form TESTW instructions if optimizing for
-      // min size. Otherwise we only save one byte and possibly get a length
-      // changing prefix penalty in the decoders.
-      if (OptForMinSize && isUInt<16>(Mask) && N0.getValueType() != MVT::i16 &&
-          (!(Mask & 0x8000) || hasNoSignedComparisonUses(Node))) {
-        SDValue Imm = CurDAG->getTargetConstant(Mask, dl, MVT::i16);
-        SDValue Reg = N0.getOperand(0);
-
-        // Extract the 16-bit subregister.
-        SDValue Subreg = CurDAG->getTargetExtractSubreg(X86::sub_16bit, dl,
-                                                        MVT::i16, Reg);
-
-        // Emit a testw.
-        SDNode *NewNode = CurDAG->getMachineNode(X86::TEST16ri, dl, MVT::i32,
-                                                 Subreg, Imm);
-        // Replace SUB|CMP with TEST, since SUB has two outputs while TEST has
-        // one, do not call ReplaceAllUsesWith.
-        ReplaceUses(SDValue(Node, (Opcode == X86ISD::SUB ? 1 : 0)),
-                    SDValue(NewNode, 0));
-        CurDAG->RemoveDeadNode(Node);
-        return;
-      }
-
       // For example, "testq %rax, $268468232" to "testl %eax, $268468232".
-      if (isUInt<32>(Mask) && N0.getValueType() == MVT::i64 &&
+      if (isUInt<32>(Mask) &&
           (!(Mask & 0x80000000) || hasNoSignedComparisonUses(Node))) {
-        SDValue Imm = CurDAG->getTargetConstant(Mask, dl, MVT::i32);
+        MVT VT = MVT::i32;
+        int SubRegOp = X86::sub_32bit;
+        unsigned Op = X86::TEST32ri;
+
+        // For example, "testl %eax, $32776" to "testw %ax, $32776".
+        // NOTE: We only want to form TESTW instructions if optimizing for
+        // min size. Otherwise we only save one byte and possibly get a length
+        // changing prefix penalty in the decoders.
+        if (OptForMinSize && isUInt<16>(Mask) &&
+            (!(Mask & 0x8000) || hasNoSignedComparisonUses(Node))) {
+          VT = MVT::i16;
+          SubRegOp = X86::sub_16bit;
+          Op = X86::TEST16ri;
+        }
+
+        SDValue Imm = CurDAG->getTargetConstant(Mask, dl, VT);
         SDValue Reg = N0.getOperand(0);
 
-        // Extract the 32-bit subregister.
-        SDValue Subreg = CurDAG->getTargetExtractSubreg(X86::sub_32bit, dl,
-                                                        MVT::i32, Reg);
+        // Extract the subregister if necessary.
+        if (N0.getValueType() != VT)
+          Reg = CurDAG->getTargetExtractSubreg(SubRegOp, dl, VT, Reg);
 
-        // Emit a testl.
-        SDNode *NewNode = CurDAG->getMachineNode(X86::TEST32ri, dl, MVT::i32,
-                                                 Subreg, Imm);
+        // Emit a testl or testw.
+        SDNode *NewNode = CurDAG->getMachineNode(Op, dl, MVT::i32, Reg, Imm);
         // Replace SUB|CMP with TEST, since SUB has two outputs while TEST has
         // one, do not call ReplaceAllUsesWith.
         ReplaceUses(SDValue(Node, (Opcode == X86ISD::SUB ? 1 : 0)),

Modified: llvm/trunk/lib/Target/X86/X86InstrArithmetic.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrArithmetic.td?rev=323690&r1=323689&r2=323690&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrArithmetic.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrArithmetic.td Mon Jan 29 12:54:33 2018
@@ -1257,14 +1257,6 @@ let isCompare = 1 in {
     def TEST32mi   : BinOpMI_F<0xF6, "test", Xi32, X86testpat, MRM0m>;
     let Predicates = [In64BitMode] in
     def TEST64mi32 : BinOpMI_F<0xF6, "test", Xi64, X86testpat, MRM0m>;
-
-    // When testing the result of EXTRACT_SUBREG sub_8bit_hi, make sure the
-    // register class is constrained to GR8_NOREX. This pseudo is explicitly
-    // marked side-effect free, since it doesn't have an isel pattern like
-    // other test instructions.
-    let isPseudo = 1, hasSideEffects = 0 in
-    def TEST8ri_NOREX : I<0, Pseudo, (outs), (ins GR8_NOREX:$src, i8imm:$mask),
-                          "", [], IIC_BIN_NONMEM>, Sched<[WriteALU]>;
   } // Defs = [EFLAGS]
 
   def TEST8i8    : BinOpAI_F<0xA8, "test", Xi8 , AL,

Modified: llvm/trunk/lib/Target/X86/X86InstrInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrInfo.cpp?rev=323690&r1=323689&r2=323690&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrInfo.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86InstrInfo.cpp Mon Jan 29 12:54:33 2018
@@ -8018,9 +8018,6 @@ bool X86InstrInfo::expandPostRAPseudo(Ma
   case X86::VMOVUPSZ256mr_NOVLX:
     return expandNOVLXStore(MIB, &getRegisterInfo(), get(X86::VMOVUPSYmr),
                             get(X86::VEXTRACTF64x4Zmr), X86::sub_ymm);
-  case X86::TEST8ri_NOREX:
-    MI.setDesc(get(X86::TEST8ri));
-    return true;
   case X86::MOV32ri64:
     MI.setDesc(get(X86::MOV32ri));
     return true;

Modified: llvm/trunk/lib/Target/X86/X86MacroFusion.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86MacroFusion.cpp?rev=323690&r1=323689&r2=323690&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86MacroFusion.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86MacroFusion.cpp Mon Jan 29 12:54:33 2018
@@ -86,7 +86,6 @@ static bool shouldScheduleAdjacent(const
   case X86::TEST16mr:
   case X86::TEST32mr:
   case X86::TEST64mr:
-  case X86::TEST8ri_NOREX:
   case X86::AND16i16:
   case X86::AND16ri:
   case X86::AND16ri8:

Modified: llvm/trunk/test/CodeGen/X86/test-shrink.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/test-shrink.ll?rev=323690&r1=323689&r2=323690&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/test-shrink.ll (original)
+++ llvm/trunk/test/CodeGen/X86/test-shrink.ll Mon Jan 29 12:54:33 2018
@@ -484,8 +484,7 @@ no:
 define void @truncand32(i16 inreg %x) nounwind {
 ; CHECK-LINUX64-LABEL: truncand32:
 ; CHECK-LINUX64:       # %bb.0:
-; CHECK-LINUX64-NEXT:    andl $2049, %edi # imm = 0x801
-; CHECK-LINUX64-NEXT:    testw %di, %di
+; CHECK-LINUX64-NEXT:    testl $2049, %edi # imm = 0x801
 ; CHECK-LINUX64-NEXT:    je .LBB11_1
 ; CHECK-LINUX64-NEXT:  # %bb.2: # %no
 ; CHECK-LINUX64-NEXT:    retq
@@ -498,8 +497,7 @@ define void @truncand32(i16 inreg %x) no
 ; CHECK-WIN32-64-LABEL: truncand32:
 ; CHECK-WIN32-64:       # %bb.0:
 ; CHECK-WIN32-64-NEXT:    subq $40, %rsp
-; CHECK-WIN32-64-NEXT:    andl $2049, %ecx # imm = 0x801
-; CHECK-WIN32-64-NEXT:    testw %cx, %cx
+; CHECK-WIN32-64-NEXT:    testl $2049, %ecx # imm = 0x801
 ; CHECK-WIN32-64-NEXT:    je .LBB11_1
 ; CHECK-WIN32-64-NEXT:  # %bb.2: # %no
 ; CHECK-WIN32-64-NEXT:    addq $40, %rsp
@@ -511,8 +509,7 @@ define void @truncand32(i16 inreg %x) no
 ;
 ; CHECK-X86-LABEL: truncand32:
 ; CHECK-X86:       # %bb.0:
-; CHECK-X86-NEXT:    andl $2049, %eax # imm = 0x801
-; CHECK-X86-NEXT:    testw %ax, %ax
+; CHECK-X86-NEXT:    testl $2049, %eax # imm = 0x801
 ; CHECK-X86-NEXT:    je .LBB11_1
 ; CHECK-X86-NEXT:  # %bb.2: # %no
 ; CHECK-X86-NEXT:    retl

Modified: llvm/trunk/test/CodeGen/X86/testb-je-fusion.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/testb-je-fusion.ll?rev=323690&r1=323689&r2=323690&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/testb-je-fusion.ll (original)
+++ llvm/trunk/test/CodeGen/X86/testb-je-fusion.ll Mon Jan 29 12:54:33 2018
@@ -6,9 +6,8 @@
 define i32 @check_flag(i32 %flags, ...) nounwind {
 ; CHECK-LABEL: check_flag:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    movl %edi, %ecx
 ; CHECK-NEXT:    xorl %eax, %eax
-; CHECK-NEXT:    testb $2, %ch
+; CHECK-NEXT:    testl $512, %edi # imm = 0x200
 ; CHECK-NEXT:    je .LBB0_2
 ; CHECK-NEXT:  # %bb.1: # %if.then
 ; CHECK-NEXT:    movl $1, %eax

Modified: llvm/trunk/test/CodeGen/X86/vastart-defs-eflags.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vastart-defs-eflags.ll?rev=323690&r1=323689&r2=323690&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vastart-defs-eflags.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vastart-defs-eflags.ll Mon Jan 29 12:54:33 2018
@@ -8,9 +8,7 @@ target triple = "x86_64-apple-macosx10.1
 define i32 @check_flag(i32 %flags, ...) nounwind {
 ; CHECK-LABEL: check_flag:
 ; CHECK:       ## %bb.0: ## %entry
-; CHECK-NEXT:    pushq %rbx
-; CHECK-NEXT:    subq $48, %rsp
-; CHECK-NEXT:    movl %edi, %ebx
+; CHECK-NEXT:    subq $56, %rsp
 ; CHECK-NEXT:    testb %al, %al
 ; CHECK-NEXT:    je LBB0_2
 ; CHECK-NEXT:  ## %bb.1: ## %entry
@@ -29,7 +27,7 @@ define i32 @check_flag(i32 %flags, ...)
 ; CHECK-NEXT:    movq %rdx, -{{[0-9]+}}(%rsp)
 ; CHECK-NEXT:    movq %rsi, -{{[0-9]+}}(%rsp)
 ; CHECK-NEXT:    xorl %eax, %eax
-; CHECK-NEXT:    testb $2, %bh
+; CHECK-NEXT:    testl $512, %edi ## imm = 0x200
 ; CHECK-NEXT:    je LBB0_4
 ; CHECK-NEXT:  ## %bb.3: ## %if.then
 ; CHECK-NEXT:    leaq -{{[0-9]+}}(%rsp), %rax
@@ -40,8 +38,7 @@ define i32 @check_flag(i32 %flags, ...)
 ; CHECK-NEXT:    movl $8, 0
 ; CHECK-NEXT:    movl $1, %eax
 ; CHECK-NEXT:  LBB0_4: ## %if.end
-; CHECK-NEXT:    addq $48, %rsp
-; CHECK-NEXT:    popq %rbx
+; CHECK-NEXT:    addq $56, %rsp
 ; CHECK-NEXT:    retq
 entry:
   %and = and i32 %flags, 512