[llvm] 67e9151 - [x86] try harder to use shift instead of test if it can save some immediate bytes

Thu Mar 17 06:14:28 PDT 2022

Author: Sanjay Patel
Date: 2022-03-17T09:10:57-04:00
New Revision: 67e91510963a5307814aaf8463222635bddf1644

URL: https://github.com/llvm/llvm-project/commit/67e91510963a5307814aaf8463222635bddf1644
DIFF: https://github.com/llvm/llvm-project/commit/67e91510963a5307814aaf8463222635bddf1644.diff

LOG: [x86] try harder to use shift instead of test if it can save some immediate bytes

We favor 'and' and 'test' in earlier phases of optimization,
and that's usually the better option, but we can save a few
instruction bytes by converting a mask constant to a shift here.

Differential Revision: https://reviews.llvm.org/D121147

Added: 
    

Modified: 
    llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
    llvm/test/CodeGen/X86/cmp.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
index a02fdf33dff6b..7feef2a7b9651 100644

--- a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
+++ b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
@@ -5614,10 +5614,10 @@ void X86DAGToDAGISel::Select(SDNode *Node) {
       uint64_t Mask = MaskC->getZExtValue();
       Mask &= maskTrailingOnes<uint64_t>(CmpVT.getScalarSizeInBits());
 
-      // Check if we can replace AND+IMM64 with a shift. This is possible for
-      // masks like 0xFF000000 or 0x00FFFFFF and if we care only about the zero
-      // flag.
-      if (CmpVT == MVT::i64 && !isInt<32>(Mask) && isShiftedMask_64(Mask) &&
+      // Check if we can replace AND+IMM{32,64} with a shift. This is possible
+      // for masks like 0xFF000000 or 0x00FFFFFF and if we care only about the
+      // zero flag.
+      if (CmpVT == MVT::i64 && !isInt<8>(Mask) && isShiftedMask_64(Mask) &&
           onlyUsesZeroFlag(SDValue(Node, 0))) {
         unsigned ShiftOpcode = ISD::DELETED_NODE;
         unsigned ShiftAmt;
@@ -5626,7 +5626,12 @@ void X86DAGToDAGISel::Select(SDNode *Node) {
         unsigned TestOpcode;
         unsigned LeadingZeros = countLeadingZeros(Mask);
         unsigned TrailingZeros = countTrailingZeros(Mask);
-        if (LeadingZeros == 0) {
+
+        // With leading/trailing zeros, the transform is profitable if we can
+        // eliminate a movabsq or shrink a 32-bit immediate to 8-bit without
+        // incurring any extra register moves.
+        bool SavesBytes = !isInt<32>(Mask) || N0.getOperand(0).hasOneUse();
+        if (LeadingZeros == 0 && SavesBytes) {
           // If the mask covers the most significant bit, then we can replace
           // TEST+AND with a SHR and check eflags.
           // This emits a redundant TEST which is subsequently eliminated.
@@ -5634,7 +5639,7 @@ void X86DAGToDAGISel::Select(SDNode *Node) {
           ShiftAmt = TrailingZeros;
           SubRegIdx = 0;
           TestOpcode = X86::TEST64rr;
-        } else if (TrailingZeros == 0) {
+        } else if (TrailingZeros == 0 && SavesBytes) {
           // If the mask covers the least significant bit, then we can replace
           // TEST+AND with a SHL and check eflags.
           // This emits a redundant TEST which is subsequently eliminated.
@@ -5642,9 +5647,9 @@ void X86DAGToDAGISel::Select(SDNode *Node) {
           ShiftAmt = LeadingZeros;
           SubRegIdx = 0;
           TestOpcode = X86::TEST64rr;
-        } else if (MaskC->hasOneUse()) {
-          // If the mask is 8/16 or 32bits wide, then we can replace it with
-          // a SHR and a TEST8rr/TEST16rr/TEST32rr.
+        } else if (MaskC->hasOneUse() && !isInt<32>(Mask)) {
+          // If the shifted mask extends into the high half and is 8/16/32 bits
+          // wide, then replace it with a SHR and a TEST8rr/TEST16rr/TEST32rr.
           unsigned PopCount = 64 - LeadingZeros - TrailingZeros;
           if (PopCount == 8) {
             ShiftOpcode = X86::SHR64ri;

diff  --git a/llvm/test/CodeGen/X86/cmp.ll b/llvm/test/CodeGen/X86/cmp.ll
index a94ad3cd82259..53766ae13c3fd 100644
--- a/llvm/test/CodeGen/X86/cmp.ll
+++ b/llvm/test/CodeGen/X86/cmp.ll
@@ -430,8 +430,7 @@ define i32 @highmask_i64_mask32(i64 %val) {
 ; CHECK-LABEL: highmask_i64_mask32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    xorl %eax, %eax # encoding: [0x31,0xc0]
-; CHECK-NEXT:    testq $-1048576, %rdi # encoding: [0x48,0xf7,0xc7,0x00,0x00,0xf0,0xff]
-; CHECK-NEXT:    # imm = 0xFFF00000
+; CHECK-NEXT:    shrq $20, %rdi # encoding: [0x48,0xc1,0xef,0x14]
 ; CHECK-NEXT:    sete %al # encoding: [0x0f,0x94,0xc0]
 ; CHECK-NEXT:    retq # encoding: [0xc3]
   %and = and i64 %val, -1048576
@@ -502,8 +501,7 @@ define i32 @lowmask_i64_mask32(i64 %val) {
 ; CHECK-LABEL: lowmask_i64_mask32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    xorl %eax, %eax # encoding: [0x31,0xc0]
-; CHECK-NEXT:    testl $1048575, %edi # encoding: [0xf7,0xc7,0xff,0xff,0x0f,0x00]
-; CHECK-NEXT:    # imm = 0xFFFFF
+; CHECK-NEXT:    shlq $44, %rdi # encoding: [0x48,0xc1,0xe7,0x2c]
 ; CHECK-NEXT:    setne %al # encoding: [0x0f,0x95,0xc0]
 ; CHECK-NEXT:    retq # encoding: [0xc3]
   %and = and i64 %val, 1048575