[PATCH] D121147: [x86] try harder to use shift instead of test if it can save some immediate bytes

Wed Mar 16 10:36:58 PDT 2022

spatel updated this revision to Diff 415899.
spatel added a comment.

Patch updated:
Rebased now that D121320 <https://reviews.llvm.org/D121320> has landed. The checks are adjusted to fit the new code structure. We have negative tests in place to verify that we don't enable any size-increasing transforms.


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D121147/new/

https://reviews.llvm.org/D121147

Files:
  llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
  llvm/test/CodeGen/X86/cmp.ll


Index: llvm/test/CodeGen/X86/cmp.ll
===================================================================

--- llvm/test/CodeGen/X86/cmp.ll
+++ llvm/test/CodeGen/X86/cmp.ll
@@ -430,8 +430,7 @@
 ; CHECK-LABEL: highmask_i64_mask32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    xorl %eax, %eax # encoding: [0x31,0xc0]
-; CHECK-NEXT:    testq $-1048576, %rdi # encoding: [0x48,0xf7,0xc7,0x00,0x00,0xf0,0xff]
-; CHECK-NEXT:    # imm = 0xFFF00000
+; CHECK-NEXT:    shrq $20, %rdi # encoding: [0x48,0xc1,0xef,0x14]
 ; CHECK-NEXT:    sete %al # encoding: [0x0f,0x94,0xc0]
 ; CHECK-NEXT:    retq # encoding: [0xc3]
   %and = and i64 %val, -1048576
@@ -502,8 +501,7 @@
 ; CHECK-LABEL: lowmask_i64_mask32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    xorl %eax, %eax # encoding: [0x31,0xc0]
-; CHECK-NEXT:    testl $1048575, %edi # encoding: [0xf7,0xc7,0xff,0xff,0x0f,0x00]
-; CHECK-NEXT:    # imm = 0xFFFFF
+; CHECK-NEXT:    shlq $44, %rdi # encoding: [0x48,0xc1,0xe7,0x2c]
 ; CHECK-NEXT:    setne %al # encoding: [0x0f,0x95,0xc0]
 ; CHECK-NEXT:    retq # encoding: [0xc3]
   %and = and i64 %val, 1048575
Index: llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
===================================================================
--- llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
+++ llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
@@ -5614,10 +5614,10 @@
       uint64_t Mask = MaskC->getZExtValue();
       Mask &= maskTrailingOnes<uint64_t>(CmpVT.getScalarSizeInBits());
 
-      // Check if we can replace AND+IMM64 with a shift. This is possible for
-      // masks like 0xFF000000 or 0x00FFFFFF and if we care only about the zero
-      // flag.
-      if (CmpVT == MVT::i64 && !isInt<32>(Mask) && isShiftedMask_64(Mask) &&
+      // Check if we can replace AND+IMM{32,64} with a shift. This is possible
+      // for masks like 0xFF000000 or 0x00FFFFFF and if we care only about the
+      // zero flag.
+      if (CmpVT == MVT::i64 && !isInt<8>(Mask) && isShiftedMask_64(Mask) &&
           onlyUsesZeroFlag(SDValue(Node, 0))) {
         unsigned ShiftOpcode = ISD::DELETED_NODE;
         unsigned ShiftAmt;
@@ -5626,7 +5626,12 @@
         unsigned TestOpcode;
         unsigned LeadingZeros = countLeadingZeros(Mask);
         unsigned TrailingZeros = countTrailingZeros(Mask);
-        if (LeadingZeros == 0) {
+
+        // With leading/trailing zeros, the transform is profitable if we can
+        // eliminate a movabsq or shrink a 32-bit immediate to 8-bit without
+        // incurring any extra register moves.
+        bool SavesBytes = !isInt<32>(Mask) || N0.getOperand(0).hasOneUse();
+        if (LeadingZeros == 0 && SavesBytes) {
           // If the mask covers the most significant bit, then we can replace
           // TEST+AND with a SHR and check eflags.
           // This emits a redundant TEST which is subsequently eliminated.
@@ -5634,7 +5639,7 @@
           ShiftAmt = TrailingZeros;
           SubRegIdx = 0;
           TestOpcode = X86::TEST64rr;
-        } else if (TrailingZeros == 0) {
+        } else if (TrailingZeros == 0 && SavesBytes) {
           // If the mask covers the least significant bit, then we can replace
           // TEST+AND with a SHL and check eflags.
           // This emits a redundant TEST which is subsequently eliminated.
@@ -5642,9 +5647,9 @@
           ShiftAmt = LeadingZeros;
           SubRegIdx = 0;
           TestOpcode = X86::TEST64rr;
-        } else if (MaskC->hasOneUse()) {
-          // If the mask is 8/16 or 32bits wide, then we can replace it with
-          // a SHR and a TEST8rr/TEST16rr/TEST32rr.
+        } else if (MaskC->hasOneUse() && !isInt<32>(Mask)) {
+          // If the shifted mask extends into the high half and is 8/16/32 bits
+          // wide, then replace it with a SHR and a TEST8rr/TEST16rr/TEST32rr.
           unsigned PopCount = 64 - LeadingZeros - TrailingZeros;
           if (PopCount == 8) {
             ShiftOpcode = X86::SHR64ri;


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D121147.415899.patch
Type: text/x-patch
Size: 3907 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20220316/ff1b3b27/attachment-0001.bin>