[llvm] r245169 - [X86] Widen the 'AND' mask if doing so shrinks the encoding size

Sat Aug 15 21:52:12 PDT 2015

Author: majnemer
Date: Sat Aug 15 23:52:11 2015
New Revision: 245169

URL: http://llvm.org/viewvc/llvm-project?rev=245169&view=rev
Log:
[X86] Widen the 'AND' mask if doing so shrinks the encoding size

We can set additional bits in a mask given that we know the other
operand of an AND already has some bits set to zero.  This can be more
efficient if doing so allows us to use an instruction which implicitly
sign extends the immediate.

This fixes PR24085.

Differential Revision: http://reviews.llvm.org/D11289

Modified:
    llvm/trunk/lib/Target/X86/X86ISelDAGToDAG.cpp
    llvm/trunk/test/CodeGen/X86/shift-pair.ll
    llvm/trunk/test/CodeGen/X86/win64_frame.ll
    llvm/trunk/test/CodeGen/X86/zext-fold.ll

Modified: llvm/trunk/lib/Target/X86/X86ISelDAGToDAG.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelDAGToDAG.cpp?rev=245169&r1=245168&r2=245169&view=diff
==============================================================================

--- llvm/trunk/lib/Target/X86/X86ISelDAGToDAG.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelDAGToDAG.cpp Sat Aug 15 23:52:11 2015
@@ -198,6 +198,7 @@ namespace {
     SDNode *Select(SDNode *N) override;
     SDNode *SelectGather(SDNode *N, unsigned Opc);
     SDNode *SelectAtomicLoadArith(SDNode *Node, MVT NVT);
+    SDNode *SelectAndWithSExtImmediate(SDNode *Node, MVT NVT);
 
     bool FoldOffsetIntoAddress(uint64_t Offset, X86ISelAddressMode &AM);
     bool MatchLoadInAddress(LoadSDNode *N, X86ISelAddressMode &AM);
@@ -2208,6 +2209,57 @@ SDNode *X86DAGToDAGISel::SelectGather(SD
   return ResNode;
 }
 
+// Try to shrink the encoding of an AND by setting additional bits in the mask.
+// It is only correct to do so if we know a priori that the other operand of the
+// AND already has those bits set to zero.
+SDNode *X86DAGToDAGISel::SelectAndWithSExtImmediate(SDNode *Node, MVT NVT) {
+  SDValue N0 = Node->getOperand(0);
+  SDValue N1 = Node->getOperand(1);
+
+  if (NVT != MVT::i32 && NVT != MVT::i64)
+    return nullptr;
+
+  auto *Cst = dyn_cast<ConstantSDNode>(N1);
+  if (!Cst)
+    return nullptr;
+
+  // As a heuristic, skip over negative constants.  It turns out not to be
+  // productive to widen the mask.
+  int64_t Val = Cst->getSExtValue();
+  if (Val <= 0)
+    return nullptr;
+
+  // Limit ourselves to constants which already have sign bits to save on
+  // compile time.
+  if ((int8_t)Val >= 0)
+    return nullptr;
+
+  unsigned Opc;
+  switch (NVT.SimpleTy) {
+  default:
+    llvm_unreachable("Unsupported VT!");
+  case MVT::i32:
+    Opc = X86::AND32ri8;
+    break;
+  case MVT::i64:
+    Opc = X86::AND64ri8;
+    break;
+  }
+
+  APInt Op0Zero, Op0One;
+  CurDAG->computeKnownBits(N0, Op0Zero, Op0One);
+  // Grow the mask using the known zero bits.
+  Op0Zero |= Val;
+  // See if the mask can be efficiently encoded using at most NumBits.
+  if (!Op0Zero.isSignedIntN(8))
+    return nullptr;
+
+  SDLoc DL(Node);
+  SDValue NewCst =
+      CurDAG->getTargetConstant(Op0Zero.getSExtValue(), DL, MVT::i8);
+  return CurDAG->getMachineNode(Opc, DL, NVT, N0, NewCst);
+}
+
 SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
   MVT NVT = Node->getSimpleValueType(0);
   unsigned Opc, MOpc;
@@ -2223,7 +2275,8 @@ SDNode *X86DAGToDAGISel::Select(SDNode *
   }
 
   switch (Opcode) {
-  default: break;
+  default:
+    break;
   case ISD::INTRINSIC_W_CHAIN: {
     unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue();
     switch (IntNo) {
@@ -2298,7 +2351,13 @@ SDNode *X86DAGToDAGISel::Select(SDNode *
       return RetVal;
     break;
   }
-  case ISD::AND:
+  case ISD::AND: {
+    if (SDNode *NewNode = SelectAndWithSExtImmediate(Node, NVT)) {
+      ReplaceUses(SDValue(Node, 0), SDValue(NewNode, 0));
+      return nullptr;
+    }
+    // FALLTHROUGH
+  }
   case ISD::OR:
   case ISD::XOR: {
     // For operations of the form (x << C1) op C2, check if we can use a smaller

Modified: llvm/trunk/test/CodeGen/X86/shift-pair.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/shift-pair.ll?rev=245169&r1=245168&r2=245169&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/shift-pair.ll (original)
+++ llvm/trunk/test/CodeGen/X86/shift-pair.ll Sat Aug 15 23:52:11 2015
@@ -3,7 +3,7 @@
 define i64 @test(i64 %A) {
 ; CHECK: @test
 ; CHECK: shrq $54
-; CHECK: andl $1020
+; CHECK: andq $-4
 ; CHECK: ret
     %B = lshr i64 %A, 56
     %C = shl i64 %B, 2

Modified: llvm/trunk/test/CodeGen/X86/win64_frame.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/win64_frame.ll?rev=245169&r1=245168&r2=245169&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/win64_frame.ll (original)
+++ llvm/trunk/test/CodeGen/X86/win64_frame.ll Sat Aug 15 23:52:11 2015
@@ -100,9 +100,8 @@ define i32 @f8(i32 %a, i32 %b, i32 %c, i
 
   alloca i32, i32 %a
   ; CHECK:        movl    %ecx, %eax
-  ; CHECK:        leaq    15(,%rax,4), %rcx
-  ; CHECK:        movabsq $34359738352, %rax
-  ; CHECK:        andq    %rcx, %rax
+  ; CHECK:        leaq    15(,%rax,4), %rax
+  ; CHECK:        andq    $-16, %rax
   ; CHECK:        callq   __chkstk
   ; CHECK:        subq    %rax, %rsp
 

Modified: llvm/trunk/test/CodeGen/X86/zext-fold.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/zext-fold.ll?rev=245169&r1=245168&r2=245169&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/zext-fold.ll (original)
+++ llvm/trunk/test/CodeGen/X86/zext-fold.ll Sat Aug 15 23:52:11 2015
@@ -8,7 +8,7 @@ define i32 @test1(i8 %x) nounwind readno
 }
 ; CHECK: test1
 ; CHECK: movzbl
-; CHECK-NEXT: andl {{.*}}224
+; CHECK-NEXT: andl {{.*}}-32
 
 ;; Multiple uses of %x but easily extensible.
 define i32 @test2(i8 %x) nounwind readnone {
@@ -21,7 +21,7 @@ define i32 @test2(i8 %x) nounwind readno
 }
 ; CHECK: test2
 ; CHECK: movzbl
-; CHECK: andl $224
+; CHECK: andl $-32
 ; CHECK: orl $63
 
 declare void @use(i32, i8)
@@ -36,6 +36,6 @@ define void @test3(i8 %x) nounwind readn
 ; CHECK: test3
 ; CHECK: movzbl {{[0-9]+}}(%esp), [[REGISTER:%e[a-z]{2}]]
 ; CHECK-NEXT: movl [[REGISTER]], 4(%esp)
-; CHECK-NEXT: andl $224, [[REGISTER]]
+; CHECK-NEXT: andl $-32, [[REGISTER]]
 ; CHECK-NEXT: movl [[REGISTER]], (%esp)
 ; CHECK-NEXT: call{{.*}}use