[llvm] r344179 - [X86] Move X86DAGToDAGISel::matchBEXTRFromAnd() into X86ISelLowering

Wed Oct 10 13:40:12 PDT 2018

Author: lebedevri
Date: Wed Oct 10 13:40:12 2018
New Revision: 344179

URL: http://llvm.org/viewvc/llvm-project?rev=344179&view=rev
Log:
[X86] Move X86DAGToDAGISel::matchBEXTRFromAnd() into X86ISelLowering

Summary:
As discussed in [[ https://bugs.llvm.org/show_bug.cgi?id=38938 | PR38938 ]],
we fail to emit `BEXTR` if the mask is shifted.
We can't deal with that in `X86DAGToDAGISel` `before the address mode for the inc is selected`,
and we can't really do it in the normal DAGCombine, because we don't have generic `ISD::BitFieldExtract` node,
and if we simply turn the shifted mask into a normal mask + shift-left, it will be folded back.
So it would seem X86ISelLowering is the place to handle this.

This patch only moves the matchBEXTRFromAnd()
from X86DAGToDAGISel to X86ISelLowering.
It does not add support for the 'shifted mask' pattern.

Reviewers: RKSimon, craig.topper, spatel

Reviewed By: RKSimon

Subscribers: llvm-commits

Differential Revision: https://reviews.llvm.org/D52426

Modified:
    llvm/trunk/lib/Target/X86/X86ISelDAGToDAG.cpp
    llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
    llvm/trunk/test/CodeGen/X86/tbm_patterns.ll

Modified: llvm/trunk/lib/Target/X86/X86ISelDAGToDAG.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelDAGToDAG.cpp?rev=344179&r1=344178&r2=344179&view=diff
==============================================================================

--- llvm/trunk/lib/Target/X86/X86ISelDAGToDAG.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelDAGToDAG.cpp Wed Oct 10 13:40:12 2018
@@ -457,7 +457,6 @@ namespace {
     }
 
     bool foldLoadStoreIntoMemOperand(SDNode *Node);
-    bool matchBEXTRFromAnd(SDNode *Node);
     bool shrinkAndImmediate(SDNode *N);
     bool isMaskZeroExtended(SDNode *N) const;
     bool tryShiftAmountMod(SDNode *N);
@@ -2582,69 +2581,6 @@ bool X86DAGToDAGISel::foldLoadStoreIntoM
   return true;
 }
 
-// See if this is an (X >> C1) & C2 that we can match to BEXTR/BEXTRI.
-bool X86DAGToDAGISel::matchBEXTRFromAnd(SDNode *Node) {
-  MVT NVT = Node->getSimpleValueType(0);
-  SDLoc dl(Node);
-
-  SDValue N0 = Node->getOperand(0);
-  SDValue N1 = Node->getOperand(1);
-
-  // If we have TBM we can use an immediate for the control. If we have BMI
-  // we should only do this if the BEXTR instruction is implemented well.
-  // Otherwise moving the control into a register makes this more costly.
-  // TODO: Maybe load folding, greater than 32-bit masks, or a guarantee of LICM
-  // hoisting the move immediate would make it worthwhile with a less optimal
-  // BEXTR?
-  if (!Subtarget->hasTBM() &&
-      !(Subtarget->hasBMI() && Subtarget->hasFastBEXTR()))
-    return false;
-
-  // Must have a shift right.
-  if (N0->getOpcode() != ISD::SRL && N0->getOpcode() != ISD::SRA)
-    return false;
-
-  // Shift can't have additional users.
-  if (!N0->hasOneUse())
-    return false;
-
-  // Only supported for 32 and 64 bits.
-  if (NVT != MVT::i32 && NVT != MVT::i64)
-    return false;
-
-  // Shift amount and RHS of and must be constant.
-  ConstantSDNode *MaskCst = dyn_cast<ConstantSDNode>(N1);
-  ConstantSDNode *ShiftCst = dyn_cast<ConstantSDNode>(N0->getOperand(1));
-  if (!MaskCst || !ShiftCst)
-    return false;
-
-  // And RHS must be a mask.
-  uint64_t Mask = MaskCst->getZExtValue();
-  if (!isMask_64(Mask))
-    return false;
-
-  uint64_t Shift = ShiftCst->getZExtValue();
-  uint64_t MaskSize = countPopulation(Mask);
-
-  // Don't interfere with something that can be handled by extracting AH.
-  // TODO: If we are able to fold a load, BEXTR might still be better than AH.
-  if (Shift == 8 && MaskSize == 8)
-    return false;
-
-  // Make sure we are only using bits that were in the original value, not
-  // shifted in.
-  if (Shift + MaskSize > NVT.getSizeInBits())
-    return false;
-
-  // Create a BEXTR node and run it through selection.
-  SDValue C = CurDAG->getConstant(Shift | (MaskSize << 8), dl, NVT);
-  SDValue New = CurDAG->getNode(X86ISD::BEXTR, dl, NVT,
-                                N0->getOperand(0), C);
-  ReplaceNode(Node, New.getNode());
-  SelectCode(New.getNode());
-  return true;
-}
-
 // Emit a PCMISTR(I/M) instruction.
 MachineSDNode *X86DAGToDAGISel::emitPCMPISTR(unsigned ROpc, unsigned MOpc,
                                              bool MayFoldLoad, const SDLoc &dl,
@@ -2952,8 +2888,6 @@ void X86DAGToDAGISel::Select(SDNode *Nod
     break;
 
   case ISD::AND:
-    if (matchBEXTRFromAnd(Node))
-      return;
     if (AndImmShrink && shrinkAndImmediate(Node))
       return;
 

Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=344179&r1=344178&r2=344179&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Wed Oct 10 13:40:12 2018
@@ -35278,6 +35278,69 @@ static SDValue combineAndLoadToBZHI(SDNo
   return SDValue();
 }
 
+static bool hasBEXTR(const X86Subtarget &Subtarget, EVT VT) {
+  // If we have TBM we can use an immediate for the control. If we have BMI
+  // we should only do this if the BEXTR instruction is implemented well.
+  // Otherwise moving the control into a register makes this more costly.
+  // TODO: Maybe load folding, greater than 32-bit masks, or a guarantee of LICM
+  // hoisting the move immediate would make it worthwhile with a less optimal
+  // BEXTR?
+  if (!Subtarget.hasTBM() && !(Subtarget.hasBMI() && Subtarget.hasFastBEXTR()))
+    return false;
+  return (VT == MVT::i32 || (VT == MVT::i64 && Subtarget.is64Bit()));
+}
+
+// See if this is an (X >> C1) & C2 that we can match to BEXTR/BEXTRI.
+static SDValue combineAndIntoBEXTR(SDNode *Node, SelectionDAG &DAG,
+                                   const X86Subtarget &Subtarget) {
+  EVT NVT = Node->getValueType(0);
+  SDLoc dl(Node);
+
+  SDValue N0 = Node->getOperand(0);
+  SDValue N1 = Node->getOperand(1);
+
+  // Check if subtarget has BEXTR instruction for the node's type
+  if (!hasBEXTR(Subtarget, NVT))
+    return SDValue();
+
+  // Must have a shift right.
+  if (N0->getOpcode() != ISD::SRL && N0->getOpcode() != ISD::SRA)
+    return SDValue();
+
+  // Shift can't have additional users.
+  if (!N0->hasOneUse())
+    return SDValue();
+
+  // Shift amount and RHS of and must be constant.
+  ConstantSDNode *MaskCst = dyn_cast<ConstantSDNode>(N1);
+  ConstantSDNode *ShiftCst = dyn_cast<ConstantSDNode>(N0->getOperand(1));
+  if (!MaskCst || !ShiftCst)
+    return SDValue();
+
+  // And RHS must be a mask.
+  uint64_t Mask = MaskCst->getZExtValue();
+  if (!isMask_64(Mask))
+    return SDValue();
+
+  uint64_t Shift = ShiftCst->getZExtValue();
+  uint64_t MaskSize = countPopulation(Mask);
+
+  // Don't interfere with something that can be handled by extracting AH.
+  // TODO: If we are able to fold a load, BEXTR might still be better than AH.
+  if (Shift == 8 && MaskSize == 8)
+    return SDValue();
+
+  // Make sure we are only using bits that were in the original value, not
+  // shifted in.
+  if (Shift + MaskSize > NVT.getSizeInBits())
+    return SDValue();
+
+  // Create a BEXTR node.
+  SDValue C = DAG.getConstant(Shift | (MaskSize << 8), dl, NVT);
+  SDValue New = DAG.getNode(X86ISD::BEXTR, dl, NVT, N0->getOperand(0), C);
+  return New;
+}
+
 // Look for (and (ctpop X), 1) which is the IR form of __builtin_parity.
 // Turn it into series of XORs and a setnp.
 static SDValue combineParity(SDNode *N, SelectionDAG &DAG,
@@ -35379,6 +35442,9 @@ static SDValue combineAnd(SDNode *N, Sel
   if (DCI.isBeforeLegalizeOps())
     return SDValue();
 
+  if (SDValue R = combineAndIntoBEXTR(N, DAG, Subtarget))
+    return R;
+
   if (SDValue R = combineCompareEqual(N, DAG, DCI, Subtarget))
     return R;
 

Modified: llvm/trunk/test/CodeGen/X86/tbm_patterns.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/tbm_patterns.ll?rev=344179&r1=344178&r2=344179&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/tbm_patterns.ll (original)
+++ llvm/trunk/test/CodeGen/X86/tbm_patterns.ll Wed Oct 10 13:40:12 2018
@@ -53,8 +53,7 @@ define i32 @test_x86_tbm_bextri_u32_z2(i
 ; CHECK-LABEL: test_x86_tbm_bextri_u32_z2:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    movl %esi, %eax
-; CHECK-NEXT:    shrl $4, %edi
-; CHECK-NEXT:    testl $4095, %edi # imm = 0xFFF
+; CHECK-NEXT:    bextrl $3076, %edi, %ecx # imm = 0xC04
 ; CHECK-NEXT:    cmovnel %edx, %eax
 ; CHECK-NEXT:    retq
   %t0 = lshr i32 %a, 4
@@ -114,8 +113,7 @@ define i64 @test_x86_tbm_bextri_u64_z2(i
 ; CHECK-LABEL: test_x86_tbm_bextri_u64_z2:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    movq %rsi, %rax
-; CHECK-NEXT:    shrl $4, %edi
-; CHECK-NEXT:    testl $4095, %edi # imm = 0xFFF
+; CHECK-NEXT:    bextrl $3076, %edi, %ecx # imm = 0xC04
 ; CHECK-NEXT:    cmovneq %rdx, %rax
 ; CHECK-NEXT:    retq
   %t0 = lshr i64 %a, 4