[llvm] r207702 - [ARM64] Prevent bit extraction to be adjusted by following shift

Wed Apr 30 14:07:24 PDT 2014

Author: weimingz
Date: Wed Apr 30 16:07:24 2014
New Revision: 207702

URL: http://llvm.org/viewvc/llvm-project?rev=207702&view=rev
Log:
[ARM64] Prevent bit extraction to be adjusted by following shift

For pattern like ((x >> C1) & Mask) << C2, DAG combiner may convert it
into (x >> (C1-C2)) & (Mask << C2), which makes pattern matching of ubfx
more difficult.
For example:
Given
  %shr = lshr i64 %x, 4
  %and = and i64 %shr, 15
  %arrayidx = getelementptr inbounds [8 x [64 x i64]]* @arr, i64 0, %i64 2, i64 %and
  %0 = load i64* %arrayidx
With current shift folding, it takes 3 instrs to compute base address:
  lsr x8, x0, #1
  and x8, x8, #0x78
  add x8, x9, x8

If using ubfx, it only needs 2 instrs:
  ubfx  x8, x0, #4, #4
  add x8, x9, x8, lsl #3

This fixes bug 19589

Modified:
    llvm/trunk/include/llvm/Target/TargetLowering.h
    llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
    llvm/trunk/lib/Target/ARM64/ARM64ISelLowering.cpp
    llvm/trunk/lib/Target/ARM64/ARM64ISelLowering.h
    llvm/trunk/test/CodeGen/ARM64/bitfield-extract.ll

Modified: llvm/trunk/include/llvm/Target/TargetLowering.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Target/TargetLowering.h?rev=207702&r1=207701&r2=207702&view=diff
==============================================================================

--- llvm/trunk/include/llvm/Target/TargetLowering.h (original)
+++ llvm/trunk/include/llvm/Target/TargetLowering.h Wed Apr 30 16:07:24 2014
@@ -2017,6 +2017,15 @@ public:
   ///
   virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const;
 
+  /// Return true if it is profitable to move a following shift through this
+  //  node, adjusting any immediate operands as necessary to preserve semantics.
+  //  This transformation may not be desirable if it disrupts a particularly
+  //  auspicious target-specific tree (e.g. bitfield extractionon in AArch64).
+  //  By default, it returns true.
+  virtual bool isDesirableToCommuteWithShift(const SDNode *N /*Op*/) const {
+    return true;
+  }
+
   /// Return true if the target has native support for the specified value type
   /// and it is 'desirable' to use the type for the given node type. e.g. On x86
   /// i16 is legal, but undesirable since i16 instruction encodings are longer

Modified: llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp?rev=207702&r1=207701&r2=207702&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp (original)
+++ llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp Wed Apr 30 16:07:24 2014
@@ -3867,6 +3867,9 @@ SDValue DAGCombiner::visitShiftByConstan
       return SDValue();
   }
 
+  if (!TLI.isDesirableToCommuteWithShift(LHS))
+    return SDValue();
+
   // Fold the constants, shifting the binop RHS by the shift amount.
   SDValue NewRHS = DAG.getNode(N->getOpcode(), SDLoc(LHS->getOperand(1)),
                                N->getValueType(0),

Modified: llvm/trunk/lib/Target/ARM64/ARM64ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM64/ARM64ISelLowering.cpp?rev=207702&r1=207701&r2=207702&view=diff
==============================================================================
--- llvm/trunk/lib/Target/ARM64/ARM64ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/ARM64/ARM64ISelLowering.cpp Wed Apr 30 16:07:24 2014
@@ -5931,6 +5931,21 @@ ARM64TargetLowering::getScratchRegisters
   return ScratchRegs;
 }
 
+bool ARM64TargetLowering::isDesirableToCommuteWithShift(const SDNode *N) const {
+  EVT VT = N->getValueType(0);
+    // If N is unsigned bit extraction: ((x >> C) & mask), then do not combine
+    // it with shift to let it be lowered to UBFX.
+  if (N->getOpcode() == ISD::AND && (VT == MVT::i32 || VT == MVT::i64) &&
+      isa<ConstantSDNode>(N->getOperand(1))) {
+    uint64_t TruncMask = N->getConstantOperandVal(1);
+    if (isMask_64(TruncMask) &&
+      N->getOperand(0).getOpcode() == ISD::SRL &&
+      isa<ConstantSDNode>(N->getOperand(0)->getOperand(1)))
+      return false;
+  }
+  return true;
+}
+
 bool ARM64TargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm,
                                                             Type *Ty) const {
   assert(Ty->isIntegerTy());

Modified: llvm/trunk/lib/Target/ARM64/ARM64ISelLowering.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM64/ARM64ISelLowering.h?rev=207702&r1=207701&r2=207702&view=diff
==============================================================================
--- llvm/trunk/lib/Target/ARM64/ARM64ISelLowering.h (original)
+++ llvm/trunk/lib/Target/ARM64/ARM64ISelLowering.h Wed Apr 30 16:07:24 2014
@@ -284,6 +284,9 @@ public:
 
   const MCPhysReg *getScratchRegisters(CallingConv::ID CC) const override;
 
+  /// \brief Returns false if N is a bit extraction pattern of (X >> C) & Mask.
+  bool isDesirableToCommuteWithShift(const SDNode *N) const override;
+
   /// \brief Returns true if it is beneficial to convert a load of a constant
   /// to just the constant itself.
   bool shouldConvertConstantLoadToIntImm(const APInt &Imm,

Modified: llvm/trunk/test/CodeGen/ARM64/bitfield-extract.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM64/bitfield-extract.ll?rev=207702&r1=207701&r2=207702&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/ARM64/bitfield-extract.ll (original)
+++ llvm/trunk/test/CodeGen/ARM64/bitfield-extract.ll Wed Apr 30 16:07:24 2014
@@ -501,6 +501,19 @@ end:
   ret i80 %conv3
 }
 
+; Check if we can still catch UBFX when "AND" is used by SHL.
+; CHECK-LABEL: fct21:
+; CHECK: ubfx
+ at arr = external global [8 x [64 x i64]]
+define i64 @fct21(i64 %x) {
+entry:
+  %shr = lshr i64 %x, 4
+  %and = and i64 %shr, 15
+  %arrayidx = getelementptr inbounds [8 x [64 x i64]]* @arr, i64 0, i64 0, i64 %and
+  %0 = load i64* %arrayidx, align 8
+  ret i64 %0
+}
+
 define i16 @test_ignored_rightbits(i32 %dst, i32 %in) {
 ; CHECK-LABEL: test_ignored_rightbits: