[llvm] 5cd900c - [AArch64] Transform shift+and to shift+shift to select more shifted register

Tue Dec 6 07:30:02 PST 2022

Author: chenglin.bi
Date: 2022-12-06T23:29:56+08:00
New Revision: 5cd900ce3c6edd9e0d9b163c9d6cf7f38688d2ee

URL: https://github.com/llvm/llvm-project/commit/5cd900ce3c6edd9e0d9b163c9d6cf7f38688d2ee
DIFF: https://github.com/llvm/llvm-project/commit/5cd900ce3c6edd9e0d9b163c9d6cf7f38688d2ee.diff

LOG: [AArch64] Transform shift+and to shift+shift to select more shifted register

and (shl/srl/sra, x, c), mask --> shl (srl/sra, x, c1), c2

Reviewed By: dmgreen

Differential Revision: https://reviews.llvm.org/D138904

Added: 
    

Modified: 
    llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
    llvm/test/CodeGen/AArch64/shiftregister-from-and.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
index 5ed6293c4c1ab..436eff92f24ea 100644

--- a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
@@ -370,6 +370,7 @@ class AArch64DAGToDAGISel : public SelectionDAGISel {
 private:
   bool SelectShiftedRegister(SDValue N, bool AllowROR, SDValue &Reg,
                              SDValue &Shift);
+  bool SelectShiftedRegisterFromAnd(SDValue N, SDValue &Reg, SDValue &Shift);
   bool SelectAddrModeIndexed7S(SDValue N, unsigned Size, SDValue &Base,
                                SDValue &OffImm) {
     return SelectAddrModeIndexedBitWidth(N, true, 7, Size, Base, OffImm);
@@ -607,6 +608,84 @@ bool AArch64DAGToDAGISel::isWorthFolding(SDValue V) const {
   return false;
 }
 
+/// and (shl/srl/sra, x, c), mask --> shl (srl/sra, x, c1), c2
+/// to select more shifted register
+bool AArch64DAGToDAGISel::SelectShiftedRegisterFromAnd(SDValue N, SDValue &Reg,
+                                                       SDValue &Shift) {
+  EVT VT = N.getValueType();
+  if (VT != MVT::i32 && VT != MVT::i64)
+    return false;
+
+  if (N->getOpcode() != ISD::AND || !N->hasOneUse())
+    return false;
+  SDValue LHS = N.getOperand(0);
+  if (!LHS->hasOneUse())
+    return false;
+
+  unsigned LHSOpcode = LHS->getOpcode();
+  if (LHSOpcode != ISD::SHL && LHSOpcode != ISD::SRL && LHSOpcode != ISD::SRA)
+    return false;
+
+  ConstantSDNode *ShiftAmtNode = dyn_cast<ConstantSDNode>(LHS.getOperand(1));
+  if (!ShiftAmtNode)
+    return false;
+
+  uint64_t ShiftAmtC = ShiftAmtNode->getZExtValue();
+  ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(N.getOperand(1));
+  if (!RHSC)
+    return false;
+
+  APInt AndMask = RHSC->getAPIntValue();
+  unsigned LowZBits, MaskLen;
+  if (!AndMask.isShiftedMask(LowZBits, MaskLen))
+    return false;
+
+  unsigned BitWidth = N.getValueSizeInBits();
+  SDLoc DL(LHS);
+  uint64_t NewShiftC;
+  unsigned NewShiftOp;
+  if (LHSOpcode == ISD::SHL) {
+    // LowZBits <= ShiftAmtC will fall into isBitfieldPositioningOp
+    // BitWidth != LowZBits + MaskLen doesn't match the pattern
+    if (LowZBits <= ShiftAmtC || (BitWidth != LowZBits + MaskLen))
+      return false;
+
+    NewShiftC = LowZBits - ShiftAmtC;
+    NewShiftOp = VT == MVT::i64 ? AArch64::UBFMXri : AArch64::UBFMWri;
+  } else {
+    if (LowZBits == 0)
+      return false;
+
+    // NewShiftC >= BitWidth will fall into isBitfieldExtractOp
+    NewShiftC = LowZBits + ShiftAmtC;
+    if (NewShiftC >= BitWidth)
+      return false;
+
+    // SRA need all high bits
+    if (LHSOpcode == ISD::SRA && (BitWidth != (LowZBits + MaskLen)))
+      return false;
+
+    // SRL high bits can be 0 or 1
+    if (LHSOpcode == ISD::SRL && (BitWidth > (NewShiftC + MaskLen)))
+      return false;
+
+    if (LHSOpcode == ISD::SRL)
+      NewShiftOp = VT == MVT::i64 ? AArch64::UBFMXri : AArch64::UBFMWri;
+    else
+      NewShiftOp = VT == MVT::i64 ? AArch64::SBFMXri : AArch64::SBFMWri;
+  }
+
+  assert(NewShiftC < BitWidth && "Invalid shift amount");
+  SDValue NewShiftAmt = CurDAG->getTargetConstant(NewShiftC, DL, VT);
+  SDValue BitWidthMinus1 = CurDAG->getTargetConstant(BitWidth - 1, DL, VT);
+  Reg = SDValue(CurDAG->getMachineNode(NewShiftOp, DL, VT, LHS->getOperand(0),
+                                       NewShiftAmt, BitWidthMinus1),
+                0);
+  unsigned ShVal = AArch64_AM::getShifterImm(AArch64_AM::LSL, LowZBits);
+  Shift = CurDAG->getTargetConstant(ShVal, DL, MVT::i32);
+  return true;
+}
+
 /// SelectShiftedRegister - Select a "shifted register" operand.  If the value
 /// is not shifted, set the Shift operand to default of "LSL 0".  The logical
 /// instructions allow the shifted register to be rotated, but the arithmetic
@@ -614,6 +693,9 @@ bool AArch64DAGToDAGISel::isWorthFolding(SDValue V) const {
 /// supported.
 bool AArch64DAGToDAGISel::SelectShiftedRegister(SDValue N, bool AllowROR,
                                                 SDValue &Reg, SDValue &Shift) {
+  if (SelectShiftedRegisterFromAnd(N, Reg, Shift))
+    return true;
+
   AArch64_AM::ShiftExtendType ShType = getShiftTypeForNode(N);
   if (ShType == AArch64_AM::InvalidShiftExtend)
     return false;

diff  --git a/llvm/test/CodeGen/AArch64/shiftregister-from-and.ll b/llvm/test/CodeGen/AArch64/shiftregister-from-and.ll
index 4db550c35d829..91011ec66048f 100644
--- a/llvm/test/CodeGen/AArch64/shiftregister-from-and.ll
+++ b/llvm/test/CodeGen/AArch64/shiftregister-from-and.ll
@@ -37,9 +37,8 @@ define i64 @bic_shiftedreg_from_and(i64 %a, i64 %b) {
 define i64 @eon_shiftedreg_from_and(i64 %a, i64 %b) {
 ; CHECK-LABEL: eon_shiftedreg_from_and:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    lsl x8, x0, #36
-; CHECK-NEXT:    and x8, x8, #0xffe0000000000000
-; CHECK-NEXT:    eon x0, x8, x1
+; CHECK-NEXT:    lsr x8, x0, #17
+; CHECK-NEXT:    eon x0, x1, x8, lsl #53
 ; CHECK-NEXT:    ret
   %shl = shl i64 %a, 36
   %and = and i64 %shl, -9007199254740992
@@ -53,9 +52,8 @@ define i64 @eon_shiftedreg_from_and(i64 %a, i64 %b) {
 define i64 @eor_shiftedreg_from_and(i64 %a, i64 %b) {
 ; CHECK-LABEL: eor_shiftedreg_from_and:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    lsr x8, x0, #23
-; CHECK-NEXT:    and x8, x8, #0x1ffff000000
-; CHECK-NEXT:    eor x0, x8, x1
+; CHECK-NEXT:    lsr x8, x0, #47
+; CHECK-NEXT:    eor x0, x1, x8, lsl #24
 ; CHECK-NEXT:    ret
   %lshr = lshr i64 %a, 23
   %and = and i64 %lshr, 2199006478336
@@ -100,9 +98,8 @@ define i64 @orn_shiftedreg_from_and(i64 %a, i64 %b) {
 define i64 @orr_shiftedreg_from_and(i64 %a, i64 %b) {
 ; CHECK-LABEL: orr_shiftedreg_from_and:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    lsr x8, x0, #23
-; CHECK-NEXT:    and x8, x8, #0x1ffff000000
-; CHECK-NEXT:    orr x0, x8, x1
+; CHECK-NEXT:    lsr x8, x0, #47
+; CHECK-NEXT:    orr x0, x1, x8, lsl #24
 ; CHECK-NEXT:    ret
   %lshr = lshr i64 %a, 23
   %and = and i64 %lshr, 2199006478336 ; 0x1ffff000000
@@ -116,9 +113,8 @@ define i64 @orr_shiftedreg_from_and(i64 %a, i64 %b) {
 define i64 @orr_shiftedreg_from_and_mask2(i64 %a, i64 %b) {
 ; CHECK-LABEL: orr_shiftedreg_from_and_mask2:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    lsr x8, x0, #23
-; CHECK-NEXT:    and x8, x8, #0x1ffff000000
-; CHECK-NEXT:    orr x0, x8, x1
+; CHECK-NEXT:    lsr x8, x0, #47
+; CHECK-NEXT:    orr x0, x1, x8, lsl #24
 ; CHECK-NEXT:    ret
   %lshr = lshr i64 %a, 23
   %and = and i64 %lshr, 4398029733888 ; 0x3ffff000000
@@ -132,9 +128,8 @@ define i64 @orr_shiftedreg_from_and_mask2(i64 %a, i64 %b) {
 define i32 @add_shiftedreg_from_and(i32 %a, i32 %b) {
 ; CHECK-LABEL: add_shiftedreg_from_and:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    asr w8, w0, #3
-; CHECK-NEXT:    and w8, w8, #0xff000000
-; CHECK-NEXT:    add w0, w8, w1
+; CHECK-NEXT:    asr w8, w0, #27
+; CHECK-NEXT:    add w0, w1, w8, lsl #24
 ; CHECK-NEXT:    ret
   %ashr = ashr i32 %a, 3
   %and = and i32 %ashr, -16777216
@@ -147,9 +142,8 @@ define i32 @add_shiftedreg_from_and(i32 %a, i32 %b) {
 define i64 @sub_shiftedreg_from_and_shl(i64 %a, i64 %b) {
 ; CHECK-LABEL: sub_shiftedreg_from_and_shl:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    lsl x8, x0, #36
-; CHECK-NEXT:    and x8, x8, #0xffe0000000000000
-; CHECK-NEXT:    sub x0, x1, x8
+; CHECK-NEXT:    lsr x8, x0, #17
+; CHECK-NEXT:    sub x0, x1, x8, lsl #53
 ; CHECK-NEXT:    ret
   %shl = shl i64 %a, 36
   %and = and i64 %shl, -9007199254740992