[llvm] [ARM] Move BFI creation to ISELDAGToDAG (PR #152200)
via llvm-commits
llvm-commits at lists.llvm.org
Tue Aug 5 13:34:44 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-arm
Author: AZero13 (AZero13)
<details>
<summary>Changes</summary>
This prevents a bunch of extraneous bfi nodes that do more harm than good, as can be seen in select-imm.ll
---
Full diff: https://github.com/llvm/llvm-project/pull/152200.diff
3 Files Affected:
- (modified) llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp (+165)
- (modified) llvm/lib/Target/ARM/ARMISelLowering.cpp (-133)
- (modified) llvm/test/CodeGen/ARM/select-imm.ll (+14-40)
``````````diff
diff --git a/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp b/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp
index 9ad46df159c20..68285f83c58c0 100644
--- a/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp
+++ b/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp
@@ -303,6 +303,7 @@ class ARMDAGToDAGISel : public SelectionDAGISel {
/// Try to select SBFX/UBFX instructions for ARM.
bool tryV6T2BitfieldExtractOp(SDNode *N, bool isSigned);
+ bool tryV6T2BitfieldInsertOp(SDNode *N);
bool tryInsertVectorElt(SDNode *N);
@@ -3323,6 +3324,165 @@ bool ARMDAGToDAGISel::tryFMULFixed(SDNode *N, SDLoc dl) {
N, N, LHS.getOpcode() == ISD::UINT_TO_FP, true);
}
+bool ARMDAGToDAGISel::tryV6T2BitfieldInsertOp(SDNode *N) {
+ if (!Subtarget->hasV6T2Ops())
+ return false;
+
+ EVT VT = N->getValueType(0);
+ if (VT != MVT::i32)
+ return false;
+
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ SDLoc DL(N);
+
+ unsigned MaskImm;
+ // Must be a single use AND with an immediate operand.
+ if (!N0.hasOneUse() ||
+ !isOpcWithIntImmediate(N0.getNode(), ISD::AND, MaskImm))
+ return false;
+
+ // If the mask is 0xffff, we can do better
+ // via a movt instruction, so don't use BFI in that case.
+ if (MaskImm == 0xffff)
+ return false;
+
+ SDValue N00 = N0.getOperand(0);
+ unsigned Mask = MaskImm;
+
+ // Case (1): or (and A, mask), val => ARMbfi A, val, mask
+ ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
+ if (N1C) {
+ unsigned Val = N1C->getZExtValue();
+ if ((Val & ~Mask) != Val)
+ return false;
+
+ if (ARM::isBitFieldInvertedMask(Mask)) {
+ Val >>= llvm::countr_zero(~Mask);
+
+ // Materialize the constant to be inserted
+ unsigned MovOpc = Subtarget->isThumb() ? ARM::t2MOVi32imm : ARM::MOVi32imm;
+ SDNode *ValNode = CurDAG->getMachineNode(
+ MovOpc, DL, VT, CurDAG->getTargetConstant(Val, DL, MVT::i32));
+
+ SDValue Ops[] = {N00, SDValue(ValNode, 0),
+ CurDAG->getTargetConstant(Mask, DL, MVT::i32),
+ getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32)};
+ unsigned Opc = Subtarget->isThumb() ? ARM::t2BFI : ARM::BFI;
+ CurDAG->SelectNodeTo(N, Opc, VT, Ops);
+ return true;
+ }
+ } else if (N1.getOpcode() == ISD::AND) {
+ // case (2) or (and A, mask), (and B, mask2) => ARMbfi A, (lsr B, amt), mask
+ ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1));
+ if (!N11C)
+ return false;
+ unsigned Mask2 = N11C->getZExtValue();
+
+ // Mask and ~Mask2 (or reverse) must be equivalent for the BFI pattern
+ // as is to match.
+ if (ARM::isBitFieldInvertedMask(Mask) && (Mask == ~Mask2)) {
+ // The pack halfword instruction works better for masks that fit it,
+ // so use that when it's available.
+ if (Subtarget->hasDSP() && (Mask == 0xffff || Mask == 0xffff0000))
+ return false;
+ // 2a
+ unsigned amt = llvm::countr_zero(Mask2);
+
+ // Create machine shift instruction
+ SDNode *Shift;
+ if (Subtarget->isThumb()) {
+ // For Thumb2, use t2LSRri: Rm, imm, pred, pred_reg, cc_out
+ SDValue ShiftOps[] = {
+ N1.getOperand(0), CurDAG->getTargetConstant(amt, DL, MVT::i32),
+ getAL(CurDAG, DL), // predicate
+ CurDAG->getRegister(0, MVT::i32), // pred_reg
+ CurDAG->getRegister(0, MVT::i32) // cc_out
+ };
+ Shift = CurDAG->getMachineNode(ARM::t2LSRri, DL, VT, ShiftOps);
+ } else {
+ // For ARM mode, use MOVsi: Rm, so_reg_imm, pred, pred_reg, cc_out
+ SDValue ShiftOps[] = {
+ N1.getOperand(0),
+ CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ARM_AM::lsr, amt), DL,
+ MVT::i32),
+ getAL(CurDAG, DL), // predicate
+ CurDAG->getRegister(0, MVT::i32), // pred_reg
+ CurDAG->getRegister(0, MVT::i32) // cc_out
+ };
+ Shift = CurDAG->getMachineNode(ARM::MOVsi, DL, VT, ShiftOps);
+ }
+
+ SDValue Ops[] = {N00, SDValue(Shift, 0),
+ CurDAG->getTargetConstant(Mask, DL, MVT::i32),
+ getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32)};
+ unsigned Opc = Subtarget->isThumb() ? ARM::t2BFI : ARM::BFI;
+ CurDAG->SelectNodeTo(N, Opc, VT, Ops);
+ return true;
+ } else if (ARM::isBitFieldInvertedMask(~Mask) && (~Mask == Mask2)) {
+ // The pack halfword instruction works better for masks that fit it,
+ // so use that when it's available.
+ if (Subtarget->hasDSP() && (Mask2 == 0xffff || Mask2 == 0xffff0000))
+ return false;
+ // 2b
+ unsigned lsb = llvm::countr_zero(Mask);
+
+ // Create machine shift instruction
+ SDNode *Shift;
+ if (Subtarget->isThumb()) {
+ // For Thumb2, use t2LSRri: Rm, imm, pred, pred_reg, cc_out
+ SDValue ShiftOps[] = {
+ N00, CurDAG->getTargetConstant(lsb, DL, MVT::i32),
+ getAL(CurDAG, DL), // predicate
+ CurDAG->getRegister(0, MVT::i32), // pred_reg
+ CurDAG->getRegister(0, MVT::i32) // cc_out
+ };
+ Shift = CurDAG->getMachineNode(ARM::t2LSRri, DL, VT, ShiftOps);
+ } else {
+ // For ARM mode, use MOVsi: Rm, so_reg_imm, pred, pred_reg, cc_out
+ SDValue ShiftOps[] = {
+ N00,
+ CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ARM_AM::lsr, lsb), DL,
+ MVT::i32),
+ getAL(CurDAG, DL), // predicate
+ CurDAG->getRegister(0, MVT::i32), // pred_reg
+ CurDAG->getRegister(0, MVT::i32) // cc_out
+ };
+ Shift = CurDAG->getMachineNode(ARM::MOVsi, DL, VT, ShiftOps);
+ }
+
+ SDValue Ops[] = {N1.getOperand(0), SDValue(Shift, 0),
+ CurDAG->getTargetConstant(Mask2, DL, MVT::i32),
+ getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32)};
+ unsigned Opc = Subtarget->isThumb() ? ARM::t2BFI : ARM::BFI;
+ CurDAG->SelectNodeTo(N, Opc, VT, Ops);
+ return true;
+ }
+ }
+
+ // Case (3): or (and (shl A, #shamt), mask), B => ARMbfi B, A, ~mask
+ // where lsb(mask) == #shamt and masked bits of B are known zero.
+ if (CurDAG->MaskedValueIsZero(N1, APInt(32, Mask)) &&
+ N00.getOpcode() == ISD::SHL && isa<ConstantSDNode>(N00.getOperand(1)) &&
+ ARM::isBitFieldInvertedMask(~Mask)) {
+ SDValue ShAmt = N00.getOperand(1);
+ unsigned ShAmtC = cast<ConstantSDNode>(ShAmt)->getZExtValue();
+ unsigned LSB = llvm::countr_zero(Mask);
+ if (ShAmtC != LSB)
+ return false;
+
+ SDValue Ops[] = {N1, N00.getOperand(0),
+ CurDAG->getTargetConstant(~Mask, DL, MVT::i32),
+ getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32)};
+
+ unsigned Opc = Subtarget->isThumb() ? ARM::t2BFI : ARM::BFI;
+ CurDAG->SelectNodeTo(N, Opc, VT, Ops);
+ return true;
+ }
+
+ return false;
+}
+
bool ARMDAGToDAGISel::tryV6T2BitfieldExtractOp(SDNode *N, bool isSigned) {
if (!Subtarget->hasV6T2Ops())
return false;
@@ -3833,6 +3993,11 @@ void ARMDAGToDAGISel::Select(SDNode *N) {
}
}
break;
+ case ISD::OR: {
+ if (tryV6T2BitfieldInsertOp(N))
+ return;
+ break;
+ }
case ISD::AND: {
// Check for unsigned bitfield extract
if (tryV6T2BitfieldExtractOp(N, false))
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp
index 7f8b4460bb814..5058a35930be3 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -14579,132 +14579,6 @@ static SDValue PerformORCombineToSMULWBT(SDNode *OR,
return SDValue(OR, 0);
}
-static SDValue PerformORCombineToBFI(SDNode *N,
- TargetLowering::DAGCombinerInfo &DCI,
- const ARMSubtarget *Subtarget) {
- // BFI is only available on V6T2+
- if (Subtarget->isThumb1Only() || !Subtarget->hasV6T2Ops())
- return SDValue();
-
- EVT VT = N->getValueType(0);
- SDValue N0 = N->getOperand(0);
- SDValue N1 = N->getOperand(1);
- SelectionDAG &DAG = DCI.DAG;
- SDLoc DL(N);
- // 1) or (and A, mask), val => ARMbfi A, val, mask
- // iff (val & mask) == val
- //
- // 2) or (and A, mask), (and B, mask2) => ARMbfi A, (lsr B, amt), mask
- // 2a) iff isBitFieldInvertedMask(mask) && isBitFieldInvertedMask(~mask2)
- // && mask == ~mask2
- // 2b) iff isBitFieldInvertedMask(~mask) && isBitFieldInvertedMask(mask2)
- // && ~mask == mask2
- // (i.e., copy a bitfield value into another bitfield of the same width)
-
- if (VT != MVT::i32)
- return SDValue();
-
- SDValue N00 = N0.getOperand(0);
-
- // The value and the mask need to be constants so we can verify this is
- // actually a bitfield set. If the mask is 0xffff, we can do better
- // via a movt instruction, so don't use BFI in that case.
- SDValue MaskOp = N0.getOperand(1);
- ConstantSDNode *MaskC = dyn_cast<ConstantSDNode>(MaskOp);
- if (!MaskC)
- return SDValue();
- unsigned Mask = MaskC->getZExtValue();
- if (Mask == 0xffff)
- return SDValue();
- SDValue Res;
- // Case (1): or (and A, mask), val => ARMbfi A, val, mask
- ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
- if (N1C) {
- unsigned Val = N1C->getZExtValue();
- if ((Val & ~Mask) != Val)
- return SDValue();
-
- if (ARM::isBitFieldInvertedMask(Mask)) {
- Val >>= llvm::countr_zero(~Mask);
-
- Res = DAG.getNode(ARMISD::BFI, DL, VT, N00,
- DAG.getConstant(Val, DL, MVT::i32),
- DAG.getConstant(Mask, DL, MVT::i32));
-
- DCI.CombineTo(N, Res, false);
- // Return value from the original node to inform the combiner than N is
- // now dead.
- return SDValue(N, 0);
- }
- } else if (N1.getOpcode() == ISD::AND) {
- // case (2) or (and A, mask), (and B, mask2) => ARMbfi A, (lsr B, amt), mask
- ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1));
- if (!N11C)
- return SDValue();
- unsigned Mask2 = N11C->getZExtValue();
-
- // Mask and ~Mask2 (or reverse) must be equivalent for the BFI pattern
- // as is to match.
- if (ARM::isBitFieldInvertedMask(Mask) &&
- (Mask == ~Mask2)) {
- // The pack halfword instruction works better for masks that fit it,
- // so use that when it's available.
- if (Subtarget->hasDSP() &&
- (Mask == 0xffff || Mask == 0xffff0000))
- return SDValue();
- // 2a
- unsigned amt = llvm::countr_zero(Mask2);
- Res = DAG.getNode(ISD::SRL, DL, VT, N1.getOperand(0),
- DAG.getConstant(amt, DL, MVT::i32));
- Res = DAG.getNode(ARMISD::BFI, DL, VT, N00, Res,
- DAG.getConstant(Mask, DL, MVT::i32));
- DCI.CombineTo(N, Res, false);
- // Return value from the original node to inform the combiner than N is
- // now dead.
- return SDValue(N, 0);
- } else if (ARM::isBitFieldInvertedMask(~Mask) &&
- (~Mask == Mask2)) {
- // The pack halfword instruction works better for masks that fit it,
- // so use that when it's available.
- if (Subtarget->hasDSP() &&
- (Mask2 == 0xffff || Mask2 == 0xffff0000))
- return SDValue();
- // 2b
- unsigned lsb = llvm::countr_zero(Mask);
- Res = DAG.getNode(ISD::SRL, DL, VT, N00,
- DAG.getConstant(lsb, DL, MVT::i32));
- Res = DAG.getNode(ARMISD::BFI, DL, VT, N1.getOperand(0), Res,
- DAG.getConstant(Mask2, DL, MVT::i32));
- DCI.CombineTo(N, Res, false);
- // Return value from the original node to inform the combiner than N is
- // now dead.
- return SDValue(N, 0);
- }
- }
-
- if (DAG.MaskedValueIsZero(N1, MaskC->getAPIntValue()) &&
- N00.getOpcode() == ISD::SHL && isa<ConstantSDNode>(N00.getOperand(1)) &&
- ARM::isBitFieldInvertedMask(~Mask)) {
- // Case (3): or (and (shl A, #shamt), mask), B => ARMbfi B, A, ~mask
- // where lsb(mask) == #shamt and masked bits of B are known zero.
- SDValue ShAmt = N00.getOperand(1);
- unsigned ShAmtC = ShAmt->getAsZExtVal();
- unsigned LSB = llvm::countr_zero(Mask);
- if (ShAmtC != LSB)
- return SDValue();
-
- Res = DAG.getNode(ARMISD::BFI, DL, VT, N1, N00.getOperand(0),
- DAG.getConstant(~Mask, DL, MVT::i32));
-
- DCI.CombineTo(N, Res, false);
- // Return value from the original node to inform the combiner than N is
- // now dead.
- return SDValue(N, 0);
- }
-
- return SDValue();
-}
-
static bool isValidMVECond(unsigned CC, bool IsFloat) {
switch (CC) {
case ARMCC::EQ:
@@ -14849,13 +14723,6 @@ static SDValue PerformORCombine(SDNode *N,
}
}
- // Try to use the ARM/Thumb2 BFI (bitfield insert) instruction when
- // reasonable.
- if (N0.getOpcode() == ISD::AND && N0.hasOneUse()) {
- if (SDValue Res = PerformORCombineToBFI(N, DCI, Subtarget))
- return Res;
- }
-
if (SDValue Result = PerformSHLSimplify(N, DCI, Subtarget))
return Result;
diff --git a/llvm/test/CodeGen/ARM/select-imm.ll b/llvm/test/CodeGen/ARM/select-imm.ll
index 186276b50ceeb..b2e400f4ce923 100644
--- a/llvm/test/CodeGen/ARM/select-imm.ll
+++ b/llvm/test/CodeGen/ARM/select-imm.ll
@@ -696,27 +696,14 @@ define i1 @t11() {
; ARMT2: @ %bb.0: @ %entry
; ARMT2-NEXT: .pad #4
; ARMT2-NEXT: sub sp, sp, #4
-; ARMT2-NEXT: ldr r1, [sp]
-; ARMT2-NEXT: mov r0, #33
-; ARMT2-NEXT: movw r2, #39322
-; ARMT2-NEXT: movt r2, #6553
-; ARMT2-NEXT: bfi r1, r0, #0, #12
-; ARMT2-NEXT: mov r0, #10
-; ARMT2-NEXT: bfi r1, r0, #12, #13
-; ARMT2-NEXT: mov r0, r1
-; ARMT2-NEXT: bfc r0, #12, #20
-; ARMT2-NEXT: umull r2, r3, r0, r2
-; ARMT2-NEXT: add r2, r3, r3, lsl #2
-; ARMT2-NEXT: sub r0, r0, r2, lsl #1
-; ARMT2-NEXT: movw r2, #40960
-; ARMT2-NEXT: movt r2, #65024
-; ARMT2-NEXT: and r1, r1, r2
-; ARMT2-NEXT: orr r0, r1, r0
+; ARMT2-NEXT: ldr r0, [sp]
+; ARMT2-NEXT: movw r1, #40960
+; ARMT2-NEXT: movt r1, #65024
+; ARMT2-NEXT: orr r0, r0, #40960
+; ARMT2-NEXT: and r0, r0, r1
+; ARMT2-NEXT: orr r0, r0, #3
; ARMT2-NEXT: str r0, [sp]
-; ARMT2-NEXT: and r0, r0, #15
-; ARMT2-NEXT: sub r0, r0, #3
-; ARMT2-NEXT: clz r0, r0
-; ARMT2-NEXT: lsr r0, r0, #5
+; ARMT2-NEXT: mov r0, #1
; ARMT2-NEXT: add sp, sp, #4
; ARMT2-NEXT: bx lr
;
@@ -758,27 +745,14 @@ define i1 @t11() {
; THUMB2: @ %bb.0: @ %entry
; THUMB2-NEXT: .pad #4
; THUMB2-NEXT: sub sp, #4
-; THUMB2-NEXT: ldr r1, [sp]
-; THUMB2-NEXT: movs r0, #33
-; THUMB2-NEXT: movw r2, #39322
-; THUMB2-NEXT: bfi r1, r0, #0, #12
-; THUMB2-NEXT: movs r0, #10
-; THUMB2-NEXT: bfi r1, r0, #12, #13
-; THUMB2-NEXT: mov r0, r1
-; THUMB2-NEXT: movt r2, #6553
-; THUMB2-NEXT: bfc r0, #12, #20
-; THUMB2-NEXT: umull r2, r3, r0, r2
-; THUMB2-NEXT: add.w r2, r3, r3, lsl #2
-; THUMB2-NEXT: sub.w r0, r0, r2, lsl #1
-; THUMB2-NEXT: movw r2, #40960
-; THUMB2-NEXT: movt r2, #65024
-; THUMB2-NEXT: ands r1, r2
-; THUMB2-NEXT: orrs r0, r1
+; THUMB2-NEXT: ldr r0, [sp]
+; THUMB2-NEXT: movw r1, #40960
+; THUMB2-NEXT: movt r1, #65024
+; THUMB2-NEXT: orr r0, r0, #40960
+; THUMB2-NEXT: ands r0, r1
+; THUMB2-NEXT: adds r0, #3
; THUMB2-NEXT: str r0, [sp]
-; THUMB2-NEXT: and r0, r0, #15
-; THUMB2-NEXT: subs r0, #3
-; THUMB2-NEXT: clz r0, r0
-; THUMB2-NEXT: lsrs r0, r0, #5
+; THUMB2-NEXT: movs r0, #1
; THUMB2-NEXT: add sp, #4
; THUMB2-NEXT: bx lr
;
``````````
</details>
https://github.com/llvm/llvm-project/pull/152200
More information about the llvm-commits
mailing list