[llvm] r270846 - [AArch64] Generate a BFI/BFXIL from 'or (and X, MaskImm), OrImm'.
Chad Rosier via llvm-commits
llvm-commits at lists.llvm.org
Thu May 26 06:27:57 PDT 2016
Author: mcrosier
Date: Thu May 26 08:27:56 2016
New Revision: 270846
URL: http://llvm.org/viewvc/llvm-project?rev=270846&view=rev
Log:
[AArch64] Generate a BFI/BFXIL from 'or (and X, MaskImm), OrImm'.
If and only if the value being inserted sets only known zero bits.
This combine transforms things like
and w8, w0, #0xfffffff0
movz w9, #5
orr w0, w8, w9
into
movz w8, #5
bfxil w0, w8, #0, #4
The combine is tuned to make sure we always reduce the number of instructions.
We avoid churning code for what is expected to be performance neutral changes
(e.g., converted AND+OR to OR+BFI).
Differential Revision: http://reviews.llvm.org/D20387
Modified:
llvm/trunk/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
llvm/trunk/test/CodeGen/AArch64/bitfield-insert.ll
Modified: llvm/trunk/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp?rev=270846&r1=270845&r2=270846&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp (original)
+++ llvm/trunk/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp Thu May 26 08:27:56 2016
@@ -1981,6 +1981,97 @@ static bool isShiftedMask(uint64_t Mask,
return isShiftedMask_64(Mask);
}
+// Generate a BFI/BFXIL from 'or (and X, MaskImm), OrImm' iff the value being
+// inserted only sets known zero bits.
+static bool tryBitfieldInsertOpFromOrAndImm(SDNode *N, SelectionDAG *CurDAG) {
+ assert(N->getOpcode() == ISD::OR && "Expect a OR operation");
+
+ EVT VT = N->getValueType(0);
+ if (VT != MVT::i32 && VT != MVT::i64)
+ return false;
+
+ unsigned BitWidth = VT.getSizeInBits();
+
+ uint64_t OrImm;
+ if (!isOpcWithIntImmediate(N, ISD::OR, OrImm))
+ return false;
+
+ // Skip this transformation if the ORR immediate can be encoded in the ORR.
+ // Otherwise, we'll trade an AND+ORR for ORR+BFI/BFXIL, which is most likely
+ // performance neutral.
+ if (AArch64_AM::isLogicalImmediate(OrImm, BitWidth))
+ return false;
+
+ uint64_t MaskImm;
+ SDValue And = N->getOperand(0);
+ // Must be a single use AND with an immediate operand.
+ if (!And.hasOneUse() ||
+ !isOpcWithIntImmediate(And.getNode(), ISD::AND, MaskImm))
+ return false;
+
+ // Compute the Known Zero for the AND as this allows us to catch more general
+ // cases than just looking for AND with imm.
+ APInt KnownZero, KnownOne;
+ CurDAG->computeKnownBits(And, KnownZero, KnownOne);
+
+ // Non-zero in the sense that they're not provably zero, which is the key
+ // point if we want to use this value.
+ uint64_t NotKnownZero = (~KnownZero).getZExtValue();
+
+ // The KnownZero mask must be a shifted mask (e.g., 1110..011, 11100..00).
+ if (!isShiftedMask(KnownZero.getZExtValue(), VT))
+ return false;
+
+ // The bits being inserted must only set those bits that are known to be zero.
+ if ((OrImm & NotKnownZero) != 0) {
+ // FIXME: It's okay if the OrImm sets NotKnownZero bits to 1, but we don't
+ // currently handle this case.
+ return false;
+ }
+
+ // BFI/BFXIL dst, src, #lsb, #width.
+ int LSB = countTrailingOnes(NotKnownZero);
+ int Width = BitWidth - APInt(BitWidth, NotKnownZero).countPopulation();
+
+ // BFI/BFXIL is an alias of BFM, so translate to BFM operands.
+ unsigned ImmR = (BitWidth - LSB) % BitWidth;
+ unsigned ImmS = Width - 1;
+
+ // If we're creating a BFI instruction avoid cases where we need more
+ // instructions to materialize the BFI constant as compared to the original
+ // ORR. A BFXIL will use the same constant as the original ORR, so the code
+ // should be no worse in this case.
+ bool IsBFI = LSB != 0;
+ uint64_t BFIImm = OrImm >> LSB;
+ if (IsBFI && !AArch64_AM::isLogicalImmediate(BFIImm, BitWidth)) {
+ // We have a BFI instruction and we know the constant can't be materialized
+ // with a ORR-immediate with the zero register.
+ unsigned OrChunks = 0, BFIChunks = 0;
+ for (unsigned Shift = 0; Shift < BitWidth; Shift += 16) {
+ if (((OrImm >> Shift) & 0xFFFF) != 0)
+ ++OrChunks;
+ if (((BFIImm >> Shift) & 0xFFFF) != 0)
+ ++BFIChunks;
+ }
+ if (BFIChunks > OrChunks)
+ return false;
+ }
+
+ // Materialize the constant to be inserted.
+ SDLoc DL(N);
+ unsigned MOVIOpc = VT == MVT::i32 ? AArch64::MOVi32imm : AArch64::MOVi64imm;
+ SDNode *MOVI = CurDAG->getMachineNode(
+ MOVIOpc, DL, VT, CurDAG->getTargetConstant(BFIImm, DL, VT));
+
+ // Create the BFI/BFXIL instruction.
+ SDValue Ops[] = {And.getOperand(0), SDValue(MOVI, 0),
+ CurDAG->getTargetConstant(ImmR, DL, VT),
+ CurDAG->getTargetConstant(ImmS, DL, VT)};
+ unsigned Opc = (VT == MVT::i32) ? AArch64::BFMWri : AArch64::BFMXri;
+ CurDAG->SelectNodeTo(N, Opc, VT, Ops);
+ return true;
+}
+
static bool tryBitfieldInsertOpFromOr(SDNode *N, const APInt &UsefulBits,
SelectionDAG *CurDAG) {
assert(N->getOpcode() == ISD::OR && "Expect a OR operation");
@@ -2159,7 +2250,10 @@ bool AArch64DAGToDAGISel::tryBitfieldIns
return true;
}
- return tryBitfieldInsertOpFromOr(N, NUsefulBits, CurDAG);
+ if (tryBitfieldInsertOpFromOr(N, NUsefulBits, CurDAG))
+ return true;
+
+ return tryBitfieldInsertOpFromOrAndImm(N, CurDAG);
}
/// SelectBitfieldInsertInZeroOp - Match a UBFIZ instruction that is the
Modified: llvm/trunk/test/CodeGen/AArch64/bitfield-insert.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/bitfield-insert.ll?rev=270846&r1=270845&r2=270846&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/bitfield-insert.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/bitfield-insert.ll Thu May 26 08:27:56 2016
@@ -378,3 +378,88 @@ entry:
%or = or i32 %and, %and1
ret i32 %or
}
+
+; CHECK-LABEL: @test1
+; CHECK: movz [[REG:w[0-9]+]], #5
+; CHECK: bfxil w0, [[REG]], #0, #4
+define i32 @test1(i32 %a) {
+ %1 = and i32 %a, -16 ; 0xfffffff0
+ %2 = or i32 %1, 5 ; 0x00000005
+ ret i32 %2
+}
+
+; CHECK-LABEL: @test2
+; CHECK: movz [[REG:w[0-9]+]], #10
+; CHECK: bfi w0, [[REG]], #22, #4
+define i32 @test2(i32 %a) {
+ %1 = and i32 %a, -62914561 ; 0xfc3fffff
+ %2 = or i32 %1, 41943040 ; 0x06400000
+ ret i32 %2
+}
+
+; CHECK-LABEL: @test3
+; CHECK: movz [[REG:x[0-9]+]], #5
+; CHECK: bfxil x0, [[REG]], #0, #3
+define i64 @test3(i64 %a) {
+ %1 = and i64 %a, -8 ; 0xfffffffffffffff8
+ %2 = or i64 %1, 5 ; 0x0000000000000005
+ ret i64 %2
+}
+
+; CHECK-LABEL: @test4
+; CHECK: movz [[REG:x[0-9]+]], #9
+; CHECK: bfi x0, [[REG]], #1, #7
+define i64 @test4(i64 %a) {
+ %1 = and i64 %a, -255 ; 0xffffffffffffff01
+ %2 = or i64 %1, 18 ; 0x0000000000000012
+ ret i64 %2
+}
+
+; Don't generate BFI/BFXIL if the immediate can be encoded in the ORR.
+; CHECK-LABEL: @test5
+; CHECK: and [[REG:w[0-9]+]], w0, #0xfffffff0
+; CHECK: orr w0, [[REG]], #0x6
+define i32 @test5(i32 %a) {
+ %1 = and i32 %a, 4294967280 ; 0xfffffff0
+ %2 = or i32 %1, 6 ; 0x00000006
+ ret i32 %2
+}
+
+; BFXIL will use the same constant as the ORR, so we don't care how the constant
+; is materialized (it's an equal cost either way).
+; CHECK-LABEL: @test6
+; CHECK: movz [[REG:w[0-9]+]], #11, lsl #16
+; CHECK: movk [[REG]], #23250
+; CHECK: bfxil w0, [[REG]], #0, #20
+define i32 @test6(i32 %a) {
+ %1 = and i32 %a, 4293918720 ; 0xfff00000
+ %2 = or i32 %1, 744146 ; 0x000b5ad2
+ ret i32 %2
+}
+
+; BFIs that require the same number of instruction to materialize the constant
+; as the original ORR are okay.
+; CHECK-LABEL: @test7
+; CHECK: movz [[REG:w[0-9]+]], #5, lsl #16
+; CHECK: movk [[REG]], #44393
+; CHECK: bfi w0, [[REG]], #1, #19
+define i32 @test7(i32 %a) {
+ %1 = and i32 %a, 4293918721 ; 0xfff00001
+ %2 = or i32 %1, 744146 ; 0x000b5ad2
+ ret i32 %2
+}
+
+; BFIs that require more instructions to materialize the constant as compared
+; to the original ORR are not okay. In this case we would be replacing the
+; 'and' with a 'movk', which would decrease ILP while using the same number of
+; instructions.
+; CHECK: @test8
+; CHECK: movz [[REG2:x[0-9]+]], #36694, lsl #32
+; CHECK: and [[REG1:x[0-9]+]], x0, #0xff000000000000ff
+; CHECK: movk [[REG2]], #31059, lsl #16
+; CHECK: orr x0, [[REG1]], [[REG2]]
+define i64 @test8(i64 %a) {
+ %1 = and i64 %a, -72057594037927681 ; 0xff000000000000ff
+ %2 = or i64 %1, 157601565442048 ; 0x00008f5679530000
+ ret i64 %2
+}
More information about the llvm-commits
mailing list