[llvm] [InstCombine] Improve bitfield addition (PR #77184)
via llvm-commits
llvm-commits at lists.llvm.org
Sun Jul 21 05:35:25 PDT 2024
================
@@ -3485,6 +3485,252 @@ static Value *foldOrOfInversions(BinaryOperator &I,
return nullptr;
}
+struct BitFieldAddBitMask {
+ const APInt *Lower;
+ const APInt *Upper;
+};
+struct BitFieldOptBitMask {
+ const APInt *Lower;
+ const APInt *Upper;
+ const APInt *New;
+};
+struct BitFieldAddInfo {
+ Value *X;
+ Value *Y;
+ bool opt;
+ union {
+ BitFieldAddBitMask AddMask;
+ BitFieldOptBitMask OptMask;
+ };
+};
+
+/// Bitfield operation is consisted of three-step as following,
+/// 1. extracting the bits
+/// 2. performing operations
+/// 3. eliminating the bits beyond the specified range
+///
+/// Depending on the location of the bitfield on which we want to perform
+/// the operation, all or only some of these steps are performed.
+///
+/// Consider:
+/// %narrow = add i8 %y, %x
+/// %bf.value = and i8 %narrow, 7
+/// %bf.lshr = and i8 %x, 24
+/// %bf.lshr1244 = add i8 %bf.lshr, %y
+/// %bf.shl = and i8 %bf.lshr1244, 24
+/// %bf.set20 = or disjoint i8 %bf.value, %bf.shl
+///
+/// This example show us bitfield operation that doing 0-3 bit first, 4-5 bit
+/// second. as you can see, first 0-3 bitfield operation do not proceed step 1,
+/// it is not necessary because it located bottom of bitfield. after that,
+/// second 4-5 bit operation proceed 3-step as above described.
+///
+/// After the operation for each bitfield is completed, all bits are collected
+/// through the `or disjoint` operation and the result is returned.
+///
+/// Our optimizing oppotunity is reducing 3-step of bitfield operation.
+/// We show you optimized example with constant for more intuitive describing.
+///
+/// Consider:
+/// (first) (second) (final)
+/// ????????(x) ????????(x) 00000???
+/// + 00000001 & 00011000 | 000??000
+/// ---------- ---------- ----------
+/// 0000???? 000??000 = 000?????
+/// & 00000111 + 00001000
+/// = 00000??? ----------
+/// 00???000
+/// & 00011000
+/// ----------
+/// = 000??000
+///
+/// Optimized:
+/// (first) (second) (final)
+/// 000????? (x) 000????? (x) 000????? (x&11) + 9
+/// & 00001011 & 00010100 ^ 000?0?00 (x&20)
+/// ---------- ---------- ----------
+/// 0000?0?? (x & 11) = 000?0?00 = 000?????
+/// + 00001001
+/// ----------
+/// = 000????? (x&11) + 9
+///
+/// 1. Extract each bitfield exclude high bit.
+/// 2. Add sum of all values to be added to each bitfield.
+/// 3. Extract high bits of each bitfield.
+/// 4. Perform ExcludeOR with 2 and 3.
+///
+/// The most important logic here is part 4. ExclusiveOR operation is performed
+/// on the highest bit of each pre-extracted bit field and the value after the
+/// addition operation. Through this, we can obtain normally addition perfomed
+/// results for the highest bit of the bitfield without removing the overflowed
+/// bit.
+static Value *foldBitFieldArithmetic(BinaryOperator &I,
+ InstCombiner::BuilderTy &Builder) {
+ auto *Disjoint = dyn_cast<PossiblyDisjointInst>(&I);
+ if (!Disjoint || !Disjoint->isDisjoint())
+ return nullptr;
+
+ unsigned BitWidth = I.getType()->getScalarSizeInBits();
+
+ // If operand of bitfield operation is a constant, sum of the constants is
+ // computed and returned. if operand is not a constant, operand is
+ // returned. if this operation is not a bitfield operation, null is returned.
+ auto AccumulateY = [&](Value *LoY, Value *UpY, const APInt LoMask,
+ const APInt UpMask) -> Value * {
+ Value *Y = nullptr;
+ auto *CLoY = dyn_cast_or_null<Constant>(LoY);
+ auto *CUpY = dyn_cast_or_null<Constant>(UpY);
+ // If one of operand is constant, other also must be constant.
+ if ((CLoY == nullptr) ^ (CUpY == nullptr))
+ return nullptr;
+
+ if (CLoY && CUpY) {
+ APInt IUpY = CUpY->getUniqueInteger();
+ APInt ILoY = CLoY->getUniqueInteger();
+ // Each operands bits must in range of its own field.
+ if (!(IUpY.isSubsetOf(UpMask) && ILoY.isSubsetOf(LoMask)))
+ return nullptr;
+ Y = ConstantInt::get(CLoY->getType(), ILoY + IUpY);
+ } else if (LoY == UpY) {
+ Y = LoY;
+ }
+
+ return Y;
+ };
+
+ // Perform whether this `OR disjoint` instruction is bitfield operation
+ // In the case of bitfield operation, the information necessary
+ // to optimize the bitfield operation is extracted and returned as
+ // BitFieldAddInfo.
+ auto MatchBitFieldAdd =
+ [&](BinaryOperator &I) -> std::optional<BitFieldAddInfo> {
+ const APInt *OptLoMask, *OptUpMask, *LoMask, *UpMask, *UpMask2 = nullptr;
+ Value *X, *Y, *UpY;
+
+ // Bitfield has more than 2 member.
+ // ((X&UpMask)+UpY)&UpMask2 | (X&UpMask)+UpY
+ auto BitFieldAddUpper = m_CombineOr(
+ m_And(m_c_Add(m_And(m_Value(X), m_APInt(UpMask)), m_Value(UpY)),
+ m_APInt(UpMask2)),
+ m_c_Add(m_And(m_Value(X), m_APInt(UpMask)), m_Value(UpY)));
+ // Bitfield has more than 2 member but bottom bitfield
+ // BitFieldAddUpper | (X+Y)&LoMask
+ auto BitFieldAdd =
+ m_c_Or(BitFieldAddUpper,
+ m_And(m_c_Add(m_Deferred(X), m_Value(Y)), m_APInt(LoMask)));
+ // When bitfield has only 2 member
+ // (X+Y)&HiMask | (X+UpY)&LoMask
+ auto BitFieldAddIC =
+ m_c_Or(m_And(m_c_Add(m_Value(X), m_Value(Y)), m_APInt(LoMask)),
+ m_And(m_c_Add(m_Deferred(X), m_Value(UpY)), m_APInt(UpMask)));
+ // When `Or optimized-bitfield, BitFieldAddUpper` matched
+ // OptUpMask = highest bits of each bitfield
+ // OptLoMask = all bit of bitfield excluded highest bit
+ // BitFieldAddUpper | ((X&OptLoMask)+Y) ^ ((X&OptUpMask))
+ auto OptBitFieldAdd = m_c_Or(
+ m_c_Xor(m_CombineOr(
+ // When Y is not the constant.
+ m_c_Add(m_And(m_Value(X), m_APInt(OptLoMask)),
+ m_And(m_Value(Y), m_APInt(OptLoMask))),
+ // When Y is Constant, it can be accumulated.
+ m_c_Add(m_And(m_Value(X), m_APInt(OptLoMask)), m_Value(Y))),
+ // If Y is a constant, X^Y&OptUpMask can be pre-computed and
+ // OptUpMask is its result.
+ m_CombineOr(m_And(m_Deferred(X), m_APInt(OptUpMask)),
+ m_And(m_c_Xor(m_Deferred(X), m_Value(UpY)),
+ m_APInt(OptUpMask)))),
----------------
ParkHanbum wrote:
Yes, but I made a mistake and fixed it
https://github.com/llvm/llvm-project/pull/77184
More information about the llvm-commits
mailing list