[llvm] [Aarch64] Materialize immediates with 64-bit ORR + EOR if shorter (PR #68287)
David Green via llvm-commits
llvm-commits at lists.llvm.org
Sun Oct 8 03:14:56 PDT 2023
================
@@ -362,6 +362,106 @@ static bool tryAndOfLogicalImmediates(uint64_t UImm,
return false;
}
+// Check whether the constant can be represented by exclusive-or of two 64-bit
+// logical immediates. If so, materialize it with an ORR instruction followed
+// by an EOR instruction.
+//
+// This encoding allows all remaining repeated byte patterns, and many repeated
+// 16-bit values, to be encoded without needing four instructions. It can also
+// represent some irregular bitmasks (although those would mostly only need
+// three instructions otherwise).
+static bool tryEorOfLogicalImmediates(uint64_t Imm,
+ SmallVectorImpl<ImmInsnModel> &Insn) {
+ // Determine the larger repetition size of the two possible logical
+ // immediates, by finding the repetition size of Imm.
+ unsigned BigSize = 64;
+
+ do {
+ BigSize /= 2;
+ uint64_t Mask = (1ULL << BigSize) - 1;
+
+ if ((Imm & Mask) != ((Imm >> BigSize) & Mask)) {
+ BigSize *= 2;
+ break;
+ }
+ } while (BigSize > 2);
+
+ uint64_t BigMask = ((uint64_t)-1LL) >> (64 - BigSize);
+
+ // Find the last bit of each run of ones, circularly. For runs which wrap
+ // around from bit 0 to bit 63, this is the bit before the most-significant
+ // zero, otherwise it is the least-significant bit in the run of ones.
+ uint64_t RunStarts = Imm & ~llvm::rotl<uint64_t>(Imm, 1);
+
+ // Find the smaller repetition size of the two possible logical immediates by
+ // counting the number of runs of one-bits within the BigSize-bit value. Both
+ // sizes may be the same. The EOR may add one or subtract one from the
+ // power-of-two count that can be represented by a logical immediate, or it
+ // may be left unchanged.
+ int RunsPerBigChunk = llvm::popcount(RunStarts & BigMask);
+
+ static const int8_t BigToSmallSizeTable[32] = {
+ -1, -1, 0, 1, 2, 2, -1, 3, 3, 3, -1, -1, -1, -1, -1, 4,
+ 4, 4, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 5,
+ };
+
+ int BigToSmallShift = BigToSmallSizeTable[RunsPerBigChunk];
+
+ // Early-exit if the big chunk couldn't be a power-of-two number of runs
+ // EORed with another single run.
+ if (BigToSmallShift == -1)
+ return false;
+
+ unsigned SmallSize = BigSize >> BigToSmallShift;
+
+ // 64-bit values with a bit set every (1 << index) bits.
+ static const uint64_t RepeatedOnesTable[] = {
+ 0xffffffffffffffff, 0x5555555555555555, 0x1111111111111111,
+ 0x0101010101010101, 0x0001000100010001, 0x0000000100000001,
+ 0x0000000000000001,
+ };
+
+ // This RepeatedOnesTable lookup is a faster implementation of the division
+ // 0xffffffffffffffff / ((1 << SmallSize) - 1), and can be thought of as
+ // dividing the 64-bit value into fields of width SmallSize, and placing a
+ // one in the least significant bit of each field.
+ uint64_t SmallOnes = RepeatedOnesTable[llvm::countr_zero(SmallSize)];
+
+ // Now we try to find the number of ones in each of the smaller repetitions,
+ // by looking at runs of ones in Imm. This can take three attempts, as the
+ // EOR may have changed the length of the first two runs we find.
----------------
davemgreen wrote:
Is this 3 because any of the first 3 runs of 1s maybe be, lets say, "corrupted" by the xor constant. But by the time we have looked at 3 runs we will have found BigImm^SmallImm (or ~BigImm^~SmallImm), or it won't be valid?
https://github.com/llvm/llvm-project/pull/68287
More information about the llvm-commits
mailing list