[llvm] [LoopIdiomRecognizer] Implement CRC recognition (PR #79295)
Joe Faulls via llvm-commits
llvm-commits at lists.llvm.org
Wed Mar 20 10:30:30 PDT 2024
https://github.com/joe-img updated https://github.com/llvm/llvm-project/pull/79295
>From de44cae8d94920d3b7340fb0b541512845d121b7 Mon Sep 17 00:00:00 2001
From: "Joseph.Faulls" <Joseph.Faulls at imgtec.com>
Date: Fri, 12 Jan 2024 15:36:04 +0000
Subject: [PATCH 01/10] [LoopIdiomRecognize] Implement function to extract CRC
data from loops
This will check to see if a loop looks like CRC, not necessarily
guaranteeing that it is CRC.
---
.../Transforms/Scalar/LoopIdiomRecognize.cpp | 244 +++++++++++++++++-
1 file changed, 235 insertions(+), 9 deletions(-)
diff --git a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
index 3721564890ddb4..c21b6e6fe29568 100644
--- a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
@@ -135,6 +135,9 @@ static cl::opt<bool> UseLIRCodeSizeHeurs(
"with -Os/-Oz"),
cl::init(true), cl::Hidden);
+static cl::opt<bool> CRCRecognize("recognize-crc", cl::desc("CRC RECOGNIZE"),
+ cl::init(false), cl::Hidden);
+
namespace {
class LoopIdiomRecognize {
@@ -186,6 +189,15 @@ class LoopIdiomRecognize {
// handling.
};
+ struct CRCInfo {
+ Value *CRCInput;
+ Value *CRCOutput;
+ Value *DataInput;
+ uint64_t Width;
+ uint64_t Polynomial;
+ bool BitReversed;
+ };
+
/// \name Countable Loop Idiom Handling
/// @{
@@ -242,6 +254,8 @@ class LoopIdiomRecognize {
bool recognizeShiftUntilBitTest();
bool recognizeShiftUntilZero();
+ std::optional<CRCInfo> looksLikeCRC(const SCEV *BECount);
+ bool recognizeCRC(const SCEV *BECount);
/// @}
};
@@ -298,13 +312,8 @@ bool LoopIdiomRecognize::runOnLoop(Loop *L) {
ApplyCodeSizeHeuristics =
L->getHeader()->getParent()->hasOptSize() && UseLIRCodeSizeHeurs;
- HasMemset = TLI->has(LibFunc_memset);
- HasMemsetPattern = TLI->has(LibFunc_memset_pattern16);
- HasMemcpy = TLI->has(LibFunc_memcpy);
-
- if (HasMemset || HasMemsetPattern || HasMemcpy)
- if (SE->hasLoopInvariantBackedgeTakenCount(L))
- return runOnCountableLoop();
+ if (SE->hasLoopInvariantBackedgeTakenCount(L))
+ return runOnCountableLoop();
return runOnNoncountableLoop();
}
@@ -329,6 +338,17 @@ bool LoopIdiomRecognize::runOnCountableLoop() {
<< "] Countable Loop %" << CurLoop->getHeader()->getName()
<< "\n");
+ bool MadeChange = false;
+ if (CRCRecognize)
+ MadeChange |= recognizeCRC(BECount);
+
+ HasMemset = TLI->has(LibFunc_memset);
+ HasMemsetPattern = TLI->has(LibFunc_memset_pattern16);
+ HasMemcpy = TLI->has(LibFunc_memcpy);
+
+ if (!(HasMemset || HasMemsetPattern || HasMemcpy))
+ return MadeChange;
+
// The following transforms hoist stores/memsets into the loop pre-header.
// Give up if the loop has instructions that may throw.
SimpleLoopSafetyInfo SafetyInfo;
@@ -336,8 +356,6 @@ bool LoopIdiomRecognize::runOnCountableLoop() {
if (SafetyInfo.anyBlockMayThrow())
return false;
- bool MadeChange = false;
-
// Scan all the blocks in the loop that are not in subloops.
for (auto *BB : CurLoop->getBlocks()) {
// Ignore blocks in subloops.
@@ -2868,3 +2886,211 @@ bool LoopIdiomRecognize::recognizeShiftUntilZero() {
++NumShiftUntilZero;
return MadeChange;
}
+
+static uint64_t reverseBits(uint64_t Num, unsigned NumBits) {
+ uint64_t Reversed = 0;
+ for (unsigned i = 1; i <= NumBits; i++) {
+ Reversed |= (Num & 1) << (NumBits - i);
+ Num >>= 1;
+ }
+ return Reversed;
+}
+
+bool LoopIdiomRecognize::recognizeCRC(const SCEV *BECount) {
+ // Step one: Check if the loop looks like crc, and extract some useful
+ // information for us to check
+ std::optional<CRCInfo> MaybeCRC = looksLikeCRC(BECount);
+ if (!MaybeCRC)
+ return false;
+ CRCInfo CRC = *MaybeCRC;
+
+ uint64_t CRCSize = CRC.CRCInput->getType()->getScalarSizeInBits();
+ LLVM_DEBUG(dbgs() << DEBUG_TYPE " CRCRegonize: Found potential CRCLoop "
+ << *CurLoop << "\n"
+ << "Input CRC: " << *CRC.CRCInput << "\n"
+ << "Output CRC: " << *CRC.CRCOutput << "\n"
+ << "GeneratorPolynomial: " << CRC.Polynomial << "\n"
+ << "CRC Size: " << CRCSize << "\n"
+ << "CRC Width: " << CRC.Width << "\n"
+ << "Reversed: " << CRC.BitReversed << "\n");
+ if (CRC.DataInput) {
+ LLVM_DEBUG(dbgs() << "Data Input: " << *CRC.DataInput << "\n"
+ << "Data Size: "
+ << CRC.DataInput->getType()->getScalarSizeInBits()
+ << "\n");
+ }
+
+ return false;
+}
+
+std::optional<LoopIdiomRecognize::CRCInfo>
+LoopIdiomRecognize::looksLikeCRC(const SCEV *BECount) {
+ // Initial checks to see if this loop looks like CRC:
+ // - Inner most loop
+ // - One block
+ // - One exit
+ // - Iteration count is 8
+
+ // Check if this is inner most loop
+ if (!CurLoop->isInnermost())
+ return std::nullopt;
+
+ // Since we are far enough in the optimization pipeline that small branches
+ // will have been folded into Select instructions, if we have branches we are
+ // unlikely to be CRC. To reduce complexity, only consider single-block loops
+ // for CRC recognition
+ if (CurLoop->getBlocks().size() > 1) {
+ LLVM_DEBUG(dbgs() << DEBUG_TYPE " CRCRegonize: Loops with more than one"
+ << "block are unsupported\n");
+ return std::nullopt;
+ }
+
+ // Ensure one exit block
+ const BasicBlock *ExitBlock = CurLoop->getExitBlock();
+ if (!ExitBlock)
+ return std::nullopt;
+
+ // Check iteration count is 8
+ const SCEV *TripCountSCEV =
+ SE->getTripCountFromExitCount(BECount, BECount->getType(), CurLoop);
+ const SCEVConstant *TripCountSCEVConst =
+ dyn_cast<SCEVConstant>(TripCountSCEV);
+ if (!TripCountSCEVConst)
+ return std::nullopt;
+ APInt TripCount = TripCountSCEVConst->getAPInt();
+ // Only support one byte CRC loops. Loops with tripcount 16 or 32 can also be
+ // CRC, but this is currently unsupported
+ if (TripCount != 8)
+ return std::nullopt;
+
+ // Ensure only one value that is live across the loop boundary, and track the
+ // operations on this value. This should include:
+ // 1) A phi with an initial value outside the loop
+ // 2) Shift operation
+ // 3) ICMP operation
+
+ // Ensure only one value is live across the loop boundary. LCSSA ensures any
+ // live values are captured in a PHI of the exit block.
+ Instruction *LoopOutput = nullptr;
+ for (const PHINode &ExitPhi : ExitBlock->phis()) {
+ for (const Use &IncomingUse : ExitPhi.incoming_values()) {
+ Instruction *IncomingUser = dyn_cast<Instruction>(&IncomingUse);
+ if (!IncomingUser)
+ continue;
+ if (CurLoop->contains(IncomingUser)) {
+ if (LoopOutput)
+ return std::nullopt;
+ LoopOutput = IncomingUser;
+ }
+ }
+ }
+
+ if (!LoopOutput)
+ return std::nullopt;
+
+ auto AddAllInstOps = [](Instruction *I,
+ SmallVectorImpl<Instruction *> &Worklist) {
+ for (Use &Op : I->operands()) {
+ Instruction *OpInst = dyn_cast<Instruction>(Op.get());
+ if (OpInst)
+ Worklist.push_back(OpInst);
+ }
+ };
+
+ // Follow this value in the loop
+ SmallVector<Instruction *, 4> Worklist;
+ SmallPtrSet<Instruction *, 4> Visited;
+ bool FoundIcmp = false;
+ BinaryOperator *CRCShift = nullptr;
+ ConstantInt *GeneratorPolynomial = nullptr;
+ Value *CRCInput = nullptr;
+ Worklist.push_back(LoopOutput);
+ while (!Worklist.empty()) {
+ Instruction *I = Worklist.pop_back_val();
+ if (Visited.contains(I))
+ continue;
+ Visited.insert(I);
+ if (SelectInst *Select = dyn_cast<SelectInst>(I)) {
+ FoundIcmp |= isa<ICmpInst>(Select->getCondition());
+ AddAllInstOps(Select, Worklist);
+ } else if (isa<ICmpInst>(I)) {
+ // Instead of tracking the condition and working out if it's based on
+ // MSB of crc/data, just greedily assume it will be and check later.
+ FoundIcmp = true;
+ } else if (BinaryOperator *BinOp = dyn_cast<BinaryOperator>(I)) {
+ switch (BinOp->getOpcode()) {
+ default:
+ break;
+ case Instruction::Shl:
+ case Instruction::LShr: {
+ // This shift could be the data or the crc. Either way, the RHS should
+ // be constant one.
+ Instruction *ShLHS = dyn_cast<Instruction>(BinOp->getOperand(0));
+ ConstantInt *ShRHS = dyn_cast<ConstantInt>(BinOp->getOperand(1));
+ if (!ShRHS || !ShLHS || ShRHS->getZExtValue() != 1)
+ return std::nullopt;
+ CRCShift = BinOp;
+ Worklist.push_back(ShLHS);
+ break;
+ }
+ case Instruction::Xor: {
+ Value *XorRHS = BinOp->getOperand(1);
+ if (ConstantInt *RHSConst = dyn_cast<ConstantInt>(XorRHS))
+ GeneratorPolynomial = RHSConst;
+ AddAllInstOps(BinOp, Worklist);
+ break;
+ }
+ }
+ } else if (PHINode *PHI = dyn_cast<PHINode>(I)) {
+ for (BasicBlock *IncomingBlock : PHI->blocks()) {
+ Value *IncomingValue = PHI->getIncomingValueForBlock(IncomingBlock);
+
+ if (CurLoop->contains(IncomingBlock)) {
+ if (Instruction *IncomingI = dyn_cast<Instruction>(IncomingValue)) {
+ Worklist.push_back(IncomingI);
+ }
+ } else {
+ CRCInput = IncomingValue;
+ }
+ }
+ }
+ }
+
+ if (!(CRCShift && GeneratorPolynomial && CRCInput)) {
+ LLVM_DEBUG(dbgs() << DEBUG_TYPE " CRCRegonize: Does not look like CRC");
+ return std::nullopt;
+ }
+
+ // The crc loop will have either one or two inputs depending on whether the
+ // data is xor'd inside or outside the loop. Assume any additional inputs that
+ // isn't our crc input is the data.
+ Value *DataInput = nullptr;
+ PHINode *InductionPhi = CurLoop->getInductionVariable(*SE);
+ for (const PHINode &EntryPhi : CurLoop->getHeader()->phis()) {
+ if (&EntryPhi == InductionPhi)
+ continue;
+ for (BasicBlock *BB : EntryPhi.blocks()) {
+ if (!CurLoop->contains(BB)) {
+ Value *IncomingVal = EntryPhi.getIncomingValueForBlock(BB);
+ if (IncomingVal != CRCInput) {
+ // Only allow exactly one additional input to the loop.
+ if (DataInput)
+ return std::nullopt;
+ DataInput = IncomingVal;
+ }
+ }
+ }
+ }
+
+ bool Reversed = CRCShift->getOpcode() == Instruction::LShr;
+
+ uint64_t Polynomial = GeneratorPolynomial->getZExtValue();
+ if (Reversed)
+ Polynomial =
+ reverseBits(Polynomial, CRCInput->getType()->getScalarSizeInBits());
+
+ CRCInfo CRC = {CRCInput, LoopOutput, DataInput, TripCount.getZExtValue(),
+ Polynomial, Reversed};
+
+ return std::optional<CRCInfo>{CRC};
+}
>From a3aaf1a78bab5f58cd390830421bac79e7f38e07 Mon Sep 17 00:00:00 2001
From: "Joseph.Faulls" <Joseph.Faulls at imgtec.com>
Date: Fri, 12 Jan 2024 15:59:09 +0000
Subject: [PATCH 02/10] [LoopIdiomRecognize] Implement ValueBits class
This is a representation of a value's bits in terms of references to
other values' bits, or 1/0 if the bit is known. This allows symbolic
execution of bitwise instructions without knowing the exact values.
Example:
LLVM IR Value i8 %x:
[%x[7], %x[6], %x[5], %x[4], %x[3], %x[2], %x[1], %x[0]]
%shr = lshr i8 %x, 2
[ 0, 0, %x[7], %x[6], %x[5], %x[4], %x[3], %x[2]]
%shl = shl i8 %shr, 1
[ 0, %x[7], %x[6], %x[5], %x[4], %x[3], %x[2], 0]
%xor = xor i8 %shl, 0xb
[ 0, %x[7], %x[6], %x[5], %x[4]^1, %x[3], %x[2]^1, 1]
---
.../Transforms/Scalar/LoopIdiomRecognize.cpp | 236 ++++++++++++++++++
1 file changed, 236 insertions(+)
diff --git a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
index c21b6e6fe29568..f3d8b04130ffa2 100644
--- a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
@@ -2896,6 +2896,242 @@ static uint64_t reverseBits(uint64_t Num, unsigned NumBits) {
return Reversed;
}
+class ValueBits {
+ // This is a representation of a value's bits in terms of references to
+ // other values' bits, or 1/0 if the bit is known. This allows symbolic
+ // execution of bitwise instructions without knowing the exact values.
+ //
+ // Example:
+ //
+ // LLVM IR Value i8 %x:
+ // [%x[7], %x[6], %x[5], %x[4], %x[3], %x[2], %x[1], %x[0]]
+ //
+ // %shr = lshr i8 %x, 2
+ // [ 0, 0, %x[7], %x[6], %x[5], %x[4], %x[3], %x[2]]
+ //
+ // %shl = shl i8 %shr, 1
+ // [ 0, %x[7], %x[6], %x[5], %x[4], %x[3], %x[2], 0]
+ //
+ // %xor = xor i8 %shl, 0xb
+ // [ 0, %x[7], %x[6], %x[5], %x[4]^1, %x[3], %x[2]^1, 1]
+public:
+ class ValueBit {
+ public:
+ enum BitType { ONE, ZERO, REF, XOR };
+
+ private:
+ BitType _Type;
+ std::pair<Value *, uint64_t> _BitRef;
+ ValueBit *_LHS;
+ ValueBit *_RHS;
+
+ ValueBit(BitType Type) : _Type(Type) {}
+ ValueBit(BitType Type, std::pair<Value *, uint64_t> BitRef)
+ : _Type(Type), _BitRef(BitRef) {}
+ ValueBit(BitType Type, ValueBit *LHS, ValueBit *RHS)
+ : _Type(Type), _LHS(LHS), _RHS(RHS) {}
+
+ public:
+ static ValueBit *CreateOneBit() { return new ValueBit(BitType::ONE); }
+ static ValueBit *CreateZeroBit() { return new ValueBit(BitType::ZERO); }
+ static ValueBit *CreateRefBit(Value *Ref, uint64_t Offset) {
+ return new ValueBit(BitType::REF, std::make_pair(Ref, Offset));
+ }
+ static ValueBit *CreateXORBit(ValueBit *LHS, ValueBit *RHS) {
+ return new ValueBit(BitType::XOR, LHS, RHS);
+ }
+ inline BitType getType() { return _Type; }
+ bool equals(ValueBit *RHS) {
+ if (_Type != RHS->getType())
+ return false;
+ switch (_Type) {
+ case BitType::ONE:
+ case BitType::ZERO:
+ return true;
+ case BitType::REF:
+ return _BitRef == RHS->_BitRef;
+ case BitType::XOR:
+ return (_LHS->equals(RHS->_LHS) && _RHS->equals(RHS->_RHS)) ||
+ (_LHS->equals(RHS->_RHS) && _RHS->equals(RHS->_LHS));
+ }
+ return false;
+ }
+
+ void print(raw_ostream &OS) {
+ switch (_Type) {
+ case BitType::ONE:
+ OS << "1";
+ break;
+ case BitType::ZERO:
+ OS << "0";
+ break;
+ case BitType::REF:
+ OS << _BitRef.first->getNameOrAsOperand() << "[" << _BitRef.second
+ << "]";
+ break;
+ case BitType::XOR:
+ _LHS->print(OS);
+ OS << "^";
+ _RHS->print(OS);
+ break;
+ }
+ }
+ };
+
+private:
+ uint64_t Size;
+ std::vector<ValueBit *> Bits;
+
+ virtual void _Shl(uint64_t N) {
+ for (; N > 0; N--) {
+ Bits.insert(Bits.begin(), ValueBit::CreateZeroBit());
+ Bits.pop_back();
+ }
+ }
+ virtual void _LShr(uint64_t N) {
+ for (; N > 0; N--) {
+ Bits.insert(Bits.end(), ValueBit::CreateZeroBit());
+ Bits.erase(Bits.begin());
+ }
+ }
+ virtual void _Xor(ValueBits *RHS) {
+ assert(Size == RHS->getSize());
+ for (unsigned I = 0; I < Size; I++) {
+ auto It = Bits.begin() + I;
+ ValueBit *RHSBit = RHS->getBit(I);
+ if (RHSBit->getType() == ValueBit::BitType::ONE) {
+ Bits.erase(It);
+ if ((*It)->getType() == ValueBit::BitType::ZERO) {
+ Bits.insert(It, ValueBit::CreateOneBit());
+ } else if ((*It)->getType() == ValueBit::BitType::ONE) {
+ Bits.insert(It, ValueBit::CreateZeroBit());
+ } else {
+ ValueBit *One = ValueBit::CreateOneBit();
+ Bits.insert(It, ValueBit::CreateXORBit(*It, One));
+ }
+ } else if (RHSBit->getType() != ValueBit::BitType::ZERO) {
+ if ((*It)->getType() == ValueBit::BitType::ZERO) {
+ Bits.erase(It);
+ ValueBit *BitRef = new ValueBit(*RHSBit);
+ Bits.insert(It, BitRef);
+ } else {
+ ValueBit *ItVB = *It;
+ Bits.erase(It);
+ Bits.insert(It, ValueBit::CreateXORBit(ItVB, RHSBit));
+ }
+ }
+ }
+ }
+ virtual void _ZExt(uint64_t ToSize) {
+ assert(ToSize > Size);
+ for (uint64_t I = 0; I < ToSize - Size; I++)
+ Bits.push_back(ValueBit::CreateZeroBit());
+ Size = ToSize;
+ }
+ virtual void _Trunc(uint64_t ToSize) {
+ assert(ToSize < Size);
+ Bits.erase(Bits.begin() + ToSize, Bits.end());
+ Size = ToSize;
+ }
+ virtual void _And(uint64_t RHS) {
+ for (unsigned I = 0; I < Size; I++) {
+ if (!(RHS & 1)) {
+ auto It = Bits.begin() + I;
+ Bits.erase(It);
+ Bits.insert(It, ValueBit::CreateZeroBit());
+ }
+ RHS >>= 1;
+ }
+ }
+
+protected:
+ ValueBits() {}
+
+public:
+ ValueBits(Value *InitialVal, uint64_t BitLength) : Size(BitLength) {
+ for (unsigned i = 0; i < BitLength; i++)
+ Bits.push_back(ValueBit::CreateRefBit(InitialVal, i));
+ }
+ ValueBits(uint64_t InitialVal, uint64_t BitLength) : Size(BitLength) {
+ for (unsigned i = 0; i < BitLength; i++) {
+ if (InitialVal & 0x1)
+ Bits.push_back(ValueBit::CreateOneBit());
+ else
+ Bits.push_back(ValueBit::CreateZeroBit());
+ InitialVal >>= 1;
+ }
+ }
+ uint64_t getSize() { return Size; }
+ ValueBit *getBit(unsigned i) { return Bits[i]; }
+
+ virtual ValueBits *copyBits() { return new ValueBits(*this); }
+
+ static ValueBits *Shl(ValueBits *LHS, uint64_t N) {
+ ValueBits *Shifted = LHS->copyBits();
+ Shifted->_Shl(N);
+ return Shifted;
+ }
+ static ValueBits *LShr(ValueBits *LHS, uint64_t N) {
+ ValueBits *Shifted = LHS->copyBits();
+ Shifted->_LShr(N);
+ return Shifted;
+ }
+ static ValueBits *Xor(ValueBits *LHS, ValueBits *RHS) {
+ ValueBits *Xord = LHS->copyBits();
+ Xord->_Xor(RHS);
+ return Xord;
+ }
+ static ValueBits *ZExt(ValueBits *LHS, uint64_t ToSize) {
+ ValueBits *Zexted = LHS->copyBits();
+ Zexted->_ZExt(ToSize);
+ return Zexted;
+ }
+ static ValueBits *Trunc(ValueBits *LHS, uint64_t N) {
+ ValueBits *Trunced = LHS->copyBits();
+ Trunced->_Trunc(N);
+ return Trunced;
+ }
+ static ValueBits *And(ValueBits *LHS, uint64_t RHS) {
+ ValueBits *Anded = LHS->copyBits();
+ Anded->_And(RHS);
+ return Anded;
+ }
+
+ virtual bool isPredicated() { return false; }
+
+ virtual bool equals(ValueBits *RHS) {
+ if (Size != RHS->getSize())
+ return false;
+
+ for (unsigned I = 0; I < Size; I++)
+ if (!getBit(I)->equals(RHS->getBit(I)))
+ return false;
+
+ return true;
+ }
+
+ virtual void print(raw_ostream &OS) {
+ assert(Size != 0);
+ OS << "[";
+ Bits[Size - 1]->print(OS);
+ for (int i = Size - 2; i >= 0; i--) {
+ OS << " | ";
+ Bits[i]->print(OS);
+ }
+ OS << "]\n";
+ }
+};
+
+inline raw_ostream &operator<<(raw_ostream &OS, ValueBits &VBS) {
+ VBS.print(OS);
+ return OS;
+}
+
+inline raw_ostream &operator<<(raw_ostream &OS, ValueBits::ValueBit &VB) {
+ VB.print(OS);
+ return OS;
+}
+
bool LoopIdiomRecognize::recognizeCRC(const SCEV *BECount) {
// Step one: Check if the loop looks like crc, and extract some useful
// information for us to check
>From 8b16af4d22052200173d4027e550a2388a1dc554 Mon Sep 17 00:00:00 2001
From: "Joseph.Faulls" <Joseph.Faulls at imgtec.com>
Date: Fri, 12 Jan 2024 15:59:36 +0000
Subject: [PATCH 03/10] [LoopIdiomRecognize] Implement PredicatedValueBits
These would be representitive of select or phi instructions where the
bits would depend on an icmp.
---
.../Transforms/Scalar/LoopIdiomRecognize.cpp | 50 +++++++++++++++++++
1 file changed, 50 insertions(+)
diff --git a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
index f3d8b04130ffa2..050a415ede38e2 100644
--- a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
@@ -3131,6 +3131,56 @@ inline raw_ostream &operator<<(raw_ostream &OS, ValueBits::ValueBit &VB) {
VB.print(OS);
return OS;
}
+class PredicatedValueBits : public ValueBits {
+ // This would be representitive of select or phi instructions where the bits
+ // would depend on an icmp.
+private:
+ ICmpInst *_Predicate;
+ ValueBits *_IfTrue;
+ ValueBits *_IfFalse;
+
+ void _Shl(uint64_t N) override {
+ _IfTrue = ValueBits::Shl(_IfTrue, N);
+ _IfFalse = ValueBits::Shl(_IfFalse, N);
+ }
+ void _LShr(uint64_t N) override {
+ _IfTrue = ValueBits::LShr(_IfTrue, N);
+ _IfFalse = ValueBits::LShr(_IfFalse, N);
+ }
+ void _ZExt(uint64_t N) override {
+ _IfTrue = ValueBits::ZExt(_IfTrue, N);
+ _IfFalse = ValueBits::ZExt(_IfFalse, N);
+ }
+ void _And(uint64_t N) override {
+ _IfTrue = ValueBits::And(_IfTrue, N);
+ _IfFalse = ValueBits::And(_IfFalse, N);
+ }
+ void _Xor(ValueBits *RHS) override {
+ _IfTrue = ValueBits::Xor(_IfTrue, RHS);
+ _IfFalse = ValueBits::Xor(_IfFalse, RHS);
+ }
+ void _Trunc(uint64_t N) override {
+ _IfTrue = ValueBits::Trunc(_IfTrue, N);
+ _IfFalse = ValueBits::Trunc(_IfFalse, N);
+ }
+
+public:
+ PredicatedValueBits(ICmpInst *Predicate, ValueBits *IfTrue,
+ ValueBits *IfFalse)
+ : _Predicate(Predicate), _IfTrue(IfTrue), _IfFalse(IfFalse) {}
+
+ ValueBits *copyBits() override { return new PredicatedValueBits(*this); }
+ bool isPredicated() override { return true; }
+ ValueBits *getIfTrue() { return _IfTrue; }
+ ValueBits *getIfFalse() { return _IfFalse; }
+ ICmpInst *getPredicate() { return _Predicate; }
+
+ virtual void print(raw_ostream &OS) override {
+ OS << "Predicate: " << *_Predicate << "\nIf True:\n"
+ << *_IfTrue << "If False:\n"
+ << *_IfFalse;
+ }
+};
bool LoopIdiomRecognize::recognizeCRC(const SCEV *BECount) {
// Step one: Check if the loop looks like crc, and extract some useful
>From a6839f0371f30e0d7df718a17cdf7acd18cd68bd Mon Sep 17 00:00:00 2001
From: "Joseph.Faulls" <Joseph.Faulls at imgtec.com>
Date: Fri, 12 Jan 2024 16:11:59 +0000
Subject: [PATCH 04/10] [LoopIdiomRecognize] Add function to symbolically
execute basic block
The result is a map between llvm Values and their bit representations as
ValueBits.
---
.../Transforms/Scalar/LoopIdiomRecognize.cpp | 144 ++++++++++++++++++
1 file changed, 144 insertions(+)
diff --git a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
index 050a415ede38e2..531c328f681691 100644
--- a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
@@ -90,6 +90,7 @@
#include <algorithm>
#include <cassert>
#include <cstdint>
+#include <map>
#include <utility>
#include <vector>
@@ -3182,6 +3183,119 @@ class PredicatedValueBits : public ValueBits {
}
};
+// Execute the instructions in a basic block whilst mapping out Values to
+// ValueBits
+static bool symbolicallyExecute(BasicBlock *BB,
+ std::map<Value *, ValueBits *> &ValueMap) {
+
+ auto getConstantOperand = [](Instruction *I, uint8_t Operand) {
+ ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(Operand));
+ if (!CI) {
+ LLVM_DEBUG(dbgs() << DEBUG_TYPE " CRCRegonize: Do not know how to"
+ << " handle this operation with non-constant operand "
+ << Operand << ":\n"
+ << *I << "\n");
+ }
+ return CI;
+ };
+
+ auto getOrCreateValueBits = [&ValueMap](Value *Val) {
+ auto Result = ValueMap.find(Val);
+ ValueBits *LHSBits = nullptr;
+ if (Result == ValueMap.end()) {
+ ConstantInt *CI = dyn_cast<ConstantInt>(Val);
+ if (CI) {
+ LHSBits = new ValueBits(CI->getSExtValue(),
+ Val->getType()->getScalarSizeInBits());
+ } else {
+ LHSBits = new ValueBits(Val, Val->getType()->getScalarSizeInBits());
+ }
+ } else
+ LHSBits = Result->second;
+ return LHSBits;
+ };
+
+ for (Instruction &I : *BB) {
+ uint64_t BitSize = I.getType()->getScalarSizeInBits();
+ switch (I.getOpcode()) {
+ case Instruction::PHI: {
+ PHINode *PHI = dyn_cast<PHINode>(&I);
+ const BasicBlock *IncomingBlock = nullptr;
+ for (const BasicBlock *Incoming : PHI->blocks()) {
+ if (Incoming != BB) {
+ if (IncomingBlock) {
+ LLVM_DEBUG(dbgs()
+ << DEBUG_TYPE " CRCRegonize: Do not know how to"
+ << " handle loop with multiple entries" << I << "\n");
+ return false;
+ }
+ IncomingBlock = Incoming;
+ }
+ }
+ assert(IncomingBlock);
+ ValueMap[&I] =
+ getOrCreateValueBits(PHI->getIncomingValueForBlock(IncomingBlock));
+ } break;
+ case Instruction::Shl: {
+ ConstantInt *CI = getConstantOperand(&I, 1);
+ if (!CI)
+ return false;
+ Value *LHSVal = I.getOperand(0);
+ ValueBits *LHSBits = getOrCreateValueBits(LHSVal);
+ ValueMap[&I] = ValueBits::Shl(LHSBits, CI->getSExtValue());
+ } break;
+ case Instruction::LShr: {
+ ConstantInt *CI = getConstantOperand(&I, 1);
+ if (!CI)
+ return false;
+ Value *LHSVal = I.getOperand(0);
+ ValueBits *LHSBits = getOrCreateValueBits(LHSVal);
+ ValueMap[&I] = ValueBits::LShr(LHSBits, CI->getSExtValue());
+ } break;
+ case Instruction::And: {
+ ConstantInt *CI = getConstantOperand(&I, 1);
+ if (!CI)
+ return false;
+ Value *LHSVal = I.getOperand(0);
+ ValueBits *LHSBits = getOrCreateValueBits(LHSVal);
+ ValueMap[&I] = ValueBits::And(LHSBits, CI->getSExtValue());
+ } break;
+ case Instruction::Xor: {
+ ValueBits *LHSBits = getOrCreateValueBits(I.getOperand(0));
+ ValueBits *RHSBits = getOrCreateValueBits(I.getOperand(1));
+ ValueMap[&I] = ValueBits::Xor(LHSBits, RHSBits);
+ } break;
+ case Instruction::ZExt: {
+ ValueBits *LHSBits = getOrCreateValueBits(I.getOperand(0));
+ ValueMap[&I] = ValueBits::ZExt(LHSBits, BitSize);
+ } break;
+ case Instruction::Trunc: {
+ ValueBits *LHSBits = getOrCreateValueBits(I.getOperand(0));
+ ValueMap[&I] = ValueBits::Trunc(LHSBits, BitSize);
+ } break;
+ case Instruction::Select: {
+ SelectInst *Select = cast<SelectInst>(&I);
+ ICmpInst *Cond = dyn_cast<ICmpInst>(Select->getCondition());
+ if (!Cond) {
+ LLVM_DEBUG(dbgs() << DEBUG_TYPE " CRCRegonize: Do not know how to"
+ << " handle SelectInst with non-icmp condition: " << I
+ << "\n");
+ return false;
+ }
+ ValueBits *IfTrue = getOrCreateValueBits(Select->getTrueValue());
+ ValueBits *IfFalse = getOrCreateValueBits(Select->getFalseValue());
+ ValueMap[&I] = new PredicatedValueBits(Cond, IfTrue, IfFalse);
+ } break;
+ default:
+ // If this instruction is not recognized, then just continue. This is
+ // okay because users of this will just reference it by value, which is
+ // conservative.
+ break;
+ }
+ }
+ return true;
+}
+
bool LoopIdiomRecognize::recognizeCRC(const SCEV *BECount) {
// Step one: Check if the loop looks like crc, and extract some useful
// information for us to check
@@ -3206,6 +3320,36 @@ bool LoopIdiomRecognize::recognizeCRC(const SCEV *BECount) {
<< "\n");
}
+ // Symbolically execute one iteration of the loop to populate a map of
+ // Value's to their ValueBits, aka a representation of their bits in terms of
+ // 1's, 0's and references to other values' bits. If these match pre-computed
+ // crc values, then we can say it's doing crc.
+ std::map<Value *, ValueBits *> ValueMap;
+
+ if (!symbolicallyExecute(CurLoop->getHeader(), ValueMap))
+ return false;
+
+ auto Result = ValueMap.find(CRC.CRCOutput);
+ if (Result == ValueMap.end()) {
+ LLVM_DEBUG(dbgs() << DEBUG_TYPE " CRCRegonize: Did not find CRC output"
+ << " after symbolic execution\n");
+ return false;
+ }
+
+ ValueBits *CRCOutBits = Result->second;
+ LLVM_DEBUG(dbgs() << DEBUG_TYPE
+ << " CRCRegonize: ValueBits for output crc value:\n"
+ << *CRCOutBits);
+
+ // Check this value is predicated
+ if (!CRCOutBits->isPredicated()) {
+ LLVM_DEBUG(dbgs() << DEBUG_TYPE
+ << " CRCRegonize: Output CRC ValueBits is not"
+ << " predicated.\n");
+ return false;
+ }
+ PredicatedValueBits *CRCOutBitsPred = (PredicatedValueBits *)CRCOutBits;
+
return false;
}
>From 002baf311271338d89fb565cf9803171574aa68d Mon Sep 17 00:00:00 2001
From: "Joseph.Faulls" <Joseph.Faulls at imgtec.com>
Date: Fri, 12 Jan 2024 16:15:44 +0000
Subject: [PATCH 05/10] [LoopIdiomRecognize] Check result of symbolic execution
matches CRC
---
.../Transforms/Scalar/LoopIdiomRecognize.cpp | 151 ++++++++++++++++++
1 file changed, 151 insertions(+)
diff --git a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
index 531c328f681691..73714609025cc3 100644
--- a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
@@ -3350,6 +3350,157 @@ bool LoopIdiomRecognize::recognizeCRC(const SCEV *BECount) {
}
PredicatedValueBits *CRCOutBitsPred = (PredicatedValueBits *)CRCOutBits;
+ // Need to check if the predicate is checking the MSB/LSB depending on
+ // whether this is bit reversed CRC
+ ICmpInst *ICmp = CRCOutBitsPred->getPredicate();
+ CmpInst::Predicate Pred = ICmp->getPredicate();
+ LLVM_DEBUG(dbgs() << DEBUG_TYPE << " CRCRegonize checking to see if " << *ICmp
+ << " is checking the "
+ << (CRC.BitReversed ? "LSB\n" : "MSB\n"));
+
+ // Firstly check the LHS is in our map, and RHS is a constant
+ ConstantInt *RHS = dyn_cast<ConstantInt>(ICmp->getOperand(1));
+ Result = ValueMap.find(ICmp->getOperand(0));
+ if (!RHS || (Result == ValueMap.end())) {
+ LLVM_DEBUG(dbgs() << DEBUG_TYPE
+ << " CRCRegonize: Cannot determine ICmp operands\n");
+ return false;
+ }
+ ValueBits *ICmpOp0Bits = Result->second;
+
+ // Now match the following cases
+ // (LSB): icmp [ne/eq] %mcrc, [1/0], where mcrc has LSB masked out
+ // (MSB): icmp [ne/eq] %mcrc, [1 << BitSize], where mcrc has MSB masked out
+ // (MSB): icmp [sgt/sge] %crc, [1/0]
+ // (MSB): icmp [slt/sle] %crc, [0/-1]
+ // And decide whether the check is checking for existence of 1 or 0
+ bool checkZero = false;
+ ValueBits::ValueBit *CheckBit = nullptr;
+ switch (Pred) {
+ case CmpInst::ICMP_NE:
+ case CmpInst::ICMP_EQ: {
+ // Check RHS is checking only one bit.
+ uint64_t RHSNum = RHS->getZExtValue();
+ uint64_t MSBNum = 1 << (ICmpOp0Bits->getSize() - 1);
+ // LSB if BitReversed, MSB otherwise.
+ if (!(CRC.BitReversed && RHSNum == 1) &&
+ !(!CRC.BitReversed && RHSNum == MSBNum) && RHSNum != 0) {
+ LLVM_DEBUG(dbgs() << DEBUG_TYPE
+ << " CRCRegonize: ICmp RHS is not checking [M/L]SB\n");
+ return false;
+ }
+ // Now to check if we already know all the other bits of the RHS are zero.
+ ValueBits AllZeroValueBits((uint64_t)0, ICmpOp0Bits->getSize());
+ ValueBits *CRCOutBitsMasked = nullptr;
+ if (CRC.BitReversed) {
+ // Masking out the LSB is equivalent to shifting right one if we're just
+ // comparing all the other bits are zero.
+ CRCOutBitsMasked = ValueBits::LShr(ICmpOp0Bits, 1);
+ CheckBit = ICmpOp0Bits->getBit(0);
+ } else {
+ // The CRC type might be larger than the data, so we can't shift left
+ // one. Mask instead.
+ uint64_t MSBMask = ~(1 << (CRC.Width - 1));
+ CRCOutBitsMasked = ValueBits::And(ICmpOp0Bits, MSBMask);
+ CheckBit = ICmpOp0Bits->getBit(CRC.Width - 1);
+ }
+ if (!CRCOutBitsMasked->equals(&AllZeroValueBits)) {
+ LLVM_DEBUG(
+ dbgs() << DEBUG_TYPE
+ << " CRCRegonize: Cannot determine ICmp checks [M/L]SB\n");
+ return false;
+ }
+ checkZero = RHSNum == 0;
+ break;
+ }
+ case CmpInst::ICMP_SGT:
+ case CmpInst::ICMP_SGE:
+ case CmpInst::ICMP_ULT:
+ case CmpInst::ICMP_ULE:
+ checkZero = true;
+ [[fallthrough]];
+ case CmpInst::ICMP_SLT:
+ case CmpInst::ICMP_SLE: {
+ int64_t RHSNum = RHS->getSExtValue();
+ if (((Pred == CmpInst::ICMP_SLT || Pred == CmpInst::ICMP_SGE) &&
+ RHSNum != 0) ||
+ ((Pred == CmpInst::ICMP_SLE) && RHSNum != -1) ||
+ ((Pred == CmpInst::ICMP_SGT) && RHSNum != 1) ||
+ ((Pred == CmpInst::ICMP_ULT) && RHSNum != (1 << (CRC.Width - 1))) ||
+ ((Pred == CmpInst::ICMP_ULE) && RHSNum != (1 << (CRC.Width - 1)) - 1)) {
+ LLVM_DEBUG(dbgs() << DEBUG_TYPE
+ << " CRCRegonize: ICmp RHS is not checking MSB\n");
+ return false;
+ }
+ CheckBit = ICmpOp0Bits->getBit(CRCSize - 1);
+ break;
+ }
+ default:
+ return false;
+ }
+
+ // If there exists a Data input, ensure the check bit is crc^data.
+ ValueBits::ValueBit *RefCheckBit = nullptr;
+ uint64_t CRCCheckIdx = CRC.BitReversed ? 0 : CRCSize - 1;
+ ValueBits::ValueBit *CRCInputRefBit =
+ ValueBits::ValueBit::CreateRefBit(CRC.CRCInput, CRCCheckIdx);
+ if (CRC.DataInput) {
+ uint64_t DataSize = CRC.DataInput->getType()->getScalarSizeInBits();
+ uint64_t DataCheckIdx = CRC.BitReversed ? 0 : DataSize - 1;
+ ValueBits::ValueBit *DataInputRefBit =
+ ValueBits::ValueBit::CreateRefBit(CRC.DataInput, DataCheckIdx);
+ RefCheckBit =
+ ValueBits::ValueBit::CreateXORBit(CRCInputRefBit, DataInputRefBit);
+ } else {
+ RefCheckBit = CRCInputRefBit;
+ }
+
+ if (!RefCheckBit->equals(CheckBit)) {
+ LLVM_DEBUG(dbgs() << DEBUG_TYPE
+ << " CRCRegonize: Cannot verify check bit!\n"
+ << *RefCheckBit << "\n"
+ << *CheckBit << "\n");
+ return false;
+ }
+
+ ValueBits *CRCOutBitsIfOne = CRCOutBitsPred->getIfTrue();
+ ValueBits *CRCOutBitsIfZero = CRCOutBitsPred->getIfFalse();
+ if (checkZero)
+ std::swap(CRCOutBitsIfZero, CRCOutBitsIfOne);
+
+ // Now construct ValueBits that would be the result of crc for one iteration.
+ // That is, a shift and then xor if [M/L]SB is 1.
+ ValueBits *CRCValueBits = nullptr;
+ Result = ValueMap.find(CRC.CRCInput);
+ if (Result == ValueMap.end()) {
+ CRCValueBits = new ValueBits(CRC.CRCInput, CRCSize);
+ } else {
+ CRCValueBits = Result->second;
+ }
+ uint64_t GeneratorPolynomial =
+ CRC.BitReversed ? reverseBits(CRC.Polynomial, CRCSize) : CRC.Polynomial;
+ ValueBits Polynomial(GeneratorPolynomial, CRCSize);
+
+ // Case where the MSB/LSB of the data is 0
+ ValueBits *IfZero = CRC.BitReversed ? ValueBits::LShr(CRCValueBits, 1)
+ : ValueBits::Shl(CRCValueBits, 1);
+
+ // Case where the MSB/LSB of the data is 1
+ ValueBits *IfOne = ValueBits::Xor(IfZero, &Polynomial);
+
+ if (!IfZero->equals(CRCOutBitsIfZero)) {
+ LLVM_DEBUG(dbgs() << DEBUG_TYPE << " CRCRegonize: Not Equal!\n"
+ << *IfZero << *CRCOutBitsPred->getIfFalse());
+ return false;
+ }
+ if (!IfOne->equals(CRCOutBitsIfOne)) {
+ LLVM_DEBUG(dbgs() << DEBUG_TYPE << " CRCRegonize: Not Equal!\n"
+ << *IfOne << *CRCOutBitsPred->getIfTrue());
+ return false;
+ }
+
+ LLVM_DEBUG(dbgs() << DEBUG_TYPE << " CRCRegonize: This looks like crc!\n");
+
return false;
}
>From 01868bb972a32705576c20652455409c489ddcc8 Mon Sep 17 00:00:00 2001
From: "Joseph.Faulls" <Joseph.Faulls at imgtec.com>
Date: Fri, 12 Jan 2024 16:17:13 +0000
Subject: [PATCH 06/10] [LoopIdiomRecognize] Write lookup table based CRC for
one-byte data
---
.../Transforms/Scalar/LoopIdiomRecognize.cpp | 96 ++++++++++++++++++-
1 file changed, 95 insertions(+), 1 deletion(-)
diff --git a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
index 73714609025cc3..f20947daaed8d5 100644
--- a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
@@ -91,6 +91,7 @@
#include <cassert>
#include <cstdint>
#include <map>
+#include <sstream>
#include <utility>
#include <vector>
@@ -257,6 +258,7 @@ class LoopIdiomRecognize {
bool recognizeShiftUntilZero();
std::optional<CRCInfo> looksLikeCRC(const SCEV *BECount);
bool recognizeCRC(const SCEV *BECount);
+ void writeTableBasedCRCOneByte(CRCInfo &CRC);
/// @}
};
@@ -3296,6 +3298,96 @@ static bool symbolicallyExecute(BasicBlock *BB,
return true;
}
+void LoopIdiomRecognize::writeTableBasedCRCOneByte(CRCInfo &CRC) {
+ BasicBlock *ExitBB = CurLoop->getExitBlock();
+ IRBuilder<> Builder(ExitBB);
+ Builder.SetInsertPoint(ExitBB->getFirstNonPHI());
+ Type *CRCType = CRC.CRCInput->getType();
+ uint64_t CRCSize = CRCType->getScalarSizeInBits();
+
+ // Construct the CRC table
+ uint64_t CRCTable[256];
+ uint64_t Polynomial = CRC.Polynomial;
+ uint64_t SB = CRC.BitReversed ? 0x1 : (0x1 << (CRCSize - 1));
+ if (CRC.BitReversed)
+ Polynomial = reverseBits(Polynomial, CRCSize);
+ for (uint64_t Dividend = 0; Dividend < 256; Dividend++) {
+ uint64_t CurByte = Dividend;
+ if (!CRC.BitReversed)
+ CurByte <<= CRCSize - 8;
+ for (uint8_t Bit = 0; Bit < 8; Bit++) {
+ if ((CurByte & SB) != 0) {
+ CurByte = CRC.BitReversed ? CurByte >> 1 : CurByte << 1;
+ CurByte = CurByte ^ Polynomial;
+ } else {
+ CurByte = CRC.BitReversed ? CurByte >> 1 : CurByte << 1;
+ }
+ }
+ CRCTable[Dividend] = CurByte;
+ }
+ // To construct a global data array, we need the raw data in bytes.
+ // The calculated table array is an array of 64bit values because we can't
+ // dynamically type it, so we need to truncate the values to the crc size
+ // to avoid padded zeros. Do this by allocating a byte array (of slightly more
+ // than we need to account for overflow) and copying the 64bit values across
+ // aligned correctly
+ uint64_t CRCNumBytes = CRCSize / 8;
+ char *CRCTableData = (char *)malloc(CRCNumBytes * 260);
+ for (int I = 0; I < 256; I++) {
+ *((uint64_t *)(CRCTableData + I * CRCNumBytes)) = CRCTable[I];
+ }
+
+ // Construct and add the table as a global variable
+ ArrayType *TableType = ArrayType::get(CRCType, 256);
+ Constant *ConstantArr = ConstantDataArray::getRaw(
+ StringRef(CRCTableData, CRCNumBytes * 256), 256, CRCType);
+ std::stringstream TableNameSS;
+ TableNameSS << "crctable.i" << CRCSize << "." << CRC.Polynomial;
+ if (CRC.BitReversed)
+ TableNameSS << ".reversed";
+ GlobalVariable *CRCTableGlobal = new GlobalVariable(
+ TableType, true, GlobalVariable::LinkageTypes::PrivateLinkage,
+ ConstantArr, TableNameSS.str());
+ ExitBB->getModule()->insertGlobalVariable(CRCTableGlobal);
+ free(CRCTableData);
+
+ // Construct the IR to load from this table
+ Value *CRCOffset = CRC.CRCInput;
+ if (CRCSize > 8) {
+ // Get the next byte into position and truncate
+ if (!CRC.BitReversed)
+ CRCOffset = Builder.CreateLShr(CRCOffset, CRCSize - 8);
+ CRCOffset = Builder.CreateTrunc(CRCOffset, Builder.getInt8Ty());
+ }
+ if (CRC.DataInput) {
+ // Data size can be more than 8 due to extending
+ Value *Data = CRC.DataInput;
+ if (CRC.DataInput->getType()->getScalarSizeInBits() > 8) {
+ Data = Builder.CreateTrunc(Data, Builder.getInt8Ty());
+ }
+ // Xor the data, offset into the table and load
+ CRCOffset = Builder.CreateXor(CRCOffset, Data);
+ }
+
+ CRCOffset = Builder.CreateZExt(CRCOffset, Builder.getInt32Ty());
+ Value *Gep = Builder.CreateInBoundsGEP(CRCType, CRCTableGlobal, {CRCOffset});
+ Value *CRCRes = Builder.CreateLoad(CRCType, Gep);
+ if (CRCSize > 8) {
+ // Shift out SB used for division and Xor the rest of the crc back in
+ Value *RestOfCRC = CRC.CRCInput;
+ if (CRC.BitReversed)
+ RestOfCRC = Builder.CreateLShr(CRC.CRCInput, 8);
+ else
+ RestOfCRC = Builder.CreateShl(CRC.CRCInput, 8);
+ CRCRes = Builder.CreateXor(RestOfCRC, CRCRes);
+ }
+ for (PHINode &ExitPhi : CurLoop->getExitBlock()->phis()) {
+ if (ExitPhi.getNumIncomingValues() == 1 &&
+ ExitPhi.getIncomingValue(0) == CRC.CRCOutput)
+ ExitPhi.replaceAllUsesWith(CRCRes);
+ }
+}
+
bool LoopIdiomRecognize::recognizeCRC(const SCEV *BECount) {
// Step one: Check if the loop looks like crc, and extract some useful
// information for us to check
@@ -3501,7 +3593,9 @@ bool LoopIdiomRecognize::recognizeCRC(const SCEV *BECount) {
LLVM_DEBUG(dbgs() << DEBUG_TYPE << " CRCRegonize: This looks like crc!\n");
- return false;
+ writeTableBasedCRCOneByte(CRC);
+
+ return true;
}
std::optional<LoopIdiomRecognize::CRCInfo>
>From e8fe438ea23368b8726b1b59480fadb373d28925 Mon Sep 17 00:00:00 2001
From: "Joseph.Faulls" <Joseph.Faulls at imgtec.com>
Date: Fri, 12 Jan 2024 16:23:57 +0000
Subject: [PATCH 07/10] [LoopIdiomRecognize] Add unit tests for CRC idiom
recognizer
---
llvm/test/Transforms/LoopIdiom/crc/crc.ll | 195 ++++++++++++++++++
llvm/test/Transforms/LoopIdiom/crc/not-crc.ll | 113 ++++++++++
2 files changed, 308 insertions(+)
create mode 100644 llvm/test/Transforms/LoopIdiom/crc/crc.ll
create mode 100644 llvm/test/Transforms/LoopIdiom/crc/not-crc.ll
diff --git a/llvm/test/Transforms/LoopIdiom/crc/crc.ll b/llvm/test/Transforms/LoopIdiom/crc/crc.ll
new file mode 100644
index 00000000000000..17c1313a4c7d58
--- /dev/null
+++ b/llvm/test/Transforms/LoopIdiom/crc/crc.ll
@@ -0,0 +1,195 @@
+; RUN: opt -passes=loop-idiom < %s -S -debug -recognize-crc 2>&1 | FileCheck %s
+
+; CRC 8 bit, data 8 bit
+; CHECK: GeneratorPolynomial: 29
+; CHECK: CRC Size: 8
+; CHECK: Reversed: 0
+; CHECK: loop-idiom CRCRegonize: This looks like crc!
+define dso_local zeroext i8 @crc8_loop(ptr noundef %data, i32 noundef %length) {
+entry:
+ br label %for.cond
+
+for.cond: ; preds = %for.cond.cleanup7, %entry
+ %crc.0 = phi i8 [ 0, %entry ], [ %crc.1.lcssa, %for.cond.cleanup7 ]
+ %i.0 = phi i32 [ 0, %entry ], [ %inc20, %for.cond.cleanup7 ]
+ %cmp = icmp ult i32 %i.0, %length
+ br i1 %cmp, label %for.body, label %for.cond.cleanup
+
+for.cond.cleanup: ; preds = %for.cond
+ %crc.0.lcssa = phi i8 [ %crc.0, %for.cond ]
+ ret i8 %crc.0.lcssa
+
+for.body: ; preds = %for.cond
+ %add.ptr = getelementptr inbounds i8, ptr %data, i32 %i.0
+ %0 = load i8, ptr %add.ptr, align 1
+ %xor29 = xor i8 %0, %crc.0
+ br label %for.body8
+
+for.cond.cleanup7: ; preds = %for.body8
+ %crc.1.lcssa = phi i8 [ %crc.2, %for.body8 ]
+ %inc20 = add i32 %i.0, 1
+ br label %for.cond
+
+for.body8: ; preds = %for.body, %for.body8
+ %i3.032 = phi i32 [ 0, %for.body ], [ %inc, %for.body8 ]
+ %crc.131 = phi i8 [ %xor29, %for.body ], [ %crc.2, %for.body8 ]
+ %shl = shl i8 %crc.131, 1
+ %xor14 = xor i8 %shl, 29
+ %cmp10.not30 = icmp slt i8 %crc.131, 0
+ %crc.2 = select i1 %cmp10.not30, i8 %xor14, i8 %shl
+ %inc = add nuw nsw i32 %i3.032, 1
+ %cmp5 = icmp ult i32 %inc, 8
+ br i1 %cmp5, label %for.body8, label %for.cond.cleanup7
+}
+
+; CRC16, 8 bit data
+; CHECK: Input CRC: i16 %crc
+; CHECK: Output CRC: %crc.addr.2
+; CHECK: GeneratorPolynomial: 32773
+; CHECK: CRC Size: 16
+; CHECK: Reversed: 1
+; CHECK: Data Input: i8 %data
+; CHECK: Data Size: 8
+define i16 @crc16_reversed(i8 %data, i16 %crc) {
+entry:
+ br label %for.body
+
+for.body: ; preds = %entry, %for.body
+ %i.036 = phi i8 [ 0, %entry ], [ %inc, %for.body ]
+ %crc.addr.035 = phi i16 [ %crc, %entry ], [ %crc.addr.2, %for.body ]
+ %data.addr.034 = phi i8 [ %data, %entry ], [ %1, %for.body ]
+ %0 = trunc i16 %crc.addr.035 to i8
+ %and33 = xor i8 %0, %data.addr.034
+ %xor = and i8 %and33, 1
+ %1 = lshr i8 %data.addr.034, 1
+ %cmp10.not = icmp eq i8 %xor, 0
+ %2 = lshr i16 %crc.addr.035, 1
+ %3 = xor i16 %2, -24575
+ %crc.addr.2 = select i1 %cmp10.not, i16 %2, i16 %3
+ %inc = add nuw nsw i8 %i.036, 1
+ %cmp = icmp ult i8 %inc, 8
+ br i1 %cmp, label %for.body, label %for.end
+
+for.end: ; preds = %for.body
+ %crc.addr.0.lcssa = phi i16 [ %crc.addr.2, %for.body ]
+ ret i16 %crc.addr.0.lcssa
+}
+
+; CRC16 xor outside loop
+; CHECK: loop-idiom CRCRegonize: This looks like crc!
+define dso_local zeroext i16 @crc16_xor_outside(i16 %crc, i8 %data) {
+entry:
+ %conv2 = zext i8 %data to i16
+ %shl = shl nuw i16 %conv2, 8
+ %xor = xor i16 %shl, %crc
+ br label %for.body
+
+for.body: ; preds = %entry, %for.body
+ %i.020 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+ %crc.addr.019 = phi i16 [ %xor, %entry ], [ %crc.addr.1, %for.body ]
+ %shl7 = shl i16 %crc.addr.019, 1
+ %xor8 = xor i16 %shl7, 4129
+ %tobool.not18 = icmp slt i16 %crc.addr.019, 0
+ %crc.addr.1 = select i1 %tobool.not18, i16 %xor8, i16 %shl7
+ %inc = add nuw nsw i32 %i.020, 1
+ %cmp = icmp ult i32 %inc, 8
+ br i1 %cmp, label %for.body, label %for.end
+
+for.end: ; preds = %for.body
+ %crc.addr.0.lcssa = phi i16 [ %crc.addr.1, %for.body ]
+ ret i16 %crc.addr.0.lcssa
+}
+
+; CRC size 32 xor inside in a byte loop
+; CHECK: GeneratorPolynomial: 270598144
+; CHECK: CRC Size: 32
+; CHECK: loop-idiom CRCRegonize: This looks like crc!
+define i16 @crc32_reversed(ptr %data_p, i16 %length) {
+entry:
+ %cmp = icmp eq i16 %length, 0
+ br i1 %cmp, label %cleanup, label %do.body.preheader
+
+do.body.preheader: ; preds = %entry
+ br label %do.body
+
+do.body: ; preds = %do.body.preheader, %do.cond
+ %data_p.addr.0 = phi ptr [ %incdec.ptr, %do.cond ], [ %data_p, %do.body.preheader ]
+ %length.addr.0 = phi i16 [ %dec, %do.cond ], [ %length, %do.body.preheader ]
+ %crc.0 = phi i32 [ %crc.1.lcssa, %do.cond ], [ 65535, %do.body.preheader ]
+ %incdec.ptr = getelementptr inbounds i8, ptr %data_p.addr.0, i64 1
+ %0 = load i8, ptr %data_p.addr.0, align 1
+ %conv3 = zext i8 %0 to i32
+ br label %for.body
+
+for.body: ; preds = %do.body, %for.body
+ %crc.135 = phi i32 [ %crc.0, %do.body ], [ %crc.2, %for.body ]
+ %data.034 = phi i32 [ %conv3, %do.body ], [ %shr13, %for.body ]
+ %i.033 = phi i8 [ 0, %do.body ], [ %inc, %for.body ]
+ %and732 = xor i32 %crc.135, %data.034
+ %xor = and i32 %and732, 1
+ %tobool.not = icmp eq i32 %xor, 0
+ %shr = lshr i32 %crc.135, 1
+ %xor10 = xor i32 %shr, 33800
+ %crc.2 = select i1 %tobool.not, i32 %shr, i32 %xor10
+ %inc = add nuw nsw i8 %i.033, 1
+ %shr13 = lshr i32 %data.034, 1
+ %cmp5 = icmp ult i8 %inc, 8
+ br i1 %cmp5, label %for.body, label %do.cond
+
+do.cond: ; preds = %for.body
+ %crc.1.lcssa = phi i32 [ %crc.2, %for.body ]
+ %dec = add i16 %length.addr.0, -1
+ %tobool14.not = icmp eq i16 %dec, 0
+ br i1 %tobool14.not, label %do.end, label %do.body
+
+do.end: ; preds = %do.cond
+ %crc.1.lcssa.lcssa = phi i32 [ %crc.1.lcssa, %do.cond ]
+ %not15 = xor i32 %crc.1.lcssa.lcssa, -1
+ %shl = shl i32 %not15, 8
+ %shr16 = lshr i32 %not15, 8
+ %and17 = and i32 %shr16, 255
+ %or = add nuw nsw i32 %and17, %shl
+ %conv18 = trunc i32 %or to i16
+ br label %cleanup
+
+cleanup: ; preds = %entry, %do.end
+ %retval.0 = phi i16 [ %conv18, %do.end ], [ 0, %entry ]
+ ret i16 %retval.0
+}
+
+; CRC16
+; CHECK: GeneratorPolynomial: 258
+; CHECK: CRC Size: 16
+; CHECK: Reversed: 0
+; CHECK: Data Size: 8
+; CHECK: loop-idiom CRCRegonize: This looks like crc!
+define signext i16 @crc16(i16 %crcValue, i8 %newByte) {
+entry:
+ br label %for.body
+
+for.body: ; preds = %entry, %for.body
+ %i.017 = phi i8 [ 0, %entry ], [ %inc, %for.body ]
+ %newByte.addr.016 = phi i8 [ %newByte, %entry ], [ %shl7, %for.body ]
+ %crcValue.addr.015 = phi i16 [ %crcValue, %entry ], [ %crcValue.addr.1, %for.body ]
+ %and = lshr i16 %crcValue.addr.015, 8
+ %conv2 = zext i8 %newByte.addr.016 to i16
+ %shr14 = xor i16 %conv2, %and
+ %xor = and i16 %shr14, 128
+ %tobool.not = icmp eq i16 %xor, 0
+ %shl = shl i16 %crcValue.addr.015, 1
+ %xor4 = xor i16 %shl, 258
+ %crcValue.addr.1 = select i1 %tobool.not, i16 %shl, i16 %xor4
+ %shl7 = shl i8 %newByte.addr.016, 1
+ %inc = add nuw nsw i8 %i.017, 1
+ %cmp = icmp ult i8 %inc, 8
+ br i1 %cmp, label %for.body, label %for.end
+
+for.end: ; preds = %for.body
+ %crcValue.addr.0.lcssa = phi i16 [ %crcValue.addr.1, %for.body ]
+ ret i16 %crcValue.addr.0.lcssa
+}
+
+; CHECK: @crctable.i16.32773.reversed = private constant [256 x i16] [i16 0, i16 -16191, i16 -15999, i16 320
+; CHECK: @crctable.i16.4129 = private constant [256 x i16] [i16 0, i16 4129, i16 8258, i16 12387, i16 16516
+; CHECK: @crctable.i32.270598144.reversed = private constant [256 x i32] [i32 0, i32 4489, i32 8978, i32 12955
+; CHECK: @crctable.i16.258 = private constant [256 x i16] [i16 0, i16 258, i16 516, i16 774, i16 1032
diff --git a/llvm/test/Transforms/LoopIdiom/crc/not-crc.ll b/llvm/test/Transforms/LoopIdiom/crc/not-crc.ll
new file mode 100644
index 00000000000000..3144ffa6524351
--- /dev/null
+++ b/llvm/test/Transforms/LoopIdiom/crc/not-crc.ll
@@ -0,0 +1,113 @@
+; RUN: opt -passes=loop-idiom < %s -S -debug -recognize-crc 2>&1 | FileCheck %s
+
+; crc16 incorrect xor inside loop
+; CHECK: loop-idiom CRCRegonize: Cannot verify check bit!
+; CHECK: crc[0]^data[0]
+; CHECK: crc[1]^1
+define i16 @crc16_incorrect_xor(i8 %data, i16 %crc) {
+entry:
+ br label %for.body
+
+for.body: ; preds = %entry, %for.body
+ %i.036 = phi i8 [ 0, %entry ], [ %inc, %for.body ]
+ %crc.addr.035 = phi i16 [ %crc, %entry ], [ %crc.addr.2, %for.body ]
+ %data.addr.034 = phi i8 [ %data, %entry ], [ %1, %for.body ]
+ %0 = trunc i16 %crc.addr.035 to i8
+ %and33 = xor i8 %0, 25
+ %xor = and i8 %and33, 1
+ %1 = lshr i8 %data.addr.034, 1
+ %cmp10.not = icmp eq i8 %xor, 0
+ %2 = lshr i16 %crc.addr.035, 1
+ %3 = xor i16 %2, -24575
+ %crc.addr.2 = select i1 %cmp10.not, i16 %2, i16 %3
+ %inc = add nuw nsw i8 %i.036, 1
+ %cmp = icmp ult i8 %inc, 8
+ br i1 %cmp, label %for.body, label %for.end
+
+for.end: ; preds = %for.body
+ %crc.addr.0.lcssa = phi i16 [ %crc.addr.2, %for.body ]
+ ret i16 %crc.addr.0.lcssa
+}
+
+; Two byte at a time crc not supported
+; CHECK-NOT: loop-idiom CRCRegonize: This looks like crc!
+define i16 @crc16_reversed_data16(i16 %data, i16 %crc) {
+entry:
+ br label %for.body
+
+for.body: ; preds = %entry, %for.body
+ %i.036 = phi i8 [ 0, %entry ], [ %inc, %for.body ]
+ %crc.addr.035 = phi i16 [ %crc, %entry ], [ %crc.addr.2, %for.body ]
+ %data.addr.034 = phi i16 [ %data, %entry ], [ %0, %for.body ]
+ %and33 = xor i16 %crc.addr.035, %data.addr.034
+ %xor = and i16 %and33, 1
+ %0 = lshr i16 %data.addr.034, 1
+ %cmp10.not = icmp eq i16 %xor, 0
+ %1 = lshr i16 %crc.addr.035, 1
+ %2 = xor i16 %1, -24575
+ %crc.addr.2 = select i1 %cmp10.not, i16 %1, i16 %2
+ %inc = add nuw nsw i8 %i.036, 1
+ %cmp = icmp ult i8 %inc, 16
+ br i1 %cmp, label %for.body, label %for.end
+
+for.end: ; preds = %for.body
+ %crc.addr.0.lcssa = phi i16 [ %crc.addr.2, %for.body ]
+ ret i16 %crc.addr.0.lcssa
+}
+
+
+; Two shifts per iteration. Check that the ValueBits are correctly mismatched
+; CHECK-NOT: loop-idiom CRCRegonize: This looks like crc!
+define signext i16 @crc16_doubleshift(i16 %crcValue, i8 %newByte) {
+entry:
+ br label %for.body
+
+for.body: ; preds = %entry, %for.body
+ %i.017 = phi i8 [ 0, %entry ], [ %inc, %for.body ]
+ %newByte.addr.016 = phi i8 [ %newByte, %entry ], [ %shl7, %for.body ]
+ %crcValue.addr.015 = phi i16 [ %crcValue, %entry ], [ %crcValue.addr.1, %for.body ]
+ %and = lshr i16 %crcValue.addr.015, 8
+ %conv2 = zext i8 %newByte.addr.016 to i16
+ %shr14 = xor i16 %conv2, %and
+ %xor = and i16 %shr14, 128
+ %tobool.not = icmp eq i16 %xor, 0
+ %shlone = shl i16 %crcValue.addr.015, 1
+ %shl = lshr i16 %shlone, 1
+ %xor4 = xor i16 %shl, 258
+ %crcValue.addr.1 = select i1 %tobool.not, i16 %shl, i16 %xor4
+ %shl7 = shl i8 %newByte.addr.016, 1
+ %inc = add nuw nsw i8 %i.017, 1
+ %cmp = icmp ult i8 %inc, 8
+ br i1 %cmp, label %for.body, label %for.end
+
+for.end: ; preds = %for.body
+ %crcValue.addr.0.lcssa = phi i16 [ %crcValue.addr.1, %for.body ]
+ ret i16 %crcValue.addr.0.lcssa
+}
+
+; CHECK: loop-idiom CRCRegonize: ICmp RHS is not checking [M/L]SB
+define signext i16 @crc16_not_check_sb(i16 %crcValue, i8 %newByte) {
+entry:
+ br label %for.body
+
+for.body: ; preds = %entry, %for.body
+ %i.017 = phi i8 [ 0, %entry ], [ %inc, %for.body ]
+ %newByte.addr.016 = phi i8 [ %newByte, %entry ], [ %shl7, %for.body ]
+ %crcValue.addr.015 = phi i16 [ %crcValue, %entry ], [ %crcValue.addr.1, %for.body ]
+ %and = lshr i16 %crcValue.addr.015, 8
+ %conv2 = zext i8 %newByte.addr.016 to i16
+ %shr14 = xor i16 %conv2, %and
+ %xor = and i16 %shr14, 128
+ %tobool.not = icmp eq i16 %xor, 2
+ %shl = shl i16 %crcValue.addr.015, 1
+ %xor4 = xor i16 %shl, 258
+ %crcValue.addr.1 = select i1 %tobool.not, i16 %shl, i16 %xor4
+ %shl7 = shl i8 %newByte.addr.016, 1
+ %inc = add nuw nsw i8 %i.017, 1
+ %cmp = icmp ult i8 %inc, 8
+ br i1 %cmp, label %for.body, label %for.end
+
+for.end: ; preds = %for.body
+ %crcValue.addr.0.lcssa = phi i16 [ %crcValue.addr.1, %for.body ]
+ ret i16 %crcValue.addr.0.lcssa
+}
>From b080b7e8d2d1a859bb9a24336b4424e8e1117aa8 Mon Sep 17 00:00:00 2001
From: "Joseph.Faulls" <Joseph.Faulls at imgtec.com>
Date: Thu, 1 Feb 2024 11:51:20 +0000
Subject: [PATCH 08/10] Clarify some comments
---
.../Transforms/Scalar/LoopIdiomRecognize.cpp | 19 ++++++++++++-------
1 file changed, 12 insertions(+), 7 deletions(-)
diff --git a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
index f20947daaed8d5..a2a302b645e0fc 100644
--- a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
@@ -3413,9 +3413,12 @@ bool LoopIdiomRecognize::recognizeCRC(const SCEV *BECount) {
}
// Symbolically execute one iteration of the loop to populate a map of
- // Value's to their ValueBits, aka a representation of their bits in terms of
- // 1's, 0's and references to other values' bits. If these match pre-computed
- // crc values, then we can say it's doing crc.
+ // Value's to their ValueBits, i.e. a representation of their bits in terms of
+ // 1's, 0's and references to other values' bits. This tracks how the bits
+ // move through an iteration of the loop. If the loop output's ValueBits
+ // match pre-computed values, then we can say it's doing crc. If there are
+ // any unexpected loop variant operations happening, e.g. additional select
+ // logic or shifts, then this will be captured in the ValueBits.
std::map<Value *, ValueBits *> ValueMap;
if (!symbolicallyExecute(CurLoop->getHeader(), ValueMap))
@@ -3461,10 +3464,12 @@ bool LoopIdiomRecognize::recognizeCRC(const SCEV *BECount) {
ValueBits *ICmpOp0Bits = Result->second;
// Now match the following cases
- // (LSB): icmp [ne/eq] %mcrc, [1/0], where mcrc has LSB masked out
- // (MSB): icmp [ne/eq] %mcrc, [1 << BitSize], where mcrc has MSB masked out
- // (MSB): icmp [sgt/sge] %crc, [1/0]
- // (MSB): icmp [slt/sle] %crc, [0/-1]
+ // (LSB): (crc & 1)
+ // (MSB): (crc & (1 << n))
+ // (MSB): crc > 0
+ // (MSB): crc >= 1
+ // (MSB): crc < 0
+ // (MSB): crc <= -1
// And decide whether the check is checking for existence of 1 or 0
bool checkZero = false;
ValueBits::ValueBit *CheckBit = nullptr;
>From c788082bc35dd3fe283b10deb4662ffd35a0303b Mon Sep 17 00:00:00 2001
From: "Joseph.Faulls" <Joseph.Faulls at imgtec.com>
Date: Fri, 23 Feb 2024 16:41:21 +0000
Subject: [PATCH 09/10] Review changes that aren't memory
---
.../Transforms/Scalar/LoopIdiomRecognize.cpp | 66 ++++++++++---------
llvm/test/Transforms/LoopIdiom/crc/crc.ll | 8 +--
2 files changed, 39 insertions(+), 35 deletions(-)
diff --git a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
index a2a302b645e0fc..79f13ac257fb55 100644
--- a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
@@ -137,8 +137,11 @@ static cl::opt<bool> UseLIRCodeSizeHeurs(
"with -Os/-Oz"),
cl::init(true), cl::Hidden);
-static cl::opt<bool> CRCRecognize("recognize-crc", cl::desc("CRC RECOGNIZE"),
- cl::init(false), cl::Hidden);
+static cl::opt<bool>
+ CRCRecognize("recognize-crc",
+ cl::desc("Recognize loop-based CRC implementations and "
+ "replaces them with a lookup table."),
+ cl::init(false), cl::Hidden);
namespace {
@@ -3193,7 +3196,7 @@ static bool symbolicallyExecute(BasicBlock *BB,
auto getConstantOperand = [](Instruction *I, uint8_t Operand) {
ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(Operand));
if (!CI) {
- LLVM_DEBUG(dbgs() << DEBUG_TYPE " CRCRegonize: Do not know how to"
+ LLVM_DEBUG(dbgs() << DEBUG_TYPE " CRCRecognize: Do not know how to"
<< " handle this operation with non-constant operand "
<< Operand << ":\n"
<< *I << "\n");
@@ -3227,7 +3230,7 @@ static bool symbolicallyExecute(BasicBlock *BB,
if (Incoming != BB) {
if (IncomingBlock) {
LLVM_DEBUG(dbgs()
- << DEBUG_TYPE " CRCRegonize: Do not know how to"
+ << DEBUG_TYPE " CRCRecognize: Do not know how to"
<< " handle loop with multiple entries" << I << "\n");
return false;
}
@@ -3279,7 +3282,7 @@ static bool symbolicallyExecute(BasicBlock *BB,
SelectInst *Select = cast<SelectInst>(&I);
ICmpInst *Cond = dyn_cast<ICmpInst>(Select->getCondition());
if (!Cond) {
- LLVM_DEBUG(dbgs() << DEBUG_TYPE " CRCRegonize: Do not know how to"
+ LLVM_DEBUG(dbgs() << DEBUG_TYPE " CRCRecognize: Do not know how to"
<< " handle SelectInst with non-icmp condition: " << I
<< "\n");
return false;
@@ -3332,7 +3335,7 @@ void LoopIdiomRecognize::writeTableBasedCRCOneByte(CRCInfo &CRC) {
// than we need to account for overflow) and copying the 64bit values across
// aligned correctly
uint64_t CRCNumBytes = CRCSize / 8;
- char *CRCTableData = (char *)malloc(CRCNumBytes * 260);
+ char *CRCTableData = new char[CRCNumBytes * 260];
for (int I = 0; I < 256; I++) {
*((uint64_t *)(CRCTableData + I * CRCNumBytes)) = CRCTable[I];
}
@@ -3349,7 +3352,7 @@ void LoopIdiomRecognize::writeTableBasedCRCOneByte(CRCInfo &CRC) {
TableType, true, GlobalVariable::LinkageTypes::PrivateLinkage,
ConstantArr, TableNameSS.str());
ExitBB->getModule()->insertGlobalVariable(CRCTableGlobal);
- free(CRCTableData);
+ delete CRCTableData;
// Construct the IR to load from this table
Value *CRCOffset = CRC.CRCInput;
@@ -3369,7 +3372,8 @@ void LoopIdiomRecognize::writeTableBasedCRCOneByte(CRCInfo &CRC) {
CRCOffset = Builder.CreateXor(CRCOffset, Data);
}
- CRCOffset = Builder.CreateZExt(CRCOffset, Builder.getInt32Ty());
+ CRCOffset =
+ Builder.CreateZExt(CRCOffset, DL->getIndexType(ExitBB->getContext(), 0));
Value *Gep = Builder.CreateInBoundsGEP(CRCType, CRCTableGlobal, {CRCOffset});
Value *CRCRes = Builder.CreateLoad(CRCType, Gep);
if (CRCSize > 8) {
@@ -3397,7 +3401,7 @@ bool LoopIdiomRecognize::recognizeCRC(const SCEV *BECount) {
CRCInfo CRC = *MaybeCRC;
uint64_t CRCSize = CRC.CRCInput->getType()->getScalarSizeInBits();
- LLVM_DEBUG(dbgs() << DEBUG_TYPE " CRCRegonize: Found potential CRCLoop "
+ LLVM_DEBUG(dbgs() << DEBUG_TYPE " CRCRecognize: Found potential CRCLoop "
<< *CurLoop << "\n"
<< "Input CRC: " << *CRC.CRCInput << "\n"
<< "Output CRC: " << *CRC.CRCOutput << "\n"
@@ -3420,26 +3424,26 @@ bool LoopIdiomRecognize::recognizeCRC(const SCEV *BECount) {
// any unexpected loop variant operations happening, e.g. additional select
// logic or shifts, then this will be captured in the ValueBits.
std::map<Value *, ValueBits *> ValueMap;
-
+
if (!symbolicallyExecute(CurLoop->getHeader(), ValueMap))
return false;
auto Result = ValueMap.find(CRC.CRCOutput);
if (Result == ValueMap.end()) {
- LLVM_DEBUG(dbgs() << DEBUG_TYPE " CRCRegonize: Did not find CRC output"
+ LLVM_DEBUG(dbgs() << DEBUG_TYPE " CRCRecognize: Did not find CRC output"
<< " after symbolic execution\n");
return false;
}
ValueBits *CRCOutBits = Result->second;
LLVM_DEBUG(dbgs() << DEBUG_TYPE
- << " CRCRegonize: ValueBits for output crc value:\n"
+ << " CRCRecognize: ValueBits for output crc value:\n"
<< *CRCOutBits);
// Check this value is predicated
if (!CRCOutBits->isPredicated()) {
LLVM_DEBUG(dbgs() << DEBUG_TYPE
- << " CRCRegonize: Output CRC ValueBits is not"
+ << " CRCRecognize: Output CRC ValueBits is not"
<< " predicated.\n");
return false;
}
@@ -3449,7 +3453,7 @@ bool LoopIdiomRecognize::recognizeCRC(const SCEV *BECount) {
// whether this is bit reversed CRC
ICmpInst *ICmp = CRCOutBitsPred->getPredicate();
CmpInst::Predicate Pred = ICmp->getPredicate();
- LLVM_DEBUG(dbgs() << DEBUG_TYPE << " CRCRegonize checking to see if " << *ICmp
+ LLVM_DEBUG(dbgs() << DEBUG_TYPE << " CRCRecognize checking to see if " << *ICmp
<< " is checking the "
<< (CRC.BitReversed ? "LSB\n" : "MSB\n"));
@@ -3458,7 +3462,7 @@ bool LoopIdiomRecognize::recognizeCRC(const SCEV *BECount) {
Result = ValueMap.find(ICmp->getOperand(0));
if (!RHS || (Result == ValueMap.end())) {
LLVM_DEBUG(dbgs() << DEBUG_TYPE
- << " CRCRegonize: Cannot determine ICmp operands\n");
+ << " CRCRecognize: Cannot determine ICmp operands\n");
return false;
}
ValueBits *ICmpOp0Bits = Result->second;
@@ -3471,7 +3475,7 @@ bool LoopIdiomRecognize::recognizeCRC(const SCEV *BECount) {
// (MSB): crc < 0
// (MSB): crc <= -1
// And decide whether the check is checking for existence of 1 or 0
- bool checkZero = false;
+ bool CheckZero = false;
ValueBits::ValueBit *CheckBit = nullptr;
switch (Pred) {
case CmpInst::ICMP_NE:
@@ -3483,7 +3487,7 @@ bool LoopIdiomRecognize::recognizeCRC(const SCEV *BECount) {
if (!(CRC.BitReversed && RHSNum == 1) &&
!(!CRC.BitReversed && RHSNum == MSBNum) && RHSNum != 0) {
LLVM_DEBUG(dbgs() << DEBUG_TYPE
- << " CRCRegonize: ICmp RHS is not checking [M/L]SB\n");
+ << " CRCRecognize: ICmp RHS is not checking [M/L]SB\n");
return false;
}
// Now to check if we already know all the other bits of the RHS are zero.
@@ -3504,17 +3508,17 @@ bool LoopIdiomRecognize::recognizeCRC(const SCEV *BECount) {
if (!CRCOutBitsMasked->equals(&AllZeroValueBits)) {
LLVM_DEBUG(
dbgs() << DEBUG_TYPE
- << " CRCRegonize: Cannot determine ICmp checks [M/L]SB\n");
+ << " CRCRecognize: Cannot determine ICmp checks [M/L]SB\n");
return false;
}
- checkZero = RHSNum == 0;
+ CheckZero = RHSNum == 0;
break;
}
case CmpInst::ICMP_SGT:
case CmpInst::ICMP_SGE:
case CmpInst::ICMP_ULT:
case CmpInst::ICMP_ULE:
- checkZero = true;
+ CheckZero = true;
[[fallthrough]];
case CmpInst::ICMP_SLT:
case CmpInst::ICMP_SLE: {
@@ -3526,7 +3530,7 @@ bool LoopIdiomRecognize::recognizeCRC(const SCEV *BECount) {
((Pred == CmpInst::ICMP_ULT) && RHSNum != (1 << (CRC.Width - 1))) ||
((Pred == CmpInst::ICMP_ULE) && RHSNum != (1 << (CRC.Width - 1)) - 1)) {
LLVM_DEBUG(dbgs() << DEBUG_TYPE
- << " CRCRegonize: ICmp RHS is not checking MSB\n");
+ << " CRCRecognize: ICmp RHS is not checking MSB\n");
return false;
}
CheckBit = ICmpOp0Bits->getBit(CRCSize - 1);
@@ -3544,17 +3548,17 @@ bool LoopIdiomRecognize::recognizeCRC(const SCEV *BECount) {
if (CRC.DataInput) {
uint64_t DataSize = CRC.DataInput->getType()->getScalarSizeInBits();
uint64_t DataCheckIdx = CRC.BitReversed ? 0 : DataSize - 1;
- ValueBits::ValueBit *DataInputRefBit =
- ValueBits::ValueBit::CreateRefBit(CRC.DataInput, DataCheckIdx);
+ ValueBits::ValueBit *DataInputRefBit =
+ ValueBits::ValueBit::CreateRefBit(CRC.DataInput, DataCheckIdx);
RefCheckBit =
- ValueBits::ValueBit::CreateXORBit(CRCInputRefBit, DataInputRefBit);
+ ValueBits::ValueBit::CreateXORBit(CRCInputRefBit, DataInputRefBit);
} else {
RefCheckBit = CRCInputRefBit;
}
if (!RefCheckBit->equals(CheckBit)) {
LLVM_DEBUG(dbgs() << DEBUG_TYPE
- << " CRCRegonize: Cannot verify check bit!\n"
+ << " CRCRecognize: Cannot verify check bit!\n"
<< *RefCheckBit << "\n"
<< *CheckBit << "\n");
return false;
@@ -3562,7 +3566,7 @@ bool LoopIdiomRecognize::recognizeCRC(const SCEV *BECount) {
ValueBits *CRCOutBitsIfOne = CRCOutBitsPred->getIfTrue();
ValueBits *CRCOutBitsIfZero = CRCOutBitsPred->getIfFalse();
- if (checkZero)
+ if (CheckZero)
std::swap(CRCOutBitsIfZero, CRCOutBitsIfOne);
// Now construct ValueBits that would be the result of crc for one iteration.
@@ -3586,17 +3590,17 @@ bool LoopIdiomRecognize::recognizeCRC(const SCEV *BECount) {
ValueBits *IfOne = ValueBits::Xor(IfZero, &Polynomial);
if (!IfZero->equals(CRCOutBitsIfZero)) {
- LLVM_DEBUG(dbgs() << DEBUG_TYPE << " CRCRegonize: Not Equal!\n"
+ LLVM_DEBUG(dbgs() << DEBUG_TYPE << " CRCRecognize: Not Equal!\n"
<< *IfZero << *CRCOutBitsPred->getIfFalse());
return false;
}
if (!IfOne->equals(CRCOutBitsIfOne)) {
- LLVM_DEBUG(dbgs() << DEBUG_TYPE << " CRCRegonize: Not Equal!\n"
+ LLVM_DEBUG(dbgs() << DEBUG_TYPE << " CRCRecognize: Not Equal!\n"
<< *IfOne << *CRCOutBitsPred->getIfTrue());
return false;
}
- LLVM_DEBUG(dbgs() << DEBUG_TYPE << " CRCRegonize: This looks like crc!\n");
+ LLVM_DEBUG(dbgs() << DEBUG_TYPE << " CRCRecognize: This looks like crc!\n");
writeTableBasedCRCOneByte(CRC);
@@ -3620,7 +3624,7 @@ LoopIdiomRecognize::looksLikeCRC(const SCEV *BECount) {
// unlikely to be CRC. To reduce complexity, only consider single-block loops
// for CRC recognition
if (CurLoop->getBlocks().size() > 1) {
- LLVM_DEBUG(dbgs() << DEBUG_TYPE " CRCRegonize: Loops with more than one"
+ LLVM_DEBUG(dbgs() << DEBUG_TYPE " CRCRecognize: Loops with more than one"
<< "block are unsupported\n");
return std::nullopt;
}
@@ -3737,7 +3741,7 @@ LoopIdiomRecognize::looksLikeCRC(const SCEV *BECount) {
}
if (!(CRCShift && GeneratorPolynomial && CRCInput)) {
- LLVM_DEBUG(dbgs() << DEBUG_TYPE " CRCRegonize: Does not look like CRC");
+ LLVM_DEBUG(dbgs() << DEBUG_TYPE " CRCRecognize: Does not look like CRC");
return std::nullopt;
}
diff --git a/llvm/test/Transforms/LoopIdiom/crc/crc.ll b/llvm/test/Transforms/LoopIdiom/crc/crc.ll
index 17c1313a4c7d58..5a1c6afcb57412 100644
--- a/llvm/test/Transforms/LoopIdiom/crc/crc.ll
+++ b/llvm/test/Transforms/LoopIdiom/crc/crc.ll
@@ -4,7 +4,7 @@
; CHECK: GeneratorPolynomial: 29
; CHECK: CRC Size: 8
; CHECK: Reversed: 0
-; CHECK: loop-idiom CRCRegonize: This looks like crc!
+; CHECK: loop-idiom CRCRecognize: This looks like crc!
define dso_local zeroext i8 @crc8_loop(ptr noundef %data, i32 noundef %length) {
entry:
br label %for.cond
@@ -76,7 +76,7 @@ for.end: ; preds = %for.body
}
; CRC16 xor outside loop
-; CHECK: loop-idiom CRCRegonize: This looks like crc!
+; CHECK: loop-idiom CRCRecognize: This looks like crc!
define dso_local zeroext i16 @crc16_xor_outside(i16 %crc, i8 %data) {
entry:
%conv2 = zext i8 %data to i16
@@ -103,7 +103,7 @@ for.end: ; preds = %for.body
; CRC size 32 xor inside in a byte loop
; CHECK: GeneratorPolynomial: 270598144
; CHECK: CRC Size: 32
-; CHECK: loop-idiom CRCRegonize: This looks like crc!
+; CHECK: loop-idiom CRCRecognize: This looks like crc!
define i16 @crc32_reversed(ptr %data_p, i16 %length) {
entry:
%cmp = icmp eq i16 %length, 0
@@ -162,7 +162,7 @@ cleanup: ; preds = %entry, %do.end
; CHECK: CRC Size: 16
; CHECK: Reversed: 0
; CHECK: Data Size: 8
-; CHECK: loop-idiom CRCRegonize: This looks like crc!
+; CHECK: loop-idiom CRCRecognize: This looks like crc!
define signext i16 @crc16(i16 %crcValue, i8 %newByte) {
entry:
br label %for.body
>From 9ebb437002f79e51f1ece7e17c06bd821f6ae7b7 Mon Sep 17 00:00:00 2001
From: "Joseph.Faulls" <Joseph.Faulls at imgtec.com>
Date: Fri, 23 Feb 2024 17:30:01 +0000
Subject: [PATCH 10/10] Fix memory issues
- Use shared pointers for factory methods
I think the use of shared pointers in `symbolicallyExecute` is the
correct decision due to the filling of a map. I tried for a while to
make this a map of objects, but due to [object
slicing](https://en.wikipedia.org/wiki/Object_slicing) it would not be
possible to store a PredicatedValueBits in this map. (This would require
PredicatedValueBits to be redesigned into ValueBits class, but then the
internal methods get messy. Virtual functions was a clean answer to
this) Additionally, using `unique_ptr` proves difficult due to having to
store it in a map. And unique pointers cannot be copied. As
`symbolicallyExecute` is the primary user of the factory methods, it
made sense for the factory methods to return a shared pointer instead of
a raw pointer or object and then wrapping this in a shared pointer
within symbolicallyExecute.
- Have XOR bits own their LHS/RHS
Having them as member objects instead of pointers I don't think makes
much sense due to them not being needed for most ValueBit objects, but
they would have to be initialized for all ValueBit instantiations.
- Use object values instead of pointers for bit representation
IMO the cleanest way. They're very lightweight anyway.
---
.../Transforms/Scalar/LoopIdiomRecognize.cpp | 320 ++++++++++--------
1 file changed, 180 insertions(+), 140 deletions(-)
diff --git a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
index 79f13ac257fb55..b4bc8d71db425c 100644
--- a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
@@ -91,6 +91,7 @@
#include <cassert>
#include <cstdint>
#include <map>
+#include <memory>
#include <sstream>
#include <utility>
#include <vector>
@@ -2928,37 +2929,71 @@ class ValueBits {
private:
BitType _Type;
std::pair<Value *, uint64_t> _BitRef;
- ValueBit *_LHS;
- ValueBit *_RHS;
+ // Pointers to LHS and RHS of an XOR operation. These pointers are owned by
+ // the ValueBit object.
+ ValueBit *_LHS = nullptr;
+ ValueBit *_RHS = nullptr;
+ public:
ValueBit(BitType Type) : _Type(Type) {}
ValueBit(BitType Type, std::pair<Value *, uint64_t> BitRef)
: _Type(Type), _BitRef(BitRef) {}
- ValueBit(BitType Type, ValueBit *LHS, ValueBit *RHS)
- : _Type(Type), _LHS(LHS), _RHS(RHS) {}
+ ValueBit(BitType Type, ValueBit &LHS, ValueBit &RHS)
+ : _Type(Type), _LHS(new ValueBit(LHS)), _RHS(new ValueBit(RHS)) {
+ assert(_Type == BitType::XOR);
+ }
+ ValueBit() = delete;
+ // Define Copy and Assignment constructor to create copies of the LHS and
+ // RHS if the bit type is XOR. This is done to ensure the pointers will be
+ // owned by the ValueBit object and avoid double free in the destructor.
+ ValueBit(const ValueBit &VB) {
+ _Type = VB._Type;
+ if (_Type == BitType::REF)
+ _BitRef = VB._BitRef;
+ else if (_Type == BitType::XOR) {
+ _LHS = new ValueBit(*VB._LHS);
+ _RHS = new ValueBit(*VB._RHS);
+ }
+ }
+ ValueBit& operator=(const ValueBit &VB) {
+ _Type = VB._Type;
+ if (_Type == BitType::REF)
+ _BitRef = VB._BitRef;
+ else if (_Type == BitType::XOR) {
+ _LHS = new ValueBit(*VB._LHS);
+ _RHS = new ValueBit(*VB._RHS);
+ }
+ return *this;
+ }
+ ~ValueBit() {
+ if (_LHS)
+ delete _LHS;
+ if (_RHS)
+ delete _RHS;
+ }
public:
- static ValueBit *CreateOneBit() { return new ValueBit(BitType::ONE); }
- static ValueBit *CreateZeroBit() { return new ValueBit(BitType::ZERO); }
- static ValueBit *CreateRefBit(Value *Ref, uint64_t Offset) {
- return new ValueBit(BitType::REF, std::make_pair(Ref, Offset));
+ static ValueBit CreateOneBit() { return ValueBit(BitType::ONE); }
+ static ValueBit CreateZeroBit() { return ValueBit(BitType::ZERO); }
+ static ValueBit CreateRefBit(Value *Ref, uint64_t Offset) {
+ return ValueBit(BitType::REF, std::make_pair(Ref, Offset));
}
- static ValueBit *CreateXORBit(ValueBit *LHS, ValueBit *RHS) {
- return new ValueBit(BitType::XOR, LHS, RHS);
+ static ValueBit CreateXORBit(ValueBit &LHS, ValueBit &RHS) {
+ return ValueBit(BitType::XOR, LHS, RHS);
}
inline BitType getType() { return _Type; }
- bool equals(ValueBit *RHS) {
- if (_Type != RHS->getType())
+ bool equals(ValueBit RHS) {
+ if (_Type != RHS.getType())
return false;
switch (_Type) {
case BitType::ONE:
case BitType::ZERO:
return true;
case BitType::REF:
- return _BitRef == RHS->_BitRef;
+ return _BitRef == RHS._BitRef;
case BitType::XOR:
- return (_LHS->equals(RHS->_LHS) && _RHS->equals(RHS->_RHS)) ||
- (_LHS->equals(RHS->_RHS) && _RHS->equals(RHS->_LHS));
+ return (_LHS->equals(*RHS._LHS) && _RHS->equals(*RHS._RHS)) ||
+ (_LHS->equals(*RHS._RHS) && _RHS->equals(*RHS._LHS));
}
return false;
}
@@ -2983,10 +3018,11 @@ class ValueBits {
}
}
};
+ using PValueBits = std::shared_ptr<ValueBits>;
private:
uint64_t Size;
- std::vector<ValueBit *> Bits;
+ std::vector<ValueBit> Bits;
virtual void _Shl(uint64_t N) {
for (; N > 0; N--) {
@@ -3000,28 +3036,28 @@ class ValueBits {
Bits.erase(Bits.begin());
}
}
- virtual void _Xor(ValueBits *RHS) {
- assert(Size == RHS->getSize());
+ virtual void _Xor(const ValueBits &RHS) {
+ assert(Size == RHS.getSize());
for (unsigned I = 0; I < Size; I++) {
auto It = Bits.begin() + I;
- ValueBit *RHSBit = RHS->getBit(I);
- if (RHSBit->getType() == ValueBit::BitType::ONE) {
+ ValueBit RHSBit = RHS.getBit(I);
+ if (RHSBit.getType() == ValueBit::BitType::ONE) {
+ ValueBit ItVB = *It;
Bits.erase(It);
- if ((*It)->getType() == ValueBit::BitType::ZERO) {
+ if (ItVB.getType() == ValueBit::BitType::ZERO) {
Bits.insert(It, ValueBit::CreateOneBit());
- } else if ((*It)->getType() == ValueBit::BitType::ONE) {
+ } else if (ItVB.getType() == ValueBit::BitType::ONE) {
Bits.insert(It, ValueBit::CreateZeroBit());
} else {
- ValueBit *One = ValueBit::CreateOneBit();
- Bits.insert(It, ValueBit::CreateXORBit(*It, One));
+ ValueBit One = ValueBit::CreateOneBit();
+ Bits.insert(It, ValueBit::CreateXORBit(ItVB, One));
}
- } else if (RHSBit->getType() != ValueBit::BitType::ZERO) {
- if ((*It)->getType() == ValueBit::BitType::ZERO) {
+ } else if (RHSBit.getType() != ValueBit::BitType::ZERO) {
+ if ((*It).getType() == ValueBit::BitType::ZERO) {
Bits.erase(It);
- ValueBit *BitRef = new ValueBit(*RHSBit);
- Bits.insert(It, BitRef);
+ Bits.insert(It, RHSBit);
} else {
- ValueBit *ItVB = *It;
+ ValueBit ItVB = *It;
Bits.erase(It);
Bits.insert(It, ValueBit::CreateXORBit(ItVB, RHSBit));
}
@@ -3067,40 +3103,40 @@ class ValueBits {
InitialVal >>= 1;
}
}
- uint64_t getSize() { return Size; }
- ValueBit *getBit(unsigned i) { return Bits[i]; }
+ uint64_t getSize() const { return Size; }
+ ValueBit getBit(unsigned i) const { return Bits[i]; }
- virtual ValueBits *copyBits() { return new ValueBits(*this); }
+ virtual ValueBits copyBits() { return ValueBits(*this); }
- static ValueBits *Shl(ValueBits *LHS, uint64_t N) {
- ValueBits *Shifted = LHS->copyBits();
- Shifted->_Shl(N);
- return Shifted;
+ static PValueBits Shl(const ValueBits &LHS, uint64_t N) {
+ PValueBits VB = std::make_shared<ValueBits>(LHS);
+ VB->_Shl(N);
+ return VB;
}
- static ValueBits *LShr(ValueBits *LHS, uint64_t N) {
- ValueBits *Shifted = LHS->copyBits();
- Shifted->_LShr(N);
- return Shifted;
+ static PValueBits LShr(const ValueBits &LHS, uint64_t N) {
+ PValueBits VB = std::make_shared<ValueBits>(LHS);
+ VB->_LShr(N);
+ return VB;
}
- static ValueBits *Xor(ValueBits *LHS, ValueBits *RHS) {
- ValueBits *Xord = LHS->copyBits();
- Xord->_Xor(RHS);
- return Xord;
+ static PValueBits Xor(const ValueBits &LHS, const ValueBits &RHS) {
+ PValueBits VB = std::make_shared<ValueBits>(LHS);
+ VB->_Xor(RHS);
+ return VB;
}
- static ValueBits *ZExt(ValueBits *LHS, uint64_t ToSize) {
- ValueBits *Zexted = LHS->copyBits();
- Zexted->_ZExt(ToSize);
- return Zexted;
+ static PValueBits ZExt(const ValueBits &LHS, uint64_t ToSize) {
+ PValueBits VB = std::make_shared<ValueBits>(LHS);
+ VB->_ZExt(ToSize);
+ return VB;
}
- static ValueBits *Trunc(ValueBits *LHS, uint64_t N) {
- ValueBits *Trunced = LHS->copyBits();
- Trunced->_Trunc(N);
- return Trunced;
+ static PValueBits Trunc(const ValueBits &LHS, uint64_t N) {
+ PValueBits VB = std::make_shared<ValueBits>(LHS);
+ VB->_Trunc(N);
+ return VB;
}
- static ValueBits *And(ValueBits *LHS, uint64_t RHS) {
- ValueBits *Anded = LHS->copyBits();
- Anded->_And(RHS);
- return Anded;
+ static PValueBits And(const ValueBits &LHS, uint64_t RHS) {
+ PValueBits VB = std::make_shared<ValueBits>(LHS);
+ VB->_And(RHS);
+ return VB;
}
virtual bool isPredicated() { return false; }
@@ -3110,7 +3146,7 @@ class ValueBits {
return false;
for (unsigned I = 0; I < Size; I++)
- if (!getBit(I)->equals(RHS->getBit(I)))
+ if (!getBit(I).equals(RHS->getBit(I)))
return false;
return true;
@@ -3119,10 +3155,10 @@ class ValueBits {
virtual void print(raw_ostream &OS) {
assert(Size != 0);
OS << "[";
- Bits[Size - 1]->print(OS);
+ Bits[Size - 1].print(OS);
for (int i = Size - 2; i >= 0; i--) {
OS << " | ";
- Bits[i]->print(OS);
+ Bits[i].print(OS);
}
OS << "]\n";
}
@@ -3142,43 +3178,43 @@ class PredicatedValueBits : public ValueBits {
// would depend on an icmp.
private:
ICmpInst *_Predicate;
- ValueBits *_IfTrue;
- ValueBits *_IfFalse;
+ PValueBits _IfTrue;
+ PValueBits _IfFalse;
void _Shl(uint64_t N) override {
- _IfTrue = ValueBits::Shl(_IfTrue, N);
- _IfFalse = ValueBits::Shl(_IfFalse, N);
+ _IfTrue = ValueBits::Shl(*_IfTrue, N);
+ _IfFalse = ValueBits::Shl(*_IfFalse, N);
}
void _LShr(uint64_t N) override {
- _IfTrue = ValueBits::LShr(_IfTrue, N);
- _IfFalse = ValueBits::LShr(_IfFalse, N);
+ _IfTrue = ValueBits::LShr(*_IfTrue, N);
+ _IfFalse = ValueBits::LShr(*_IfFalse, N);
}
void _ZExt(uint64_t N) override {
- _IfTrue = ValueBits::ZExt(_IfTrue, N);
- _IfFalse = ValueBits::ZExt(_IfFalse, N);
+ _IfTrue = ValueBits::ZExt(*_IfTrue, N);
+ _IfFalse = ValueBits::ZExt(*_IfFalse, N);
}
void _And(uint64_t N) override {
- _IfTrue = ValueBits::And(_IfTrue, N);
- _IfFalse = ValueBits::And(_IfFalse, N);
+ _IfTrue = ValueBits::And(*_IfTrue, N);
+ _IfFalse = ValueBits::And(*_IfFalse, N);
}
- void _Xor(ValueBits *RHS) override {
- _IfTrue = ValueBits::Xor(_IfTrue, RHS);
- _IfFalse = ValueBits::Xor(_IfFalse, RHS);
+ void _Xor(const ValueBits &RHS) override {
+ _IfTrue = ValueBits::Xor(*_IfTrue, RHS);
+ _IfFalse = ValueBits::Xor(*_IfFalse, RHS);
}
void _Trunc(uint64_t N) override {
- _IfTrue = ValueBits::Trunc(_IfTrue, N);
- _IfFalse = ValueBits::Trunc(_IfFalse, N);
+ _IfTrue = ValueBits::Trunc(*_IfTrue, N);
+ _IfFalse = ValueBits::Trunc(*_IfFalse, N);
}
public:
- PredicatedValueBits(ICmpInst *Predicate, ValueBits *IfTrue,
- ValueBits *IfFalse)
+ PredicatedValueBits(ICmpInst *Predicate, PValueBits IfTrue,
+ PValueBits IfFalse)
: _Predicate(Predicate), _IfTrue(IfTrue), _IfFalse(IfFalse) {}
- ValueBits *copyBits() override { return new PredicatedValueBits(*this); }
+ ValueBits copyBits() override { return PredicatedValueBits(*this); }
bool isPredicated() override { return true; }
- ValueBits *getIfTrue() { return _IfTrue; }
- ValueBits *getIfFalse() { return _IfFalse; }
+ PValueBits getIfTrue() { return _IfTrue; }
+ PValueBits getIfFalse() { return _IfFalse; }
ICmpInst *getPredicate() { return _Predicate; }
virtual void print(raw_ostream &OS) override {
@@ -3190,8 +3226,9 @@ class PredicatedValueBits : public ValueBits {
// Execute the instructions in a basic block whilst mapping out Values to
// ValueBits
-static bool symbolicallyExecute(BasicBlock *BB,
- std::map<Value *, ValueBits *> &ValueMap) {
+static bool
+symbolicallyExecute(BasicBlock *BB,
+ std::map<Value *, std::shared_ptr<ValueBits>> &ValueMap) {
auto getConstantOperand = [](Instruction *I, uint8_t Operand) {
ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(Operand));
@@ -3206,18 +3243,15 @@ static bool symbolicallyExecute(BasicBlock *BB,
auto getOrCreateValueBits = [&ValueMap](Value *Val) {
auto Result = ValueMap.find(Val);
- ValueBits *LHSBits = nullptr;
- if (Result == ValueMap.end()) {
- ConstantInt *CI = dyn_cast<ConstantInt>(Val);
- if (CI) {
- LHSBits = new ValueBits(CI->getSExtValue(),
- Val->getType()->getScalarSizeInBits());
- } else {
- LHSBits = new ValueBits(Val, Val->getType()->getScalarSizeInBits());
- }
- } else
- LHSBits = Result->second;
- return LHSBits;
+ if (Result != ValueMap.end())
+ return Result->second;
+ ConstantInt *CI = dyn_cast<ConstantInt>(Val);
+ if (CI) {
+ return std::make_shared<ValueBits>(CI->getSExtValue(),
+ Val->getType()->getScalarSizeInBits());
+ }
+ return std::make_shared<ValueBits>(Val,
+ Val->getType()->getScalarSizeInBits());
};
for (Instruction &I : *BB) {
@@ -3238,45 +3272,42 @@ static bool symbolicallyExecute(BasicBlock *BB,
}
}
assert(IncomingBlock);
- ValueMap[&I] =
- getOrCreateValueBits(PHI->getIncomingValueForBlock(IncomingBlock));
+ ValueMap.insert({&I,
+ getOrCreateValueBits(PHI->getIncomingValueForBlock(IncomingBlock))});
} break;
case Instruction::Shl: {
ConstantInt *CI = getConstantOperand(&I, 1);
if (!CI)
return false;
- Value *LHSVal = I.getOperand(0);
- ValueBits *LHSBits = getOrCreateValueBits(LHSVal);
- ValueMap[&I] = ValueBits::Shl(LHSBits, CI->getSExtValue());
+ auto LHSBits = getOrCreateValueBits(I.getOperand(0));
+ ValueMap.insert({&I, ValueBits::Shl(*LHSBits, CI->getSExtValue())});
} break;
case Instruction::LShr: {
ConstantInt *CI = getConstantOperand(&I, 1);
if (!CI)
return false;
- Value *LHSVal = I.getOperand(0);
- ValueBits *LHSBits = getOrCreateValueBits(LHSVal);
- ValueMap[&I] = ValueBits::LShr(LHSBits, CI->getSExtValue());
+ auto LHSBits = getOrCreateValueBits(I.getOperand(0));
+ ValueMap.insert({&I, ValueBits::LShr(*LHSBits, CI->getSExtValue())});
} break;
case Instruction::And: {
ConstantInt *CI = getConstantOperand(&I, 1);
if (!CI)
return false;
- Value *LHSVal = I.getOperand(0);
- ValueBits *LHSBits = getOrCreateValueBits(LHSVal);
- ValueMap[&I] = ValueBits::And(LHSBits, CI->getSExtValue());
+ auto LHSBits = getOrCreateValueBits(I.getOperand(0));
+ ValueMap.insert({&I, ValueBits::And(*LHSBits, CI->getSExtValue())});
} break;
case Instruction::Xor: {
- ValueBits *LHSBits = getOrCreateValueBits(I.getOperand(0));
- ValueBits *RHSBits = getOrCreateValueBits(I.getOperand(1));
- ValueMap[&I] = ValueBits::Xor(LHSBits, RHSBits);
+ auto LHSBits = getOrCreateValueBits(I.getOperand(0));
+ auto RHSBits = getOrCreateValueBits(I.getOperand(1));
+ ValueMap.insert({&I, ValueBits::Xor(*LHSBits, *RHSBits)});
} break;
case Instruction::ZExt: {
- ValueBits *LHSBits = getOrCreateValueBits(I.getOperand(0));
- ValueMap[&I] = ValueBits::ZExt(LHSBits, BitSize);
+ auto LHSBits = getOrCreateValueBits(I.getOperand(0));
+ ValueMap.insert({&I, ValueBits::ZExt(*LHSBits, BitSize)});
} break;
case Instruction::Trunc: {
- ValueBits *LHSBits = getOrCreateValueBits(I.getOperand(0));
- ValueMap[&I] = ValueBits::Trunc(LHSBits, BitSize);
+ auto LHSBits = getOrCreateValueBits(I.getOperand(0));
+ ValueMap.insert({&I, ValueBits::Trunc(*LHSBits, BitSize)});
} break;
case Instruction::Select: {
SelectInst *Select = cast<SelectInst>(&I);
@@ -3287,9 +3318,10 @@ static bool symbolicallyExecute(BasicBlock *BB,
<< "\n");
return false;
}
- ValueBits *IfTrue = getOrCreateValueBits(Select->getTrueValue());
- ValueBits *IfFalse = getOrCreateValueBits(Select->getFalseValue());
- ValueMap[&I] = new PredicatedValueBits(Cond, IfTrue, IfFalse);
+ auto IfTrue = getOrCreateValueBits(Select->getTrueValue());
+ auto IfFalse = getOrCreateValueBits(Select->getFalseValue());
+ ValueMap.insert({&I,
+ std::make_shared<PredicatedValueBits>(Cond, IfTrue, IfFalse)});
} break;
default:
// If this instruction is not recognized, then just continue. This is
@@ -3423,7 +3455,7 @@ bool LoopIdiomRecognize::recognizeCRC(const SCEV *BECount) {
// match pre-computed values, then we can say it's doing crc. If there are
// any unexpected loop variant operations happening, e.g. additional select
// logic or shifts, then this will be captured in the ValueBits.
- std::map<Value *, ValueBits *> ValueMap;
+ std::map<Value *, std::shared_ptr<ValueBits>> ValueMap;
if (!symbolicallyExecute(CurLoop->getHeader(), ValueMap))
return false;
@@ -3435,7 +3467,8 @@ bool LoopIdiomRecognize::recognizeCRC(const SCEV *BECount) {
return false;
}
- ValueBits *CRCOutBits = Result->second;
+ using PValueBits = std::shared_ptr<ValueBits>;
+ PValueBits CRCOutBits = Result->second;
LLVM_DEBUG(dbgs() << DEBUG_TYPE
<< " CRCRecognize: ValueBits for output crc value:\n"
<< *CRCOutBits);
@@ -3447,7 +3480,8 @@ bool LoopIdiomRecognize::recognizeCRC(const SCEV *BECount) {
<< " predicated.\n");
return false;
}
- PredicatedValueBits *CRCOutBitsPred = (PredicatedValueBits *)CRCOutBits;
+ std::shared_ptr<PredicatedValueBits> CRCOutBitsPred =
+ std::static_pointer_cast<PredicatedValueBits>(CRCOutBits);
// Need to check if the predicate is checking the MSB/LSB depending on
// whether this is bit reversed CRC
@@ -3465,7 +3499,7 @@ bool LoopIdiomRecognize::recognizeCRC(const SCEV *BECount) {
<< " CRCRecognize: Cannot determine ICmp operands\n");
return false;
}
- ValueBits *ICmpOp0Bits = Result->second;
+ PValueBits ICmpOp0Bits = Result->second;
// Now match the following cases
// (LSB): (crc & 1)
@@ -3476,7 +3510,7 @@ bool LoopIdiomRecognize::recognizeCRC(const SCEV *BECount) {
// (MSB): crc <= -1
// And decide whether the check is checking for existence of 1 or 0
bool CheckZero = false;
- ValueBits::ValueBit *CheckBit = nullptr;
+ std::optional<ValueBits::ValueBit> CheckBit;
switch (Pred) {
case CmpInst::ICMP_NE:
case CmpInst::ICMP_EQ: {
@@ -3492,17 +3526,17 @@ bool LoopIdiomRecognize::recognizeCRC(const SCEV *BECount) {
}
// Now to check if we already know all the other bits of the RHS are zero.
ValueBits AllZeroValueBits((uint64_t)0, ICmpOp0Bits->getSize());
- ValueBits *CRCOutBitsMasked = nullptr;
+ PValueBits CRCOutBitsMasked;
if (CRC.BitReversed) {
// Masking out the LSB is equivalent to shifting right one if we're just
// comparing all the other bits are zero.
- CRCOutBitsMasked = ValueBits::LShr(ICmpOp0Bits, 1);
+ CRCOutBitsMasked = ValueBits::LShr(*ICmpOp0Bits, 1);
CheckBit = ICmpOp0Bits->getBit(0);
} else {
// The CRC type might be larger than the data, so we can't shift left
// one. Mask instead.
uint64_t MSBMask = ~(1 << (CRC.Width - 1));
- CRCOutBitsMasked = ValueBits::And(ICmpOp0Bits, MSBMask);
+ CRCOutBitsMasked = ValueBits::And(*ICmpOp0Bits, MSBMask);
CheckBit = ICmpOp0Bits->getBit(CRC.Width - 1);
}
if (!CRCOutBitsMasked->equals(&AllZeroValueBits)) {
@@ -3540,23 +3574,26 @@ bool LoopIdiomRecognize::recognizeCRC(const SCEV *BECount) {
return false;
}
+ if (!CheckBit.has_value())
+ return false;
+
// If there exists a Data input, ensure the check bit is crc^data.
- ValueBits::ValueBit *RefCheckBit = nullptr;
+ std::optional<ValueBits::ValueBit> RefCheckBit;
uint64_t CRCCheckIdx = CRC.BitReversed ? 0 : CRCSize - 1;
- ValueBits::ValueBit *CRCInputRefBit =
+ ValueBits::ValueBit CRCInputRefBit =
ValueBits::ValueBit::CreateRefBit(CRC.CRCInput, CRCCheckIdx);
if (CRC.DataInput) {
uint64_t DataSize = CRC.DataInput->getType()->getScalarSizeInBits();
uint64_t DataCheckIdx = CRC.BitReversed ? 0 : DataSize - 1;
- ValueBits::ValueBit *DataInputRefBit =
- ValueBits::ValueBit::CreateRefBit(CRC.DataInput, DataCheckIdx);
+ ValueBits::ValueBit DataInputRefBit =
+ ValueBits::ValueBit::CreateRefBit(CRC.DataInput, DataCheckIdx);
RefCheckBit =
- ValueBits::ValueBit::CreateXORBit(CRCInputRefBit, DataInputRefBit);
+ ValueBits::ValueBit::CreateXORBit(CRCInputRefBit, DataInputRefBit);
} else {
RefCheckBit = CRCInputRefBit;
}
- if (!RefCheckBit->equals(CheckBit)) {
+ if (!RefCheckBit.value().equals(CheckBit.value())) {
LLVM_DEBUG(dbgs() << DEBUG_TYPE
<< " CRCRecognize: Cannot verify check bit!\n"
<< *RefCheckBit << "\n"
@@ -3564,37 +3601,40 @@ bool LoopIdiomRecognize::recognizeCRC(const SCEV *BECount) {
return false;
}
- ValueBits *CRCOutBitsIfOne = CRCOutBitsPred->getIfTrue();
- ValueBits *CRCOutBitsIfZero = CRCOutBitsPred->getIfFalse();
- if (CheckZero)
- std::swap(CRCOutBitsIfZero, CRCOutBitsIfOne);
+ PValueBits CRCOutBitsIfOne = CRCOutBitsPred->getIfTrue();
+ PValueBits CRCOutBitsIfZero = CRCOutBitsPred->getIfFalse();
+ if (CheckZero) {
+ PValueBits Tmp = CRCOutBitsIfOne;
+ CRCOutBitsIfOne = CRCOutBitsIfZero;
+ CRCOutBitsIfZero = Tmp;
+ }
// Now construct ValueBits that would be the result of crc for one iteration.
// That is, a shift and then xor if [M/L]SB is 1.
- ValueBits *CRCValueBits = nullptr;
+ PValueBits CRCValueBits;
Result = ValueMap.find(CRC.CRCInput);
if (Result == ValueMap.end()) {
- CRCValueBits = new ValueBits(CRC.CRCInput, CRCSize);
+ CRCValueBits = std::make_shared<ValueBits>(CRC.CRCInput, CRCSize);
} else {
CRCValueBits = Result->second;
}
uint64_t GeneratorPolynomial =
CRC.BitReversed ? reverseBits(CRC.Polynomial, CRCSize) : CRC.Polynomial;
- ValueBits Polynomial(GeneratorPolynomial, CRCSize);
+ PValueBits Polynomial = std::make_shared<ValueBits>(GeneratorPolynomial, CRCSize);
// Case where the MSB/LSB of the data is 0
- ValueBits *IfZero = CRC.BitReversed ? ValueBits::LShr(CRCValueBits, 1)
- : ValueBits::Shl(CRCValueBits, 1);
+ PValueBits IfZero = CRC.BitReversed ? ValueBits::LShr(*CRCValueBits, 1)
+ : ValueBits::Shl(*CRCValueBits, 1);
// Case where the MSB/LSB of the data is 1
- ValueBits *IfOne = ValueBits::Xor(IfZero, &Polynomial);
+ PValueBits IfOne = ValueBits::Xor(*IfZero, *Polynomial);
- if (!IfZero->equals(CRCOutBitsIfZero)) {
+ if (!IfZero->equals(CRCOutBitsIfZero.get())) {
LLVM_DEBUG(dbgs() << DEBUG_TYPE << " CRCRecognize: Not Equal!\n"
<< *IfZero << *CRCOutBitsPred->getIfFalse());
return false;
}
- if (!IfOne->equals(CRCOutBitsIfOne)) {
+ if (!IfOne->equals(CRCOutBitsIfOne.get())) {
LLVM_DEBUG(dbgs() << DEBUG_TYPE << " CRCRecognize: Not Equal!\n"
<< *IfOne << *CRCOutBitsPred->getIfTrue());
return false;
More information about the llvm-commits
mailing list