[llvm] [LoopIdiomRecognizer] Implement CRC recognition (PR #79295)

Joe Faulls via llvm-commits llvm-commits at lists.llvm.org
Thu Apr 25 02:00:49 PDT 2024


https://github.com/joe-img updated https://github.com/llvm/llvm-project/pull/79295

>From 5626c07e9b0a419d2c6b13710a60dd2deccbf0bf Mon Sep 17 00:00:00 2001
From: "Joseph.Faulls" <Joseph.Faulls at imgtec.com>
Date: Fri, 12 Jan 2024 15:36:04 +0000
Subject: [PATCH 01/12] [LoopIdiomRecognize] Implement function to extract CRC
 data from loops

This will check to see if a loop looks like CRC, not necessarily
guaranteeing that it is CRC.
---
 .../Transforms/Scalar/LoopIdiomRecognize.cpp  | 244 +++++++++++++++++-
 1 file changed, 235 insertions(+), 9 deletions(-)

diff --git a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
index c7e25c9f3d2c92..77030d5a772f95 100644
--- a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
@@ -135,6 +135,9 @@ static cl::opt<bool> UseLIRCodeSizeHeurs(
              "with -Os/-Oz"),
     cl::init(true), cl::Hidden);
 
+static cl::opt<bool> CRCRecognize("recognize-crc", cl::desc("CRC RECOGNIZE"),
+                                  cl::init(false), cl::Hidden);
+
 namespace {
 
 class LoopIdiomRecognize {
@@ -186,6 +189,15 @@ class LoopIdiomRecognize {
             // handling.
   };
 
+  struct CRCInfo {
+    Value *CRCInput;
+    Value *CRCOutput;
+    Value *DataInput;
+    uint64_t Width;
+    uint64_t Polynomial;
+    bool BitReversed;
+  };
+
   /// \name Countable Loop Idiom Handling
   /// @{
 
@@ -242,6 +254,8 @@ class LoopIdiomRecognize {
 
   bool recognizeShiftUntilBitTest();
   bool recognizeShiftUntilZero();
+  std::optional<CRCInfo> looksLikeCRC(const SCEV *BECount);
+  bool recognizeCRC(const SCEV *BECount);
 
   /// @}
 };
@@ -298,13 +312,8 @@ bool LoopIdiomRecognize::runOnLoop(Loop *L) {
   ApplyCodeSizeHeuristics =
       L->getHeader()->getParent()->hasOptSize() && UseLIRCodeSizeHeurs;
 
-  HasMemset = TLI->has(LibFunc_memset);
-  HasMemsetPattern = TLI->has(LibFunc_memset_pattern16);
-  HasMemcpy = TLI->has(LibFunc_memcpy);
-
-  if (HasMemset || HasMemsetPattern || HasMemcpy)
-    if (SE->hasLoopInvariantBackedgeTakenCount(L))
-      return runOnCountableLoop();
+  if (SE->hasLoopInvariantBackedgeTakenCount(L))
+    return runOnCountableLoop();
 
   return runOnNoncountableLoop();
 }
@@ -329,6 +338,17 @@ bool LoopIdiomRecognize::runOnCountableLoop() {
                     << "] Countable Loop %" << CurLoop->getHeader()->getName()
                     << "\n");
 
+  bool MadeChange = false;
+  if (CRCRecognize)
+    MadeChange |= recognizeCRC(BECount);
+
+  HasMemset = TLI->has(LibFunc_memset);
+  HasMemsetPattern = TLI->has(LibFunc_memset_pattern16);
+  HasMemcpy = TLI->has(LibFunc_memcpy);
+
+  if (!(HasMemset || HasMemsetPattern || HasMemcpy))
+    return MadeChange;
+
   // The following transforms hoist stores/memsets into the loop pre-header.
   // Give up if the loop has instructions that may throw.
   SimpleLoopSafetyInfo SafetyInfo;
@@ -336,8 +356,6 @@ bool LoopIdiomRecognize::runOnCountableLoop() {
   if (SafetyInfo.anyBlockMayThrow())
     return false;
 
-  bool MadeChange = false;
-
   // Scan all the blocks in the loop that are not in subloops.
   for (auto *BB : CurLoop->getBlocks()) {
     // Ignore blocks in subloops.
@@ -2868,3 +2886,211 @@ bool LoopIdiomRecognize::recognizeShiftUntilZero() {
   ++NumShiftUntilZero;
   return MadeChange;
 }
+
+static uint64_t reverseBits(uint64_t Num, unsigned NumBits) {
+  uint64_t Reversed = 0;
+  for (unsigned i = 1; i <= NumBits; i++) {
+    Reversed |= (Num & 1) << (NumBits - i);
+    Num >>= 1;
+  }
+  return Reversed;
+}
+
+bool LoopIdiomRecognize::recognizeCRC(const SCEV *BECount) {
+  // Step one: Check if the loop looks like crc, and extract some useful
+  // information for us to check
+  std::optional<CRCInfo> MaybeCRC = looksLikeCRC(BECount);
+  if (!MaybeCRC)
+    return false;
+  CRCInfo CRC = *MaybeCRC;
+
+  uint64_t CRCSize = CRC.CRCInput->getType()->getScalarSizeInBits();
+  LLVM_DEBUG(dbgs() << DEBUG_TYPE " CRCRegonize: Found potential CRCLoop "
+                    << *CurLoop << "\n"
+                    << "Input CRC: " << *CRC.CRCInput << "\n"
+                    << "Output CRC: " << *CRC.CRCOutput << "\n"
+                    << "GeneratorPolynomial: " << CRC.Polynomial << "\n"
+                    << "CRC Size: " << CRCSize << "\n"
+                    << "CRC Width: " << CRC.Width << "\n"
+                    << "Reversed: " << CRC.BitReversed << "\n");
+  if (CRC.DataInput) {
+    LLVM_DEBUG(dbgs() << "Data Input: " << *CRC.DataInput << "\n"
+                      << "Data Size: "
+                      << CRC.DataInput->getType()->getScalarSizeInBits()
+                      << "\n");
+  }
+
+  return false;
+}
+
+std::optional<LoopIdiomRecognize::CRCInfo>
+LoopIdiomRecognize::looksLikeCRC(const SCEV *BECount) {
+  // Initial checks to see if this loop looks like CRC:
+  // - Inner most loop
+  // - One block
+  // - One exit
+  // - Iteration count is 8
+
+  // Check if this is inner most loop
+  if (!CurLoop->isInnermost())
+    return std::nullopt;
+
+  // Since we are far enough in the optimization pipeline that small branches
+  // will have been folded into Select instructions, if we have branches we are
+  // unlikely to be CRC. To reduce complexity, only consider single-block loops
+  // for CRC recognition
+  if (CurLoop->getBlocks().size() > 1) {
+    LLVM_DEBUG(dbgs() << DEBUG_TYPE " CRCRegonize: Loops with more than one"
+                      << "block are unsupported\n");
+    return std::nullopt;
+  }
+
+  // Ensure one exit block
+  const BasicBlock *ExitBlock = CurLoop->getExitBlock();
+  if (!ExitBlock)
+    return std::nullopt;
+
+  // Check iteration count is 8
+  const SCEV *TripCountSCEV =
+      SE->getTripCountFromExitCount(BECount, BECount->getType(), CurLoop);
+  const SCEVConstant *TripCountSCEVConst =
+      dyn_cast<SCEVConstant>(TripCountSCEV);
+  if (!TripCountSCEVConst)
+    return std::nullopt;
+  APInt TripCount = TripCountSCEVConst->getAPInt();
+  // Only support one byte CRC loops. Loops with tripcount 16 or 32 can also be
+  // CRC, but this is currently unsupported
+  if (TripCount != 8)
+    return std::nullopt;
+
+  // Ensure only one value that is live across the loop boundary, and track the
+  // operations on this value. This should include:
+  // 1) A phi with an initial value outside the loop
+  // 2) Shift operation
+  // 3) ICMP operation
+
+  // Ensure only one value is live across the loop boundary. LCSSA ensures any
+  // live values are captured in a PHI of the exit block.
+  Instruction *LoopOutput = nullptr;
+  for (const PHINode &ExitPhi : ExitBlock->phis()) {
+    for (const Use &IncomingUse : ExitPhi.incoming_values()) {
+      Instruction *IncomingUser = dyn_cast<Instruction>(&IncomingUse);
+      if (!IncomingUser)
+        continue;
+      if (CurLoop->contains(IncomingUser)) {
+        if (LoopOutput)
+          return std::nullopt;
+        LoopOutput = IncomingUser;
+      }
+    }
+  }
+
+  if (!LoopOutput)
+    return std::nullopt;
+
+  auto AddAllInstOps = [](Instruction *I,
+                          SmallVectorImpl<Instruction *> &Worklist) {
+    for (Use &Op : I->operands()) {
+      Instruction *OpInst = dyn_cast<Instruction>(Op.get());
+      if (OpInst)
+        Worklist.push_back(OpInst);
+    }
+  };
+
+  // Follow this value in the loop
+  SmallVector<Instruction *, 4> Worklist;
+  SmallPtrSet<Instruction *, 4> Visited;
+  bool FoundIcmp = false;
+  BinaryOperator *CRCShift = nullptr;
+  ConstantInt *GeneratorPolynomial = nullptr;
+  Value *CRCInput = nullptr;
+  Worklist.push_back(LoopOutput);
+  while (!Worklist.empty()) {
+    Instruction *I = Worklist.pop_back_val();
+    if (Visited.contains(I))
+      continue;
+    Visited.insert(I);
+    if (SelectInst *Select = dyn_cast<SelectInst>(I)) {
+      FoundIcmp |= isa<ICmpInst>(Select->getCondition());
+      AddAllInstOps(Select, Worklist);
+    } else if (isa<ICmpInst>(I)) {
+      // Instead of tracking the condition and working out if it's based on
+      // MSB of crc/data, just greedily assume it will be and check later.
+      FoundIcmp = true;
+    } else if (BinaryOperator *BinOp = dyn_cast<BinaryOperator>(I)) {
+      switch (BinOp->getOpcode()) {
+      default:
+        break;
+      case Instruction::Shl:
+      case Instruction::LShr: {
+        // This shift could be the data or the crc. Either way, the RHS should
+        // be constant one.
+        Instruction *ShLHS = dyn_cast<Instruction>(BinOp->getOperand(0));
+        ConstantInt *ShRHS = dyn_cast<ConstantInt>(BinOp->getOperand(1));
+        if (!ShRHS || !ShLHS || ShRHS->getZExtValue() != 1)
+          return std::nullopt;
+        CRCShift = BinOp;
+        Worklist.push_back(ShLHS);
+        break;
+      }
+      case Instruction::Xor: {
+        Value *XorRHS = BinOp->getOperand(1);
+        if (ConstantInt *RHSConst = dyn_cast<ConstantInt>(XorRHS))
+          GeneratorPolynomial = RHSConst;
+        AddAllInstOps(BinOp, Worklist);
+        break;
+      }
+      }
+    } else if (PHINode *PHI = dyn_cast<PHINode>(I)) {
+      for (BasicBlock *IncomingBlock : PHI->blocks()) {
+        Value *IncomingValue = PHI->getIncomingValueForBlock(IncomingBlock);
+
+        if (CurLoop->contains(IncomingBlock)) {
+          if (Instruction *IncomingI = dyn_cast<Instruction>(IncomingValue)) {
+            Worklist.push_back(IncomingI);
+          }
+        } else {
+          CRCInput = IncomingValue;
+        }
+      }
+    }
+  }
+
+  if (!(CRCShift && GeneratorPolynomial && CRCInput)) {
+    LLVM_DEBUG(dbgs() << DEBUG_TYPE " CRCRegonize: Does not look like CRC");
+    return std::nullopt;
+  }
+
+  // The crc loop will have either one or two inputs depending on whether the
+  // data is xor'd inside or outside the loop. Assume any additional inputs that
+  // isn't our crc input is the data.
+  Value *DataInput = nullptr;
+  PHINode *InductionPhi = CurLoop->getInductionVariable(*SE);
+  for (const PHINode &EntryPhi : CurLoop->getHeader()->phis()) {
+    if (&EntryPhi == InductionPhi)
+      continue;
+    for (BasicBlock *BB : EntryPhi.blocks()) {
+      if (!CurLoop->contains(BB)) {
+        Value *IncomingVal = EntryPhi.getIncomingValueForBlock(BB);
+        if (IncomingVal != CRCInput) {
+          // Only allow exactly one additional input to the loop.
+          if (DataInput)
+            return std::nullopt;
+          DataInput = IncomingVal;
+        }
+      }
+    }
+  }
+
+  bool Reversed = CRCShift->getOpcode() == Instruction::LShr;
+
+  uint64_t Polynomial = GeneratorPolynomial->getZExtValue();
+  if (Reversed)
+    Polynomial =
+        reverseBits(Polynomial, CRCInput->getType()->getScalarSizeInBits());
+
+  CRCInfo CRC = {CRCInput,   LoopOutput, DataInput, TripCount.getZExtValue(),
+                 Polynomial, Reversed};
+
+  return std::optional<CRCInfo>{CRC};
+}

>From cecb3bcf72650e918270034f43303b36f8168a61 Mon Sep 17 00:00:00 2001
From: "Joseph.Faulls" <Joseph.Faulls at imgtec.com>
Date: Fri, 12 Jan 2024 15:59:09 +0000
Subject: [PATCH 02/12] [LoopIdiomRecognize] Implement ValueBits class

This is a representation of a value's bits in terms of references to
other values' bits, or 1/0 if the bit is known. This allows symbolic
execution of bitwise instructions without knowing the exact values.

Example:

LLVM IR Value i8 %x:
[%x[7], %x[6], %x[5], %x[4], %x[3], %x[2], %x[1], %x[0]]

%shr = lshr i8 %x, 2
[ 0, 0, %x[7], %x[6], %x[5], %x[4], %x[3], %x[2]]

%shl = shl i8 %shr, 1
[ 0, %x[7], %x[6], %x[5], %x[4], %x[3], %x[2], 0]

%xor = xor i8 %shl, 0xb
[ 0, %x[7], %x[6], %x[5], %x[4]^1, %x[3], %x[2]^1, 1]
---
 .../Transforms/Scalar/LoopIdiomRecognize.cpp  | 236 ++++++++++++++++++
 1 file changed, 236 insertions(+)

diff --git a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
index 77030d5a772f95..6b8c080be1ca87 100644
--- a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
@@ -2896,6 +2896,242 @@ static uint64_t reverseBits(uint64_t Num, unsigned NumBits) {
   return Reversed;
 }
 
+class ValueBits {
+  // This is a representation of a value's bits in terms of references to
+  // other values' bits, or 1/0 if the bit is known. This allows symbolic
+  // execution of bitwise instructions without knowing the exact values.
+  //
+  // Example:
+  //
+  // LLVM IR Value i8 %x:
+  // [%x[7], %x[6], %x[5], %x[4], %x[3], %x[2], %x[1], %x[0]]
+  //
+  // %shr = lshr i8 %x, 2
+  // [ 0, 0, %x[7], %x[6], %x[5], %x[4], %x[3], %x[2]]
+  //
+  // %shl = shl i8 %shr, 1
+  // [ 0, %x[7], %x[6], %x[5], %x[4], %x[3], %x[2], 0]
+  //
+  // %xor = xor i8 %shl, 0xb
+  // [ 0, %x[7], %x[6], %x[5], %x[4]^1, %x[3], %x[2]^1, 1]
+public:
+  class ValueBit {
+  public:
+    enum BitType { ONE, ZERO, REF, XOR };
+
+  private:
+    BitType _Type;
+    std::pair<Value *, uint64_t> _BitRef;
+    ValueBit *_LHS;
+    ValueBit *_RHS;
+
+    ValueBit(BitType Type) : _Type(Type) {}
+    ValueBit(BitType Type, std::pair<Value *, uint64_t> BitRef)
+        : _Type(Type), _BitRef(BitRef) {}
+    ValueBit(BitType Type, ValueBit *LHS, ValueBit *RHS)
+        : _Type(Type), _LHS(LHS), _RHS(RHS) {}
+
+  public:
+    static ValueBit *CreateOneBit() { return new ValueBit(BitType::ONE); }
+    static ValueBit *CreateZeroBit() { return new ValueBit(BitType::ZERO); }
+    static ValueBit *CreateRefBit(Value *Ref, uint64_t Offset) {
+      return new ValueBit(BitType::REF, std::make_pair(Ref, Offset));
+    }
+    static ValueBit *CreateXORBit(ValueBit *LHS, ValueBit *RHS) {
+      return new ValueBit(BitType::XOR, LHS, RHS);
+    }
+    inline BitType getType() { return _Type; }
+    bool equals(ValueBit *RHS) {
+      if (_Type != RHS->getType())
+        return false;
+      switch (_Type) {
+      case BitType::ONE:
+      case BitType::ZERO:
+        return true;
+      case BitType::REF:
+        return _BitRef == RHS->_BitRef;
+      case BitType::XOR:
+        return (_LHS->equals(RHS->_LHS) && _RHS->equals(RHS->_RHS)) ||
+               (_LHS->equals(RHS->_RHS) && _RHS->equals(RHS->_LHS));
+      }
+      return false;
+    }
+
+    void print(raw_ostream &OS) {
+      switch (_Type) {
+      case BitType::ONE:
+        OS << "1";
+        break;
+      case BitType::ZERO:
+        OS << "0";
+        break;
+      case BitType::REF:
+        OS << _BitRef.first->getNameOrAsOperand() << "[" << _BitRef.second
+           << "]";
+        break;
+      case BitType::XOR:
+        _LHS->print(OS);
+        OS << "^";
+        _RHS->print(OS);
+        break;
+      }
+    }
+  };
+
+private:
+  uint64_t Size;
+  std::vector<ValueBit *> Bits;
+
+  virtual void _Shl(uint64_t N) {
+    for (; N > 0; N--) {
+      Bits.insert(Bits.begin(), ValueBit::CreateZeroBit());
+      Bits.pop_back();
+    }
+  }
+  virtual void _LShr(uint64_t N) {
+    for (; N > 0; N--) {
+      Bits.insert(Bits.end(), ValueBit::CreateZeroBit());
+      Bits.erase(Bits.begin());
+    }
+  }
+  virtual void _Xor(ValueBits *RHS) {
+    assert(Size == RHS->getSize());
+    for (unsigned I = 0; I < Size; I++) {
+      auto It = Bits.begin() + I;
+      ValueBit *RHSBit = RHS->getBit(I);
+      if (RHSBit->getType() == ValueBit::BitType::ONE) {
+        Bits.erase(It);
+        if ((*It)->getType() == ValueBit::BitType::ZERO) {
+          Bits.insert(It, ValueBit::CreateOneBit());
+        } else if ((*It)->getType() == ValueBit::BitType::ONE) {
+          Bits.insert(It, ValueBit::CreateZeroBit());
+        } else {
+          ValueBit *One = ValueBit::CreateOneBit();
+          Bits.insert(It, ValueBit::CreateXORBit(*It, One));
+        }
+      } else if (RHSBit->getType() != ValueBit::BitType::ZERO) {
+        if ((*It)->getType() == ValueBit::BitType::ZERO) {
+          Bits.erase(It);
+          ValueBit *BitRef = new ValueBit(*RHSBit);
+          Bits.insert(It, BitRef);
+        } else {
+          ValueBit *ItVB = *It;
+          Bits.erase(It);
+          Bits.insert(It, ValueBit::CreateXORBit(ItVB, RHSBit));
+        }
+      }
+    }
+  }
+  virtual void _ZExt(uint64_t ToSize) {
+    assert(ToSize > Size);
+    for (uint64_t I = 0; I < ToSize - Size; I++)
+      Bits.push_back(ValueBit::CreateZeroBit());
+    Size = ToSize;
+  }
+  virtual void _Trunc(uint64_t ToSize) {
+    assert(ToSize < Size);
+    Bits.erase(Bits.begin() + ToSize, Bits.end());
+    Size = ToSize;
+  }
+  virtual void _And(uint64_t RHS) {
+    for (unsigned I = 0; I < Size; I++) {
+      if (!(RHS & 1)) {
+        auto It = Bits.begin() + I;
+        Bits.erase(It);
+        Bits.insert(It, ValueBit::CreateZeroBit());
+      }
+      RHS >>= 1;
+    }
+  }
+
+protected:
+  ValueBits() {}
+
+public:
+  ValueBits(Value *InitialVal, uint64_t BitLength) : Size(BitLength) {
+    for (unsigned i = 0; i < BitLength; i++)
+      Bits.push_back(ValueBit::CreateRefBit(InitialVal, i));
+  }
+  ValueBits(uint64_t InitialVal, uint64_t BitLength) : Size(BitLength) {
+    for (unsigned i = 0; i < BitLength; i++) {
+      if (InitialVal & 0x1)
+        Bits.push_back(ValueBit::CreateOneBit());
+      else
+        Bits.push_back(ValueBit::CreateZeroBit());
+      InitialVal >>= 1;
+    }
+  }
+  uint64_t getSize() { return Size; }
+  ValueBit *getBit(unsigned i) { return Bits[i]; }
+
+  virtual ValueBits *copyBits() { return new ValueBits(*this); }
+
+  static ValueBits *Shl(ValueBits *LHS, uint64_t N) {
+    ValueBits *Shifted = LHS->copyBits();
+    Shifted->_Shl(N);
+    return Shifted;
+  }
+  static ValueBits *LShr(ValueBits *LHS, uint64_t N) {
+    ValueBits *Shifted = LHS->copyBits();
+    Shifted->_LShr(N);
+    return Shifted;
+  }
+  static ValueBits *Xor(ValueBits *LHS, ValueBits *RHS) {
+    ValueBits *Xord = LHS->copyBits();
+    Xord->_Xor(RHS);
+    return Xord;
+  }
+  static ValueBits *ZExt(ValueBits *LHS, uint64_t ToSize) {
+    ValueBits *Zexted = LHS->copyBits();
+    Zexted->_ZExt(ToSize);
+    return Zexted;
+  }
+  static ValueBits *Trunc(ValueBits *LHS, uint64_t N) {
+    ValueBits *Trunced = LHS->copyBits();
+    Trunced->_Trunc(N);
+    return Trunced;
+  }
+  static ValueBits *And(ValueBits *LHS, uint64_t RHS) {
+    ValueBits *Anded = LHS->copyBits();
+    Anded->_And(RHS);
+    return Anded;
+  }
+
+  virtual bool isPredicated() { return false; }
+
+  virtual bool equals(ValueBits *RHS) {
+    if (Size != RHS->getSize())
+      return false;
+
+    for (unsigned I = 0; I < Size; I++)
+      if (!getBit(I)->equals(RHS->getBit(I)))
+        return false;
+
+    return true;
+  }
+
+  virtual void print(raw_ostream &OS) {
+    assert(Size != 0);
+    OS << "[";
+    Bits[Size - 1]->print(OS);
+    for (int i = Size - 2; i >= 0; i--) {
+      OS << " | ";
+      Bits[i]->print(OS);
+    }
+    OS << "]\n";
+  }
+};
+
+inline raw_ostream &operator<<(raw_ostream &OS, ValueBits &VBS) {
+  VBS.print(OS);
+  return OS;
+}
+
+inline raw_ostream &operator<<(raw_ostream &OS, ValueBits::ValueBit &VB) {
+  VB.print(OS);
+  return OS;
+}
+
 bool LoopIdiomRecognize::recognizeCRC(const SCEV *BECount) {
   // Step one: Check if the loop looks like crc, and extract some useful
   // information for us to check

>From cd8aaf7d5f34dc3a980a075a28fb8167a58885f3 Mon Sep 17 00:00:00 2001
From: "Joseph.Faulls" <Joseph.Faulls at imgtec.com>
Date: Fri, 12 Jan 2024 15:59:36 +0000
Subject: [PATCH 03/12] [LoopIdiomRecognize] Implement PredicatedValueBits

These would be representitive of select or phi instructions where the
bits would depend on an icmp.
---
 .../Transforms/Scalar/LoopIdiomRecognize.cpp  | 50 +++++++++++++++++++
 1 file changed, 50 insertions(+)

diff --git a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
index 6b8c080be1ca87..184d008c61b58b 100644
--- a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
@@ -3131,6 +3131,56 @@ inline raw_ostream &operator<<(raw_ostream &OS, ValueBits::ValueBit &VB) {
   VB.print(OS);
   return OS;
 }
+class PredicatedValueBits : public ValueBits {
+  // This would be representitive of select or phi instructions where the bits
+  // would depend on an icmp.
+private:
+  ICmpInst *_Predicate;
+  ValueBits *_IfTrue;
+  ValueBits *_IfFalse;
+
+  void _Shl(uint64_t N) override {
+    _IfTrue = ValueBits::Shl(_IfTrue, N);
+    _IfFalse = ValueBits::Shl(_IfFalse, N);
+  }
+  void _LShr(uint64_t N) override {
+    _IfTrue = ValueBits::LShr(_IfTrue, N);
+    _IfFalse = ValueBits::LShr(_IfFalse, N);
+  }
+  void _ZExt(uint64_t N) override {
+    _IfTrue = ValueBits::ZExt(_IfTrue, N);
+    _IfFalse = ValueBits::ZExt(_IfFalse, N);
+  }
+  void _And(uint64_t N) override {
+    _IfTrue = ValueBits::And(_IfTrue, N);
+    _IfFalse = ValueBits::And(_IfFalse, N);
+  }
+  void _Xor(ValueBits *RHS) override {
+    _IfTrue = ValueBits::Xor(_IfTrue, RHS);
+    _IfFalse = ValueBits::Xor(_IfFalse, RHS);
+  }
+  void _Trunc(uint64_t N) override {
+    _IfTrue = ValueBits::Trunc(_IfTrue, N);
+    _IfFalse = ValueBits::Trunc(_IfFalse, N);
+  }
+
+public:
+  PredicatedValueBits(ICmpInst *Predicate, ValueBits *IfTrue,
+                      ValueBits *IfFalse)
+      : _Predicate(Predicate), _IfTrue(IfTrue), _IfFalse(IfFalse) {}
+
+  ValueBits *copyBits() override { return new PredicatedValueBits(*this); }
+  bool isPredicated() override { return true; }
+  ValueBits *getIfTrue() { return _IfTrue; }
+  ValueBits *getIfFalse() { return _IfFalse; }
+  ICmpInst *getPredicate() { return _Predicate; }
+
+  virtual void print(raw_ostream &OS) override {
+    OS << "Predicate: " << *_Predicate << "\nIf True:\n"
+       << *_IfTrue << "If False:\n"
+       << *_IfFalse;
+  }
+};
 
 bool LoopIdiomRecognize::recognizeCRC(const SCEV *BECount) {
   // Step one: Check if the loop looks like crc, and extract some useful

>From 180c3980a9c71c5754c00947d6d253876908c8be Mon Sep 17 00:00:00 2001
From: "Joseph.Faulls" <Joseph.Faulls at imgtec.com>
Date: Fri, 12 Jan 2024 16:11:59 +0000
Subject: [PATCH 04/12] [LoopIdiomRecognize] Add function to symbolically
 execute basic block

The result is a map between llvm Values and their bit representations as
ValueBits.
---
 .../Transforms/Scalar/LoopIdiomRecognize.cpp  | 144 ++++++++++++++++++
 1 file changed, 144 insertions(+)

diff --git a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
index 184d008c61b58b..c5cb54e93e93b8 100644
--- a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
@@ -90,6 +90,7 @@
 #include <algorithm>
 #include <cassert>
 #include <cstdint>
+#include <map>
 #include <utility>
 #include <vector>
 
@@ -3182,6 +3183,119 @@ class PredicatedValueBits : public ValueBits {
   }
 };
 
+// Execute the instructions in a basic block whilst mapping out Values to
+// ValueBits
+static bool symbolicallyExecute(BasicBlock *BB,
+                                std::map<Value *, ValueBits *> &ValueMap) {
+
+  auto getConstantOperand = [](Instruction *I, uint8_t Operand) {
+    ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(Operand));
+    if (!CI) {
+      LLVM_DEBUG(dbgs() << DEBUG_TYPE " CRCRegonize: Do not know how to"
+                        << " handle this operation with non-constant operand "
+                        << Operand << ":\n"
+                        << *I << "\n");
+    }
+    return CI;
+  };
+
+  auto getOrCreateValueBits = [&ValueMap](Value *Val) {
+    auto Result = ValueMap.find(Val);
+    ValueBits *LHSBits = nullptr;
+    if (Result == ValueMap.end()) {
+      ConstantInt *CI = dyn_cast<ConstantInt>(Val);
+      if (CI) {
+        LHSBits = new ValueBits(CI->getSExtValue(),
+                                Val->getType()->getScalarSizeInBits());
+      } else {
+        LHSBits = new ValueBits(Val, Val->getType()->getScalarSizeInBits());
+      }
+    } else
+      LHSBits = Result->second;
+    return LHSBits;
+  };
+
+  for (Instruction &I : *BB) {
+    uint64_t BitSize = I.getType()->getScalarSizeInBits();
+    switch (I.getOpcode()) {
+    case Instruction::PHI: {
+      PHINode *PHI = dyn_cast<PHINode>(&I);
+      const BasicBlock *IncomingBlock = nullptr;
+      for (const BasicBlock *Incoming : PHI->blocks()) {
+        if (Incoming != BB) {
+          if (IncomingBlock) {
+            LLVM_DEBUG(dbgs()
+                       << DEBUG_TYPE " CRCRegonize: Do not know how to"
+                       << " handle loop with multiple entries" << I << "\n");
+            return false;
+          }
+          IncomingBlock = Incoming;
+        }
+      }
+      assert(IncomingBlock);
+      ValueMap[&I] =
+          getOrCreateValueBits(PHI->getIncomingValueForBlock(IncomingBlock));
+    } break;
+    case Instruction::Shl: {
+      ConstantInt *CI = getConstantOperand(&I, 1);
+      if (!CI)
+        return false;
+      Value *LHSVal = I.getOperand(0);
+      ValueBits *LHSBits = getOrCreateValueBits(LHSVal);
+      ValueMap[&I] = ValueBits::Shl(LHSBits, CI->getSExtValue());
+    } break;
+    case Instruction::LShr: {
+      ConstantInt *CI = getConstantOperand(&I, 1);
+      if (!CI)
+        return false;
+      Value *LHSVal = I.getOperand(0);
+      ValueBits *LHSBits = getOrCreateValueBits(LHSVal);
+      ValueMap[&I] = ValueBits::LShr(LHSBits, CI->getSExtValue());
+    } break;
+    case Instruction::And: {
+      ConstantInt *CI = getConstantOperand(&I, 1);
+      if (!CI)
+        return false;
+      Value *LHSVal = I.getOperand(0);
+      ValueBits *LHSBits = getOrCreateValueBits(LHSVal);
+      ValueMap[&I] = ValueBits::And(LHSBits, CI->getSExtValue());
+    } break;
+    case Instruction::Xor: {
+      ValueBits *LHSBits = getOrCreateValueBits(I.getOperand(0));
+      ValueBits *RHSBits = getOrCreateValueBits(I.getOperand(1));
+      ValueMap[&I] = ValueBits::Xor(LHSBits, RHSBits);
+    } break;
+    case Instruction::ZExt: {
+      ValueBits *LHSBits = getOrCreateValueBits(I.getOperand(0));
+      ValueMap[&I] = ValueBits::ZExt(LHSBits, BitSize);
+    } break;
+    case Instruction::Trunc: {
+      ValueBits *LHSBits = getOrCreateValueBits(I.getOperand(0));
+      ValueMap[&I] = ValueBits::Trunc(LHSBits, BitSize);
+    } break;
+    case Instruction::Select: {
+      SelectInst *Select = cast<SelectInst>(&I);
+      ICmpInst *Cond = dyn_cast<ICmpInst>(Select->getCondition());
+      if (!Cond) {
+        LLVM_DEBUG(dbgs() << DEBUG_TYPE " CRCRegonize: Do not know how to"
+                          << " handle SelectInst with non-icmp condition: " << I
+                          << "\n");
+        return false;
+      }
+      ValueBits *IfTrue = getOrCreateValueBits(Select->getTrueValue());
+      ValueBits *IfFalse = getOrCreateValueBits(Select->getFalseValue());
+      ValueMap[&I] = new PredicatedValueBits(Cond, IfTrue, IfFalse);
+    } break;
+    default:
+      // If this instruction is not recognized, then just continue. This is
+      // okay because users of this will just reference it by value, which is
+      // conservative.
+      break;
+    }
+  }
+  return true;
+}
+
 bool LoopIdiomRecognize::recognizeCRC(const SCEV *BECount) {
   // Step one: Check if the loop looks like crc, and extract some useful
   // information for us to check
@@ -3206,6 +3320,36 @@ bool LoopIdiomRecognize::recognizeCRC(const SCEV *BECount) {
                       << "\n");
   }
 
+  // Symbolically execute one iteration of the loop to populate a map of
+  // Value's to their ValueBits, aka a representation of their bits in terms of
+  // 1's, 0's and references to other values' bits. If these match pre-computed
+  // crc values, then we can say it's doing crc.
+  std::map<Value *, ValueBits *> ValueMap;
+
+  if (!symbolicallyExecute(CurLoop->getHeader(), ValueMap))
+    return false;
+
+  auto Result = ValueMap.find(CRC.CRCOutput);
+  if (Result == ValueMap.end()) {
+    LLVM_DEBUG(dbgs() << DEBUG_TYPE " CRCRegonize: Did not find CRC output"
+                      << " after symbolic execution\n");
+    return false;
+  }
+
+  ValueBits *CRCOutBits = Result->second;
+  LLVM_DEBUG(dbgs() << DEBUG_TYPE
+                    << " CRCRegonize: ValueBits for output crc value:\n"
+                    << *CRCOutBits);
+
+  // Check this value is predicated
+  if (!CRCOutBits->isPredicated()) {
+    LLVM_DEBUG(dbgs() << DEBUG_TYPE
+                      << " CRCRegonize: Output CRC ValueBits is not"
+                      << " predicated.\n");
+    return false;
+  }
+  PredicatedValueBits *CRCOutBitsPred = (PredicatedValueBits *)CRCOutBits;
+
   return false;
 }
 

>From 8939111aa7f6e250c4ca855f0d3d33394b0c0328 Mon Sep 17 00:00:00 2001
From: "Joseph.Faulls" <Joseph.Faulls at imgtec.com>
Date: Fri, 12 Jan 2024 16:15:44 +0000
Subject: [PATCH 05/12] [LoopIdiomRecognize] Check result of symbolic execution
 matches CRC

---
 .../Transforms/Scalar/LoopIdiomRecognize.cpp  | 151 ++++++++++++++++++
 1 file changed, 151 insertions(+)

diff --git a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
index c5cb54e93e93b8..3c4741edcfc73c 100644
--- a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
@@ -3350,6 +3350,157 @@ bool LoopIdiomRecognize::recognizeCRC(const SCEV *BECount) {
   }
   PredicatedValueBits *CRCOutBitsPred = (PredicatedValueBits *)CRCOutBits;
 
+  // Need to check if the predicate is checking the MSB/LSB depending on
+  // whether this is bit reversed CRC
+  ICmpInst *ICmp = CRCOutBitsPred->getPredicate();
+  CmpInst::Predicate Pred = ICmp->getPredicate();
+  LLVM_DEBUG(dbgs() << DEBUG_TYPE << " CRCRegonize checking to see if " << *ICmp
+                    << " is checking the "
+                    << (CRC.BitReversed ? "LSB\n" : "MSB\n"));
+
+  // Firstly check the LHS is in our map, and RHS is a constant
+  ConstantInt *RHS = dyn_cast<ConstantInt>(ICmp->getOperand(1));
+  Result = ValueMap.find(ICmp->getOperand(0));
+  if (!RHS || (Result == ValueMap.end())) {
+    LLVM_DEBUG(dbgs() << DEBUG_TYPE
+                      << " CRCRegonize: Cannot determine ICmp operands\n");
+    return false;
+  }
+  ValueBits *ICmpOp0Bits = Result->second;
+
+  // Now match the following cases
+  // (LSB): icmp [ne/eq] %mcrc, [1/0], where mcrc has LSB masked out
+  // (MSB): icmp [ne/eq] %mcrc, [1 << BitSize], where mcrc has MSB masked out
+  // (MSB): icmp [sgt/sge] %crc, [1/0]
+  // (MSB): icmp [slt/sle] %crc, [0/-1]
+  // And decide whether the check is checking for existence of 1 or 0
+  bool checkZero = false;
+  ValueBits::ValueBit *CheckBit = nullptr;
+  switch (Pred) {
+  case CmpInst::ICMP_NE:
+  case CmpInst::ICMP_EQ: {
+    // Check RHS is checking only one bit.
+    uint64_t RHSNum = RHS->getZExtValue();
+    uint64_t MSBNum = 1 << (ICmpOp0Bits->getSize() - 1);
+    // LSB if BitReversed, MSB otherwise.
+    if (!(CRC.BitReversed && RHSNum == 1) &&
+        !(!CRC.BitReversed && RHSNum == MSBNum) && RHSNum != 0) {
+      LLVM_DEBUG(dbgs() << DEBUG_TYPE
+                        << " CRCRegonize: ICmp RHS is not checking [M/L]SB\n");
+      return false;
+    }
+    // Now to check if we already know all the other bits of the RHS are zero.
+    ValueBits AllZeroValueBits((uint64_t)0, ICmpOp0Bits->getSize());
+    ValueBits *CRCOutBitsMasked = nullptr;
+    if (CRC.BitReversed) {
+      // Masking out the LSB is equivalent to shifting right one if we're just
+      // comparing all the other bits are zero.
+      CRCOutBitsMasked = ValueBits::LShr(ICmpOp0Bits, 1);
+      CheckBit = ICmpOp0Bits->getBit(0);
+    } else {
+      // The CRC type might be larger than the data, so we can't shift left
+      // one. Mask instead.
+      uint64_t MSBMask = ~(1 << (CRC.Width - 1));
+      CRCOutBitsMasked = ValueBits::And(ICmpOp0Bits, MSBMask);
+      CheckBit = ICmpOp0Bits->getBit(CRC.Width - 1);
+    }
+    if (!CRCOutBitsMasked->equals(&AllZeroValueBits)) {
+      LLVM_DEBUG(
+          dbgs() << DEBUG_TYPE
+                 << " CRCRegonize: Cannot determine ICmp checks [M/L]SB\n");
+      return false;
+    }
+    checkZero = RHSNum == 0;
+    break;
+  }
+  case CmpInst::ICMP_SGT:
+  case CmpInst::ICMP_SGE:
+  case CmpInst::ICMP_ULT:
+  case CmpInst::ICMP_ULE:
+    checkZero = true;
+    [[fallthrough]];
+  case CmpInst::ICMP_SLT:
+  case CmpInst::ICMP_SLE: {
+    int64_t RHSNum = RHS->getSExtValue();
+    if (((Pred == CmpInst::ICMP_SLT || Pred == CmpInst::ICMP_SGE) &&
+         RHSNum != 0) ||
+        ((Pred == CmpInst::ICMP_SLE) && RHSNum != -1) ||
+        ((Pred == CmpInst::ICMP_SGT) && RHSNum != 1) ||
+        ((Pred == CmpInst::ICMP_ULT) && RHSNum != (1 << (CRC.Width - 1))) ||
+        ((Pred == CmpInst::ICMP_ULE) && RHSNum != (1 << (CRC.Width - 1)) - 1)) {
+      LLVM_DEBUG(dbgs() << DEBUG_TYPE
+                        << " CRCRegonize: ICmp RHS is not checking MSB\n");
+      return false;
+    }
+    CheckBit = ICmpOp0Bits->getBit(CRCSize - 1);
+    break;
+  }
+  default:
+    return false;
+  }
+
+  // If there exists a Data input, ensure the check bit is crc^data.
+  ValueBits::ValueBit *RefCheckBit = nullptr;
+  uint64_t CRCCheckIdx = CRC.BitReversed ? 0 : CRCSize - 1;
+  ValueBits::ValueBit *CRCInputRefBit =
+      ValueBits::ValueBit::CreateRefBit(CRC.CRCInput, CRCCheckIdx);
+  if (CRC.DataInput) {
+    uint64_t DataSize = CRC.DataInput->getType()->getScalarSizeInBits();
+    uint64_t DataCheckIdx = CRC.BitReversed ? 0 : DataSize - 1;
+    ValueBits::ValueBit *DataInputRefBit =
+        ValueBits::ValueBit::CreateRefBit(CRC.DataInput, DataCheckIdx);
+    RefCheckBit =
+        ValueBits::ValueBit::CreateXORBit(CRCInputRefBit, DataInputRefBit);
+  } else {
+    RefCheckBit = CRCInputRefBit;
+  }
+
+  if (!RefCheckBit->equals(CheckBit)) {
+    LLVM_DEBUG(dbgs() << DEBUG_TYPE
+                      << " CRCRegonize: Cannot verify check bit!\n"
+                      << *RefCheckBit << "\n"
+                      << *CheckBit << "\n");
+    return false;
+  }
+
+  ValueBits *CRCOutBitsIfOne = CRCOutBitsPred->getIfTrue();
+  ValueBits *CRCOutBitsIfZero = CRCOutBitsPred->getIfFalse();
+  if (checkZero)
+    std::swap(CRCOutBitsIfZero, CRCOutBitsIfOne);
+
+  // Now construct ValueBits that would be the result of crc for one iteration.
+  // That is, a shift and then xor if [M/L]SB is 1.
+  ValueBits *CRCValueBits = nullptr;
+  Result = ValueMap.find(CRC.CRCInput);
+  if (Result == ValueMap.end()) {
+    CRCValueBits = new ValueBits(CRC.CRCInput, CRCSize);
+  } else {
+    CRCValueBits = Result->second;
+  }
+  uint64_t GeneratorPolynomial =
+      CRC.BitReversed ? reverseBits(CRC.Polynomial, CRCSize) : CRC.Polynomial;
+  ValueBits Polynomial(GeneratorPolynomial, CRCSize);
+
+  // Case where the MSB/LSB of the data is 0
+  ValueBits *IfZero = CRC.BitReversed ? ValueBits::LShr(CRCValueBits, 1)
+                                      : ValueBits::Shl(CRCValueBits, 1);
+
+  // Case where the MSB/LSB of the data is 1
+  ValueBits *IfOne = ValueBits::Xor(IfZero, &Polynomial);
+
+  if (!IfZero->equals(CRCOutBitsIfZero)) {
+    LLVM_DEBUG(dbgs() << DEBUG_TYPE << " CRCRegonize: Not Equal!\n"
+                      << *IfZero << *CRCOutBitsPred->getIfFalse());
+    return false;
+  }
+  if (!IfOne->equals(CRCOutBitsIfOne)) {
+    LLVM_DEBUG(dbgs() << DEBUG_TYPE << " CRCRegonize: Not Equal!\n"
+                      << *IfOne << *CRCOutBitsPred->getIfTrue());
+    return false;
+  }
+
+  LLVM_DEBUG(dbgs() << DEBUG_TYPE << " CRCRegonize: This looks like crc!\n");
+
   return false;
 }
 

>From bb3a6c7022611e78c67ece23bb527ff4a7f65583 Mon Sep 17 00:00:00 2001
From: "Joseph.Faulls" <Joseph.Faulls at imgtec.com>
Date: Fri, 12 Jan 2024 16:17:13 +0000
Subject: [PATCH 06/12] [LoopIdiomRecognize] Write lookup table based CRC for
 one-byte data

---
 .../Transforms/Scalar/LoopIdiomRecognize.cpp  | 96 ++++++++++++++++++-
 1 file changed, 95 insertions(+), 1 deletion(-)

diff --git a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
index 3c4741edcfc73c..d9c18cd9696f04 100644
--- a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
@@ -91,6 +91,7 @@
 #include <cassert>
 #include <cstdint>
 #include <map>
+#include <sstream>
 #include <utility>
 #include <vector>
 
@@ -257,6 +258,7 @@ class LoopIdiomRecognize {
   bool recognizeShiftUntilZero();
   std::optional<CRCInfo> looksLikeCRC(const SCEV *BECount);
   bool recognizeCRC(const SCEV *BECount);
+  void writeTableBasedCRCOneByte(CRCInfo &CRC);
 
   /// @}
 };
@@ -3296,6 +3298,96 @@ static bool symbolicallyExecute(BasicBlock *BB,
   return true;
 }
 
+void LoopIdiomRecognize::writeTableBasedCRCOneByte(CRCInfo &CRC) {
+  BasicBlock *ExitBB = CurLoop->getExitBlock();
+  IRBuilder<> Builder(ExitBB);
+  Builder.SetInsertPoint(ExitBB->getFirstNonPHI());
+  Type *CRCType = CRC.CRCInput->getType();
+  uint64_t CRCSize = CRCType->getScalarSizeInBits();
+
+  // Construct the CRC table
+  uint64_t CRCTable[256];
+  uint64_t Polynomial = CRC.Polynomial;
+  uint64_t SB = CRC.BitReversed ? 0x1 : (0x1 << (CRCSize - 1));
+  if (CRC.BitReversed)
+    Polynomial = reverseBits(Polynomial, CRCSize);
+  for (uint64_t Dividend = 0; Dividend < 256; Dividend++) {
+    uint64_t CurByte = Dividend;
+    if (!CRC.BitReversed)
+      CurByte <<= CRCSize - 8;
+    for (uint8_t Bit = 0; Bit < 8; Bit++) {
+      if ((CurByte & SB) != 0) {
+        CurByte = CRC.BitReversed ? CurByte >> 1 : CurByte << 1;
+        CurByte = CurByte ^ Polynomial;
+      } else {
+        CurByte = CRC.BitReversed ? CurByte >> 1 : CurByte << 1;
+      }
+    }
+    CRCTable[Dividend] = CurByte;
+  }
+  // To construct a global data array, we need the raw data in bytes.
+  // The calculated table array is an array of 64bit values because we can't
+  // dynamically type it, so we need to truncate the values to the crc size
+  // to avoid padded zeros. Do this by allocating a byte array (of slightly more
+  // than we need to account for overflow) and copying the 64bit values across
+  // aligned correctly
+  uint64_t CRCNumBytes = CRCSize / 8;
+  char *CRCTableData = (char *)malloc(CRCNumBytes * 260);
+  for (int I = 0; I < 256; I++) {
+    *((uint64_t *)(CRCTableData + I * CRCNumBytes)) = CRCTable[I];
+  }
+
+  // Construct and add the table as a global variable
+  ArrayType *TableType = ArrayType::get(CRCType, 256);
+  Constant *ConstantArr = ConstantDataArray::getRaw(
+      StringRef(CRCTableData, CRCNumBytes * 256), 256, CRCType);
+  std::stringstream TableNameSS;
+  TableNameSS << "crctable.i" << CRCSize << "." << CRC.Polynomial;
+  if (CRC.BitReversed)
+    TableNameSS << ".reversed";
+  GlobalVariable *CRCTableGlobal = new GlobalVariable(
+      TableType, true, GlobalVariable::LinkageTypes::PrivateLinkage,
+      ConstantArr, TableNameSS.str());
+  ExitBB->getModule()->insertGlobalVariable(CRCTableGlobal);
+  free(CRCTableData);
+
+  // Construct the IR to load from this table
+  Value *CRCOffset = CRC.CRCInput;
+  if (CRCSize > 8) {
+    // Get the next byte into position and truncate
+    if (!CRC.BitReversed)
+      CRCOffset = Builder.CreateLShr(CRCOffset, CRCSize - 8);
+    CRCOffset = Builder.CreateTrunc(CRCOffset, Builder.getInt8Ty());
+  }
+  if (CRC.DataInput) {
+    // Data size can be more than 8 due to extending
+    Value *Data = CRC.DataInput;
+    if (CRC.DataInput->getType()->getScalarSizeInBits() > 8) {
+      Data = Builder.CreateTrunc(Data, Builder.getInt8Ty());
+    }
+    // Xor the data, offset into the table and load
+    CRCOffset = Builder.CreateXor(CRCOffset, Data);
+  }
+
+  CRCOffset = Builder.CreateZExt(CRCOffset, Builder.getInt32Ty());
+  Value *Gep = Builder.CreateInBoundsGEP(CRCType, CRCTableGlobal, {CRCOffset});
+  Value *CRCRes = Builder.CreateLoad(CRCType, Gep);
+  if (CRCSize > 8) {
+    // Shift out SB used for division and Xor the rest of the crc back in
+    Value *RestOfCRC = CRC.CRCInput;
+    if (CRC.BitReversed)
+      RestOfCRC = Builder.CreateLShr(CRC.CRCInput, 8);
+    else
+      RestOfCRC = Builder.CreateShl(CRC.CRCInput, 8);
+    CRCRes = Builder.CreateXor(RestOfCRC, CRCRes);
+  }
+  for (PHINode &ExitPhi : CurLoop->getExitBlock()->phis()) {
+    if (ExitPhi.getNumIncomingValues() == 1 &&
+        ExitPhi.getIncomingValue(0) == CRC.CRCOutput)
+      ExitPhi.replaceAllUsesWith(CRCRes);
+  }
+}
+
 bool LoopIdiomRecognize::recognizeCRC(const SCEV *BECount) {
   // Step one: Check if the loop looks like crc, and extract some useful
   // information for us to check
@@ -3501,7 +3593,9 @@ bool LoopIdiomRecognize::recognizeCRC(const SCEV *BECount) {
 
   LLVM_DEBUG(dbgs() << DEBUG_TYPE << " CRCRegonize: This looks like crc!\n");
 
-  return false;
+  writeTableBasedCRCOneByte(CRC);
+
+  return true;
 }
 
 std::optional<LoopIdiomRecognize::CRCInfo>

>From 6831e2cc11372666b36b62d1ed1efaa0ada37a13 Mon Sep 17 00:00:00 2001
From: "Joseph.Faulls" <Joseph.Faulls at imgtec.com>
Date: Fri, 12 Jan 2024 16:23:57 +0000
Subject: [PATCH 07/12] [LoopIdiomRecognize] Add unit tests for CRC idiom
 recognizer

---
 llvm/test/Transforms/LoopIdiom/crc/crc.ll     | 195 ++++++++++++++++++
 llvm/test/Transforms/LoopIdiom/crc/not-crc.ll | 113 ++++++++++
 2 files changed, 308 insertions(+)
 create mode 100644 llvm/test/Transforms/LoopIdiom/crc/crc.ll
 create mode 100644 llvm/test/Transforms/LoopIdiom/crc/not-crc.ll

diff --git a/llvm/test/Transforms/LoopIdiom/crc/crc.ll b/llvm/test/Transforms/LoopIdiom/crc/crc.ll
new file mode 100644
index 00000000000000..17c1313a4c7d58
--- /dev/null
+++ b/llvm/test/Transforms/LoopIdiom/crc/crc.ll
@@ -0,0 +1,195 @@
+; RUN: opt -passes=loop-idiom < %s -S -debug -recognize-crc 2>&1 | FileCheck %s
+
+; CRC 8 bit, data 8 bit
+; CHECK: GeneratorPolynomial: 29
+; CHECK: CRC Size: 8
+; CHECK: Reversed: 0
+; CHECK: loop-idiom CRCRegonize: This looks like crc!
+define dso_local zeroext i8 @crc8_loop(ptr noundef %data, i32 noundef %length) {
+entry:
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.cond.cleanup7, %entry
+  %crc.0 = phi i8 [ 0, %entry ], [ %crc.1.lcssa, %for.cond.cleanup7 ]
+  %i.0 = phi i32 [ 0, %entry ], [ %inc20, %for.cond.cleanup7 ]
+  %cmp = icmp ult i32 %i.0, %length
+  br i1 %cmp, label %for.body, label %for.cond.cleanup
+
+for.cond.cleanup:                                 ; preds = %for.cond
+  %crc.0.lcssa = phi i8 [ %crc.0, %for.cond ]
+  ret i8 %crc.0.lcssa
+
+for.body:                                         ; preds = %for.cond
+  %add.ptr = getelementptr inbounds i8, ptr %data, i32 %i.0
+  %0 = load i8, ptr %add.ptr, align 1
+  %xor29 = xor i8 %0, %crc.0
+  br label %for.body8
+
+for.cond.cleanup7:                                ; preds = %for.body8
+  %crc.1.lcssa = phi i8 [ %crc.2, %for.body8 ]
+  %inc20 = add i32 %i.0, 1
+  br label %for.cond
+
+for.body8:                                        ; preds = %for.body, %for.body8
+  %i3.032 = phi i32 [ 0, %for.body ], [ %inc, %for.body8 ]
+  %crc.131 = phi i8 [ %xor29, %for.body ], [ %crc.2, %for.body8 ]
+  %shl = shl i8 %crc.131, 1
+  %xor14 = xor i8 %shl, 29
+  %cmp10.not30 = icmp slt i8 %crc.131, 0
+  %crc.2 = select i1 %cmp10.not30, i8 %xor14, i8 %shl
+  %inc = add nuw nsw i32 %i3.032, 1
+  %cmp5 = icmp ult i32 %inc, 8
+  br i1 %cmp5, label %for.body8, label %for.cond.cleanup7
+}
+
+; CRC16, 8 bit data
+; CHECK: Input CRC: i16 %crc
+; CHECK: Output CRC:   %crc.addr.2
+; CHECK: GeneratorPolynomial: 32773
+; CHECK: CRC Size: 16
+; CHECK: Reversed: 1
+; CHECK: Data Input: i8 %data
+; CHECK: Data Size: 8
+define i16 @crc16_reversed(i8 %data, i16 %crc) {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %entry, %for.body
+  %i.036 = phi i8 [ 0, %entry ], [ %inc, %for.body ]
+  %crc.addr.035 = phi i16 [ %crc, %entry ], [ %crc.addr.2, %for.body ]
+  %data.addr.034 = phi i8 [ %data, %entry ], [ %1, %for.body ]
+  %0 = trunc i16 %crc.addr.035 to i8
+  %and33 = xor i8 %0, %data.addr.034
+  %xor = and i8 %and33, 1
+  %1 = lshr i8 %data.addr.034, 1
+  %cmp10.not = icmp eq i8 %xor, 0
+  %2 = lshr i16 %crc.addr.035, 1
+  %3 = xor i16 %2, -24575
+  %crc.addr.2 = select i1 %cmp10.not, i16 %2, i16 %3
+  %inc = add nuw nsw i8 %i.036, 1
+  %cmp = icmp ult i8 %inc, 8
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body
+  %crc.addr.0.lcssa = phi i16 [ %crc.addr.2, %for.body ]
+  ret i16 %crc.addr.0.lcssa
+}
+
+; CRC16 xor outside loop
+; CHECK: loop-idiom CRCRegonize: This looks like crc!
+define dso_local zeroext i16 @crc16_xor_outside(i16 %crc, i8 %data) {
+entry:
+  %conv2 = zext i8 %data to i16
+  %shl = shl nuw i16 %conv2, 8
+  %xor = xor i16 %shl, %crc
+  br label %for.body
+
+for.body:                                         ; preds = %entry, %for.body
+  %i.020 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+  %crc.addr.019 = phi i16 [ %xor, %entry ], [ %crc.addr.1, %for.body ]
+  %shl7 = shl i16 %crc.addr.019, 1
+  %xor8 = xor i16 %shl7, 4129
+  %tobool.not18 = icmp slt i16 %crc.addr.019, 0
+  %crc.addr.1 = select i1 %tobool.not18, i16 %xor8, i16 %shl7
+  %inc = add nuw nsw i32 %i.020, 1
+  %cmp = icmp ult i32 %inc, 8
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body
+  %crc.addr.0.lcssa = phi i16 [ %crc.addr.1, %for.body ]
+  ret i16 %crc.addr.0.lcssa
+}
+
+; CRC size 32 xor inside in a byte loop
+; CHECK: GeneratorPolynomial: 270598144
+; CHECK: CRC Size: 32
+; CHECK: loop-idiom CRCRegonize: This looks like crc!
+define i16 @crc32_reversed(ptr %data_p, i16 %length) {
+entry:
+  %cmp = icmp eq i16 %length, 0
+  br i1 %cmp, label %cleanup, label %do.body.preheader
+
+do.body.preheader:                                ; preds = %entry
+  br label %do.body
+
+do.body:                                          ; preds = %do.body.preheader, %do.cond
+  %data_p.addr.0 = phi ptr [ %incdec.ptr, %do.cond ], [ %data_p, %do.body.preheader ]
+  %length.addr.0 = phi i16 [ %dec, %do.cond ], [ %length, %do.body.preheader ]
+  %crc.0 = phi i32 [ %crc.1.lcssa, %do.cond ], [ 65535, %do.body.preheader ]
+  %incdec.ptr = getelementptr inbounds i8, ptr %data_p.addr.0, i64 1
+  %0 = load i8, ptr %data_p.addr.0, align 1
+  %conv3 = zext i8 %0 to i32
+  br label %for.body
+
+for.body:                                         ; preds = %do.body, %for.body
+  %crc.135 = phi i32 [ %crc.0, %do.body ], [ %crc.2, %for.body ]
+  %data.034 = phi i32 [ %conv3, %do.body ], [ %shr13, %for.body ]
+  %i.033 = phi i8 [ 0, %do.body ], [ %inc, %for.body ]
+  %and732 = xor i32 %crc.135, %data.034
+  %xor = and i32 %and732, 1
+  %tobool.not = icmp eq i32 %xor, 0
+  %shr = lshr i32 %crc.135, 1
+  %xor10 = xor i32 %shr, 33800
+  %crc.2 = select i1 %tobool.not, i32 %shr, i32 %xor10
+  %inc = add nuw nsw i8 %i.033, 1
+  %shr13 = lshr i32 %data.034, 1
+  %cmp5 = icmp ult i8 %inc, 8
+  br i1 %cmp5, label %for.body, label %do.cond
+
+do.cond:                                          ; preds = %for.body
+  %crc.1.lcssa = phi i32 [ %crc.2, %for.body ]
+  %dec = add i16 %length.addr.0, -1
+  %tobool14.not = icmp eq i16 %dec, 0
+  br i1 %tobool14.not, label %do.end, label %do.body
+
+do.end:                                           ; preds = %do.cond
+  %crc.1.lcssa.lcssa = phi i32 [ %crc.1.lcssa, %do.cond ]
+  %not15 = xor i32 %crc.1.lcssa.lcssa, -1
+  %shl = shl i32 %not15, 8
+  %shr16 = lshr i32 %not15, 8
+  %and17 = and i32 %shr16, 255
+  %or = add nuw nsw i32 %and17, %shl
+  %conv18 = trunc i32 %or to i16
+  br label %cleanup
+
+cleanup:                                          ; preds = %entry, %do.end
+  %retval.0 = phi i16 [ %conv18, %do.end ], [ 0, %entry ]
+  ret i16 %retval.0
+}
+
+; CRC16 
+; CHECK: GeneratorPolynomial: 258
+; CHECK: CRC Size: 16
+; CHECK: Reversed: 0
+; CHECK: Data Size: 8
+; CHECK: loop-idiom CRCRegonize: This looks like crc!
+define signext i16 @crc16(i16 %crcValue, i8 %newByte) {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %entry, %for.body
+  %i.017 = phi i8 [ 0, %entry ], [ %inc, %for.body ]
+  %newByte.addr.016 = phi i8 [ %newByte, %entry ], [ %shl7, %for.body ]
+  %crcValue.addr.015 = phi i16 [ %crcValue, %entry ], [ %crcValue.addr.1, %for.body ]
+  %and = lshr i16 %crcValue.addr.015, 8
+  %conv2 = zext i8 %newByte.addr.016 to i16
+  %shr14 = xor i16 %conv2, %and
+  %xor = and i16 %shr14, 128
+  %tobool.not = icmp eq i16 %xor, 0
+  %shl = shl i16 %crcValue.addr.015, 1
+  %xor4 = xor i16 %shl, 258
+  %crcValue.addr.1 = select i1 %tobool.not, i16 %shl, i16 %xor4
+  %shl7 = shl i8 %newByte.addr.016, 1
+  %inc = add nuw nsw i8 %i.017, 1
+  %cmp = icmp ult i8 %inc, 8
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body
+  %crcValue.addr.0.lcssa = phi i16 [ %crcValue.addr.1, %for.body ]
+  ret i16 %crcValue.addr.0.lcssa
+}
+
+; CHECK: @crctable.i16.32773.reversed = private constant [256 x i16] [i16 0, i16 -16191, i16 -15999, i16 320
+; CHECK: @crctable.i16.4129 = private constant [256 x i16] [i16 0, i16 4129, i16 8258, i16 12387, i16 16516
+; CHECK: @crctable.i32.270598144.reversed = private constant [256 x i32] [i32 0, i32 4489, i32 8978, i32 12955
+; CHECK: @crctable.i16.258 = private constant [256 x i16] [i16 0, i16 258, i16 516, i16 774, i16 1032
diff --git a/llvm/test/Transforms/LoopIdiom/crc/not-crc.ll b/llvm/test/Transforms/LoopIdiom/crc/not-crc.ll
new file mode 100644
index 00000000000000..3144ffa6524351
--- /dev/null
+++ b/llvm/test/Transforms/LoopIdiom/crc/not-crc.ll
@@ -0,0 +1,113 @@
+; RUN: opt -passes=loop-idiom < %s -S -debug -recognize-crc 2>&1 | FileCheck %s
+
+; crc16 incorrect xor inside loop
+; CHECK: loop-idiom CRCRegonize: Cannot verify check bit!
+; CHECK: crc[0]^data[0]
+; CHECK: crc[1]^1
+define i16 @crc16_incorrect_xor(i8 %data, i16 %crc) {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %entry, %for.body
+  %i.036 = phi i8 [ 0, %entry ], [ %inc, %for.body ]
+  %crc.addr.035 = phi i16 [ %crc, %entry ], [ %crc.addr.2, %for.body ]
+  %data.addr.034 = phi i8 [ %data, %entry ], [ %1, %for.body ]
+  %0 = trunc i16 %crc.addr.035 to i8
+  %and33 = xor i8 %0, 25
+  %xor = and i8 %and33, 1
+  %1 = lshr i8 %data.addr.034, 1
+  %cmp10.not = icmp eq i8 %xor, 0
+  %2 = lshr i16 %crc.addr.035, 1
+  %3 = xor i16 %2, -24575
+  %crc.addr.2 = select i1 %cmp10.not, i16 %2, i16 %3
+  %inc = add nuw nsw i8 %i.036, 1
+  %cmp = icmp ult i8 %inc, 8
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body
+  %crc.addr.0.lcssa = phi i16 [ %crc.addr.2, %for.body ]
+  ret i16 %crc.addr.0.lcssa
+}
+
+; Two byte at a time crc not supported
+; CHECK-NOT: loop-idiom CRCRegonize: This looks like crc!
+define i16 @crc16_reversed_data16(i16 %data, i16 %crc) {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %entry, %for.body
+  %i.036 = phi i8 [ 0, %entry ], [ %inc, %for.body ]
+  %crc.addr.035 = phi i16 [ %crc, %entry ], [ %crc.addr.2, %for.body ]
+  %data.addr.034 = phi i16 [ %data, %entry ], [ %0, %for.body ]
+  %and33 = xor i16 %crc.addr.035, %data.addr.034
+  %xor = and i16 %and33, 1
+  %0 = lshr i16 %data.addr.034, 1
+  %cmp10.not = icmp eq i16 %xor, 0
+  %1 = lshr i16 %crc.addr.035, 1
+  %2 = xor i16 %1, -24575
+  %crc.addr.2 = select i1 %cmp10.not, i16 %1, i16 %2
+  %inc = add nuw nsw i8 %i.036, 1
+  %cmp = icmp ult i8 %inc, 16
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body
+  %crc.addr.0.lcssa = phi i16 [ %crc.addr.2, %for.body ]
+  ret i16 %crc.addr.0.lcssa
+}
+
+
+; Two shifts per iteration. Check that the ValueBits are correctly mismatched
+; CHECK-NOT: loop-idiom CRCRegonize: This looks like crc!
+define signext i16 @crc16_doubleshift(i16 %crcValue, i8 %newByte) {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %entry, %for.body
+  %i.017 = phi i8 [ 0, %entry ], [ %inc, %for.body ]
+  %newByte.addr.016 = phi i8 [ %newByte, %entry ], [ %shl7, %for.body ]
+  %crcValue.addr.015 = phi i16 [ %crcValue, %entry ], [ %crcValue.addr.1, %for.body ]
+  %and = lshr i16 %crcValue.addr.015, 8
+  %conv2 = zext i8 %newByte.addr.016 to i16
+  %shr14 = xor i16 %conv2, %and
+  %xor = and i16 %shr14, 128
+  %tobool.not = icmp eq i16 %xor, 0
+  %shlone = shl i16 %crcValue.addr.015, 1
+  %shl = lshr i16 %shlone, 1
+  %xor4 = xor i16 %shl, 258
+  %crcValue.addr.1 = select i1 %tobool.not, i16 %shl, i16 %xor4
+  %shl7 = shl i8 %newByte.addr.016, 1
+  %inc = add nuw nsw i8 %i.017, 1
+  %cmp = icmp ult i8 %inc, 8
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body
+  %crcValue.addr.0.lcssa = phi i16 [ %crcValue.addr.1, %for.body ]
+  ret i16 %crcValue.addr.0.lcssa
+}
+
+; CHECK: loop-idiom CRCRegonize: ICmp RHS is not checking [M/L]SB
+define signext i16 @crc16_not_check_sb(i16 %crcValue, i8 %newByte) {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %entry, %for.body
+  %i.017 = phi i8 [ 0, %entry ], [ %inc, %for.body ]
+  %newByte.addr.016 = phi i8 [ %newByte, %entry ], [ %shl7, %for.body ]
+  %crcValue.addr.015 = phi i16 [ %crcValue, %entry ], [ %crcValue.addr.1, %for.body ]
+  %and = lshr i16 %crcValue.addr.015, 8
+  %conv2 = zext i8 %newByte.addr.016 to i16
+  %shr14 = xor i16 %conv2, %and
+  %xor = and i16 %shr14, 128
+  %tobool.not = icmp eq i16 %xor, 2
+  %shl = shl i16 %crcValue.addr.015, 1
+  %xor4 = xor i16 %shl, 258
+  %crcValue.addr.1 = select i1 %tobool.not, i16 %shl, i16 %xor4
+  %shl7 = shl i8 %newByte.addr.016, 1
+  %inc = add nuw nsw i8 %i.017, 1
+  %cmp = icmp ult i8 %inc, 8
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body
+  %crcValue.addr.0.lcssa = phi i16 [ %crcValue.addr.1, %for.body ]
+  ret i16 %crcValue.addr.0.lcssa
+}

>From 4eb14971a98fac2e8c5547ba423a028da21319b1 Mon Sep 17 00:00:00 2001
From: "Joseph.Faulls" <Joseph.Faulls at imgtec.com>
Date: Thu, 1 Feb 2024 11:51:20 +0000
Subject: [PATCH 08/12] Clarify some comments

---
 .../Transforms/Scalar/LoopIdiomRecognize.cpp  | 19 ++++++++++++-------
 1 file changed, 12 insertions(+), 7 deletions(-)

diff --git a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
index d9c18cd9696f04..a2d898ae74ff60 100644
--- a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
@@ -3413,9 +3413,12 @@ bool LoopIdiomRecognize::recognizeCRC(const SCEV *BECount) {
   }
 
   // Symbolically execute one iteration of the loop to populate a map of
-  // Value's to their ValueBits, aka a representation of their bits in terms of
-  // 1's, 0's and references to other values' bits. If these match pre-computed
-  // crc values, then we can say it's doing crc.
+  // Value's to their ValueBits, i.e. a representation of their bits in terms of
+  // 1's, 0's and references to other values' bits. This tracks how the bits
+  // move through an iteration of the loop. If the loop output's ValueBits
+  // match pre-computed values, then we can say it's doing crc. If there are
+  // any unexpected loop variant operations happening, e.g. additional select
+  // logic or shifts, then this will be captured in the ValueBits.
   std::map<Value *, ValueBits *> ValueMap;
 
   if (!symbolicallyExecute(CurLoop->getHeader(), ValueMap))
@@ -3461,10 +3464,12 @@ bool LoopIdiomRecognize::recognizeCRC(const SCEV *BECount) {
   ValueBits *ICmpOp0Bits = Result->second;
 
   // Now match the following cases
-  // (LSB): icmp [ne/eq] %mcrc, [1/0], where mcrc has LSB masked out
-  // (MSB): icmp [ne/eq] %mcrc, [1 << BitSize], where mcrc has MSB masked out
-  // (MSB): icmp [sgt/sge] %crc, [1/0]
-  // (MSB): icmp [slt/sle] %crc, [0/-1]
+  // (LSB): (crc & 1)
+  // (MSB): (crc & (1 << n))
+  // (MSB): crc > 0
+  // (MSB): crc >= 1
+  // (MSB): crc < 0
+  // (MSB): crc <= -1
   // And decide whether the check is checking for existence of 1 or 0
   bool checkZero = false;
   ValueBits::ValueBit *CheckBit = nullptr;

>From 7438109ecb17246c6d43da52202358d14d5a990c Mon Sep 17 00:00:00 2001
From: "Joseph.Faulls" <Joseph.Faulls at imgtec.com>
Date: Fri, 23 Feb 2024 16:41:21 +0000
Subject: [PATCH 09/12] Review changes that aren't memory

---
 .../Transforms/Scalar/LoopIdiomRecognize.cpp  | 66 ++++++++++---------
 llvm/test/Transforms/LoopIdiom/crc/crc.ll     |  8 +--
 2 files changed, 39 insertions(+), 35 deletions(-)

diff --git a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
index a2d898ae74ff60..76280e2efc1c6f 100644
--- a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
@@ -137,8 +137,11 @@ static cl::opt<bool> UseLIRCodeSizeHeurs(
              "with -Os/-Oz"),
     cl::init(true), cl::Hidden);
 
-static cl::opt<bool> CRCRecognize("recognize-crc", cl::desc("CRC RECOGNIZE"),
-                                  cl::init(false), cl::Hidden);
+static cl::opt<bool>
+    CRCRecognize("recognize-crc",
+                 cl::desc("Recognize loop-based CRC implementations and "
+                          "replaces them with a lookup table."),
+                 cl::init(false), cl::Hidden);
 
 namespace {
 
@@ -3193,7 +3196,7 @@ static bool symbolicallyExecute(BasicBlock *BB,
   auto getConstantOperand = [](Instruction *I, uint8_t Operand) {
     ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(Operand));
     if (!CI) {
-      LLVM_DEBUG(dbgs() << DEBUG_TYPE " CRCRegonize: Do not know how to"
+      LLVM_DEBUG(dbgs() << DEBUG_TYPE " CRCRecognize: Do not know how to"
                         << " handle this operation with non-constant operand "
                         << Operand << ":\n"
                         << *I << "\n");
@@ -3227,7 +3230,7 @@ static bool symbolicallyExecute(BasicBlock *BB,
         if (Incoming != BB) {
           if (IncomingBlock) {
             LLVM_DEBUG(dbgs()
-                       << DEBUG_TYPE " CRCRegonize: Do not know how to"
+                       << DEBUG_TYPE " CRCRecognize: Do not know how to"
                        << " handle loop with multiple entries" << I << "\n");
             return false;
           }
@@ -3279,7 +3282,7 @@ static bool symbolicallyExecute(BasicBlock *BB,
       SelectInst *Select = cast<SelectInst>(&I);
       ICmpInst *Cond = dyn_cast<ICmpInst>(Select->getCondition());
       if (!Cond) {
-        LLVM_DEBUG(dbgs() << DEBUG_TYPE " CRCRegonize: Do not know how to"
+        LLVM_DEBUG(dbgs() << DEBUG_TYPE " CRCRecognize: Do not know how to"
                           << " handle SelectInst with non-icmp condition: " << I
                           << "\n");
         return false;
@@ -3332,7 +3335,7 @@ void LoopIdiomRecognize::writeTableBasedCRCOneByte(CRCInfo &CRC) {
   // than we need to account for overflow) and copying the 64bit values across
   // aligned correctly
   uint64_t CRCNumBytes = CRCSize / 8;
-  char *CRCTableData = (char *)malloc(CRCNumBytes * 260);
+  char *CRCTableData = new char[CRCNumBytes * 260];
   for (int I = 0; I < 256; I++) {
     *((uint64_t *)(CRCTableData + I * CRCNumBytes)) = CRCTable[I];
   }
@@ -3349,7 +3352,7 @@ void LoopIdiomRecognize::writeTableBasedCRCOneByte(CRCInfo &CRC) {
       TableType, true, GlobalVariable::LinkageTypes::PrivateLinkage,
       ConstantArr, TableNameSS.str());
   ExitBB->getModule()->insertGlobalVariable(CRCTableGlobal);
-  free(CRCTableData);
+  delete CRCTableData;
 
   // Construct the IR to load from this table
   Value *CRCOffset = CRC.CRCInput;
@@ -3369,7 +3372,8 @@ void LoopIdiomRecognize::writeTableBasedCRCOneByte(CRCInfo &CRC) {
     CRCOffset = Builder.CreateXor(CRCOffset, Data);
   }
 
-  CRCOffset = Builder.CreateZExt(CRCOffset, Builder.getInt32Ty());
+  CRCOffset =
+      Builder.CreateZExt(CRCOffset, DL->getIndexType(ExitBB->getContext(), 0));
   Value *Gep = Builder.CreateInBoundsGEP(CRCType, CRCTableGlobal, {CRCOffset});
   Value *CRCRes = Builder.CreateLoad(CRCType, Gep);
   if (CRCSize > 8) {
@@ -3397,7 +3401,7 @@ bool LoopIdiomRecognize::recognizeCRC(const SCEV *BECount) {
   CRCInfo CRC = *MaybeCRC;
 
   uint64_t CRCSize = CRC.CRCInput->getType()->getScalarSizeInBits();
-  LLVM_DEBUG(dbgs() << DEBUG_TYPE " CRCRegonize: Found potential CRCLoop "
+  LLVM_DEBUG(dbgs() << DEBUG_TYPE " CRCRecognize: Found potential CRCLoop "
                     << *CurLoop << "\n"
                     << "Input CRC: " << *CRC.CRCInput << "\n"
                     << "Output CRC: " << *CRC.CRCOutput << "\n"
@@ -3420,26 +3424,26 @@ bool LoopIdiomRecognize::recognizeCRC(const SCEV *BECount) {
   // any unexpected loop variant operations happening, e.g. additional select
   // logic or shifts, then this will be captured in the ValueBits.
   std::map<Value *, ValueBits *> ValueMap;
-
+    
   if (!symbolicallyExecute(CurLoop->getHeader(), ValueMap))
     return false;
 
   auto Result = ValueMap.find(CRC.CRCOutput);
   if (Result == ValueMap.end()) {
-    LLVM_DEBUG(dbgs() << DEBUG_TYPE " CRCRegonize: Did not find CRC output"
+    LLVM_DEBUG(dbgs() << DEBUG_TYPE " CRCRecognize: Did not find CRC output"
                       << " after symbolic execution\n");
     return false;
   }
 
   ValueBits *CRCOutBits = Result->second;
   LLVM_DEBUG(dbgs() << DEBUG_TYPE
-                    << " CRCRegonize: ValueBits for output crc value:\n"
+                    << " CRCRecognize: ValueBits for output crc value:\n"
                     << *CRCOutBits);
 
   // Check this value is predicated
   if (!CRCOutBits->isPredicated()) {
     LLVM_DEBUG(dbgs() << DEBUG_TYPE
-                      << " CRCRegonize: Output CRC ValueBits is not"
+                      << " CRCRecognize: Output CRC ValueBits is not"
                       << " predicated.\n");
     return false;
   }
@@ -3449,7 +3453,7 @@ bool LoopIdiomRecognize::recognizeCRC(const SCEV *BECount) {
   // whether this is bit reversed CRC
   ICmpInst *ICmp = CRCOutBitsPred->getPredicate();
   CmpInst::Predicate Pred = ICmp->getPredicate();
-  LLVM_DEBUG(dbgs() << DEBUG_TYPE << " CRCRegonize checking to see if " << *ICmp
+  LLVM_DEBUG(dbgs() << DEBUG_TYPE << " CRCRecognize checking to see if " << *ICmp
                     << " is checking the "
                     << (CRC.BitReversed ? "LSB\n" : "MSB\n"));
 
@@ -3458,7 +3462,7 @@ bool LoopIdiomRecognize::recognizeCRC(const SCEV *BECount) {
   Result = ValueMap.find(ICmp->getOperand(0));
   if (!RHS || (Result == ValueMap.end())) {
     LLVM_DEBUG(dbgs() << DEBUG_TYPE
-                      << " CRCRegonize: Cannot determine ICmp operands\n");
+                      << " CRCRecognize: Cannot determine ICmp operands\n");
     return false;
   }
   ValueBits *ICmpOp0Bits = Result->second;
@@ -3471,7 +3475,7 @@ bool LoopIdiomRecognize::recognizeCRC(const SCEV *BECount) {
   // (MSB): crc < 0
   // (MSB): crc <= -1
   // And decide whether the check is checking for existence of 1 or 0
-  bool checkZero = false;
+  bool CheckZero = false;
   ValueBits::ValueBit *CheckBit = nullptr;
   switch (Pred) {
   case CmpInst::ICMP_NE:
@@ -3483,7 +3487,7 @@ bool LoopIdiomRecognize::recognizeCRC(const SCEV *BECount) {
     if (!(CRC.BitReversed && RHSNum == 1) &&
         !(!CRC.BitReversed && RHSNum == MSBNum) && RHSNum != 0) {
       LLVM_DEBUG(dbgs() << DEBUG_TYPE
-                        << " CRCRegonize: ICmp RHS is not checking [M/L]SB\n");
+                        << " CRCRecognize: ICmp RHS is not checking [M/L]SB\n");
       return false;
     }
     // Now to check if we already know all the other bits of the RHS are zero.
@@ -3504,17 +3508,17 @@ bool LoopIdiomRecognize::recognizeCRC(const SCEV *BECount) {
     if (!CRCOutBitsMasked->equals(&AllZeroValueBits)) {
       LLVM_DEBUG(
           dbgs() << DEBUG_TYPE
-                 << " CRCRegonize: Cannot determine ICmp checks [M/L]SB\n");
+                 << " CRCRecognize: Cannot determine ICmp checks [M/L]SB\n");
       return false;
     }
-    checkZero = RHSNum == 0;
+    CheckZero = RHSNum == 0;
     break;
   }
   case CmpInst::ICMP_SGT:
   case CmpInst::ICMP_SGE:
   case CmpInst::ICMP_ULT:
   case CmpInst::ICMP_ULE:
-    checkZero = true;
+    CheckZero = true;
     [[fallthrough]];
   case CmpInst::ICMP_SLT:
   case CmpInst::ICMP_SLE: {
@@ -3526,7 +3530,7 @@ bool LoopIdiomRecognize::recognizeCRC(const SCEV *BECount) {
         ((Pred == CmpInst::ICMP_ULT) && RHSNum != (1 << (CRC.Width - 1))) ||
         ((Pred == CmpInst::ICMP_ULE) && RHSNum != (1 << (CRC.Width - 1)) - 1)) {
       LLVM_DEBUG(dbgs() << DEBUG_TYPE
-                        << " CRCRegonize: ICmp RHS is not checking MSB\n");
+                        << " CRCRecognize: ICmp RHS is not checking MSB\n");
       return false;
     }
     CheckBit = ICmpOp0Bits->getBit(CRCSize - 1);
@@ -3544,17 +3548,17 @@ bool LoopIdiomRecognize::recognizeCRC(const SCEV *BECount) {
   if (CRC.DataInput) {
     uint64_t DataSize = CRC.DataInput->getType()->getScalarSizeInBits();
     uint64_t DataCheckIdx = CRC.BitReversed ? 0 : DataSize - 1;
-    ValueBits::ValueBit *DataInputRefBit =
-        ValueBits::ValueBit::CreateRefBit(CRC.DataInput, DataCheckIdx);
+   ValueBits::ValueBit *DataInputRefBit =
+       ValueBits::ValueBit::CreateRefBit(CRC.DataInput, DataCheckIdx);
     RefCheckBit =
-        ValueBits::ValueBit::CreateXORBit(CRCInputRefBit, DataInputRefBit);
+       ValueBits::ValueBit::CreateXORBit(CRCInputRefBit, DataInputRefBit);
   } else {
     RefCheckBit = CRCInputRefBit;
   }
 
   if (!RefCheckBit->equals(CheckBit)) {
     LLVM_DEBUG(dbgs() << DEBUG_TYPE
-                      << " CRCRegonize: Cannot verify check bit!\n"
+                      << " CRCRecognize: Cannot verify check bit!\n"
                       << *RefCheckBit << "\n"
                       << *CheckBit << "\n");
     return false;
@@ -3562,7 +3566,7 @@ bool LoopIdiomRecognize::recognizeCRC(const SCEV *BECount) {
 
   ValueBits *CRCOutBitsIfOne = CRCOutBitsPred->getIfTrue();
   ValueBits *CRCOutBitsIfZero = CRCOutBitsPred->getIfFalse();
-  if (checkZero)
+  if (CheckZero)
     std::swap(CRCOutBitsIfZero, CRCOutBitsIfOne);
 
   // Now construct ValueBits that would be the result of crc for one iteration.
@@ -3586,17 +3590,17 @@ bool LoopIdiomRecognize::recognizeCRC(const SCEV *BECount) {
   ValueBits *IfOne = ValueBits::Xor(IfZero, &Polynomial);
 
   if (!IfZero->equals(CRCOutBitsIfZero)) {
-    LLVM_DEBUG(dbgs() << DEBUG_TYPE << " CRCRegonize: Not Equal!\n"
+    LLVM_DEBUG(dbgs() << DEBUG_TYPE << " CRCRecognize: Not Equal!\n"
                       << *IfZero << *CRCOutBitsPred->getIfFalse());
     return false;
   }
   if (!IfOne->equals(CRCOutBitsIfOne)) {
-    LLVM_DEBUG(dbgs() << DEBUG_TYPE << " CRCRegonize: Not Equal!\n"
+    LLVM_DEBUG(dbgs() << DEBUG_TYPE << " CRCRecognize: Not Equal!\n"
                       << *IfOne << *CRCOutBitsPred->getIfTrue());
     return false;
   }
 
-  LLVM_DEBUG(dbgs() << DEBUG_TYPE << " CRCRegonize: This looks like crc!\n");
+  LLVM_DEBUG(dbgs() << DEBUG_TYPE << " CRCRecognize: This looks like crc!\n");
 
   writeTableBasedCRCOneByte(CRC);
 
@@ -3620,7 +3624,7 @@ LoopIdiomRecognize::looksLikeCRC(const SCEV *BECount) {
   // unlikely to be CRC. To reduce complexity, only consider single-block loops
   // for CRC recognition
   if (CurLoop->getBlocks().size() > 1) {
-    LLVM_DEBUG(dbgs() << DEBUG_TYPE " CRCRegonize: Loops with more than one"
+    LLVM_DEBUG(dbgs() << DEBUG_TYPE " CRCRecognize: Loops with more than one"
                       << "block are unsupported\n");
     return std::nullopt;
   }
@@ -3737,7 +3741,7 @@ LoopIdiomRecognize::looksLikeCRC(const SCEV *BECount) {
   }
 
   if (!(CRCShift && GeneratorPolynomial && CRCInput)) {
-    LLVM_DEBUG(dbgs() << DEBUG_TYPE " CRCRegonize: Does not look like CRC");
+    LLVM_DEBUG(dbgs() << DEBUG_TYPE " CRCRecognize: Does not look like CRC");
     return std::nullopt;
   }
 
diff --git a/llvm/test/Transforms/LoopIdiom/crc/crc.ll b/llvm/test/Transforms/LoopIdiom/crc/crc.ll
index 17c1313a4c7d58..5a1c6afcb57412 100644
--- a/llvm/test/Transforms/LoopIdiom/crc/crc.ll
+++ b/llvm/test/Transforms/LoopIdiom/crc/crc.ll
@@ -4,7 +4,7 @@
 ; CHECK: GeneratorPolynomial: 29
 ; CHECK: CRC Size: 8
 ; CHECK: Reversed: 0
-; CHECK: loop-idiom CRCRegonize: This looks like crc!
+; CHECK: loop-idiom CRCRecognize: This looks like crc!
 define dso_local zeroext i8 @crc8_loop(ptr noundef %data, i32 noundef %length) {
 entry:
   br label %for.cond
@@ -76,7 +76,7 @@ for.end:                                          ; preds = %for.body
 }
 
 ; CRC16 xor outside loop
-; CHECK: loop-idiom CRCRegonize: This looks like crc!
+; CHECK: loop-idiom CRCRecognize: This looks like crc!
 define dso_local zeroext i16 @crc16_xor_outside(i16 %crc, i8 %data) {
 entry:
   %conv2 = zext i8 %data to i16
@@ -103,7 +103,7 @@ for.end:                                          ; preds = %for.body
 ; CRC size 32 xor inside in a byte loop
 ; CHECK: GeneratorPolynomial: 270598144
 ; CHECK: CRC Size: 32
-; CHECK: loop-idiom CRCRegonize: This looks like crc!
+; CHECK: loop-idiom CRCRecognize: This looks like crc!
 define i16 @crc32_reversed(ptr %data_p, i16 %length) {
 entry:
   %cmp = icmp eq i16 %length, 0
@@ -162,7 +162,7 @@ cleanup:                                          ; preds = %entry, %do.end
 ; CHECK: CRC Size: 16
 ; CHECK: Reversed: 0
 ; CHECK: Data Size: 8
-; CHECK: loop-idiom CRCRegonize: This looks like crc!
+; CHECK: loop-idiom CRCRecognize: This looks like crc!
 define signext i16 @crc16(i16 %crcValue, i8 %newByte) {
 entry:
   br label %for.body

>From 969c48dcb359e3d33f3a909497e8a1da72906faf Mon Sep 17 00:00:00 2001
From: "Joseph.Faulls" <Joseph.Faulls at imgtec.com>
Date: Fri, 23 Feb 2024 17:30:01 +0000
Subject: [PATCH 10/12] Fix memory issues

- Use shared pointers for factory methods

I think the use of shared pointers in `symbolicallyExecute` is the
correct decision due to the filling of a map. I tried for a while to
make this a map of objects, but due to [object
slicing](https://en.wikipedia.org/wiki/Object_slicing) it would not be
possible to store a PredicatedValueBits in this map. (This would require
PredicatedValueBits to be redesigned into ValueBits class, but then the
internal methods get messy. Virtual functions was a clean answer to
this) Additionally, using `unique_ptr` proves difficult due to having to
store it in a map. And unique pointers cannot be copied.  As
`symbolicallyExecute` is the primary user of the factory methods, it
made sense for the factory methods to return a shared pointer instead of
a raw pointer or object and then wrapping this in a shared pointer
within symbolicallyExecute.

- Have XOR bits own their LHS/RHS

Having them as member objects instead of pointers I don't think makes
much sense due to them not being needed for most ValueBit objects, but
they would have to be initialized for all ValueBit instantiations.

- Use object values instead of pointers for bit representation

IMO the cleanest way. They're very lightweight anyway.
---
 .../Transforms/Scalar/LoopIdiomRecognize.cpp  | 320 ++++++++++--------
 1 file changed, 180 insertions(+), 140 deletions(-)

diff --git a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
index 76280e2efc1c6f..050efcf66d7d31 100644
--- a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
@@ -91,6 +91,7 @@
 #include <cassert>
 #include <cstdint>
 #include <map>
+#include <memory>
 #include <sstream>
 #include <utility>
 #include <vector>
@@ -2928,37 +2929,71 @@ class ValueBits {
   private:
     BitType _Type;
     std::pair<Value *, uint64_t> _BitRef;
-    ValueBit *_LHS;
-    ValueBit *_RHS;
+    // Pointers to LHS and RHS of an XOR operation. These pointers are owned by
+    // the ValueBit object.
+    ValueBit *_LHS = nullptr;
+    ValueBit *_RHS = nullptr;
 
+  public:
     ValueBit(BitType Type) : _Type(Type) {}
     ValueBit(BitType Type, std::pair<Value *, uint64_t> BitRef)
         : _Type(Type), _BitRef(BitRef) {}
-    ValueBit(BitType Type, ValueBit *LHS, ValueBit *RHS)
-        : _Type(Type), _LHS(LHS), _RHS(RHS) {}
+    ValueBit(BitType Type, ValueBit &LHS, ValueBit &RHS)
+        : _Type(Type), _LHS(new ValueBit(LHS)), _RHS(new ValueBit(RHS)) {
+      assert(_Type == BitType::XOR);
+    }
+    ValueBit() = delete;
+    // Define Copy and Assignment constructor to create copies of the LHS and
+    // RHS if the bit type is XOR. This is done to ensure the pointers will be
+    // owned by the ValueBit object and avoid double free in the destructor.
+    ValueBit(const ValueBit &VB) {
+      _Type = VB._Type;
+      if (_Type == BitType::REF)
+        _BitRef = VB._BitRef;
+      else if (_Type == BitType::XOR) {
+        _LHS = new ValueBit(*VB._LHS);
+        _RHS = new ValueBit(*VB._RHS);
+      }
+    }
+    ValueBit& operator=(const ValueBit &VB) {
+      _Type = VB._Type;
+      if (_Type == BitType::REF)
+        _BitRef = VB._BitRef;
+      else if (_Type == BitType::XOR) {
+        _LHS = new ValueBit(*VB._LHS);
+        _RHS = new ValueBit(*VB._RHS);
+      }
+      return *this;
+    }
+    ~ValueBit() {
+      if (_LHS)
+        delete _LHS;
+      if (_RHS)
+        delete _RHS;
+    }
 
   public:
-    static ValueBit *CreateOneBit() { return new ValueBit(BitType::ONE); }
-    static ValueBit *CreateZeroBit() { return new ValueBit(BitType::ZERO); }
-    static ValueBit *CreateRefBit(Value *Ref, uint64_t Offset) {
-      return new ValueBit(BitType::REF, std::make_pair(Ref, Offset));
+    static ValueBit CreateOneBit() { return ValueBit(BitType::ONE); }
+    static ValueBit CreateZeroBit() { return ValueBit(BitType::ZERO); }
+    static ValueBit CreateRefBit(Value *Ref, uint64_t Offset) {
+      return ValueBit(BitType::REF, std::make_pair(Ref, Offset));
     }
-    static ValueBit *CreateXORBit(ValueBit *LHS, ValueBit *RHS) {
-      return new ValueBit(BitType::XOR, LHS, RHS);
+    static ValueBit CreateXORBit(ValueBit &LHS, ValueBit &RHS) {
+      return ValueBit(BitType::XOR, LHS, RHS);
     }
     inline BitType getType() { return _Type; }
-    bool equals(ValueBit *RHS) {
-      if (_Type != RHS->getType())
+    bool equals(ValueBit RHS) {
+      if (_Type != RHS.getType())
         return false;
       switch (_Type) {
       case BitType::ONE:
       case BitType::ZERO:
         return true;
       case BitType::REF:
-        return _BitRef == RHS->_BitRef;
+        return _BitRef == RHS._BitRef;
       case BitType::XOR:
-        return (_LHS->equals(RHS->_LHS) && _RHS->equals(RHS->_RHS)) ||
-               (_LHS->equals(RHS->_RHS) && _RHS->equals(RHS->_LHS));
+        return (_LHS->equals(*RHS._LHS) && _RHS->equals(*RHS._RHS)) ||
+               (_LHS->equals(*RHS._RHS) && _RHS->equals(*RHS._LHS));
       }
       return false;
     }
@@ -2983,10 +3018,11 @@ class ValueBits {
       }
     }
   };
+  using PValueBits = std::shared_ptr<ValueBits>;
 
 private:
   uint64_t Size;
-  std::vector<ValueBit *> Bits;
+  std::vector<ValueBit> Bits;
 
   virtual void _Shl(uint64_t N) {
     for (; N > 0; N--) {
@@ -3000,28 +3036,28 @@ class ValueBits {
       Bits.erase(Bits.begin());
     }
   }
-  virtual void _Xor(ValueBits *RHS) {
-    assert(Size == RHS->getSize());
+  virtual void _Xor(const ValueBits &RHS) {
+    assert(Size == RHS.getSize());
     for (unsigned I = 0; I < Size; I++) {
       auto It = Bits.begin() + I;
-      ValueBit *RHSBit = RHS->getBit(I);
-      if (RHSBit->getType() == ValueBit::BitType::ONE) {
+      ValueBit RHSBit = RHS.getBit(I);
+      if (RHSBit.getType() == ValueBit::BitType::ONE) {
+        ValueBit ItVB = *It;
         Bits.erase(It);
-        if ((*It)->getType() == ValueBit::BitType::ZERO) {
+        if (ItVB.getType() == ValueBit::BitType::ZERO) {
           Bits.insert(It, ValueBit::CreateOneBit());
-        } else if ((*It)->getType() == ValueBit::BitType::ONE) {
+        } else if (ItVB.getType() == ValueBit::BitType::ONE) {
           Bits.insert(It, ValueBit::CreateZeroBit());
         } else {
-          ValueBit *One = ValueBit::CreateOneBit();
-          Bits.insert(It, ValueBit::CreateXORBit(*It, One));
+          ValueBit One = ValueBit::CreateOneBit();
+          Bits.insert(It, ValueBit::CreateXORBit(ItVB, One));
         }
-      } else if (RHSBit->getType() != ValueBit::BitType::ZERO) {
-        if ((*It)->getType() == ValueBit::BitType::ZERO) {
+      } else if (RHSBit.getType() != ValueBit::BitType::ZERO) {
+        if ((*It).getType() == ValueBit::BitType::ZERO) {
           Bits.erase(It);
-          ValueBit *BitRef = new ValueBit(*RHSBit);
-          Bits.insert(It, BitRef);
+          Bits.insert(It, RHSBit);
         } else {
-          ValueBit *ItVB = *It;
+          ValueBit ItVB = *It;
           Bits.erase(It);
           Bits.insert(It, ValueBit::CreateXORBit(ItVB, RHSBit));
         }
@@ -3067,40 +3103,40 @@ class ValueBits {
       InitialVal >>= 1;
     }
   }
-  uint64_t getSize() { return Size; }
-  ValueBit *getBit(unsigned i) { return Bits[i]; }
+  uint64_t getSize() const { return Size; }
+  ValueBit getBit(unsigned i) const { return Bits[i]; }
 
-  virtual ValueBits *copyBits() { return new ValueBits(*this); }
+  virtual ValueBits copyBits() { return ValueBits(*this); }
 
-  static ValueBits *Shl(ValueBits *LHS, uint64_t N) {
-    ValueBits *Shifted = LHS->copyBits();
-    Shifted->_Shl(N);
-    return Shifted;
+  static PValueBits Shl(const ValueBits &LHS, uint64_t N) {
+    PValueBits VB = std::make_shared<ValueBits>(LHS);
+    VB->_Shl(N);
+    return VB;
   }
-  static ValueBits *LShr(ValueBits *LHS, uint64_t N) {
-    ValueBits *Shifted = LHS->copyBits();
-    Shifted->_LShr(N);
-    return Shifted;
+  static PValueBits LShr(const ValueBits &LHS, uint64_t N) {
+    PValueBits VB = std::make_shared<ValueBits>(LHS);
+    VB->_LShr(N);
+    return VB;
   }
-  static ValueBits *Xor(ValueBits *LHS, ValueBits *RHS) {
-    ValueBits *Xord = LHS->copyBits();
-    Xord->_Xor(RHS);
-    return Xord;
+  static PValueBits Xor(const ValueBits &LHS, const ValueBits &RHS) {
+    PValueBits VB = std::make_shared<ValueBits>(LHS);
+    VB->_Xor(RHS);
+    return VB;
   }
-  static ValueBits *ZExt(ValueBits *LHS, uint64_t ToSize) {
-    ValueBits *Zexted = LHS->copyBits();
-    Zexted->_ZExt(ToSize);
-    return Zexted;
+  static PValueBits ZExt(const ValueBits &LHS, uint64_t ToSize) {
+    PValueBits VB = std::make_shared<ValueBits>(LHS);
+    VB->_ZExt(ToSize);
+    return VB;
   }
-  static ValueBits *Trunc(ValueBits *LHS, uint64_t N) {
-    ValueBits *Trunced = LHS->copyBits();
-    Trunced->_Trunc(N);
-    return Trunced;
+  static PValueBits Trunc(const ValueBits &LHS, uint64_t N) {
+    PValueBits VB = std::make_shared<ValueBits>(LHS);
+    VB->_Trunc(N);
+    return VB;
   }
-  static ValueBits *And(ValueBits *LHS, uint64_t RHS) {
-    ValueBits *Anded = LHS->copyBits();
-    Anded->_And(RHS);
-    return Anded;
+  static PValueBits And(const ValueBits &LHS, uint64_t RHS) {
+    PValueBits VB = std::make_shared<ValueBits>(LHS);
+    VB->_And(RHS);
+    return VB;
   }
 
   virtual bool isPredicated() { return false; }
@@ -3110,7 +3146,7 @@ class ValueBits {
       return false;
 
     for (unsigned I = 0; I < Size; I++)
-      if (!getBit(I)->equals(RHS->getBit(I)))
+      if (!getBit(I).equals(RHS->getBit(I)))
         return false;
 
     return true;
@@ -3119,10 +3155,10 @@ class ValueBits {
   virtual void print(raw_ostream &OS) {
     assert(Size != 0);
     OS << "[";
-    Bits[Size - 1]->print(OS);
+    Bits[Size - 1].print(OS);
     for (int i = Size - 2; i >= 0; i--) {
       OS << " | ";
-      Bits[i]->print(OS);
+      Bits[i].print(OS);
     }
     OS << "]\n";
   }
@@ -3142,43 +3178,43 @@ class PredicatedValueBits : public ValueBits {
   // would depend on an icmp.
 private:
   ICmpInst *_Predicate;
-  ValueBits *_IfTrue;
-  ValueBits *_IfFalse;
+  PValueBits _IfTrue;
+  PValueBits _IfFalse;
 
   void _Shl(uint64_t N) override {
-    _IfTrue = ValueBits::Shl(_IfTrue, N);
-    _IfFalse = ValueBits::Shl(_IfFalse, N);
+    _IfTrue = ValueBits::Shl(*_IfTrue, N);
+    _IfFalse = ValueBits::Shl(*_IfFalse, N);
   }
   void _LShr(uint64_t N) override {
-    _IfTrue = ValueBits::LShr(_IfTrue, N);
-    _IfFalse = ValueBits::LShr(_IfFalse, N);
+    _IfTrue = ValueBits::LShr(*_IfTrue, N);
+    _IfFalse = ValueBits::LShr(*_IfFalse, N);
   }
   void _ZExt(uint64_t N) override {
-    _IfTrue = ValueBits::ZExt(_IfTrue, N);
-    _IfFalse = ValueBits::ZExt(_IfFalse, N);
+    _IfTrue = ValueBits::ZExt(*_IfTrue, N);
+    _IfFalse = ValueBits::ZExt(*_IfFalse, N);
   }
   void _And(uint64_t N) override {
-    _IfTrue = ValueBits::And(_IfTrue, N);
-    _IfFalse = ValueBits::And(_IfFalse, N);
+    _IfTrue = ValueBits::And(*_IfTrue, N);
+    _IfFalse = ValueBits::And(*_IfFalse, N);
   }
-  void _Xor(ValueBits *RHS) override {
-    _IfTrue = ValueBits::Xor(_IfTrue, RHS);
-    _IfFalse = ValueBits::Xor(_IfFalse, RHS);
+  void _Xor(const ValueBits &RHS) override {
+    _IfTrue = ValueBits::Xor(*_IfTrue, RHS);
+    _IfFalse = ValueBits::Xor(*_IfFalse, RHS);
   }
   void _Trunc(uint64_t N) override {
-    _IfTrue = ValueBits::Trunc(_IfTrue, N);
-    _IfFalse = ValueBits::Trunc(_IfFalse, N);
+    _IfTrue = ValueBits::Trunc(*_IfTrue, N);
+    _IfFalse = ValueBits::Trunc(*_IfFalse, N);
   }
 
 public:
-  PredicatedValueBits(ICmpInst *Predicate, ValueBits *IfTrue,
-                      ValueBits *IfFalse)
+  PredicatedValueBits(ICmpInst *Predicate, PValueBits IfTrue,
+                      PValueBits IfFalse)
       : _Predicate(Predicate), _IfTrue(IfTrue), _IfFalse(IfFalse) {}
 
-  ValueBits *copyBits() override { return new PredicatedValueBits(*this); }
+  ValueBits copyBits() override { return PredicatedValueBits(*this); }
   bool isPredicated() override { return true; }
-  ValueBits *getIfTrue() { return _IfTrue; }
-  ValueBits *getIfFalse() { return _IfFalse; }
+  PValueBits getIfTrue() { return _IfTrue; }
+  PValueBits getIfFalse() { return _IfFalse; }
   ICmpInst *getPredicate() { return _Predicate; }
 
   virtual void print(raw_ostream &OS) override {
@@ -3190,8 +3226,9 @@ class PredicatedValueBits : public ValueBits {
 
 // Execute the instructions in a basic block whilst mapping out Values to
 // ValueBits
-static bool symbolicallyExecute(BasicBlock *BB,
-                                std::map<Value *, ValueBits *> &ValueMap) {
+static bool
+symbolicallyExecute(BasicBlock *BB,
+                    std::map<Value *, std::shared_ptr<ValueBits>> &ValueMap) {
 
   auto getConstantOperand = [](Instruction *I, uint8_t Operand) {
     ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(Operand));
@@ -3206,18 +3243,15 @@ static bool symbolicallyExecute(BasicBlock *BB,
 
   auto getOrCreateValueBits = [&ValueMap](Value *Val) {
     auto Result = ValueMap.find(Val);
-    ValueBits *LHSBits = nullptr;
-    if (Result == ValueMap.end()) {
-      ConstantInt *CI = dyn_cast<ConstantInt>(Val);
-      if (CI) {
-        LHSBits = new ValueBits(CI->getSExtValue(),
-                                Val->getType()->getScalarSizeInBits());
-      } else {
-        LHSBits = new ValueBits(Val, Val->getType()->getScalarSizeInBits());
-      }
-    } else
-      LHSBits = Result->second;
-    return LHSBits;
+    if (Result != ValueMap.end())
+      return Result->second;
+    ConstantInt *CI = dyn_cast<ConstantInt>(Val);
+    if (CI) {
+      return std::make_shared<ValueBits>(CI->getSExtValue(),
+                                         Val->getType()->getScalarSizeInBits());
+    }
+    return std::make_shared<ValueBits>(Val,
+                                       Val->getType()->getScalarSizeInBits());
   };
 
   for (Instruction &I : *BB) {
@@ -3238,45 +3272,42 @@ static bool symbolicallyExecute(BasicBlock *BB,
         }
       }
       assert(IncomingBlock);
-      ValueMap[&I] =
-          getOrCreateValueBits(PHI->getIncomingValueForBlock(IncomingBlock));
+      ValueMap.insert({&I,
+          getOrCreateValueBits(PHI->getIncomingValueForBlock(IncomingBlock))});
     } break;
     case Instruction::Shl: {
       ConstantInt *CI = getConstantOperand(&I, 1);
       if (!CI)
         return false;
-      Value *LHSVal = I.getOperand(0);
-      ValueBits *LHSBits = getOrCreateValueBits(LHSVal);
-      ValueMap[&I] = ValueBits::Shl(LHSBits, CI->getSExtValue());
+      auto LHSBits = getOrCreateValueBits(I.getOperand(0));
+      ValueMap.insert({&I, ValueBits::Shl(*LHSBits, CI->getSExtValue())});
     } break;
     case Instruction::LShr: {
       ConstantInt *CI = getConstantOperand(&I, 1);
       if (!CI)
         return false;
-      Value *LHSVal = I.getOperand(0);
-      ValueBits *LHSBits = getOrCreateValueBits(LHSVal);
-      ValueMap[&I] = ValueBits::LShr(LHSBits, CI->getSExtValue());
+      auto LHSBits = getOrCreateValueBits(I.getOperand(0));
+      ValueMap.insert({&I, ValueBits::LShr(*LHSBits, CI->getSExtValue())});
     } break;
     case Instruction::And: {
       ConstantInt *CI = getConstantOperand(&I, 1);
       if (!CI)
         return false;
-      Value *LHSVal = I.getOperand(0);
-      ValueBits *LHSBits = getOrCreateValueBits(LHSVal);
-      ValueMap[&I] = ValueBits::And(LHSBits, CI->getSExtValue());
+      auto LHSBits = getOrCreateValueBits(I.getOperand(0));
+      ValueMap.insert({&I, ValueBits::And(*LHSBits, CI->getSExtValue())});
     } break;
     case Instruction::Xor: {
-      ValueBits *LHSBits = getOrCreateValueBits(I.getOperand(0));
-      ValueBits *RHSBits = getOrCreateValueBits(I.getOperand(1));
-      ValueMap[&I] = ValueBits::Xor(LHSBits, RHSBits);
+      auto LHSBits = getOrCreateValueBits(I.getOperand(0));
+      auto RHSBits = getOrCreateValueBits(I.getOperand(1));
+      ValueMap.insert({&I, ValueBits::Xor(*LHSBits, *RHSBits)});
     } break;
     case Instruction::ZExt: {
-      ValueBits *LHSBits = getOrCreateValueBits(I.getOperand(0));
-      ValueMap[&I] = ValueBits::ZExt(LHSBits, BitSize);
+      auto LHSBits = getOrCreateValueBits(I.getOperand(0));
+      ValueMap.insert({&I, ValueBits::ZExt(*LHSBits, BitSize)});
     } break;
     case Instruction::Trunc: {
-      ValueBits *LHSBits = getOrCreateValueBits(I.getOperand(0));
-      ValueMap[&I] = ValueBits::Trunc(LHSBits, BitSize);
+      auto LHSBits = getOrCreateValueBits(I.getOperand(0));
+      ValueMap.insert({&I, ValueBits::Trunc(*LHSBits, BitSize)});
     } break;
     case Instruction::Select: {
       SelectInst *Select = cast<SelectInst>(&I);
@@ -3287,9 +3318,10 @@ static bool symbolicallyExecute(BasicBlock *BB,
                           << "\n");
         return false;
       }
-      ValueBits *IfTrue = getOrCreateValueBits(Select->getTrueValue());
-      ValueBits *IfFalse = getOrCreateValueBits(Select->getFalseValue());
-      ValueMap[&I] = new PredicatedValueBits(Cond, IfTrue, IfFalse);
+      auto IfTrue = getOrCreateValueBits(Select->getTrueValue());
+      auto IfFalse = getOrCreateValueBits(Select->getFalseValue());
+      ValueMap.insert({&I,
+          std::make_shared<PredicatedValueBits>(Cond, IfTrue, IfFalse)});
     } break;
     default:
       // If this instruction is not recognized, then just continue. This is
@@ -3423,7 +3455,7 @@ bool LoopIdiomRecognize::recognizeCRC(const SCEV *BECount) {
   // match pre-computed values, then we can say it's doing crc. If there are
   // any unexpected loop variant operations happening, e.g. additional select
   // logic or shifts, then this will be captured in the ValueBits.
-  std::map<Value *, ValueBits *> ValueMap;
+  std::map<Value *, std::shared_ptr<ValueBits>> ValueMap;
     
   if (!symbolicallyExecute(CurLoop->getHeader(), ValueMap))
     return false;
@@ -3435,7 +3467,8 @@ bool LoopIdiomRecognize::recognizeCRC(const SCEV *BECount) {
     return false;
   }
 
-  ValueBits *CRCOutBits = Result->second;
+  using PValueBits = std::shared_ptr<ValueBits>;
+  PValueBits CRCOutBits = Result->second;
   LLVM_DEBUG(dbgs() << DEBUG_TYPE
                     << " CRCRecognize: ValueBits for output crc value:\n"
                     << *CRCOutBits);
@@ -3447,7 +3480,8 @@ bool LoopIdiomRecognize::recognizeCRC(const SCEV *BECount) {
                       << " predicated.\n");
     return false;
   }
-  PredicatedValueBits *CRCOutBitsPred = (PredicatedValueBits *)CRCOutBits;
+  std::shared_ptr<PredicatedValueBits> CRCOutBitsPred =
+      std::static_pointer_cast<PredicatedValueBits>(CRCOutBits);
 
   // Need to check if the predicate is checking the MSB/LSB depending on
   // whether this is bit reversed CRC
@@ -3465,7 +3499,7 @@ bool LoopIdiomRecognize::recognizeCRC(const SCEV *BECount) {
                       << " CRCRecognize: Cannot determine ICmp operands\n");
     return false;
   }
-  ValueBits *ICmpOp0Bits = Result->second;
+  PValueBits ICmpOp0Bits = Result->second;
 
   // Now match the following cases
   // (LSB): (crc & 1)
@@ -3476,7 +3510,7 @@ bool LoopIdiomRecognize::recognizeCRC(const SCEV *BECount) {
   // (MSB): crc <= -1
   // And decide whether the check is checking for existence of 1 or 0
   bool CheckZero = false;
-  ValueBits::ValueBit *CheckBit = nullptr;
+  std::optional<ValueBits::ValueBit> CheckBit;
   switch (Pred) {
   case CmpInst::ICMP_NE:
   case CmpInst::ICMP_EQ: {
@@ -3492,17 +3526,17 @@ bool LoopIdiomRecognize::recognizeCRC(const SCEV *BECount) {
     }
     // Now to check if we already know all the other bits of the RHS are zero.
     ValueBits AllZeroValueBits((uint64_t)0, ICmpOp0Bits->getSize());
-    ValueBits *CRCOutBitsMasked = nullptr;
+    PValueBits CRCOutBitsMasked;
     if (CRC.BitReversed) {
       // Masking out the LSB is equivalent to shifting right one if we're just
       // comparing all the other bits are zero.
-      CRCOutBitsMasked = ValueBits::LShr(ICmpOp0Bits, 1);
+      CRCOutBitsMasked = ValueBits::LShr(*ICmpOp0Bits, 1);
       CheckBit = ICmpOp0Bits->getBit(0);
     } else {
       // The CRC type might be larger than the data, so we can't shift left
       // one. Mask instead.
       uint64_t MSBMask = ~(1 << (CRC.Width - 1));
-      CRCOutBitsMasked = ValueBits::And(ICmpOp0Bits, MSBMask);
+      CRCOutBitsMasked = ValueBits::And(*ICmpOp0Bits, MSBMask);
       CheckBit = ICmpOp0Bits->getBit(CRC.Width - 1);
     }
     if (!CRCOutBitsMasked->equals(&AllZeroValueBits)) {
@@ -3540,23 +3574,26 @@ bool LoopIdiomRecognize::recognizeCRC(const SCEV *BECount) {
     return false;
   }
 
+  if (!CheckBit.has_value())
+    return false;
+
   // If there exists a Data input, ensure the check bit is crc^data.
-  ValueBits::ValueBit *RefCheckBit = nullptr;
+  std::optional<ValueBits::ValueBit> RefCheckBit;
   uint64_t CRCCheckIdx = CRC.BitReversed ? 0 : CRCSize - 1;
-  ValueBits::ValueBit *CRCInputRefBit =
+  ValueBits::ValueBit CRCInputRefBit =
       ValueBits::ValueBit::CreateRefBit(CRC.CRCInput, CRCCheckIdx);
   if (CRC.DataInput) {
     uint64_t DataSize = CRC.DataInput->getType()->getScalarSizeInBits();
     uint64_t DataCheckIdx = CRC.BitReversed ? 0 : DataSize - 1;
-   ValueBits::ValueBit *DataInputRefBit =
-       ValueBits::ValueBit::CreateRefBit(CRC.DataInput, DataCheckIdx);
+    ValueBits::ValueBit DataInputRefBit =
+        ValueBits::ValueBit::CreateRefBit(CRC.DataInput, DataCheckIdx);
     RefCheckBit =
-       ValueBits::ValueBit::CreateXORBit(CRCInputRefBit, DataInputRefBit);
+        ValueBits::ValueBit::CreateXORBit(CRCInputRefBit, DataInputRefBit);
   } else {
     RefCheckBit = CRCInputRefBit;
   }
 
-  if (!RefCheckBit->equals(CheckBit)) {
+  if (!RefCheckBit.value().equals(CheckBit.value())) {
     LLVM_DEBUG(dbgs() << DEBUG_TYPE
                       << " CRCRecognize: Cannot verify check bit!\n"
                       << *RefCheckBit << "\n"
@@ -3564,37 +3601,40 @@ bool LoopIdiomRecognize::recognizeCRC(const SCEV *BECount) {
     return false;
   }
 
-  ValueBits *CRCOutBitsIfOne = CRCOutBitsPred->getIfTrue();
-  ValueBits *CRCOutBitsIfZero = CRCOutBitsPred->getIfFalse();
-  if (CheckZero)
-    std::swap(CRCOutBitsIfZero, CRCOutBitsIfOne);
+  PValueBits CRCOutBitsIfOne = CRCOutBitsPred->getIfTrue();
+  PValueBits CRCOutBitsIfZero = CRCOutBitsPred->getIfFalse();
+  if (CheckZero) {
+    PValueBits Tmp = CRCOutBitsIfOne;
+    CRCOutBitsIfOne = CRCOutBitsIfZero;
+    CRCOutBitsIfZero = Tmp;
+  }
 
   // Now construct ValueBits that would be the result of crc for one iteration.
   // That is, a shift and then xor if [M/L]SB is 1.
-  ValueBits *CRCValueBits = nullptr;
+  PValueBits CRCValueBits;
   Result = ValueMap.find(CRC.CRCInput);
   if (Result == ValueMap.end()) {
-    CRCValueBits = new ValueBits(CRC.CRCInput, CRCSize);
+    CRCValueBits = std::make_shared<ValueBits>(CRC.CRCInput, CRCSize);
   } else {
     CRCValueBits = Result->second;
   }
   uint64_t GeneratorPolynomial =
       CRC.BitReversed ? reverseBits(CRC.Polynomial, CRCSize) : CRC.Polynomial;
-  ValueBits Polynomial(GeneratorPolynomial, CRCSize);
+  PValueBits Polynomial = std::make_shared<ValueBits>(GeneratorPolynomial, CRCSize);
 
   // Case where the MSB/LSB of the data is 0
-  ValueBits *IfZero = CRC.BitReversed ? ValueBits::LShr(CRCValueBits, 1)
-                                      : ValueBits::Shl(CRCValueBits, 1);
+  PValueBits IfZero = CRC.BitReversed ? ValueBits::LShr(*CRCValueBits, 1)
+                                      : ValueBits::Shl(*CRCValueBits, 1);
 
   // Case where the MSB/LSB of the data is 1
-  ValueBits *IfOne = ValueBits::Xor(IfZero, &Polynomial);
+  PValueBits IfOne = ValueBits::Xor(*IfZero, *Polynomial);
 
-  if (!IfZero->equals(CRCOutBitsIfZero)) {
+  if (!IfZero->equals(CRCOutBitsIfZero.get())) {
     LLVM_DEBUG(dbgs() << DEBUG_TYPE << " CRCRecognize: Not Equal!\n"
                       << *IfZero << *CRCOutBitsPred->getIfFalse());
     return false;
   }
-  if (!IfOne->equals(CRCOutBitsIfOne)) {
+  if (!IfOne->equals(CRCOutBitsIfOne.get())) {
     LLVM_DEBUG(dbgs() << DEBUG_TYPE << " CRCRecognize: Not Equal!\n"
                       << *IfOne << *CRCOutBitsPred->getIfTrue());
     return false;

>From 09510f5009cf8b62952cab9754206bdd492862b9 Mon Sep 17 00:00:00 2001
From: Joe Faulls <joseph.faulls at imgtec.com>
Date: Wed, 24 Apr 2024 17:03:46 +0100
Subject: [PATCH 11/12] Format code correctly

---
 .../Transforms/Scalar/LoopIdiomRecognize.cpp  | 19 ++++++++++---------
 1 file changed, 10 insertions(+), 9 deletions(-)

diff --git a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
index 050efcf66d7d31..ba733b2df8f5d5 100644
--- a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
@@ -2955,7 +2955,7 @@ class ValueBits {
         _RHS = new ValueBit(*VB._RHS);
       }
     }
-    ValueBit& operator=(const ValueBit &VB) {
+    ValueBit &operator=(const ValueBit &VB) {
       _Type = VB._Type;
       if (_Type == BitType::REF)
         _BitRef = VB._BitRef;
@@ -3272,8 +3272,8 @@ symbolicallyExecute(BasicBlock *BB,
         }
       }
       assert(IncomingBlock);
-      ValueMap.insert({&I,
-          getOrCreateValueBits(PHI->getIncomingValueForBlock(IncomingBlock))});
+      ValueMap.insert({&I, getOrCreateValueBits(
+                               PHI->getIncomingValueForBlock(IncomingBlock))});
     } break;
     case Instruction::Shl: {
       ConstantInt *CI = getConstantOperand(&I, 1);
@@ -3320,8 +3320,8 @@ symbolicallyExecute(BasicBlock *BB,
       }
       auto IfTrue = getOrCreateValueBits(Select->getTrueValue());
       auto IfFalse = getOrCreateValueBits(Select->getFalseValue());
-      ValueMap.insert({&I,
-          std::make_shared<PredicatedValueBits>(Cond, IfTrue, IfFalse)});
+      ValueMap.insert(
+          {&I, std::make_shared<PredicatedValueBits>(Cond, IfTrue, IfFalse)});
     } break;
     default:
       // If this instruction is not recognized, then just continue. This is
@@ -3456,7 +3456,7 @@ bool LoopIdiomRecognize::recognizeCRC(const SCEV *BECount) {
   // any unexpected loop variant operations happening, e.g. additional select
   // logic or shifts, then this will be captured in the ValueBits.
   std::map<Value *, std::shared_ptr<ValueBits>> ValueMap;
-    
+
   if (!symbolicallyExecute(CurLoop->getHeader(), ValueMap))
     return false;
 
@@ -3487,8 +3487,8 @@ bool LoopIdiomRecognize::recognizeCRC(const SCEV *BECount) {
   // whether this is bit reversed CRC
   ICmpInst *ICmp = CRCOutBitsPred->getPredicate();
   CmpInst::Predicate Pred = ICmp->getPredicate();
-  LLVM_DEBUG(dbgs() << DEBUG_TYPE << " CRCRecognize checking to see if " << *ICmp
-                    << " is checking the "
+  LLVM_DEBUG(dbgs() << DEBUG_TYPE << " CRCRecognize checking to see if "
+                    << *ICmp << " is checking the "
                     << (CRC.BitReversed ? "LSB\n" : "MSB\n"));
 
   // Firstly check the LHS is in our map, and RHS is a constant
@@ -3620,7 +3620,8 @@ bool LoopIdiomRecognize::recognizeCRC(const SCEV *BECount) {
   }
   uint64_t GeneratorPolynomial =
       CRC.BitReversed ? reverseBits(CRC.Polynomial, CRCSize) : CRC.Polynomial;
-  PValueBits Polynomial = std::make_shared<ValueBits>(GeneratorPolynomial, CRCSize);
+  PValueBits Polynomial =
+      std::make_shared<ValueBits>(GeneratorPolynomial, CRCSize);
 
   // Case where the MSB/LSB of the data is 0
   PValueBits IfZero = CRC.BitReversed ? ValueBits::LShr(*CRCValueBits, 1)

>From e781f6672d6779504d1426299cf437b5d30573ed Mon Sep 17 00:00:00 2001
From: Joe Faulls <joseph.faulls at imgtec.com>
Date: Thu, 25 Apr 2024 09:59:24 +0100
Subject: [PATCH 12/12] Surround ValueBit(s) printing with NDEBUG

getNameOrAsOperand is only available in debug mode, so causes failures
when built in release.
---
 llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
index ba733b2df8f5d5..c7706f88682243 100644
--- a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
@@ -2999,6 +2999,7 @@ class ValueBits {
     }
 
     void print(raw_ostream &OS) {
+#ifndef NDEBUG
       switch (_Type) {
       case BitType::ONE:
         OS << "1";
@@ -3016,6 +3017,7 @@ class ValueBits {
         _RHS->print(OS);
         break;
       }
+#endif
     }
   };
   using PValueBits = std::shared_ptr<ValueBits>;
@@ -3153,6 +3155,7 @@ class ValueBits {
   }
 
   virtual void print(raw_ostream &OS) {
+#ifndef NDEBUG
     assert(Size != 0);
     OS << "[";
     Bits[Size - 1].print(OS);
@@ -3161,6 +3164,7 @@ class ValueBits {
       Bits[i].print(OS);
     }
     OS << "]\n";
+#endif
   }
 };
 
@@ -3218,9 +3222,11 @@ class PredicatedValueBits : public ValueBits {
   ICmpInst *getPredicate() { return _Predicate; }
 
   virtual void print(raw_ostream &OS) override {
+#ifndef NDEBUG
     OS << "Predicate: " << *_Predicate << "\nIf True:\n"
        << *_IfTrue << "If False:\n"
        << *_IfFalse;
+#endif
   }
 };
 



More information about the llvm-commits mailing list