[llvm] [LoopIdiomRecognizer] Implement CRC recognition (PR #79295)

Joe Faulls via llvm-commits llvm-commits at lists.llvm.org
Wed Jan 24 06:28:13 PST 2024


https://github.com/joe-img created https://github.com/llvm/llvm-project/pull/79295

Recognizes CRC byte loops and replaces them with a table lookup.

Current limitations:
- Only works on byte loops
	CRC size can be any, but the data is limited to one byte. i.e. a loop with iteration count 8.
- Only works on single-block loops
	most CRC loops would have been flattened to one block with select instructions this far into the pipeline. 

Both limitations were  in effort to reduce complexity, especially for a first patch. The code can be fairly easily extended to overcome these limitations.

Implementation details:
1) Check if the loop looks like CRC and extract some useful information
2) Execute one iteration of the instruction of the loop to see what happens to our output value
4) Ensure the output value is predicated on the value of the M/LSB of our input
4) Construct an expected output value of one iteration of CRC using the extracted information from step one and compare
5) Construct a lookup table and replace the output value with a lookup

>From de44cae8d94920d3b7340fb0b541512845d121b7 Mon Sep 17 00:00:00 2001
From: "Joseph.Faulls" <Joseph.Faulls at imgtec.com>
Date: Fri, 12 Jan 2024 15:36:04 +0000
Subject: [PATCH 1/7] [LoopIdiomRecognize] Implement function to extract CRC
 data from loops

This will check to see if a loop looks like CRC, not necessarily
guaranteeing that it is CRC.
---
 .../Transforms/Scalar/LoopIdiomRecognize.cpp  | 244 +++++++++++++++++-
 1 file changed, 235 insertions(+), 9 deletions(-)

diff --git a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
index 3721564890ddb4e..c21b6e6fe295682 100644
--- a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
@@ -135,6 +135,9 @@ static cl::opt<bool> UseLIRCodeSizeHeurs(
              "with -Os/-Oz"),
     cl::init(true), cl::Hidden);
 
+static cl::opt<bool> CRCRecognize("recognize-crc", cl::desc("CRC RECOGNIZE"),
+                                  cl::init(false), cl::Hidden);
+
 namespace {
 
 class LoopIdiomRecognize {
@@ -186,6 +189,15 @@ class LoopIdiomRecognize {
             // handling.
   };
 
+  struct CRCInfo {
+    Value *CRCInput;
+    Value *CRCOutput;
+    Value *DataInput;
+    uint64_t Width;
+    uint64_t Polynomial;
+    bool BitReversed;
+  };
+
   /// \name Countable Loop Idiom Handling
   /// @{
 
@@ -242,6 +254,8 @@ class LoopIdiomRecognize {
 
   bool recognizeShiftUntilBitTest();
   bool recognizeShiftUntilZero();
+  std::optional<CRCInfo> looksLikeCRC(const SCEV *BECount);
+  bool recognizeCRC(const SCEV *BECount);
 
   /// @}
 };
@@ -298,13 +312,8 @@ bool LoopIdiomRecognize::runOnLoop(Loop *L) {
   ApplyCodeSizeHeuristics =
       L->getHeader()->getParent()->hasOptSize() && UseLIRCodeSizeHeurs;
 
-  HasMemset = TLI->has(LibFunc_memset);
-  HasMemsetPattern = TLI->has(LibFunc_memset_pattern16);
-  HasMemcpy = TLI->has(LibFunc_memcpy);
-
-  if (HasMemset || HasMemsetPattern || HasMemcpy)
-    if (SE->hasLoopInvariantBackedgeTakenCount(L))
-      return runOnCountableLoop();
+  if (SE->hasLoopInvariantBackedgeTakenCount(L))
+    return runOnCountableLoop();
 
   return runOnNoncountableLoop();
 }
@@ -329,6 +338,17 @@ bool LoopIdiomRecognize::runOnCountableLoop() {
                     << "] Countable Loop %" << CurLoop->getHeader()->getName()
                     << "\n");
 
+  bool MadeChange = false;
+  if (CRCRecognize)
+    MadeChange |= recognizeCRC(BECount);
+
+  HasMemset = TLI->has(LibFunc_memset);
+  HasMemsetPattern = TLI->has(LibFunc_memset_pattern16);
+  HasMemcpy = TLI->has(LibFunc_memcpy);
+
+  if (!(HasMemset || HasMemsetPattern || HasMemcpy))
+    return MadeChange;
+
   // The following transforms hoist stores/memsets into the loop pre-header.
   // Give up if the loop has instructions that may throw.
   SimpleLoopSafetyInfo SafetyInfo;
@@ -336,8 +356,6 @@ bool LoopIdiomRecognize::runOnCountableLoop() {
   if (SafetyInfo.anyBlockMayThrow())
     return false;
 
-  bool MadeChange = false;
-
   // Scan all the blocks in the loop that are not in subloops.
   for (auto *BB : CurLoop->getBlocks()) {
     // Ignore blocks in subloops.
@@ -2868,3 +2886,211 @@ bool LoopIdiomRecognize::recognizeShiftUntilZero() {
   ++NumShiftUntilZero;
   return MadeChange;
 }
+
+static uint64_t reverseBits(uint64_t Num, unsigned NumBits) {
+  uint64_t Reversed = 0;
+  for (unsigned i = 1; i <= NumBits; i++) {
+    Reversed |= (Num & 1) << (NumBits - i);
+    Num >>= 1;
+  }
+  return Reversed;
+}
+
+bool LoopIdiomRecognize::recognizeCRC(const SCEV *BECount) {
+  // Step one: Check if the loop looks like crc, and extract some useful
+  // information for us to check
+  std::optional<CRCInfo> MaybeCRC = looksLikeCRC(BECount);
+  if (!MaybeCRC)
+    return false;
+  CRCInfo CRC = *MaybeCRC;
+
+  uint64_t CRCSize = CRC.CRCInput->getType()->getScalarSizeInBits();
+  LLVM_DEBUG(dbgs() << DEBUG_TYPE " CRCRegonize: Found potential CRCLoop "
+                    << *CurLoop << "\n"
+                    << "Input CRC: " << *CRC.CRCInput << "\n"
+                    << "Output CRC: " << *CRC.CRCOutput << "\n"
+                    << "GeneratorPolynomial: " << CRC.Polynomial << "\n"
+                    << "CRC Size: " << CRCSize << "\n"
+                    << "CRC Width: " << CRC.Width << "\n"
+                    << "Reversed: " << CRC.BitReversed << "\n");
+  if (CRC.DataInput) {
+    LLVM_DEBUG(dbgs() << "Data Input: " << *CRC.DataInput << "\n"
+                      << "Data Size: "
+                      << CRC.DataInput->getType()->getScalarSizeInBits()
+                      << "\n");
+  }
+
+  return false;
+}
+
+std::optional<LoopIdiomRecognize::CRCInfo>
+LoopIdiomRecognize::looksLikeCRC(const SCEV *BECount) {
+  // Initial checks to see if this loop looks like CRC:
+  // - Inner most loop
+  // - One block
+  // - One exit
+  // - Iteration count is 8
+
+  // Check if this is inner most loop
+  if (!CurLoop->isInnermost())
+    return std::nullopt;
+
+  // Since we are far enough in the optimization pipeline that small branches
+  // will have been folded into Select instructions, if we have branches we are
+  // unlikely to be CRC. To reduce complexity, only consider single-block loops
+  // for CRC recognition
+  if (CurLoop->getBlocks().size() > 1) {
+    LLVM_DEBUG(dbgs() << DEBUG_TYPE " CRCRegonize: Loops with more than one"
+                      << "block are unsupported\n");
+    return std::nullopt;
+  }
+
+  // Ensure one exit block
+  const BasicBlock *ExitBlock = CurLoop->getExitBlock();
+  if (!ExitBlock)
+    return std::nullopt;
+
+  // Check iteration count is 8
+  const SCEV *TripCountSCEV =
+      SE->getTripCountFromExitCount(BECount, BECount->getType(), CurLoop);
+  const SCEVConstant *TripCountSCEVConst =
+      dyn_cast<SCEVConstant>(TripCountSCEV);
+  if (!TripCountSCEVConst)
+    return std::nullopt;
+  APInt TripCount = TripCountSCEVConst->getAPInt();
+  // Only support one byte CRC loops. Loops with tripcount 16 or 32 can also be
+  // CRC, but this is currently unsupported
+  if (TripCount != 8)
+    return std::nullopt;
+
+  // Ensure only one value that is live across the loop boundary, and track the
+  // operations on this value. This should include:
+  // 1) A phi with an initial value outside the loop
+  // 2) Shift operation
+  // 3) ICMP operation
+
+  // Ensure only one value is live across the loop boundary. LCSSA ensures any
+  // live values are captured in a PHI of the exit block.
+  Instruction *LoopOutput = nullptr;
+  for (const PHINode &ExitPhi : ExitBlock->phis()) {
+    for (const Use &IncomingUse : ExitPhi.incoming_values()) {
+      Instruction *IncomingUser = dyn_cast<Instruction>(&IncomingUse);
+      if (!IncomingUser)
+        continue;
+      if (CurLoop->contains(IncomingUser)) {
+        if (LoopOutput)
+          return std::nullopt;
+        LoopOutput = IncomingUser;
+      }
+    }
+  }
+
+  if (!LoopOutput)
+    return std::nullopt;
+
+  auto AddAllInstOps = [](Instruction *I,
+                          SmallVectorImpl<Instruction *> &Worklist) {
+    for (Use &Op : I->operands()) {
+      Instruction *OpInst = dyn_cast<Instruction>(Op.get());
+      if (OpInst)
+        Worklist.push_back(OpInst);
+    }
+  };
+
+  // Follow this value in the loop
+  SmallVector<Instruction *, 4> Worklist;
+  SmallPtrSet<Instruction *, 4> Visited;
+  bool FoundIcmp = false;
+  BinaryOperator *CRCShift = nullptr;
+  ConstantInt *GeneratorPolynomial = nullptr;
+  Value *CRCInput = nullptr;
+  Worklist.push_back(LoopOutput);
+  while (!Worklist.empty()) {
+    Instruction *I = Worklist.pop_back_val();
+    if (Visited.contains(I))
+      continue;
+    Visited.insert(I);
+    if (SelectInst *Select = dyn_cast<SelectInst>(I)) {
+      FoundIcmp |= isa<ICmpInst>(Select->getCondition());
+      AddAllInstOps(Select, Worklist);
+    } else if (isa<ICmpInst>(I)) {
+      // Instead of tracking the condition and working out if it's based on
+      // MSB of crc/data, just greedily assume it will be and check later.
+      FoundIcmp = true;
+    } else if (BinaryOperator *BinOp = dyn_cast<BinaryOperator>(I)) {
+      switch (BinOp->getOpcode()) {
+      default:
+        break;
+      case Instruction::Shl:
+      case Instruction::LShr: {
+        // This shift could be the data or the crc. Either way, the RHS should
+        // be constant one.
+        Instruction *ShLHS = dyn_cast<Instruction>(BinOp->getOperand(0));
+        ConstantInt *ShRHS = dyn_cast<ConstantInt>(BinOp->getOperand(1));
+        if (!ShRHS || !ShLHS || ShRHS->getZExtValue() != 1)
+          return std::nullopt;
+        CRCShift = BinOp;
+        Worklist.push_back(ShLHS);
+        break;
+      }
+      case Instruction::Xor: {
+        Value *XorRHS = BinOp->getOperand(1);
+        if (ConstantInt *RHSConst = dyn_cast<ConstantInt>(XorRHS))
+          GeneratorPolynomial = RHSConst;
+        AddAllInstOps(BinOp, Worklist);
+        break;
+      }
+      }
+    } else if (PHINode *PHI = dyn_cast<PHINode>(I)) {
+      for (BasicBlock *IncomingBlock : PHI->blocks()) {
+        Value *IncomingValue = PHI->getIncomingValueForBlock(IncomingBlock);
+
+        if (CurLoop->contains(IncomingBlock)) {
+          if (Instruction *IncomingI = dyn_cast<Instruction>(IncomingValue)) {
+            Worklist.push_back(IncomingI);
+          }
+        } else {
+          CRCInput = IncomingValue;
+        }
+      }
+    }
+  }
+
+  if (!(CRCShift && GeneratorPolynomial && CRCInput)) {
+    LLVM_DEBUG(dbgs() << DEBUG_TYPE " CRCRegonize: Does not look like CRC");
+    return std::nullopt;
+  }
+
+  // The crc loop will have either one or two inputs depending on whether the
+  // data is xor'd inside or outside the loop. Assume any additional inputs that
+  // isn't our crc input is the data.
+  Value *DataInput = nullptr;
+  PHINode *InductionPhi = CurLoop->getInductionVariable(*SE);
+  for (const PHINode &EntryPhi : CurLoop->getHeader()->phis()) {
+    if (&EntryPhi == InductionPhi)
+      continue;
+    for (BasicBlock *BB : EntryPhi.blocks()) {
+      if (!CurLoop->contains(BB)) {
+        Value *IncomingVal = EntryPhi.getIncomingValueForBlock(BB);
+        if (IncomingVal != CRCInput) {
+          // Only allow exactly one additional input to the loop.
+          if (DataInput)
+            return std::nullopt;
+          DataInput = IncomingVal;
+        }
+      }
+    }
+  }
+
+  bool Reversed = CRCShift->getOpcode() == Instruction::LShr;
+
+  uint64_t Polynomial = GeneratorPolynomial->getZExtValue();
+  if (Reversed)
+    Polynomial =
+        reverseBits(Polynomial, CRCInput->getType()->getScalarSizeInBits());
+
+  CRCInfo CRC = {CRCInput,   LoopOutput, DataInput, TripCount.getZExtValue(),
+                 Polynomial, Reversed};
+
+  return std::optional<CRCInfo>{CRC};
+}

>From a3aaf1a78bab5f58cd390830421bac79e7f38e07 Mon Sep 17 00:00:00 2001
From: "Joseph.Faulls" <Joseph.Faulls at imgtec.com>
Date: Fri, 12 Jan 2024 15:59:09 +0000
Subject: [PATCH 2/7] [LoopIdiomRecognize] Implement ValueBits class

This is a representation of a value's bits in terms of references to
other values' bits, or 1/0 if the bit is known. This allows symbolic
execution of bitwise instructions without knowing the exact values.

Example:

LLVM IR Value i8 %x:
[%x[7], %x[6], %x[5], %x[4], %x[3], %x[2], %x[1], %x[0]]

%shr = lshr i8 %x, 2
[ 0, 0, %x[7], %x[6], %x[5], %x[4], %x[3], %x[2]]

%shl = shl i8 %shr, 1
[ 0, %x[7], %x[6], %x[5], %x[4], %x[3], %x[2], 0]

%xor = xor i8 %shl, 0xb
[ 0, %x[7], %x[6], %x[5], %x[4]^1, %x[3], %x[2]^1, 1]
---
 .../Transforms/Scalar/LoopIdiomRecognize.cpp  | 236 ++++++++++++++++++
 1 file changed, 236 insertions(+)

diff --git a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
index c21b6e6fe295682..f3d8b04130ffa2f 100644
--- a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
@@ -2896,6 +2896,242 @@ static uint64_t reverseBits(uint64_t Num, unsigned NumBits) {
   return Reversed;
 }
 
+class ValueBits {
+  // This is a representation of a value's bits in terms of references to
+  // other values' bits, or 1/0 if the bit is known. This allows symbolic
+  // execution of bitwise instructions without knowing the exact values.
+  //
+  // Example:
+  //
+  // LLVM IR Value i8 %x:
+  // [%x[7], %x[6], %x[5], %x[4], %x[3], %x[2], %x[1], %x[0]]
+  //
+  // %shr = lshr i8 %x, 2
+  // [ 0, 0, %x[7], %x[6], %x[5], %x[4], %x[3], %x[2]]
+  //
+  // %shl = shl i8 %shr, 1
+  // [ 0, %x[7], %x[6], %x[5], %x[4], %x[3], %x[2], 0]
+  //
+  // %xor = xor i8 %shl, 0xb
+  // [ 0, %x[7], %x[6], %x[5], %x[4]^1, %x[3], %x[2]^1, 1]
+public:
+  class ValueBit {
+  public:
+    enum BitType { ONE, ZERO, REF, XOR };
+
+  private:
+    BitType _Type;
+    std::pair<Value *, uint64_t> _BitRef;
+    ValueBit *_LHS;
+    ValueBit *_RHS;
+
+    ValueBit(BitType Type) : _Type(Type) {}
+    ValueBit(BitType Type, std::pair<Value *, uint64_t> BitRef)
+        : _Type(Type), _BitRef(BitRef) {}
+    ValueBit(BitType Type, ValueBit *LHS, ValueBit *RHS)
+        : _Type(Type), _LHS(LHS), _RHS(RHS) {}
+
+  public:
+    static ValueBit *CreateOneBit() { return new ValueBit(BitType::ONE); }
+    static ValueBit *CreateZeroBit() { return new ValueBit(BitType::ZERO); }
+    static ValueBit *CreateRefBit(Value *Ref, uint64_t Offset) {
+      return new ValueBit(BitType::REF, std::make_pair(Ref, Offset));
+    }
+    static ValueBit *CreateXORBit(ValueBit *LHS, ValueBit *RHS) {
+      return new ValueBit(BitType::XOR, LHS, RHS);
+    }
+    inline BitType getType() { return _Type; }
+    bool equals(ValueBit *RHS) {
+      if (_Type != RHS->getType())
+        return false;
+      switch (_Type) {
+      case BitType::ONE:
+      case BitType::ZERO:
+        return true;
+      case BitType::REF:
+        return _BitRef == RHS->_BitRef;
+      case BitType::XOR:
+        return (_LHS->equals(RHS->_LHS) && _RHS->equals(RHS->_RHS)) ||
+               (_LHS->equals(RHS->_RHS) && _RHS->equals(RHS->_LHS));
+      }
+      return false;
+    }
+
+    void print(raw_ostream &OS) {
+      switch (_Type) {
+      case BitType::ONE:
+        OS << "1";
+        break;
+      case BitType::ZERO:
+        OS << "0";
+        break;
+      case BitType::REF:
+        OS << _BitRef.first->getNameOrAsOperand() << "[" << _BitRef.second
+           << "]";
+        break;
+      case BitType::XOR:
+        _LHS->print(OS);
+        OS << "^";
+        _RHS->print(OS);
+        break;
+      }
+    }
+  };
+
+private:
+  uint64_t Size;
+  std::vector<ValueBit *> Bits;
+
+  virtual void _Shl(uint64_t N) {
+    for (; N > 0; N--) {
+      Bits.insert(Bits.begin(), ValueBit::CreateZeroBit());
+      Bits.pop_back();
+    }
+  }
+  virtual void _LShr(uint64_t N) {
+    for (; N > 0; N--) {
+      Bits.insert(Bits.end(), ValueBit::CreateZeroBit());
+      Bits.erase(Bits.begin());
+    }
+  }
+  virtual void _Xor(ValueBits *RHS) {
+    assert(Size == RHS->getSize());
+    for (unsigned I = 0; I < Size; I++) {
+      auto It = Bits.begin() + I;
+      ValueBit *RHSBit = RHS->getBit(I);
+      if (RHSBit->getType() == ValueBit::BitType::ONE) {
+        Bits.erase(It);
+        if ((*It)->getType() == ValueBit::BitType::ZERO) {
+          Bits.insert(It, ValueBit::CreateOneBit());
+        } else if ((*It)->getType() == ValueBit::BitType::ONE) {
+          Bits.insert(It, ValueBit::CreateZeroBit());
+        } else {
+          ValueBit *One = ValueBit::CreateOneBit();
+          Bits.insert(It, ValueBit::CreateXORBit(*It, One));
+        }
+      } else if (RHSBit->getType() != ValueBit::BitType::ZERO) {
+        if ((*It)->getType() == ValueBit::BitType::ZERO) {
+          Bits.erase(It);
+          ValueBit *BitRef = new ValueBit(*RHSBit);
+          Bits.insert(It, BitRef);
+        } else {
+          ValueBit *ItVB = *It;
+          Bits.erase(It);
+          Bits.insert(It, ValueBit::CreateXORBit(ItVB, RHSBit));
+        }
+      }
+    }
+  }
+  virtual void _ZExt(uint64_t ToSize) {
+    assert(ToSize > Size);
+    for (uint64_t I = 0; I < ToSize - Size; I++)
+      Bits.push_back(ValueBit::CreateZeroBit());
+    Size = ToSize;
+  }
+  virtual void _Trunc(uint64_t ToSize) {
+    assert(ToSize < Size);
+    Bits.erase(Bits.begin() + ToSize, Bits.end());
+    Size = ToSize;
+  }
+  virtual void _And(uint64_t RHS) {
+    for (unsigned I = 0; I < Size; I++) {
+      if (!(RHS & 1)) {
+        auto It = Bits.begin() + I;
+        Bits.erase(It);
+        Bits.insert(It, ValueBit::CreateZeroBit());
+      }
+      RHS >>= 1;
+    }
+  }
+
+protected:
+  ValueBits() {}
+
+public:
+  ValueBits(Value *InitialVal, uint64_t BitLength) : Size(BitLength) {
+    for (unsigned i = 0; i < BitLength; i++)
+      Bits.push_back(ValueBit::CreateRefBit(InitialVal, i));
+  }
+  ValueBits(uint64_t InitialVal, uint64_t BitLength) : Size(BitLength) {
+    for (unsigned i = 0; i < BitLength; i++) {
+      if (InitialVal & 0x1)
+        Bits.push_back(ValueBit::CreateOneBit());
+      else
+        Bits.push_back(ValueBit::CreateZeroBit());
+      InitialVal >>= 1;
+    }
+  }
+  uint64_t getSize() { return Size; }
+  ValueBit *getBit(unsigned i) { return Bits[i]; }
+
+  virtual ValueBits *copyBits() { return new ValueBits(*this); }
+
+  static ValueBits *Shl(ValueBits *LHS, uint64_t N) {
+    ValueBits *Shifted = LHS->copyBits();
+    Shifted->_Shl(N);
+    return Shifted;
+  }
+  static ValueBits *LShr(ValueBits *LHS, uint64_t N) {
+    ValueBits *Shifted = LHS->copyBits();
+    Shifted->_LShr(N);
+    return Shifted;
+  }
+  static ValueBits *Xor(ValueBits *LHS, ValueBits *RHS) {
+    ValueBits *Xord = LHS->copyBits();
+    Xord->_Xor(RHS);
+    return Xord;
+  }
+  static ValueBits *ZExt(ValueBits *LHS, uint64_t ToSize) {
+    ValueBits *Zexted = LHS->copyBits();
+    Zexted->_ZExt(ToSize);
+    return Zexted;
+  }
+  static ValueBits *Trunc(ValueBits *LHS, uint64_t N) {
+    ValueBits *Trunced = LHS->copyBits();
+    Trunced->_Trunc(N);
+    return Trunced;
+  }
+  static ValueBits *And(ValueBits *LHS, uint64_t RHS) {
+    ValueBits *Anded = LHS->copyBits();
+    Anded->_And(RHS);
+    return Anded;
+  }
+
+  virtual bool isPredicated() { return false; }
+
+  virtual bool equals(ValueBits *RHS) {
+    if (Size != RHS->getSize())
+      return false;
+
+    for (unsigned I = 0; I < Size; I++)
+      if (!getBit(I)->equals(RHS->getBit(I)))
+        return false;
+
+    return true;
+  }
+
+  virtual void print(raw_ostream &OS) {
+    assert(Size != 0);
+    OS << "[";
+    Bits[Size - 1]->print(OS);
+    for (int i = Size - 2; i >= 0; i--) {
+      OS << " | ";
+      Bits[i]->print(OS);
+    }
+    OS << "]\n";
+  }
+};
+
+inline raw_ostream &operator<<(raw_ostream &OS, ValueBits &VBS) {
+  VBS.print(OS);
+  return OS;
+}
+
+inline raw_ostream &operator<<(raw_ostream &OS, ValueBits::ValueBit &VB) {
+  VB.print(OS);
+  return OS;
+}
+
 bool LoopIdiomRecognize::recognizeCRC(const SCEV *BECount) {
   // Step one: Check if the loop looks like crc, and extract some useful
   // information for us to check

>From 8b16af4d22052200173d4027e550a2388a1dc554 Mon Sep 17 00:00:00 2001
From: "Joseph.Faulls" <Joseph.Faulls at imgtec.com>
Date: Fri, 12 Jan 2024 15:59:36 +0000
Subject: [PATCH 3/7] [LoopIdiomRecognize] Implement PredicatedValueBits

These would be representitive of select or phi instructions where the
bits would depend on an icmp.
---
 .../Transforms/Scalar/LoopIdiomRecognize.cpp  | 50 +++++++++++++++++++
 1 file changed, 50 insertions(+)

diff --git a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
index f3d8b04130ffa2f..050a415ede38e25 100644
--- a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
@@ -3131,6 +3131,56 @@ inline raw_ostream &operator<<(raw_ostream &OS, ValueBits::ValueBit &VB) {
   VB.print(OS);
   return OS;
 }
+class PredicatedValueBits : public ValueBits {
+  // This would be representitive of select or phi instructions where the bits
+  // would depend on an icmp.
+private:
+  ICmpInst *_Predicate;
+  ValueBits *_IfTrue;
+  ValueBits *_IfFalse;
+
+  void _Shl(uint64_t N) override {
+    _IfTrue = ValueBits::Shl(_IfTrue, N);
+    _IfFalse = ValueBits::Shl(_IfFalse, N);
+  }
+  void _LShr(uint64_t N) override {
+    _IfTrue = ValueBits::LShr(_IfTrue, N);
+    _IfFalse = ValueBits::LShr(_IfFalse, N);
+  }
+  void _ZExt(uint64_t N) override {
+    _IfTrue = ValueBits::ZExt(_IfTrue, N);
+    _IfFalse = ValueBits::ZExt(_IfFalse, N);
+  }
+  void _And(uint64_t N) override {
+    _IfTrue = ValueBits::And(_IfTrue, N);
+    _IfFalse = ValueBits::And(_IfFalse, N);
+  }
+  void _Xor(ValueBits *RHS) override {
+    _IfTrue = ValueBits::Xor(_IfTrue, RHS);
+    _IfFalse = ValueBits::Xor(_IfFalse, RHS);
+  }
+  void _Trunc(uint64_t N) override {
+    _IfTrue = ValueBits::Trunc(_IfTrue, N);
+    _IfFalse = ValueBits::Trunc(_IfFalse, N);
+  }
+
+public:
+  PredicatedValueBits(ICmpInst *Predicate, ValueBits *IfTrue,
+                      ValueBits *IfFalse)
+      : _Predicate(Predicate), _IfTrue(IfTrue), _IfFalse(IfFalse) {}
+
+  ValueBits *copyBits() override { return new PredicatedValueBits(*this); }
+  bool isPredicated() override { return true; }
+  ValueBits *getIfTrue() { return _IfTrue; }
+  ValueBits *getIfFalse() { return _IfFalse; }
+  ICmpInst *getPredicate() { return _Predicate; }
+
+  virtual void print(raw_ostream &OS) override {
+    OS << "Predicate: " << *_Predicate << "\nIf True:\n"
+       << *_IfTrue << "If False:\n"
+       << *_IfFalse;
+  }
+};
 
 bool LoopIdiomRecognize::recognizeCRC(const SCEV *BECount) {
   // Step one: Check if the loop looks like crc, and extract some useful

>From a6839f0371f30e0d7df718a17cdf7acd18cd68bd Mon Sep 17 00:00:00 2001
From: "Joseph.Faulls" <Joseph.Faulls at imgtec.com>
Date: Fri, 12 Jan 2024 16:11:59 +0000
Subject: [PATCH 4/7] [LoopIdiomRecognize] Add function to symbolically execute
 basic block

The result is a map between llvm Values and their bit representations as
ValueBits.
---
 .../Transforms/Scalar/LoopIdiomRecognize.cpp  | 144 ++++++++++++++++++
 1 file changed, 144 insertions(+)

diff --git a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
index 050a415ede38e25..531c328f681691c 100644
--- a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
@@ -90,6 +90,7 @@
 #include <algorithm>
 #include <cassert>
 #include <cstdint>
+#include <map>
 #include <utility>
 #include <vector>
 
@@ -3182,6 +3183,119 @@ class PredicatedValueBits : public ValueBits {
   }
 };
 
+// Execute the instructions in a basic block whilst mapping out Values to
+// ValueBits
+static bool symbolicallyExecute(BasicBlock *BB,
+                                std::map<Value *, ValueBits *> &ValueMap) {
+
+  auto getConstantOperand = [](Instruction *I, uint8_t Operand) {
+    ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(Operand));
+    if (!CI) {
+      LLVM_DEBUG(dbgs() << DEBUG_TYPE " CRCRegonize: Do not know how to"
+                        << " handle this operation with non-constant operand "
+                        << Operand << ":\n"
+                        << *I << "\n");
+    }
+    return CI;
+  };
+
+  auto getOrCreateValueBits = [&ValueMap](Value *Val) {
+    auto Result = ValueMap.find(Val);
+    ValueBits *LHSBits = nullptr;
+    if (Result == ValueMap.end()) {
+      ConstantInt *CI = dyn_cast<ConstantInt>(Val);
+      if (CI) {
+        LHSBits = new ValueBits(CI->getSExtValue(),
+                                Val->getType()->getScalarSizeInBits());
+      } else {
+        LHSBits = new ValueBits(Val, Val->getType()->getScalarSizeInBits());
+      }
+    } else
+      LHSBits = Result->second;
+    return LHSBits;
+  };
+
+  for (Instruction &I : *BB) {
+    uint64_t BitSize = I.getType()->getScalarSizeInBits();
+    switch (I.getOpcode()) {
+    case Instruction::PHI: {
+      PHINode *PHI = dyn_cast<PHINode>(&I);
+      const BasicBlock *IncomingBlock = nullptr;
+      for (const BasicBlock *Incoming : PHI->blocks()) {
+        if (Incoming != BB) {
+          if (IncomingBlock) {
+            LLVM_DEBUG(dbgs()
+                       << DEBUG_TYPE " CRCRegonize: Do not know how to"
+                       << " handle loop with multiple entries" << I << "\n");
+            return false;
+          }
+          IncomingBlock = Incoming;
+        }
+      }
+      assert(IncomingBlock);
+      ValueMap[&I] =
+          getOrCreateValueBits(PHI->getIncomingValueForBlock(IncomingBlock));
+    } break;
+    case Instruction::Shl: {
+      ConstantInt *CI = getConstantOperand(&I, 1);
+      if (!CI)
+        return false;
+      Value *LHSVal = I.getOperand(0);
+      ValueBits *LHSBits = getOrCreateValueBits(LHSVal);
+      ValueMap[&I] = ValueBits::Shl(LHSBits, CI->getSExtValue());
+    } break;
+    case Instruction::LShr: {
+      ConstantInt *CI = getConstantOperand(&I, 1);
+      if (!CI)
+        return false;
+      Value *LHSVal = I.getOperand(0);
+      ValueBits *LHSBits = getOrCreateValueBits(LHSVal);
+      ValueMap[&I] = ValueBits::LShr(LHSBits, CI->getSExtValue());
+    } break;
+    case Instruction::And: {
+      ConstantInt *CI = getConstantOperand(&I, 1);
+      if (!CI)
+        return false;
+      Value *LHSVal = I.getOperand(0);
+      ValueBits *LHSBits = getOrCreateValueBits(LHSVal);
+      ValueMap[&I] = ValueBits::And(LHSBits, CI->getSExtValue());
+    } break;
+    case Instruction::Xor: {
+      ValueBits *LHSBits = getOrCreateValueBits(I.getOperand(0));
+      ValueBits *RHSBits = getOrCreateValueBits(I.getOperand(1));
+      ValueMap[&I] = ValueBits::Xor(LHSBits, RHSBits);
+    } break;
+    case Instruction::ZExt: {
+      ValueBits *LHSBits = getOrCreateValueBits(I.getOperand(0));
+      ValueMap[&I] = ValueBits::ZExt(LHSBits, BitSize);
+    } break;
+    case Instruction::Trunc: {
+      ValueBits *LHSBits = getOrCreateValueBits(I.getOperand(0));
+      ValueMap[&I] = ValueBits::Trunc(LHSBits, BitSize);
+    } break;
+    case Instruction::Select: {
+      SelectInst *Select = cast<SelectInst>(&I);
+      ICmpInst *Cond = dyn_cast<ICmpInst>(Select->getCondition());
+      if (!Cond) {
+        LLVM_DEBUG(dbgs() << DEBUG_TYPE " CRCRegonize: Do not know how to"
+                          << " handle SelectInst with non-icmp condition: " << I
+                          << "\n");
+        return false;
+      }
+      ValueBits *IfTrue = getOrCreateValueBits(Select->getTrueValue());
+      ValueBits *IfFalse = getOrCreateValueBits(Select->getFalseValue());
+      ValueMap[&I] = new PredicatedValueBits(Cond, IfTrue, IfFalse);
+    } break;
+    default:
+      // If this instruction is not recognized, then just continue. This is
+      // okay because users of this will just reference it by value, which is
+      // conservative.
+      break;
+    }
+  }
+  return true;
+}
+
 bool LoopIdiomRecognize::recognizeCRC(const SCEV *BECount) {
   // Step one: Check if the loop looks like crc, and extract some useful
   // information for us to check
@@ -3206,6 +3320,36 @@ bool LoopIdiomRecognize::recognizeCRC(const SCEV *BECount) {
                       << "\n");
   }
 
+  // Symbolically execute one iteration of the loop to populate a map of
+  // Value's to their ValueBits, aka a representation of their bits in terms of
+  // 1's, 0's and references to other values' bits. If these match pre-computed
+  // crc values, then we can say it's doing crc.
+  std::map<Value *, ValueBits *> ValueMap;
+
+  if (!symbolicallyExecute(CurLoop->getHeader(), ValueMap))
+    return false;
+
+  auto Result = ValueMap.find(CRC.CRCOutput);
+  if (Result == ValueMap.end()) {
+    LLVM_DEBUG(dbgs() << DEBUG_TYPE " CRCRegonize: Did not find CRC output"
+                      << " after symbolic execution\n");
+    return false;
+  }
+
+  ValueBits *CRCOutBits = Result->second;
+  LLVM_DEBUG(dbgs() << DEBUG_TYPE
+                    << " CRCRegonize: ValueBits for output crc value:\n"
+                    << *CRCOutBits);
+
+  // Check this value is predicated
+  if (!CRCOutBits->isPredicated()) {
+    LLVM_DEBUG(dbgs() << DEBUG_TYPE
+                      << " CRCRegonize: Output CRC ValueBits is not"
+                      << " predicated.\n");
+    return false;
+  }
+  PredicatedValueBits *CRCOutBitsPred = (PredicatedValueBits *)CRCOutBits;
+
   return false;
 }
 

>From 002baf311271338d89fb565cf9803171574aa68d Mon Sep 17 00:00:00 2001
From: "Joseph.Faulls" <Joseph.Faulls at imgtec.com>
Date: Fri, 12 Jan 2024 16:15:44 +0000
Subject: [PATCH 5/7] [LoopIdiomRecognize] Check result of symbolic execution
 matches CRC

---
 .../Transforms/Scalar/LoopIdiomRecognize.cpp  | 151 ++++++++++++++++++
 1 file changed, 151 insertions(+)

diff --git a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
index 531c328f681691c..73714609025cc3d 100644
--- a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
@@ -3350,6 +3350,157 @@ bool LoopIdiomRecognize::recognizeCRC(const SCEV *BECount) {
   }
   PredicatedValueBits *CRCOutBitsPred = (PredicatedValueBits *)CRCOutBits;
 
+  // Need to check if the predicate is checking the MSB/LSB depending on
+  // whether this is bit reversed CRC
+  ICmpInst *ICmp = CRCOutBitsPred->getPredicate();
+  CmpInst::Predicate Pred = ICmp->getPredicate();
+  LLVM_DEBUG(dbgs() << DEBUG_TYPE << " CRCRegonize checking to see if " << *ICmp
+                    << " is checking the "
+                    << (CRC.BitReversed ? "LSB\n" : "MSB\n"));
+
+  // Firstly check the LHS is in our map, and RHS is a constant
+  ConstantInt *RHS = dyn_cast<ConstantInt>(ICmp->getOperand(1));
+  Result = ValueMap.find(ICmp->getOperand(0));
+  if (!RHS || (Result == ValueMap.end())) {
+    LLVM_DEBUG(dbgs() << DEBUG_TYPE
+                      << " CRCRegonize: Cannot determine ICmp operands\n");
+    return false;
+  }
+  ValueBits *ICmpOp0Bits = Result->second;
+
+  // Now match the following cases
+  // (LSB): icmp [ne/eq] %mcrc, [1/0], where mcrc has LSB masked out
+  // (MSB): icmp [ne/eq] %mcrc, [1 << BitSize], where mcrc has MSB masked out
+  // (MSB): icmp [sgt/sge] %crc, [1/0]
+  // (MSB): icmp [slt/sle] %crc, [0/-1]
+  // And decide whether the check is checking for existence of 1 or 0
+  bool checkZero = false;
+  ValueBits::ValueBit *CheckBit = nullptr;
+  switch (Pred) {
+  case CmpInst::ICMP_NE:
+  case CmpInst::ICMP_EQ: {
+    // Check RHS is checking only one bit.
+    uint64_t RHSNum = RHS->getZExtValue();
+    uint64_t MSBNum = 1 << (ICmpOp0Bits->getSize() - 1);
+    // LSB if BitReversed, MSB otherwise.
+    if (!(CRC.BitReversed && RHSNum == 1) &&
+        !(!CRC.BitReversed && RHSNum == MSBNum) && RHSNum != 0) {
+      LLVM_DEBUG(dbgs() << DEBUG_TYPE
+                        << " CRCRegonize: ICmp RHS is not checking [M/L]SB\n");
+      return false;
+    }
+    // Now to check if we already know all the other bits of the RHS are zero.
+    ValueBits AllZeroValueBits((uint64_t)0, ICmpOp0Bits->getSize());
+    ValueBits *CRCOutBitsMasked = nullptr;
+    if (CRC.BitReversed) {
+      // Masking out the LSB is equivalent to shifting right one if we're just
+      // comparing all the other bits are zero.
+      CRCOutBitsMasked = ValueBits::LShr(ICmpOp0Bits, 1);
+      CheckBit = ICmpOp0Bits->getBit(0);
+    } else {
+      // The CRC type might be larger than the data, so we can't shift left
+      // one. Mask instead.
+      uint64_t MSBMask = ~(1 << (CRC.Width - 1));
+      CRCOutBitsMasked = ValueBits::And(ICmpOp0Bits, MSBMask);
+      CheckBit = ICmpOp0Bits->getBit(CRC.Width - 1);
+    }
+    if (!CRCOutBitsMasked->equals(&AllZeroValueBits)) {
+      LLVM_DEBUG(
+          dbgs() << DEBUG_TYPE
+                 << " CRCRegonize: Cannot determine ICmp checks [M/L]SB\n");
+      return false;
+    }
+    checkZero = RHSNum == 0;
+    break;
+  }
+  case CmpInst::ICMP_SGT:
+  case CmpInst::ICMP_SGE:
+  case CmpInst::ICMP_ULT:
+  case CmpInst::ICMP_ULE:
+    checkZero = true;
+    [[fallthrough]];
+  case CmpInst::ICMP_SLT:
+  case CmpInst::ICMP_SLE: {
+    int64_t RHSNum = RHS->getSExtValue();
+    if (((Pred == CmpInst::ICMP_SLT || Pred == CmpInst::ICMP_SGE) &&
+         RHSNum != 0) ||
+        ((Pred == CmpInst::ICMP_SLE) && RHSNum != -1) ||
+        ((Pred == CmpInst::ICMP_SGT) && RHSNum != 1) ||
+        ((Pred == CmpInst::ICMP_ULT) && RHSNum != (1 << (CRC.Width - 1))) ||
+        ((Pred == CmpInst::ICMP_ULE) && RHSNum != (1 << (CRC.Width - 1)) - 1)) {
+      LLVM_DEBUG(dbgs() << DEBUG_TYPE
+                        << " CRCRegonize: ICmp RHS is not checking MSB\n");
+      return false;
+    }
+    CheckBit = ICmpOp0Bits->getBit(CRCSize - 1);
+    break;
+  }
+  default:
+    return false;
+  }
+
+  // If there exists a Data input, ensure the check bit is crc^data.
+  ValueBits::ValueBit *RefCheckBit = nullptr;
+  uint64_t CRCCheckIdx = CRC.BitReversed ? 0 : CRCSize - 1;
+  ValueBits::ValueBit *CRCInputRefBit =
+      ValueBits::ValueBit::CreateRefBit(CRC.CRCInput, CRCCheckIdx);
+  if (CRC.DataInput) {
+    uint64_t DataSize = CRC.DataInput->getType()->getScalarSizeInBits();
+    uint64_t DataCheckIdx = CRC.BitReversed ? 0 : DataSize - 1;
+    ValueBits::ValueBit *DataInputRefBit =
+        ValueBits::ValueBit::CreateRefBit(CRC.DataInput, DataCheckIdx);
+    RefCheckBit =
+        ValueBits::ValueBit::CreateXORBit(CRCInputRefBit, DataInputRefBit);
+  } else {
+    RefCheckBit = CRCInputRefBit;
+  }
+
+  if (!RefCheckBit->equals(CheckBit)) {
+    LLVM_DEBUG(dbgs() << DEBUG_TYPE
+                      << " CRCRegonize: Cannot verify check bit!\n"
+                      << *RefCheckBit << "\n"
+                      << *CheckBit << "\n");
+    return false;
+  }
+
+  ValueBits *CRCOutBitsIfOne = CRCOutBitsPred->getIfTrue();
+  ValueBits *CRCOutBitsIfZero = CRCOutBitsPred->getIfFalse();
+  if (checkZero)
+    std::swap(CRCOutBitsIfZero, CRCOutBitsIfOne);
+
+  // Now construct ValueBits that would be the result of crc for one iteration.
+  // That is, a shift and then xor if [M/L]SB is 1.
+  ValueBits *CRCValueBits = nullptr;
+  Result = ValueMap.find(CRC.CRCInput);
+  if (Result == ValueMap.end()) {
+    CRCValueBits = new ValueBits(CRC.CRCInput, CRCSize);
+  } else {
+    CRCValueBits = Result->second;
+  }
+  uint64_t GeneratorPolynomial =
+      CRC.BitReversed ? reverseBits(CRC.Polynomial, CRCSize) : CRC.Polynomial;
+  ValueBits Polynomial(GeneratorPolynomial, CRCSize);
+
+  // Case where the MSB/LSB of the data is 0
+  ValueBits *IfZero = CRC.BitReversed ? ValueBits::LShr(CRCValueBits, 1)
+                                      : ValueBits::Shl(CRCValueBits, 1);
+
+  // Case where the MSB/LSB of the data is 1
+  ValueBits *IfOne = ValueBits::Xor(IfZero, &Polynomial);
+
+  if (!IfZero->equals(CRCOutBitsIfZero)) {
+    LLVM_DEBUG(dbgs() << DEBUG_TYPE << " CRCRegonize: Not Equal!\n"
+                      << *IfZero << *CRCOutBitsPred->getIfFalse());
+    return false;
+  }
+  if (!IfOne->equals(CRCOutBitsIfOne)) {
+    LLVM_DEBUG(dbgs() << DEBUG_TYPE << " CRCRegonize: Not Equal!\n"
+                      << *IfOne << *CRCOutBitsPred->getIfTrue());
+    return false;
+  }
+
+  LLVM_DEBUG(dbgs() << DEBUG_TYPE << " CRCRegonize: This looks like crc!\n");
+
   return false;
 }
 

>From 01868bb972a32705576c20652455409c489ddcc8 Mon Sep 17 00:00:00 2001
From: "Joseph.Faulls" <Joseph.Faulls at imgtec.com>
Date: Fri, 12 Jan 2024 16:17:13 +0000
Subject: [PATCH 6/7] [LoopIdiomRecognize] Write lookup table based CRC for
 one-byte data

---
 .../Transforms/Scalar/LoopIdiomRecognize.cpp  | 96 ++++++++++++++++++-
 1 file changed, 95 insertions(+), 1 deletion(-)

diff --git a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
index 73714609025cc3d..f20947daaed8d54 100644
--- a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
@@ -91,6 +91,7 @@
 #include <cassert>
 #include <cstdint>
 #include <map>
+#include <sstream>
 #include <utility>
 #include <vector>
 
@@ -257,6 +258,7 @@ class LoopIdiomRecognize {
   bool recognizeShiftUntilZero();
   std::optional<CRCInfo> looksLikeCRC(const SCEV *BECount);
   bool recognizeCRC(const SCEV *BECount);
+  void writeTableBasedCRCOneByte(CRCInfo &CRC);
 
   /// @}
 };
@@ -3296,6 +3298,96 @@ static bool symbolicallyExecute(BasicBlock *BB,
   return true;
 }
 
+void LoopIdiomRecognize::writeTableBasedCRCOneByte(CRCInfo &CRC) {
+  BasicBlock *ExitBB = CurLoop->getExitBlock();
+  IRBuilder<> Builder(ExitBB);
+  Builder.SetInsertPoint(ExitBB->getFirstNonPHI());
+  Type *CRCType = CRC.CRCInput->getType();
+  uint64_t CRCSize = CRCType->getScalarSizeInBits();
+
+  // Construct the CRC table
+  uint64_t CRCTable[256];
+  uint64_t Polynomial = CRC.Polynomial;
+  uint64_t SB = CRC.BitReversed ? 0x1 : (0x1 << (CRCSize - 1));
+  if (CRC.BitReversed)
+    Polynomial = reverseBits(Polynomial, CRCSize);
+  for (uint64_t Dividend = 0; Dividend < 256; Dividend++) {
+    uint64_t CurByte = Dividend;
+    if (!CRC.BitReversed)
+      CurByte <<= CRCSize - 8;
+    for (uint8_t Bit = 0; Bit < 8; Bit++) {
+      if ((CurByte & SB) != 0) {
+        CurByte = CRC.BitReversed ? CurByte >> 1 : CurByte << 1;
+        CurByte = CurByte ^ Polynomial;
+      } else {
+        CurByte = CRC.BitReversed ? CurByte >> 1 : CurByte << 1;
+      }
+    }
+    CRCTable[Dividend] = CurByte;
+  }
+  // To construct a global data array, we need the raw data in bytes.
+  // The calculated table array is an array of 64bit values because we can't
+  // dynamically type it, so we need to truncate the values to the crc size
+  // to avoid padded zeros. Do this by allocating a byte array (of slightly more
+  // than we need to account for overflow) and copying the 64bit values across
+  // aligned correctly
+  uint64_t CRCNumBytes = CRCSize / 8;
+  char *CRCTableData = (char *)malloc(CRCNumBytes * 260);
+  for (int I = 0; I < 256; I++) {
+    *((uint64_t *)(CRCTableData + I * CRCNumBytes)) = CRCTable[I];
+  }
+
+  // Construct and add the table as a global variable
+  ArrayType *TableType = ArrayType::get(CRCType, 256);
+  Constant *ConstantArr = ConstantDataArray::getRaw(
+      StringRef(CRCTableData, CRCNumBytes * 256), 256, CRCType);
+  std::stringstream TableNameSS;
+  TableNameSS << "crctable.i" << CRCSize << "." << CRC.Polynomial;
+  if (CRC.BitReversed)
+    TableNameSS << ".reversed";
+  GlobalVariable *CRCTableGlobal = new GlobalVariable(
+      TableType, true, GlobalVariable::LinkageTypes::PrivateLinkage,
+      ConstantArr, TableNameSS.str());
+  ExitBB->getModule()->insertGlobalVariable(CRCTableGlobal);
+  free(CRCTableData);
+
+  // Construct the IR to load from this table
+  Value *CRCOffset = CRC.CRCInput;
+  if (CRCSize > 8) {
+    // Get the next byte into position and truncate
+    if (!CRC.BitReversed)
+      CRCOffset = Builder.CreateLShr(CRCOffset, CRCSize - 8);
+    CRCOffset = Builder.CreateTrunc(CRCOffset, Builder.getInt8Ty());
+  }
+  if (CRC.DataInput) {
+    // Data size can be more than 8 due to extending
+    Value *Data = CRC.DataInput;
+    if (CRC.DataInput->getType()->getScalarSizeInBits() > 8) {
+      Data = Builder.CreateTrunc(Data, Builder.getInt8Ty());
+    }
+    // Xor the data, offset into the table and load
+    CRCOffset = Builder.CreateXor(CRCOffset, Data);
+  }
+
+  CRCOffset = Builder.CreateZExt(CRCOffset, Builder.getInt32Ty());
+  Value *Gep = Builder.CreateInBoundsGEP(CRCType, CRCTableGlobal, {CRCOffset});
+  Value *CRCRes = Builder.CreateLoad(CRCType, Gep);
+  if (CRCSize > 8) {
+    // Shift out SB used for division and Xor the rest of the crc back in
+    Value *RestOfCRC = CRC.CRCInput;
+    if (CRC.BitReversed)
+      RestOfCRC = Builder.CreateLShr(CRC.CRCInput, 8);
+    else
+      RestOfCRC = Builder.CreateShl(CRC.CRCInput, 8);
+    CRCRes = Builder.CreateXor(RestOfCRC, CRCRes);
+  }
+  for (PHINode &ExitPhi : CurLoop->getExitBlock()->phis()) {
+    if (ExitPhi.getNumIncomingValues() == 1 &&
+        ExitPhi.getIncomingValue(0) == CRC.CRCOutput)
+      ExitPhi.replaceAllUsesWith(CRCRes);
+  }
+}
+
 bool LoopIdiomRecognize::recognizeCRC(const SCEV *BECount) {
   // Step one: Check if the loop looks like crc, and extract some useful
   // information for us to check
@@ -3501,7 +3593,9 @@ bool LoopIdiomRecognize::recognizeCRC(const SCEV *BECount) {
 
   LLVM_DEBUG(dbgs() << DEBUG_TYPE << " CRCRegonize: This looks like crc!\n");
 
-  return false;
+  writeTableBasedCRCOneByte(CRC);
+
+  return true;
 }
 
 std::optional<LoopIdiomRecognize::CRCInfo>

>From e8fe438ea23368b8726b1b59480fadb373d28925 Mon Sep 17 00:00:00 2001
From: "Joseph.Faulls" <Joseph.Faulls at imgtec.com>
Date: Fri, 12 Jan 2024 16:23:57 +0000
Subject: [PATCH 7/7] [LoopIdiomRecognize] Add unit tests for CRC idiom
 recognizer

---
 llvm/test/Transforms/LoopIdiom/crc/crc.ll     | 195 ++++++++++++++++++
 llvm/test/Transforms/LoopIdiom/crc/not-crc.ll | 113 ++++++++++
 2 files changed, 308 insertions(+)
 create mode 100644 llvm/test/Transforms/LoopIdiom/crc/crc.ll
 create mode 100644 llvm/test/Transforms/LoopIdiom/crc/not-crc.ll

diff --git a/llvm/test/Transforms/LoopIdiom/crc/crc.ll b/llvm/test/Transforms/LoopIdiom/crc/crc.ll
new file mode 100644
index 000000000000000..17c1313a4c7d589
--- /dev/null
+++ b/llvm/test/Transforms/LoopIdiom/crc/crc.ll
@@ -0,0 +1,195 @@
+; RUN: opt -passes=loop-idiom < %s -S -debug -recognize-crc 2>&1 | FileCheck %s
+
+; CRC 8 bit, data 8 bit
+; CHECK: GeneratorPolynomial: 29
+; CHECK: CRC Size: 8
+; CHECK: Reversed: 0
+; CHECK: loop-idiom CRCRegonize: This looks like crc!
+define dso_local zeroext i8 @crc8_loop(ptr noundef %data, i32 noundef %length) {
+entry:
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.cond.cleanup7, %entry
+  %crc.0 = phi i8 [ 0, %entry ], [ %crc.1.lcssa, %for.cond.cleanup7 ]
+  %i.0 = phi i32 [ 0, %entry ], [ %inc20, %for.cond.cleanup7 ]
+  %cmp = icmp ult i32 %i.0, %length
+  br i1 %cmp, label %for.body, label %for.cond.cleanup
+
+for.cond.cleanup:                                 ; preds = %for.cond
+  %crc.0.lcssa = phi i8 [ %crc.0, %for.cond ]
+  ret i8 %crc.0.lcssa
+
+for.body:                                         ; preds = %for.cond
+  %add.ptr = getelementptr inbounds i8, ptr %data, i32 %i.0
+  %0 = load i8, ptr %add.ptr, align 1
+  %xor29 = xor i8 %0, %crc.0
+  br label %for.body8
+
+for.cond.cleanup7:                                ; preds = %for.body8
+  %crc.1.lcssa = phi i8 [ %crc.2, %for.body8 ]
+  %inc20 = add i32 %i.0, 1
+  br label %for.cond
+
+for.body8:                                        ; preds = %for.body, %for.body8
+  %i3.032 = phi i32 [ 0, %for.body ], [ %inc, %for.body8 ]
+  %crc.131 = phi i8 [ %xor29, %for.body ], [ %crc.2, %for.body8 ]
+  %shl = shl i8 %crc.131, 1
+  %xor14 = xor i8 %shl, 29
+  %cmp10.not30 = icmp slt i8 %crc.131, 0
+  %crc.2 = select i1 %cmp10.not30, i8 %xor14, i8 %shl
+  %inc = add nuw nsw i32 %i3.032, 1
+  %cmp5 = icmp ult i32 %inc, 8
+  br i1 %cmp5, label %for.body8, label %for.cond.cleanup7
+}
+
+; CRC16, 8 bit data
+; CHECK: Input CRC: i16 %crc
+; CHECK: Output CRC:   %crc.addr.2
+; CHECK: GeneratorPolynomial: 32773
+; CHECK: CRC Size: 16
+; CHECK: Reversed: 1
+; CHECK: Data Input: i8 %data
+; CHECK: Data Size: 8
+define i16 @crc16_reversed(i8 %data, i16 %crc) {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %entry, %for.body
+  %i.036 = phi i8 [ 0, %entry ], [ %inc, %for.body ]
+  %crc.addr.035 = phi i16 [ %crc, %entry ], [ %crc.addr.2, %for.body ]
+  %data.addr.034 = phi i8 [ %data, %entry ], [ %1, %for.body ]
+  %0 = trunc i16 %crc.addr.035 to i8
+  %and33 = xor i8 %0, %data.addr.034
+  %xor = and i8 %and33, 1
+  %1 = lshr i8 %data.addr.034, 1
+  %cmp10.not = icmp eq i8 %xor, 0
+  %2 = lshr i16 %crc.addr.035, 1
+  %3 = xor i16 %2, -24575
+  %crc.addr.2 = select i1 %cmp10.not, i16 %2, i16 %3
+  %inc = add nuw nsw i8 %i.036, 1
+  %cmp = icmp ult i8 %inc, 8
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body
+  %crc.addr.0.lcssa = phi i16 [ %crc.addr.2, %for.body ]
+  ret i16 %crc.addr.0.lcssa
+}
+
+; CRC16 xor outside loop
+; CHECK: loop-idiom CRCRegonize: This looks like crc!
+define dso_local zeroext i16 @crc16_xor_outside(i16 %crc, i8 %data) {
+entry:
+  %conv2 = zext i8 %data to i16
+  %shl = shl nuw i16 %conv2, 8
+  %xor = xor i16 %shl, %crc
+  br label %for.body
+
+for.body:                                         ; preds = %entry, %for.body
+  %i.020 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+  %crc.addr.019 = phi i16 [ %xor, %entry ], [ %crc.addr.1, %for.body ]
+  %shl7 = shl i16 %crc.addr.019, 1
+  %xor8 = xor i16 %shl7, 4129
+  %tobool.not18 = icmp slt i16 %crc.addr.019, 0
+  %crc.addr.1 = select i1 %tobool.not18, i16 %xor8, i16 %shl7
+  %inc = add nuw nsw i32 %i.020, 1
+  %cmp = icmp ult i32 %inc, 8
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body
+  %crc.addr.0.lcssa = phi i16 [ %crc.addr.1, %for.body ]
+  ret i16 %crc.addr.0.lcssa
+}
+
+; CRC size 32 xor inside in a byte loop
+; CHECK: GeneratorPolynomial: 270598144
+; CHECK: CRC Size: 32
+; CHECK: loop-idiom CRCRegonize: This looks like crc!
+define i16 @crc32_reversed(ptr %data_p, i16 %length) {
+entry:
+  %cmp = icmp eq i16 %length, 0
+  br i1 %cmp, label %cleanup, label %do.body.preheader
+
+do.body.preheader:                                ; preds = %entry
+  br label %do.body
+
+do.body:                                          ; preds = %do.body.preheader, %do.cond
+  %data_p.addr.0 = phi ptr [ %incdec.ptr, %do.cond ], [ %data_p, %do.body.preheader ]
+  %length.addr.0 = phi i16 [ %dec, %do.cond ], [ %length, %do.body.preheader ]
+  %crc.0 = phi i32 [ %crc.1.lcssa, %do.cond ], [ 65535, %do.body.preheader ]
+  %incdec.ptr = getelementptr inbounds i8, ptr %data_p.addr.0, i64 1
+  %0 = load i8, ptr %data_p.addr.0, align 1
+  %conv3 = zext i8 %0 to i32
+  br label %for.body
+
+for.body:                                         ; preds = %do.body, %for.body
+  %crc.135 = phi i32 [ %crc.0, %do.body ], [ %crc.2, %for.body ]
+  %data.034 = phi i32 [ %conv3, %do.body ], [ %shr13, %for.body ]
+  %i.033 = phi i8 [ 0, %do.body ], [ %inc, %for.body ]
+  %and732 = xor i32 %crc.135, %data.034
+  %xor = and i32 %and732, 1
+  %tobool.not = icmp eq i32 %xor, 0
+  %shr = lshr i32 %crc.135, 1
+  %xor10 = xor i32 %shr, 33800
+  %crc.2 = select i1 %tobool.not, i32 %shr, i32 %xor10
+  %inc = add nuw nsw i8 %i.033, 1
+  %shr13 = lshr i32 %data.034, 1
+  %cmp5 = icmp ult i8 %inc, 8
+  br i1 %cmp5, label %for.body, label %do.cond
+
+do.cond:                                          ; preds = %for.body
+  %crc.1.lcssa = phi i32 [ %crc.2, %for.body ]
+  %dec = add i16 %length.addr.0, -1
+  %tobool14.not = icmp eq i16 %dec, 0
+  br i1 %tobool14.not, label %do.end, label %do.body
+
+do.end:                                           ; preds = %do.cond
+  %crc.1.lcssa.lcssa = phi i32 [ %crc.1.lcssa, %do.cond ]
+  %not15 = xor i32 %crc.1.lcssa.lcssa, -1
+  %shl = shl i32 %not15, 8
+  %shr16 = lshr i32 %not15, 8
+  %and17 = and i32 %shr16, 255
+  %or = add nuw nsw i32 %and17, %shl
+  %conv18 = trunc i32 %or to i16
+  br label %cleanup
+
+cleanup:                                          ; preds = %entry, %do.end
+  %retval.0 = phi i16 [ %conv18, %do.end ], [ 0, %entry ]
+  ret i16 %retval.0
+}
+
+; CRC16 
+; CHECK: GeneratorPolynomial: 258
+; CHECK: CRC Size: 16
+; CHECK: Reversed: 0
+; CHECK: Data Size: 8
+; CHECK: loop-idiom CRCRegonize: This looks like crc!
+define signext i16 @crc16(i16 %crcValue, i8 %newByte) {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %entry, %for.body
+  %i.017 = phi i8 [ 0, %entry ], [ %inc, %for.body ]
+  %newByte.addr.016 = phi i8 [ %newByte, %entry ], [ %shl7, %for.body ]
+  %crcValue.addr.015 = phi i16 [ %crcValue, %entry ], [ %crcValue.addr.1, %for.body ]
+  %and = lshr i16 %crcValue.addr.015, 8
+  %conv2 = zext i8 %newByte.addr.016 to i16
+  %shr14 = xor i16 %conv2, %and
+  %xor = and i16 %shr14, 128
+  %tobool.not = icmp eq i16 %xor, 0
+  %shl = shl i16 %crcValue.addr.015, 1
+  %xor4 = xor i16 %shl, 258
+  %crcValue.addr.1 = select i1 %tobool.not, i16 %shl, i16 %xor4
+  %shl7 = shl i8 %newByte.addr.016, 1
+  %inc = add nuw nsw i8 %i.017, 1
+  %cmp = icmp ult i8 %inc, 8
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body
+  %crcValue.addr.0.lcssa = phi i16 [ %crcValue.addr.1, %for.body ]
+  ret i16 %crcValue.addr.0.lcssa
+}
+
+; CHECK: @crctable.i16.32773.reversed = private constant [256 x i16] [i16 0, i16 -16191, i16 -15999, i16 320
+; CHECK: @crctable.i16.4129 = private constant [256 x i16] [i16 0, i16 4129, i16 8258, i16 12387, i16 16516
+; CHECK: @crctable.i32.270598144.reversed = private constant [256 x i32] [i32 0, i32 4489, i32 8978, i32 12955
+; CHECK: @crctable.i16.258 = private constant [256 x i16] [i16 0, i16 258, i16 516, i16 774, i16 1032
diff --git a/llvm/test/Transforms/LoopIdiom/crc/not-crc.ll b/llvm/test/Transforms/LoopIdiom/crc/not-crc.ll
new file mode 100644
index 000000000000000..3144ffa65243518
--- /dev/null
+++ b/llvm/test/Transforms/LoopIdiom/crc/not-crc.ll
@@ -0,0 +1,113 @@
+; RUN: opt -passes=loop-idiom < %s -S -debug -recognize-crc 2>&1 | FileCheck %s
+
+; crc16 incorrect xor inside loop
+; CHECK: loop-idiom CRCRegonize: Cannot verify check bit!
+; CHECK: crc[0]^data[0]
+; CHECK: crc[1]^1
+define i16 @crc16_incorrect_xor(i8 %data, i16 %crc) {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %entry, %for.body
+  %i.036 = phi i8 [ 0, %entry ], [ %inc, %for.body ]
+  %crc.addr.035 = phi i16 [ %crc, %entry ], [ %crc.addr.2, %for.body ]
+  %data.addr.034 = phi i8 [ %data, %entry ], [ %1, %for.body ]
+  %0 = trunc i16 %crc.addr.035 to i8
+  %and33 = xor i8 %0, 25
+  %xor = and i8 %and33, 1
+  %1 = lshr i8 %data.addr.034, 1
+  %cmp10.not = icmp eq i8 %xor, 0
+  %2 = lshr i16 %crc.addr.035, 1
+  %3 = xor i16 %2, -24575
+  %crc.addr.2 = select i1 %cmp10.not, i16 %2, i16 %3
+  %inc = add nuw nsw i8 %i.036, 1
+  %cmp = icmp ult i8 %inc, 8
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body
+  %crc.addr.0.lcssa = phi i16 [ %crc.addr.2, %for.body ]
+  ret i16 %crc.addr.0.lcssa
+}
+
+; Two byte at a time crc not supported
+; CHECK-NOT: loop-idiom CRCRegonize: This looks like crc!
+define i16 @crc16_reversed_data16(i16 %data, i16 %crc) {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %entry, %for.body
+  %i.036 = phi i8 [ 0, %entry ], [ %inc, %for.body ]
+  %crc.addr.035 = phi i16 [ %crc, %entry ], [ %crc.addr.2, %for.body ]
+  %data.addr.034 = phi i16 [ %data, %entry ], [ %0, %for.body ]
+  %and33 = xor i16 %crc.addr.035, %data.addr.034
+  %xor = and i16 %and33, 1
+  %0 = lshr i16 %data.addr.034, 1
+  %cmp10.not = icmp eq i16 %xor, 0
+  %1 = lshr i16 %crc.addr.035, 1
+  %2 = xor i16 %1, -24575
+  %crc.addr.2 = select i1 %cmp10.not, i16 %1, i16 %2
+  %inc = add nuw nsw i8 %i.036, 1
+  %cmp = icmp ult i8 %inc, 16
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body
+  %crc.addr.0.lcssa = phi i16 [ %crc.addr.2, %for.body ]
+  ret i16 %crc.addr.0.lcssa
+}
+
+
+; Two shifts per iteration. Check that the ValueBits are correctly mismatched
+; CHECK-NOT: loop-idiom CRCRegonize: This looks like crc!
+define signext i16 @crc16_doubleshift(i16 %crcValue, i8 %newByte) {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %entry, %for.body
+  %i.017 = phi i8 [ 0, %entry ], [ %inc, %for.body ]
+  %newByte.addr.016 = phi i8 [ %newByte, %entry ], [ %shl7, %for.body ]
+  %crcValue.addr.015 = phi i16 [ %crcValue, %entry ], [ %crcValue.addr.1, %for.body ]
+  %and = lshr i16 %crcValue.addr.015, 8
+  %conv2 = zext i8 %newByte.addr.016 to i16
+  %shr14 = xor i16 %conv2, %and
+  %xor = and i16 %shr14, 128
+  %tobool.not = icmp eq i16 %xor, 0
+  %shlone = shl i16 %crcValue.addr.015, 1
+  %shl = lshr i16 %shlone, 1
+  %xor4 = xor i16 %shl, 258
+  %crcValue.addr.1 = select i1 %tobool.not, i16 %shl, i16 %xor4
+  %shl7 = shl i8 %newByte.addr.016, 1
+  %inc = add nuw nsw i8 %i.017, 1
+  %cmp = icmp ult i8 %inc, 8
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body
+  %crcValue.addr.0.lcssa = phi i16 [ %crcValue.addr.1, %for.body ]
+  ret i16 %crcValue.addr.0.lcssa
+}
+
+; CHECK: loop-idiom CRCRegonize: ICmp RHS is not checking [M/L]SB
+define signext i16 @crc16_not_check_sb(i16 %crcValue, i8 %newByte) {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %entry, %for.body
+  %i.017 = phi i8 [ 0, %entry ], [ %inc, %for.body ]
+  %newByte.addr.016 = phi i8 [ %newByte, %entry ], [ %shl7, %for.body ]
+  %crcValue.addr.015 = phi i16 [ %crcValue, %entry ], [ %crcValue.addr.1, %for.body ]
+  %and = lshr i16 %crcValue.addr.015, 8
+  %conv2 = zext i8 %newByte.addr.016 to i16
+  %shr14 = xor i16 %conv2, %and
+  %xor = and i16 %shr14, 128
+  %tobool.not = icmp eq i16 %xor, 2
+  %shl = shl i16 %crcValue.addr.015, 1
+  %xor4 = xor i16 %shl, 258
+  %crcValue.addr.1 = select i1 %tobool.not, i16 %shl, i16 %xor4
+  %shl7 = shl i8 %newByte.addr.016, 1
+  %inc = add nuw nsw i8 %i.017, 1
+  %cmp = icmp ult i8 %inc, 8
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body
+  %crcValue.addr.0.lcssa = phi i16 [ %crcValue.addr.1, %for.body ]
+  ret i16 %crcValue.addr.0.lcssa
+}



More information about the llvm-commits mailing list