[llvm] [HashRecognize] Introduce new analysis (PR #139120)
Ramkumar Ramachandra via llvm-commits
llvm-commits at lists.llvm.org
Wed May 21 06:13:56 PDT 2025
https://github.com/artagnon updated https://github.com/llvm/llvm-project/pull/139120
>From d12d3f7fa8c879762a372243ad8b42f72d8a1d26 Mon Sep 17 00:00:00 2001
From: Ramkumar Ramachandra <ramkumar.ramachandra at codasip.com>
Date: Fri, 25 Apr 2025 10:07:58 +0100
Subject: [PATCH 1/5] [HashRecognize] Introduce new analysis
Introduce a fresh analysis for recognizing polynomial hashes, with the
rationale that several targets have specific instructions to optimize
things like CRC and GHASH (eg. X86 and RISC-V crypto extension). We
limit the scope to polynomial hashes computed in a Galois field of
characteristic 2, since this class of operations can also be optimized
in the absence of target-specific instructions to use a lookup table.
At the moment, we only recognize the CRC algorithm.
---
llvm/include/llvm/Analysis/HashRecognize.h | 89 ++
llvm/lib/Analysis/CMakeLists.txt | 1 +
llvm/lib/Analysis/HashRecognize.cpp | 683 +++++++++++++++
llvm/lib/Passes/PassBuilder.cpp | 1 +
llvm/lib/Passes/PassRegistry.def | 2 +
.../HashRecognize/cyclic-redundancy-check.ll | 802 ++++++++++++++++++
.../gn/secondary/llvm/lib/Analysis/BUILD.gn | 1 +
llvm/utils/update_analyze_test_checks.py | 7 +-
8 files changed, 1584 insertions(+), 2 deletions(-)
create mode 100644 llvm/include/llvm/Analysis/HashRecognize.h
create mode 100644 llvm/lib/Analysis/HashRecognize.cpp
create mode 100644 llvm/test/Analysis/HashRecognize/cyclic-redundancy-check.ll
diff --git a/llvm/include/llvm/Analysis/HashRecognize.h b/llvm/include/llvm/Analysis/HashRecognize.h
new file mode 100644
index 0000000000000..cc353836118a3
--- /dev/null
+++ b/llvm/include/llvm/Analysis/HashRecognize.h
@@ -0,0 +1,89 @@
+//===- HashRecognize.h ------------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Interface for the HashRecognize analysis, which identifies hash functions
+// that can be optimized using a lookup-table or with target-specific
+// instructions.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_ANALYSIS_HASHRECOGNIZE_H
+#define LLVM_ANALYSIS_HASHRECOGNIZE_H
+
+#include "llvm/ADT/APInt.h"
+#include "llvm/Analysis/LoopAnalysisManager.h"
+#include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/IR/PassManager.h"
+#include "llvm/IR/Value.h"
+#include "llvm/Support/KnownBits.h"
+#include "llvm/Transforms/Scalar/LoopPassManager.h"
+
+namespace llvm {
+/// A tuple of bits that are expected to be zero, number N of them expected to
+/// be zero, with a boolean indicating whether it's the top or bottom N bits
+/// expected to be zero.
+using ErrBits = std::tuple<KnownBits, unsigned, bool>;
+
+/// A custom std::array with 256 entries, that also has a print function.
+struct CRCTable : public std::array<APInt, 256> {
+ void print(raw_ostream &OS) const;
+};
+
+/// The structure that is returned when a polynomial algorithm was recognized by
+/// the analysis. Currently, only the CRC algorithm is recognized.
+struct PolynomialInfo {
+ unsigned TripCount;
+ const Value *LHS;
+ APInt RHS;
+ const Value *ComputedValue;
+ bool ByteOrderSwapped;
+ const Value *LHSAux;
+ PolynomialInfo(unsigned TripCount, const Value *LHS, const APInt &RHS,
+ const Value *ComputedValue, bool ByteOrderSwapped,
+ const Value *LHSAux = nullptr);
+};
+
+/// The analysis.
+class HashRecognize {
+ const Loop &L;
+ ScalarEvolution &SE;
+
+public:
+ HashRecognize(const Loop &L, ScalarEvolution &SE);
+
+ // The main analysis entry point.
+ std::variant<PolynomialInfo, ErrBits, StringRef> recognizeCRC() const;
+
+ // Auxilary entry point after analysis to interleave the generating polynomial
+ // and return a 256-entry CRC table.
+ CRCTable genSarwateTable(const APInt &GenPoly, bool ByteOrderSwapped) const;
+
+ void print(raw_ostream &OS) const;
+};
+
+class HashRecognizePrinterPass
+ : public PassInfoMixin<HashRecognizePrinterPass> {
+ raw_ostream &OS;
+
+public:
+ explicit HashRecognizePrinterPass(raw_ostream &OS) : OS(OS) {}
+ PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM,
+ LoopStandardAnalysisResults &AR, LPMUpdater &);
+};
+
+class HashRecognizeAnalysis : public AnalysisInfoMixin<HashRecognizeAnalysis> {
+ friend AnalysisInfoMixin<HashRecognizeAnalysis>;
+ static AnalysisKey Key;
+
+public:
+ using Result = HashRecognize;
+ Result run(Loop &L, LoopAnalysisManager &AM, LoopStandardAnalysisResults &AR);
+};
+} // namespace llvm
+
+#endif
diff --git a/llvm/lib/Analysis/CMakeLists.txt b/llvm/lib/Analysis/CMakeLists.txt
index a17a75e6fbcac..b225345e825d9 100644
--- a/llvm/lib/Analysis/CMakeLists.txt
+++ b/llvm/lib/Analysis/CMakeLists.txt
@@ -77,6 +77,7 @@ add_llvm_component_library(LLVMAnalysis
FunctionPropertiesAnalysis.cpp
GlobalsModRef.cpp
GuardUtils.cpp
+ HashRecognize.cpp
HeatUtils.cpp
IRSimilarityIdentifier.cpp
IVDescriptors.cpp
diff --git a/llvm/lib/Analysis/HashRecognize.cpp b/llvm/lib/Analysis/HashRecognize.cpp
new file mode 100644
index 0000000000000..ec624e5e7d7a5
--- /dev/null
+++ b/llvm/lib/Analysis/HashRecognize.cpp
@@ -0,0 +1,683 @@
+//===- HashRecognize.h ------------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// The HashRecognize analysis recognizes unoptimized polynomial hash functions
+// with operations over a Galois field of characteristic 2, also called binary
+// fields, or GF(2^n): this class of hash functions can be optimized using a
+// lookup-table-driven implementation, or with target-specific instructions.
+// Examples:
+//
+// 1. Cyclic redundancy check (CRC), which is a polynomial division in GF(2).
+// 2. Rabin fingerprint, a component of the Rabin-Karp algorithm, which is a
+// rolling hash polynomial division in GF(2).
+// 3. Rijndael MixColumns, a step in AES computation, which is a polynomial
+// multiplication in GF(2^3).
+// 4. GHASH, the authentication mechanism in AES Galois/Counter Mode (GCM),
+// which is a polynomial evaluation in GF(2^128).
+//
+// All of them use an irreducible generating polynomial of degree m,
+//
+// c_m * x^m + c_(m-1) * x^(m-1) + ... + c_0 * x^0
+//
+// where each coefficient c is can take values in GF(2^n), where 2^n is termed
+// the order of the Galois field. For GF(2), each coefficient can take values
+// either 0 or 1, and the polynomial is simply represented by m+1 bits,
+// corresponding to the coefficients. The different variants of CRC are named by
+// degree of generating polynomial used: so CRC-32 would use a polynomial of
+// degree 32.
+//
+// The reason algorithms on GF(2^n) can be optimized with a lookup-table is the
+// following: in such fields, polynomial addition and subtraction are identical
+// and equivalent to XOR, polynomial multiplication is an AND, and polynomial
+// division is identity: the XOR and AND operations in unoptimized
+// implmentations are performed bit-wise, and can be optimized to be performed
+// chunk-wise, by interleaving copies of the generating polynomial, and storing
+// the pre-computed values in a table.
+//
+// A generating polynomial of m bits always has the MSB set, so we usually
+// omit it. An example of a 16-bit polynomial is the CRC-16-CCITT polynomial:
+//
+// (x^16) + x^12 + x^5 + 1 = (1) 0001 0000 0010 0001 = 0x1021
+//
+// Transmissions are either in big-endian or little-endian form, and hash
+// algorithms are written according to this. For example, IEEE 802 and RS-232
+// specify little-endian transmission.
+//
+//===----------------------------------------------------------------------===//
+//
+// At the moment, we only recognize the CRC algorithm.
+// Documentation on CRC32 from the kernel:
+// https://www.kernel.org/doc/Documentation/crc32.txt
+//
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/HashRecognize.h"
+#include "llvm/ADT/APInt.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/Analysis/LoopAnalysisManager.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/Analysis/ScalarEvolutionPatternMatch.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/IR/PatternMatch.h"
+#include "llvm/Support/KnownBits.h"
+
+using namespace llvm;
+using namespace PatternMatch;
+using namespace SCEVPatternMatch;
+
+#define DEBUG_TYPE "hash-recognize"
+
+// KnownBits for a PHI node. There are at most two PHI nodes, corresponding to
+// the Simple Recurrence and Conditional Recurrence. The IndVar PHI is not
+// relevant.
+using KnownPhiMap = SmallDenseMap<const PHINode *, KnownBits, 2>;
+
+// A pair of a PHI node along with its incoming value from within a loop.
+using PhiStepPair = std::pair<const PHINode *, const Instruction *>;
+
+/// A much simpler version of ValueTracking, in that it computes KnownBits of
+/// values, except that it computes the evolution of KnownBits in a loop with a
+/// given trip count, and predication is specialized for a significant-bit
+/// check.
+class ValueEvolution {
+ unsigned TripCount;
+ bool ByteOrderSwapped;
+ APInt GenPoly;
+ StringRef ErrStr;
+ unsigned AtIteration;
+
+ KnownBits computeBinOp(const BinaryOperator *I, const KnownPhiMap &KnownPhis);
+ KnownBits computeInstr(const Instruction *I, const KnownPhiMap &KnownPhis);
+ KnownBits compute(const Value *V, const KnownPhiMap &KnownPhis);
+
+public:
+ ValueEvolution(unsigned TripCount, bool ByteOrderSwapped);
+
+ // In case ValueEvolution encounters an error, these are meant to be used for
+ // a precise error message.
+ bool hasError() const;
+ StringRef getError() const;
+
+ // Given a list of PHI nodes along with their incoming value from within the
+ // loop, and the trip-count of the loop, computeEvolutions
+ // computes the KnownBits of each of the PHI nodes on the final iteration.
+ std::optional<KnownPhiMap>
+ computeEvolutions(ArrayRef<PhiStepPair> PhiEvolutions);
+};
+
+ValueEvolution::ValueEvolution(unsigned TripCount, bool ByteOrderSwapped)
+ : TripCount(TripCount), ByteOrderSwapped(ByteOrderSwapped) {}
+
+bool ValueEvolution::hasError() const { return !ErrStr.empty(); }
+StringRef ValueEvolution::getError() const { return ErrStr; }
+
+/// Compute the KnownBits of BinaryOperator \p I.
+KnownBits ValueEvolution::computeBinOp(const BinaryOperator *I,
+ const KnownPhiMap &KnownPhis) {
+ unsigned BitWidth = I->getType()->getScalarSizeInBits();
+
+ KnownBits KnownL(compute(I->getOperand(0), KnownPhis));
+ KnownBits KnownR(compute(I->getOperand(1), KnownPhis));
+
+ switch (I->getOpcode()) {
+ case Instruction::BinaryOps::And:
+ return KnownL & KnownR;
+ case Instruction::BinaryOps::Or:
+ return KnownL | KnownR;
+ case Instruction::BinaryOps::Xor:
+ return KnownL ^ KnownR;
+ case Instruction::BinaryOps::Shl: {
+ auto *OBO = cast<OverflowingBinaryOperator>(I);
+ return KnownBits::shl(KnownL, KnownR, OBO->hasNoUnsignedWrap(),
+ OBO->hasNoSignedWrap());
+ }
+ case Instruction::BinaryOps::LShr:
+ return KnownBits::lshr(KnownL, KnownR);
+ case Instruction::BinaryOps::AShr:
+ return KnownBits::ashr(KnownL, KnownR);
+ case Instruction::BinaryOps::Add: {
+ auto *OBO = cast<OverflowingBinaryOperator>(I);
+ return KnownBits::add(KnownL, KnownR, OBO->hasNoUnsignedWrap(),
+ OBO->hasNoSignedWrap());
+ }
+ case Instruction::BinaryOps::Sub: {
+ auto *OBO = cast<OverflowingBinaryOperator>(I);
+ return KnownBits::sub(KnownL, KnownR, OBO->hasNoUnsignedWrap(),
+ OBO->hasNoSignedWrap());
+ }
+ case Instruction::BinaryOps::Mul: {
+ Value *Op0 = I->getOperand(0);
+ Value *Op1 = I->getOperand(1);
+ bool SelfMultiply = Op0 == Op1 && isGuaranteedNotToBeUndef(Op0);
+ return KnownBits::mul(KnownL, KnownR, SelfMultiply);
+ }
+ case Instruction::BinaryOps::UDiv:
+ return KnownBits::udiv(KnownL, KnownR);
+ case Instruction::BinaryOps::SDiv:
+ return KnownBits::sdiv(KnownL, KnownR);
+ case Instruction::BinaryOps::URem:
+ return KnownBits::urem(KnownL, KnownR);
+ case Instruction::BinaryOps::SRem:
+ return KnownBits::srem(KnownL, KnownR);
+ default:
+ ErrStr = "Unknown BinaryOperator";
+ return {BitWidth};
+ }
+}
+
+/// Compute the KnownBits of Instruction \p I.
+KnownBits ValueEvolution::computeInstr(const Instruction *I,
+ const KnownPhiMap &KnownPhis) {
+ unsigned BitWidth = I->getType()->getScalarSizeInBits();
+
+ // We look up in the map that contains the KnownBits of the PHI from the
+ // previous iteration.
+ if (const PHINode *P = dyn_cast<PHINode>(I))
+ return KnownPhis.lookup_or(P, BitWidth);
+
+ // Compute the KnownBits for a Select(Cmp()), forcing it to take the take the
+ // branch that is predicated on the (least|most)-significant-bit check.
+ CmpPredicate Pred;
+ Value *L, *R, *TV, *FV;
+ if (match(I, m_Select(m_ICmp(Pred, m_Value(L), m_Value(R)), m_Value(TV),
+ m_Value(FV)))) {
+ KnownBits KnownL = compute(L, KnownPhis).zextOrTrunc(BitWidth);
+ KnownBits KnownR = compute(R, KnownPhis).zextOrTrunc(BitWidth);
+ KnownBits KnownTV = compute(TV, KnownPhis);
+ KnownBits KnownFV = compute(FV, KnownPhis);
+ auto LCR = ConstantRange::fromKnownBits(KnownL, false);
+ auto RCR = ConstantRange::fromKnownBits(KnownR, false);
+
+ // We need to check LCR against [0, 2) in the little-endian case, because
+ // the RCR check is insufficient: it is simply [0, 1).
+ auto CheckLCR = ConstantRange(APInt::getZero(BitWidth), APInt(BitWidth, 2));
+ if (!ByteOrderSwapped && LCR != CheckLCR) {
+ ErrStr = "Bad LHS of significant-bit-check";
+ return {BitWidth};
+ }
+
+ // Check that the predication is on (most|least) significant bit.
+ auto AllowedR = ConstantRange::makeAllowedICmpRegion(Pred, RCR);
+ auto InverseR = ConstantRange::makeAllowedICmpRegion(
+ CmpInst::getInversePredicate(Pred), RCR);
+ ConstantRange LSBRange(APInt::getZero(BitWidth), APInt(BitWidth, 1));
+ ConstantRange MSBRange(APInt::getZero(BitWidth),
+ APInt::getSignedMinValue(BitWidth));
+ const ConstantRange &CheckRCR = ByteOrderSwapped ? MSBRange : LSBRange;
+ if (AllowedR == CheckRCR)
+ return KnownTV;
+ if (AllowedR.inverse() == CheckRCR)
+ return KnownFV;
+
+ ErrStr = "Bad RHS of significant-bit-check";
+ return {BitWidth};
+ }
+
+ if (auto *BO = dyn_cast<BinaryOperator>(I))
+ return computeBinOp(BO, KnownPhis);
+
+ switch (I->getOpcode()) {
+ case Instruction::CastOps::Trunc:
+ return compute(I->getOperand(0), KnownPhis).trunc(BitWidth);
+ case Instruction::CastOps::ZExt:
+ return compute(I->getOperand(0), KnownPhis).zext(BitWidth);
+ case Instruction::CastOps::SExt:
+ return compute(I->getOperand(0), KnownPhis).sext(BitWidth);
+ default:
+ ErrStr = "Unknown Instruction";
+ return {BitWidth};
+ }
+}
+
+/// Compute the KnownBits of Value \p V.
+KnownBits ValueEvolution::compute(const Value *V,
+ const KnownPhiMap &KnownPhis) {
+ unsigned BitWidth = V->getType()->getScalarSizeInBits();
+
+ const APInt *C;
+ if (match(V, m_APInt(C)))
+ return KnownBits::makeConstant(*C);
+
+ if (auto *I = dyn_cast<Instruction>(V))
+ return computeInstr(I, KnownPhis);
+
+ ErrStr = "Unknown Value";
+ return {BitWidth};
+}
+
+// Takes every PHI-step pair in PhiEvolutions, and computes KnownBits on the
+// final iteration, using KnownBits from the previous iteration.
+std::optional<KnownPhiMap>
+ValueEvolution::computeEvolutions(ArrayRef<PhiStepPair> PhiEvolutions) {
+ KnownPhiMap KnownPhis;
+ for (unsigned I = 0; I < TripCount; ++I) {
+ AtIteration = I;
+ for (auto [Phi, Step] : PhiEvolutions) {
+ KnownBits KnownAtIter = computeInstr(Step, KnownPhis);
+ if (KnownAtIter.getBitWidth() < I + 1) {
+ ErrStr = "Loop iterations exceed bitwidth of result";
+ return std::nullopt;
+ }
+ KnownPhis.emplace_or_assign(Phi, KnownAtIter);
+ }
+ }
+ return KnownPhis;
+}
+
+/// Digs for a recurrence starting with \p V hitting the PHI node \p P in a
+/// use-def chain. Used by matchConditionalRecurrence.
+static BinaryOperator *
+digRecurrence(Instruction *V, const PHINode *P, const Loop &L,
+ const APInt *&ExtraConst,
+ Instruction::BinaryOps BOWithConstOpToMatch) {
+ SmallVector<Instruction *> Worklist;
+ Worklist.push_back(V);
+ while (!Worklist.empty()) {
+ Instruction *I = Worklist.pop_back_val();
+
+ // Don't add a PHI's operands to the Worklist.
+ if (isa<PHINode>(I))
+ continue;
+
+ // Find a recurrence over a BinOp, by matching either of its operands
+ // with with the PHINode.
+ if (match(I, m_c_BinOp(m_Value(), m_Specific(P))))
+ return cast<BinaryOperator>(I);
+
+ // Bind to ExtraConst, if we match exactly one.
+ if (I->getOpcode() == BOWithConstOpToMatch) {
+ if (ExtraConst)
+ return nullptr;
+ match(I, m_c_BinOp(m_APInt(ExtraConst), m_Value()));
+ }
+
+ // Continue along the use-def chain.
+ for (Use &U : I->operands())
+ if (auto *UI = dyn_cast<Instruction>(U))
+ if (L.contains(UI))
+ Worklist.push_back(UI);
+ }
+ return nullptr;
+}
+
+/// A Conditional Recurrence is a recurrence of the form:
+///
+/// loop:
+/// %rec = [%start, %entry], [%step, %loop]
+/// ...
+/// %step = select _, %tv, %fv
+///
+/// where %tv and %fv ultimately end up using %rec via the same %BO instruction,
+/// after digging through the use-def chain.
+///
+/// \p ExtraConst is relevant if \p BOWithConstOpToMatch is supplied: when
+/// digging the use-def chain, a BinOp with opcode \p BOWithConstOpToMatch is
+/// matched, and \p ExtraConst is a constant operand of that BinOp. This
+/// peculiary exists, because in a CRC algorithm, the \p BOWithConstOpToMatch is
+/// an XOR, and the \p ExtraConst ends up being the generating polynomial.
+static bool matchConditionalRecurrence(
+ const PHINode *P, BinaryOperator *&BO, Value *&Start, Value *&Step,
+ const Loop &L, const APInt *&ExtraConst,
+ Instruction::BinaryOps BOWithConstOpToMatch = Instruction::BinaryOpsEnd) {
+ if (P->getNumIncomingValues() != 2)
+ return false;
+
+ for (unsigned Idx = 0; Idx != 2; ++Idx) {
+ Value *FoundStep = P->getIncomingValue(Idx);
+ Value *FoundStart = P->getIncomingValue(!Idx);
+
+ Instruction *TV, *FV;
+ if (!match(FoundStep,
+ m_Select(m_Cmp(), m_Instruction(TV), m_Instruction(FV))))
+ continue;
+
+ // For a conditional recurrence, both the true and false values of the
+ // select must ultimately end up in the same recurrent BinOp.
+ ExtraConst = nullptr;
+ BinaryOperator *FoundBO =
+ digRecurrence(TV, P, L, ExtraConst, BOWithConstOpToMatch);
+ BinaryOperator *AltBO =
+ digRecurrence(FV, P, L, ExtraConst, BOWithConstOpToMatch);
+
+ if (!FoundBO || FoundBO != AltBO)
+ return false;
+
+ if (BOWithConstOpToMatch != Instruction::BinaryOpsEnd && !ExtraConst) {
+ LLVM_DEBUG(dbgs() << "HashRecognize: Unable to match single BinaryOp "
+ "with constant in conditional recurrence\n");
+ return false;
+ }
+
+ BO = FoundBO;
+ Start = FoundStart;
+ Step = FoundStep;
+ return true;
+ }
+ return false;
+}
+
+/// A structure that can hold either a Simple Recurrence or a Conditional
+/// Recurrence. Note that in the case of a Simple Recurrence, Step is an operand
+/// of the BO, while in a Conditional Recurrence, it is a SelectInst.
+struct RecurrenceInfo {
+ PHINode *Phi;
+ BinaryOperator *BO;
+ Value *Start;
+ Value *Step;
+ std::optional<APInt> ExtraConst;
+
+ RecurrenceInfo(PHINode *Phi, BinaryOperator *BO, Value *Start, Value *Step,
+ std::optional<APInt> ExtraConst = std::nullopt)
+ : Phi(Phi), BO(BO), Start(Start), Step(Step), ExtraConst(ExtraConst) {}
+
+ void print(raw_ostream &OS, unsigned Indent) const {
+ OS.indent(Indent) << "Phi: ";
+ Phi->print(OS);
+ OS << "\n";
+ OS.indent(Indent) << "BinaryOperator: ";
+ BO->print(OS);
+ OS << "\n";
+ OS.indent(Indent) << "Start: ";
+ Start->print(OS);
+ OS << "\n";
+ OS.indent(Indent) << "Step: ";
+ Step->print(OS);
+ OS << "\n";
+ if (ExtraConst) {
+ OS.indent(Indent) << "ExtraConst: ";
+ ExtraConst->print(OS, false);
+ OS << "\n";
+ }
+ }
+};
+
+/// Iterates over all the phis in \p LoopLatch, and attempts to extract a Simple
+/// Recurrence, and a Conditional Recurrence.
+static std::pair<std::optional<RecurrenceInfo>, std::optional<RecurrenceInfo>>
+getRecurrences(BasicBlock *LoopLatch, const PHINode *IndVar, const Loop &L) {
+ std::optional<RecurrenceInfo> SimpleRecurrence, ConditionalRecurrence;
+ for (PHINode &P : LoopLatch->phis()) {
+ if (&P == IndVar)
+ continue;
+ if (!P.getType()->isIntegerTy()) {
+ LLVM_DEBUG(dbgs() << "HashRecognize: Non-integral PHI found\n");
+ return {};
+ }
+
+ BinaryOperator *BO;
+ Value *Start, *Step;
+ const APInt *GenPoly;
+ if (!SimpleRecurrence && matchSimpleRecurrence(&P, BO, Start, Step)) {
+ SimpleRecurrence = {&P, BO, Start, Step};
+ } else if (!ConditionalRecurrence &&
+ matchConditionalRecurrence(&P, BO, Start, Step, L, GenPoly,
+ Instruction::BinaryOps::Xor)) {
+ ConditionalRecurrence = {&P, BO, Start, Step, *GenPoly};
+ } else {
+ LLVM_DEBUG(dbgs() << "HashRecognize: Stray PHI found: " << P << "\n");
+ return {};
+ }
+ }
+ return {SimpleRecurrence, ConditionalRecurrence};
+}
+
+PolynomialInfo::PolynomialInfo(unsigned TripCount, const Value *LHS,
+ const APInt &RHS, const Value *ComputedValue,
+ bool ByteOrderSwapped, const Value *LHSAux)
+ : TripCount(TripCount), LHS(LHS), RHS(RHS), ComputedValue(ComputedValue),
+ ByteOrderSwapped(ByteOrderSwapped), LHSAux(LHSAux) {}
+
+/// In big-endian case, checks that bottom N bits against CheckFn, and that the
+/// rest are unknown. In little-endian case, checks that the top N bits against
+/// CheckFn, and that the rest are unknown.
+static bool checkExtractBits(const KnownBits &Known, unsigned N,
+ function_ref<bool(const KnownBits &)> CheckFn,
+ bool ByteOrderSwapped) {
+ unsigned BitPos = ByteOrderSwapped ? 0 : Known.getBitWidth() - N;
+ unsigned SwappedBitPos = ByteOrderSwapped ? N : 0;
+
+ // Check that the entire thing is a constant.
+ if (N == Known.getBitWidth())
+ return CheckFn(Known.extractBits(N, 0));
+
+ // Check that the {top, bottom} N bits are not unknown and that the {bottom,
+ // top} N bits are known.
+ return CheckFn(Known.extractBits(N, BitPos)) &&
+ Known.extractBits(Known.getBitWidth() - N, SwappedBitPos).isUnknown();
+}
+
+/// Generate a lookup table of 256 entries by interleaving the generating
+/// polynomial. The optimization technique of table-lookup for CRC is also
+/// called the Sarwate algorithm.
+CRCTable HashRecognize::genSarwateTable(const APInt &GenPoly,
+ bool ByteOrderSwapped) const {
+ unsigned BW = GenPoly.getBitWidth();
+ unsigned MSB = 1 << (BW - 1);
+ CRCTable Table;
+ Table[0] = APInt::getZero(BW);
+
+ if (ByteOrderSwapped) {
+ APInt CRCInit(BW, 1);
+ for (unsigned I = 1; I < 256; I <<= 1) {
+ CRCInit = CRCInit.shl(1) ^
+ ((CRCInit & MSB).isZero() ? APInt::getZero(BW) : GenPoly);
+ for (unsigned J = 0; J < I; ++J)
+ Table[I + J] = CRCInit ^ Table[J];
+ }
+ return Table;
+ }
+
+ APInt CRCInit(BW, 128);
+ for (unsigned I = 128; I; I >>= 1) {
+ CRCInit = CRCInit.lshr(1) ^
+ ((CRCInit & 1).isZero() ? APInt::getZero(BW) : GenPoly);
+ for (unsigned J = 0; J < 256; J += (I << 1))
+ Table[I + J] = CRCInit ^ Table[J];
+ }
+ return Table;
+}
+
+/// Checks if \p Reference is reachable from \p Needle on the use-def chain, and
+/// that there are no stray PHI nodes while digging the use-def chain. \p
+/// BOToMatch is a CRC peculiarity: at least one of the Users of Needle needs to
+/// match this OpCode, which is XOR for CRC.
+static bool arePHIsIntertwined(
+ const PHINode *Needle, const PHINode *Reference, const Loop &L,
+ Instruction::BinaryOps BOToMatch = Instruction::BinaryOpsEnd) {
+ // Initialize the worklist with Users of the Needle.
+ SmallVector<const Instruction *> Worklist;
+ for (const User *U : Needle->users()) {
+ if (auto *UI = dyn_cast<Instruction>(U))
+ if (L.contains(UI))
+ Worklist.push_back(UI);
+ }
+
+ // BOToMatch is usually XOR for CRC.
+ if (BOToMatch != Instruction::BinaryOpsEnd) {
+ if (count_if(Worklist, [BOToMatch](const Instruction *I) {
+ return I->getOpcode() == BOToMatch;
+ }) != 1)
+ return false;
+ }
+
+ while (!Worklist.empty()) {
+ const Instruction *I = Worklist.pop_back_val();
+
+ // Since Needle is never pushed onto the Worklist, I must either be the
+ // Reference PHI node (in which case we're done), or a stray PHI node (in
+ // which case we abort).
+ if (isa<PHINode>(I))
+ return I == Reference;
+
+ for (const Use &U : I->operands())
+ if (auto *UI = dyn_cast<Instruction>(U))
+ // Don't push Needle back onto the Worklist.
+ if (UI != Needle && L.contains(UI))
+ Worklist.push_back(UI);
+ }
+ return false;
+}
+
+// Recognizes a multiplication or division by the constant two, using SCEV. By
+// doing this, we're immune to whether the IR expression is mul/udiv or
+// equivalently shl/lshr. Return false when it is a UDiv, true when it is a Mul,
+// and std::nullopt otherwise.
+static std::optional<bool> isBigEndianBitShift(const SCEV *E) {
+ if (match(E, m_scev_UDiv(m_SCEV(), m_scev_SpecificInt(2))))
+ return false;
+ if (match(E, m_scev_Mul(m_scev_SpecificInt(2), m_SCEV())))
+ return true;
+ return {};
+}
+
+/// The main entry point for analyzing a loop and recognizing the CRC algorithm.
+/// Returns a PolynomialInfo on success, and either an ErrBits or a StringRef on
+/// failure.
+std::variant<PolynomialInfo, ErrBits, StringRef>
+HashRecognize::recognizeCRC() const {
+ if (!L.isInnermost())
+ return "Loop is not innermost";
+ unsigned TC = SE.getSmallConstantMaxTripCount(&L);
+ if (!TC)
+ return "Unable to find a small constant trip count";
+ BasicBlock *Latch = L.getLoopLatch();
+ BasicBlock *Exit = L.getExitBlock();
+ const PHINode *IndVar = L.getCanonicalInductionVariable();
+ if (!Exit || !Latch || !IndVar)
+ return "Loop not in canonical form";
+
+ auto [SimpleRecurrence, ConditionalRecurrence] =
+ getRecurrences(Latch, IndVar, L);
+
+ if (!ConditionalRecurrence)
+ return "Unable to find conditional recurrence";
+
+ // Make sure that all recurrences are either all SCEVMul with two or SCEVDiv
+ // with two, or in other words, that they're single bit-shifts.
+ SmallSet<std::optional<bool>, 2> EndianStatus;
+ for (auto Info : {SimpleRecurrence, ConditionalRecurrence})
+ if (Info)
+ EndianStatus.insert(isBigEndianBitShift(SE.getSCEV(Info->BO)));
+
+ if (EndianStatus.size() != 1 || !*EndianStatus.begin())
+ return "Loop with non-unit bitshifts";
+
+ bool ByteOrderSwapped = **EndianStatus.begin();
+
+ if (SimpleRecurrence &&
+ !arePHIsIntertwined(SimpleRecurrence->Phi, ConditionalRecurrence->Phi, L,
+ Instruction::BinaryOps::Xor))
+ return "Simple recurrence doesn't use conditional recurrence with XOR";
+
+ // Make sure that the computed value is used in the exit block: this should be
+ // true even if it is only really used in an outer loop's exit block, since
+ // the loop is in LCSSA form.
+ auto *ComputedValue = cast<SelectInst>(ConditionalRecurrence->Step);
+ if (none_of(ComputedValue->users(), [Exit](User *U) {
+ auto *UI = dyn_cast<Instruction>(U);
+ return UI && UI->getParent() == Exit;
+ }))
+ return "Unable to find use of computed value in loop exit block";
+
+ assert(ConditionalRecurrence->ExtraConst &&
+ "Expected ExtraConst in conditional recurrence");
+ const APInt &GenPoly = *ConditionalRecurrence->ExtraConst;
+
+ // PhiEvolutions are pairs of PHINodes along with their incoming value from
+ // within the loop, which we term as their step.
+ SmallVector<PhiStepPair, 2> PhiEvolutions;
+ PhiEvolutions.emplace_back(ConditionalRecurrence->Phi, ComputedValue);
+ if (SimpleRecurrence)
+ PhiEvolutions.emplace_back(SimpleRecurrence->Phi, SimpleRecurrence->BO);
+
+ const Value *LHSAux = SimpleRecurrence ? SimpleRecurrence->Start : nullptr;
+
+ ValueEvolution VE(TC, ByteOrderSwapped);
+ std::optional<KnownPhiMap> KnownPhis = VE.computeEvolutions(PhiEvolutions);
+
+ if (VE.hasError())
+ return VE.getError();
+
+ KnownBits ResultBits = KnownPhis->at(ConditionalRecurrence->Phi);
+ auto IsZero = [](const KnownBits &K) { return K.isZero(); };
+ if (!checkExtractBits(ResultBits, TC, IsZero, ByteOrderSwapped))
+ return ErrBits(ResultBits, TC, ByteOrderSwapped);
+
+ return PolynomialInfo(TC, ConditionalRecurrence->Start, GenPoly,
+ ComputedValue, ByteOrderSwapped, LHSAux);
+}
+
+void CRCTable::print(raw_ostream &OS) const {
+ for (unsigned I = 0; I < 256; I++) {
+ (*this)[I].print(OS, false);
+ OS << (I % 16 == 15 ? '\n' : ' ');
+ }
+}
+
+void HashRecognize::print(raw_ostream &OS) const {
+ if (!L.isInnermost())
+ return;
+ OS << "HashRecognize: Checking a loop in '"
+ << L.getHeader()->getParent()->getName() << "' from " << L.getLocStr()
+ << "\n";
+ auto Ret = recognizeCRC();
+ if (!std::holds_alternative<PolynomialInfo>(Ret)) {
+ OS << "Did not find a hash algorithm\n";
+ if (std::holds_alternative<StringRef>(Ret))
+ OS << "Reason: " << std::get<StringRef>(Ret) << "\n";
+ if (std::holds_alternative<ErrBits>(Ret)) {
+ auto [Actual, Iter, ByteOrderSwapped] = std::get<ErrBits>(Ret);
+ OS << "Reason: Expected " << (ByteOrderSwapped ? "bottom " : "top ")
+ << Iter << " bits zero (";
+ Actual.print(OS);
+ OS << ")\n";
+ }
+ return;
+ }
+
+ auto Info = std::get<PolynomialInfo>(Ret);
+ OS << "Found" << (Info.ByteOrderSwapped ? " big-endian " : " little-endian ")
+ << "CRC-" << Info.RHS.getBitWidth() << " loop with trip count "
+ << Info.TripCount << "\n";
+ OS.indent(2) << "Initial CRC: ";
+ Info.LHS->print(OS);
+ OS << "\n";
+ OS.indent(2) << "Generating polynomial: ";
+ Info.RHS.print(OS, false);
+ OS << "\n";
+ OS.indent(2) << "Computed CRC: ";
+ Info.ComputedValue->print(OS);
+ OS << "\n";
+ if (Info.LHSAux) {
+ OS.indent(2) << "Auxiliary data: ";
+ Info.LHSAux->print(OS);
+ OS << "\n";
+ }
+ OS.indent(2) << "Computed CRC lookup table:\n";
+ genSarwateTable(Info.RHS, Info.ByteOrderSwapped).print(OS);
+}
+
+HashRecognize::HashRecognize(const Loop &L, ScalarEvolution &SE)
+ : L(L), SE(SE) {}
+
+PreservedAnalyses HashRecognizePrinterPass::run(Loop &L,
+ LoopAnalysisManager &AM,
+ LoopStandardAnalysisResults &AR,
+ LPMUpdater &) {
+ AM.getResult<HashRecognizeAnalysis>(L, AR).print(OS);
+ return PreservedAnalyses::all();
+}
+
+HashRecognize HashRecognizeAnalysis::run(Loop &L, LoopAnalysisManager &AM,
+ LoopStandardAnalysisResults &AR) {
+ return {L, AR.SE};
+}
+
+AnalysisKey HashRecognizeAnalysis::Key;
diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp
index 56e91703cb019..f2b44773dda30 100644
--- a/llvm/lib/Passes/PassBuilder.cpp
+++ b/llvm/lib/Passes/PassBuilder.cpp
@@ -42,6 +42,7 @@
#include "llvm/Analysis/EphemeralValuesCache.h"
#include "llvm/Analysis/FunctionPropertiesAnalysis.h"
#include "llvm/Analysis/GlobalsModRef.h"
+#include "llvm/Analysis/HashRecognize.h"
#include "llvm/Analysis/IRSimilarityIdentifier.h"
#include "llvm/Analysis/IVUsers.h"
#include "llvm/Analysis/InlineAdvisor.h"
diff --git a/llvm/lib/Passes/PassRegistry.def b/llvm/lib/Passes/PassRegistry.def
index 94dabe290213d..24c18f7ffabcc 100644
--- a/llvm/lib/Passes/PassRegistry.def
+++ b/llvm/lib/Passes/PassRegistry.def
@@ -660,6 +660,7 @@ LOOPNEST_PASS("no-op-loopnest", NoOpLoopNestPass())
#define LOOP_ANALYSIS(NAME, CREATE_PASS)
#endif
LOOP_ANALYSIS("ddg", DDGAnalysis())
+LOOP_ANALYSIS("hash-recognize", HashRecognizeAnalysis())
LOOP_ANALYSIS("iv-users", IVUsersAnalysis())
LOOP_ANALYSIS("no-op-loop", NoOpLoopAnalysis())
LOOP_ANALYSIS("pass-instrumentation", PassInstrumentationAnalysis(PIC))
@@ -693,6 +694,7 @@ LOOP_PASS("loop-versioning-licm", LoopVersioningLICMPass())
LOOP_PASS("no-op-loop", NoOpLoopPass())
LOOP_PASS("print", PrintLoopPass(errs()))
LOOP_PASS("print<ddg>", DDGAnalysisPrinterPass(errs()))
+LOOP_PASS("print<hash-recognize>", HashRecognizePrinterPass(errs()))
LOOP_PASS("print<iv-users>", IVUsersPrinterPass(errs()))
LOOP_PASS("print<loop-cache-cost>", LoopCachePrinterPass(errs()))
LOOP_PASS("print<loopnest>", LoopNestPrinterPass(errs()))
diff --git a/llvm/test/Analysis/HashRecognize/cyclic-redundancy-check.ll b/llvm/test/Analysis/HashRecognize/cyclic-redundancy-check.ll
new file mode 100644
index 0000000000000..3c4252d51a152
--- /dev/null
+++ b/llvm/test/Analysis/HashRecognize/cyclic-redundancy-check.ll
@@ -0,0 +1,802 @@
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -passes='print<hash-recognize>' -disable-output %s 2>&1 | FileCheck %s
+
+define i16 @crc16.le.tc8(i8 %msg, i16 %checksum) {
+; CHECK-LABEL: 'crc16.le.tc8'
+; CHECK-NEXT: Found little-endian CRC-16 loop with trip count 8
+; CHECK-NEXT: Initial CRC: i16 %checksum
+; CHECK-NEXT: Generating polynomial: 40961
+; CHECK-NEXT: Computed CRC: %crc.next = select i1 %check.sb, i16 %crc.lshr, i16 %xor
+; CHECK-NEXT: Auxiliary data: i8 %msg
+; CHECK-NEXT: Computed CRC lookup table:
+; CHECK-NEXT: 0 40961 1 40960 2 40963 3 40962 4 40965 5 40964 6 40967 7 40966
+; CHECK-NEXT: 8 40969 9 40968 10 40971 11 40970 12 40973 13 40972 14 40975 15 40974
+; CHECK-NEXT: 16 40977 17 40976 18 40979 19 40978 20 40981 21 40980 22 40983 23 40982
+; CHECK-NEXT: 24 40985 25 40984 26 40987 27 40986 28 40989 29 40988 30 40991 31 40990
+; CHECK-NEXT: 32 40993 33 40992 34 40995 35 40994 36 40997 37 40996 38 40999 39 40998
+; CHECK-NEXT: 40 41001 41 41000 42 41003 43 41002 44 41005 45 41004 46 41007 47 41006
+; CHECK-NEXT: 48 41009 49 41008 50 41011 51 41010 52 41013 53 41012 54 41015 55 41014
+; CHECK-NEXT: 56 41017 57 41016 58 41019 59 41018 60 41021 61 41020 62 41023 63 41022
+; CHECK-NEXT: 64 41025 65 41024 66 41027 67 41026 68 41029 69 41028 70 41031 71 41030
+; CHECK-NEXT: 72 41033 73 41032 74 41035 75 41034 76 41037 77 41036 78 41039 79 41038
+; CHECK-NEXT: 80 41041 81 41040 82 41043 83 41042 84 41045 85 41044 86 41047 87 41046
+; CHECK-NEXT: 88 41049 89 41048 90 41051 91 41050 92 41053 93 41052 94 41055 95 41054
+; CHECK-NEXT: 96 41057 97 41056 98 41059 99 41058 100 41061 101 41060 102 41063 103 41062
+; CHECK-NEXT: 104 41065 105 41064 106 41067 107 41066 108 41069 109 41068 110 41071 111 41070
+; CHECK-NEXT: 112 41073 113 41072 114 41075 115 41074 116 41077 117 41076 118 41079 119 41078
+; CHECK-NEXT: 120 41081 121 41080 122 41083 123 41082 124 41085 125 41084 126 41087 127 41086
+;
+entry:
+ br label %loop
+
+loop: ; preds = %loop, %entry
+ %iv = phi i8 [ 0, %entry ], [ %iv.next, %loop ]
+ %crc = phi i16 [ %checksum, %entry ], [ %crc.next, %loop ]
+ %data = phi i8 [ %msg, %entry ], [ %data.next, %loop ]
+ %crc.trunc = trunc i16 %crc to i8
+ %xor.data.crc = xor i8 %data, %crc.trunc
+ %and.data.crc = and i8 %xor.data.crc, 1
+ %data.next = lshr i8 %data, 1
+ %check.sb = icmp eq i8 %and.data.crc, 0
+ %crc.lshr = lshr i16 %crc, 1
+ %xor = xor i16 %crc.lshr, -24575
+ %crc.next = select i1 %check.sb, i16 %crc.lshr, i16 %xor
+ %iv.next = add nuw nsw i8 %iv, 1
+ %exit.cond = icmp samesign ult i8 %iv, 7
+ br i1 %exit.cond, label %loop, label %exit
+
+exit: ; preds = %loop
+ ret i16 %crc.next
+}
+
+define i16 @crc16.le.tc8.udiv(i8 %msg, i16 %checksum) {
+; CHECK-LABEL: 'crc16.le.tc8.udiv'
+; CHECK-NEXT: Found little-endian CRC-16 loop with trip count 8
+; CHECK-NEXT: Initial CRC: i16 %checksum
+; CHECK-NEXT: Generating polynomial: 40961
+; CHECK-NEXT: Computed CRC: %crc.next = select i1 %check.sb, i16 %crc.lshr, i16 %xor
+; CHECK-NEXT: Auxiliary data: i8 %msg
+; CHECK-NEXT: Computed CRC lookup table:
+; CHECK-NEXT: 0 40961 1 40960 2 40963 3 40962 4 40965 5 40964 6 40967 7 40966
+; CHECK-NEXT: 8 40969 9 40968 10 40971 11 40970 12 40973 13 40972 14 40975 15 40974
+; CHECK-NEXT: 16 40977 17 40976 18 40979 19 40978 20 40981 21 40980 22 40983 23 40982
+; CHECK-NEXT: 24 40985 25 40984 26 40987 27 40986 28 40989 29 40988 30 40991 31 40990
+; CHECK-NEXT: 32 40993 33 40992 34 40995 35 40994 36 40997 37 40996 38 40999 39 40998
+; CHECK-NEXT: 40 41001 41 41000 42 41003 43 41002 44 41005 45 41004 46 41007 47 41006
+; CHECK-NEXT: 48 41009 49 41008 50 41011 51 41010 52 41013 53 41012 54 41015 55 41014
+; CHECK-NEXT: 56 41017 57 41016 58 41019 59 41018 60 41021 61 41020 62 41023 63 41022
+; CHECK-NEXT: 64 41025 65 41024 66 41027 67 41026 68 41029 69 41028 70 41031 71 41030
+; CHECK-NEXT: 72 41033 73 41032 74 41035 75 41034 76 41037 77 41036 78 41039 79 41038
+; CHECK-NEXT: 80 41041 81 41040 82 41043 83 41042 84 41045 85 41044 86 41047 87 41046
+; CHECK-NEXT: 88 41049 89 41048 90 41051 91 41050 92 41053 93 41052 94 41055 95 41054
+; CHECK-NEXT: 96 41057 97 41056 98 41059 99 41058 100 41061 101 41060 102 41063 103 41062
+; CHECK-NEXT: 104 41065 105 41064 106 41067 107 41066 108 41069 109 41068 110 41071 111 41070
+; CHECK-NEXT: 112 41073 113 41072 114 41075 115 41074 116 41077 117 41076 118 41079 119 41078
+; CHECK-NEXT: 120 41081 121 41080 122 41083 123 41082 124 41085 125 41084 126 41087 127 41086
+;
+entry:
+ br label %loop
+
+loop: ; preds = %loop, %entry
+ %iv = phi i8 [ 0, %entry ], [ %iv.next, %loop ]
+ %crc = phi i16 [ %checksum, %entry ], [ %crc.next, %loop ]
+ %data = phi i8 [ %msg, %entry ], [ %data.next, %loop ]
+ %crc.trunc = trunc i16 %crc to i8
+ %xor.data.crc = xor i8 %data, %crc.trunc
+ %and.data.crc = and i8 %xor.data.crc, 1
+ %data.next = udiv i8 %data, 2
+ %check.sb = icmp eq i8 %and.data.crc, 0
+ %crc.lshr = udiv i16 %crc, 2
+ %xor = xor i16 %crc.lshr, -24575
+ %crc.next = select i1 %check.sb, i16 %crc.lshr, i16 %xor
+ %iv.next = add nuw nsw i8 %iv, 1
+ %exit.cond = icmp samesign ult i8 %iv, 7
+ br i1 %exit.cond, label %loop, label %exit
+
+exit: ; preds = %loop
+ ret i16 %crc.next
+}
+
+define i16 @crc16.le.tc16(i16 %msg, i16 %checksum) {
+; CHECK-LABEL: 'crc16.le.tc16'
+; CHECK-NEXT: Found little-endian CRC-16 loop with trip count 16
+; CHECK-NEXT: Initial CRC: i16 %checksum
+; CHECK-NEXT: Generating polynomial: 40961
+; CHECK-NEXT: Computed CRC: %crc.next = select i1 %check.sb, i16 %crc.lshr, i16 %crc.xor
+; CHECK-NEXT: Auxiliary data: i16 %msg
+; CHECK-NEXT: Computed CRC lookup table:
+; CHECK-NEXT: 0 40961 1 40960 2 40963 3 40962 4 40965 5 40964 6 40967 7 40966
+; CHECK-NEXT: 8 40969 9 40968 10 40971 11 40970 12 40973 13 40972 14 40975 15 40974
+; CHECK-NEXT: 16 40977 17 40976 18 40979 19 40978 20 40981 21 40980 22 40983 23 40982
+; CHECK-NEXT: 24 40985 25 40984 26 40987 27 40986 28 40989 29 40988 30 40991 31 40990
+; CHECK-NEXT: 32 40993 33 40992 34 40995 35 40994 36 40997 37 40996 38 40999 39 40998
+; CHECK-NEXT: 40 41001 41 41000 42 41003 43 41002 44 41005 45 41004 46 41007 47 41006
+; CHECK-NEXT: 48 41009 49 41008 50 41011 51 41010 52 41013 53 41012 54 41015 55 41014
+; CHECK-NEXT: 56 41017 57 41016 58 41019 59 41018 60 41021 61 41020 62 41023 63 41022
+; CHECK-NEXT: 64 41025 65 41024 66 41027 67 41026 68 41029 69 41028 70 41031 71 41030
+; CHECK-NEXT: 72 41033 73 41032 74 41035 75 41034 76 41037 77 41036 78 41039 79 41038
+; CHECK-NEXT: 80 41041 81 41040 82 41043 83 41042 84 41045 85 41044 86 41047 87 41046
+; CHECK-NEXT: 88 41049 89 41048 90 41051 91 41050 92 41053 93 41052 94 41055 95 41054
+; CHECK-NEXT: 96 41057 97 41056 98 41059 99 41058 100 41061 101 41060 102 41063 103 41062
+; CHECK-NEXT: 104 41065 105 41064 106 41067 107 41066 108 41069 109 41068 110 41071 111 41070
+; CHECK-NEXT: 112 41073 113 41072 114 41075 115 41074 116 41077 117 41076 118 41079 119 41078
+; CHECK-NEXT: 120 41081 121 41080 122 41083 123 41082 124 41085 125 41084 126 41087 127 41086
+;
+entry:
+ br label %loop
+
+loop: ; preds = %loop, %entry
+ %iv = phi i8 [ 0, %entry ], [ %iv.next, %loop ]
+ %crc = phi i16 [ %checksum, %entry ], [ %crc.next, %loop ]
+ %data = phi i16 [ %msg, %entry ], [ %data.next, %loop ]
+ %xor.crc.data = xor i16 %crc, %data
+ %and.crc.data = and i16 %xor.crc.data, 1
+ %data.next = lshr i16 %data, 1
+ %check.sb = icmp eq i16 %and.crc.data, 0
+ %crc.lshr = lshr i16 %crc, 1
+ %crc.xor = xor i16 %crc.lshr, -24575
+ %crc.next = select i1 %check.sb, i16 %crc.lshr, i16 %crc.xor
+ %iv.next = add nuw nsw i8 %iv, 1
+ %exit.cond = icmp samesign ult i8 %iv, 15
+ br i1 %exit.cond, label %loop, label %exit
+
+exit: ; preds = %loop
+ ret i16 %crc.next
+}
+
+define i16 @crc16.be.tc8.crc.init.li(i16 %checksum, i8 %msg) {
+; CHECK-LABEL: 'crc16.be.tc8.crc.init.li'
+; CHECK-NEXT: Found big-endian CRC-16 loop with trip count 8
+; CHECK-NEXT: Initial CRC: %crc.init = xor i16 %msg.shl, %checksum
+; CHECK-NEXT: Generating polynomial: 4129
+; CHECK-NEXT: Computed CRC: %crc.next = select i1 %check.sb, i16 %crc.xor, i16 %crc.shl
+; CHECK-NEXT: Computed CRC lookup table:
+; CHECK-NEXT: 0 2 4 6 8 10 12 14 16 18 20 22 24 26 28 30
+; CHECK-NEXT: 32 34 36 38 40 42 44 46 48 50 52 54 56 58 60 62
+; CHECK-NEXT: 64 66 68 70 72 74 76 78 80 82 84 86 88 90 92 94
+; CHECK-NEXT: 96 98 100 102 104 106 108 110 112 114 116 118 120 122 124 126
+; CHECK-NEXT: 128 130 132 134 136 138 140 142 144 146 148 150 152 154 156 158
+; CHECK-NEXT: 160 162 164 166 168 170 172 174 176 178 180 182 184 186 188 190
+; CHECK-NEXT: 192 194 196 198 200 202 204 206 208 210 212 214 216 218 220 222
+; CHECK-NEXT: 224 226 228 230 232 234 236 238 240 242 244 246 248 250 252 254
+; CHECK-NEXT: 256 258 260 262 264 266 268 270 272 274 276 278 280 282 284 286
+; CHECK-NEXT: 288 290 292 294 296 298 300 302 304 306 308 310 312 314 316 318
+; CHECK-NEXT: 320 322 324 326 328 330 332 334 336 338 340 342 344 346 348 350
+; CHECK-NEXT: 352 354 356 358 360 362 364 366 368 370 372 374 376 378 380 382
+; CHECK-NEXT: 384 386 388 390 392 394 396 398 400 402 404 406 408 410 412 414
+; CHECK-NEXT: 416 418 420 422 424 426 428 430 432 434 436 438 440 442 444 446
+; CHECK-NEXT: 448 450 452 454 456 458 460 462 464 466 468 470 472 474 476 478
+; CHECK-NEXT: 480 482 484 486 488 490 492 494 496 498 500 502 504 506 508 510
+;
+entry:
+ %msg.ext = zext i8 %msg to i16
+ %msg.shl = shl nuw i16 %msg.ext, 8
+ %crc.init = xor i16 %msg.shl, %checksum
+ br label %loop
+
+loop: ; preds = %loop, %entry
+ %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
+ %crc = phi i16 [ %crc.init, %entry ], [ %crc.next, %loop ]
+ %crc.shl = shl i16 %crc, 1
+ %crc.xor = xor i16 %crc.shl, 4129
+ %check.sb = icmp slt i16 %crc, 0
+ %crc.next = select i1 %check.sb, i16 %crc.xor, i16 %crc.shl
+ %iv.next = add nuw nsw i32 %iv, 1
+ %exit.cond = icmp samesign ult i32 %iv, 7
+ br i1 %exit.cond, label %loop, label %exit
+
+exit: ; preds = %loop
+ ret i16 %crc.next
+}
+
+define i16 @crc16.be.tc8.crc.init.arg(i16 %crc.init) {
+; CHECK-LABEL: 'crc16.be.tc8.crc.init.arg'
+; CHECK-NEXT: Found big-endian CRC-16 loop with trip count 8
+; CHECK-NEXT: Initial CRC: i16 %crc.init
+; CHECK-NEXT: Generating polynomial: 4129
+; CHECK-NEXT: Computed CRC: %crc.next = select i1 %check.sb, i16 %crc.xor, i16 %crc.shl
+; CHECK-NEXT: Computed CRC lookup table:
+; CHECK-NEXT: 0 2 4 6 8 10 12 14 16 18 20 22 24 26 28 30
+; CHECK-NEXT: 32 34 36 38 40 42 44 46 48 50 52 54 56 58 60 62
+; CHECK-NEXT: 64 66 68 70 72 74 76 78 80 82 84 86 88 90 92 94
+; CHECK-NEXT: 96 98 100 102 104 106 108 110 112 114 116 118 120 122 124 126
+; CHECK-NEXT: 128 130 132 134 136 138 140 142 144 146 148 150 152 154 156 158
+; CHECK-NEXT: 160 162 164 166 168 170 172 174 176 178 180 182 184 186 188 190
+; CHECK-NEXT: 192 194 196 198 200 202 204 206 208 210 212 214 216 218 220 222
+; CHECK-NEXT: 224 226 228 230 232 234 236 238 240 242 244 246 248 250 252 254
+; CHECK-NEXT: 256 258 260 262 264 266 268 270 272 274 276 278 280 282 284 286
+; CHECK-NEXT: 288 290 292 294 296 298 300 302 304 306 308 310 312 314 316 318
+; CHECK-NEXT: 320 322 324 326 328 330 332 334 336 338 340 342 344 346 348 350
+; CHECK-NEXT: 352 354 356 358 360 362 364 366 368 370 372 374 376 378 380 382
+; CHECK-NEXT: 384 386 388 390 392 394 396 398 400 402 404 406 408 410 412 414
+; CHECK-NEXT: 416 418 420 422 424 426 428 430 432 434 436 438 440 442 444 446
+; CHECK-NEXT: 448 450 452 454 456 458 460 462 464 466 468 470 472 474 476 478
+; CHECK-NEXT: 480 482 484 486 488 490 492 494 496 498 500 502 504 506 508 510
+;
+entry:
+ br label %loop
+
+loop: ; preds = %loop, %entry
+ %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
+ %crc = phi i16 [ %crc.init, %entry ], [ %crc.next, %loop ]
+ %crc.shl = shl i16 %crc, 1
+ %crc.xor = xor i16 %crc.shl, 4129
+ %check.sb = icmp slt i16 %crc, 0
+ %crc.next = select i1 %check.sb, i16 %crc.xor, i16 %crc.shl
+ %iv.next = add nuw nsw i32 %iv, 1
+ %exit.cond = icmp samesign ult i32 %iv, 7
+ br i1 %exit.cond, label %loop, label %exit
+
+exit: ; preds = %loop
+ ret i16 %crc.next
+}
+
+define i16 @crc16.be.tc8.crc.init.arg.flipped.sb.check(i16 %crc.init) {
+; CHECK-LABEL: 'crc16.be.tc8.crc.init.arg.flipped.sb.check'
+; CHECK-NEXT: Found big-endian CRC-16 loop with trip count 8
+; CHECK-NEXT: Initial CRC: i16 %crc.init
+; CHECK-NEXT: Generating polynomial: 4129
+; CHECK-NEXT: Computed CRC: %crc.next = select i1 %check.sb, i16 %crc.shl, i16 %crc.xor
+; CHECK-NEXT: Computed CRC lookup table:
+; CHECK-NEXT: 0 2 4 6 8 10 12 14 16 18 20 22 24 26 28 30
+; CHECK-NEXT: 32 34 36 38 40 42 44 46 48 50 52 54 56 58 60 62
+; CHECK-NEXT: 64 66 68 70 72 74 76 78 80 82 84 86 88 90 92 94
+; CHECK-NEXT: 96 98 100 102 104 106 108 110 112 114 116 118 120 122 124 126
+; CHECK-NEXT: 128 130 132 134 136 138 140 142 144 146 148 150 152 154 156 158
+; CHECK-NEXT: 160 162 164 166 168 170 172 174 176 178 180 182 184 186 188 190
+; CHECK-NEXT: 192 194 196 198 200 202 204 206 208 210 212 214 216 218 220 222
+; CHECK-NEXT: 224 226 228 230 232 234 236 238 240 242 244 246 248 250 252 254
+; CHECK-NEXT: 256 258 260 262 264 266 268 270 272 274 276 278 280 282 284 286
+; CHECK-NEXT: 288 290 292 294 296 298 300 302 304 306 308 310 312 314 316 318
+; CHECK-NEXT: 320 322 324 326 328 330 332 334 336 338 340 342 344 346 348 350
+; CHECK-NEXT: 352 354 356 358 360 362 364 366 368 370 372 374 376 378 380 382
+; CHECK-NEXT: 384 386 388 390 392 394 396 398 400 402 404 406 408 410 412 414
+; CHECK-NEXT: 416 418 420 422 424 426 428 430 432 434 436 438 440 442 444 446
+; CHECK-NEXT: 448 450 452 454 456 458 460 462 464 466 468 470 472 474 476 478
+; CHECK-NEXT: 480 482 484 486 488 490 492 494 496 498 500 502 504 506 508 510
+;
+entry:
+ br label %loop
+
+loop: ; preds = %loop, %entry
+ %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
+ %crc = phi i16 [ %crc.init, %entry ], [ %crc.next, %loop ]
+ %crc.shl = shl i16 %crc, 1
+ %crc.xor = xor i16 %crc.shl, 4129
+ %check.sb = icmp sge i16 %crc, 0
+ %crc.next = select i1 %check.sb, i16 %crc.shl, i16 %crc.xor
+ %iv.next = add nuw nsw i32 %iv, 1
+ %exit.cond = icmp samesign ult i32 %iv, 7
+ br i1 %exit.cond, label %loop, label %exit
+
+exit: ; preds = %loop
+ ret i16 %crc.next
+}
+
+define i8 @crc8.be.tc8.ptr.nested.loop(ptr %msg, i32 %loop.limit) {
+; CHECK-LABEL: 'crc8.be.tc8.ptr.nested.loop'
+; CHECK-NEXT: Found big-endian CRC-8 loop with trip count 8
+; CHECK-NEXT: Initial CRC: %crc.init = xor i8 %msg.load, %crc.outer
+; CHECK-NEXT: Generating polynomial: 29
+; CHECK-NEXT: Computed CRC: %crc.next = select i1 %check.sb, i8 %crc.xor, i8 %crc.shl
+; CHECK-NEXT: Computed CRC lookup table:
+; CHECK-NEXT: 0 2 4 6 8 10 12 14 16 18 20 22 24 26 28 30
+; CHECK-NEXT: 32 34 36 38 40 42 44 46 48 50 52 54 56 58 60 62
+; CHECK-NEXT: 64 66 68 70 72 74 76 78 80 82 84 86 88 90 92 94
+; CHECK-NEXT: 96 98 100 102 104 106 108 110 112 114 116 118 120 122 124 126
+; CHECK-NEXT: 128 130 132 134 136 138 140 142 144 146 148 150 152 154 156 158
+; CHECK-NEXT: 160 162 164 166 168 170 172 174 176 178 180 182 184 186 188 190
+; CHECK-NEXT: 192 194 196 198 200 202 204 206 208 210 212 214 216 218 220 222
+; CHECK-NEXT: 224 226 228 230 232 234 236 238 240 242 244 246 248 250 252 254
+; CHECK-NEXT: 29 31 25 27 21 23 17 19 13 15 9 11 5 7 1 3
+; CHECK-NEXT: 61 63 57 59 53 55 49 51 45 47 41 43 37 39 33 35
+; CHECK-NEXT: 93 95 89 91 85 87 81 83 77 79 73 75 69 71 65 67
+; CHECK-NEXT: 125 127 121 123 117 119 113 115 109 111 105 107 101 103 97 99
+; CHECK-NEXT: 157 159 153 155 149 151 145 147 141 143 137 139 133 135 129 131
+; CHECK-NEXT: 189 191 185 187 181 183 177 179 173 175 169 171 165 167 161 163
+; CHECK-NEXT: 221 223 217 219 213 215 209 211 205 207 201 203 197 199 193 195
+; CHECK-NEXT: 253 255 249 251 245 247 241 243 237 239 233 235 229 231 225 227
+;
+entry:
+ br label %outer.loop
+
+outer.loop: ; preds = %inner.exit, %entry
+ %crc.outer = phi i8 [ 0, %entry ], [ %crc.next, %inner.exit ]
+ %outer.iv = phi i32 [ 0, %entry ], [ %outer.iv.next, %inner.exit ]
+ %outer.exit.cond = icmp ult i32 %outer.iv, %loop.limit
+ br i1 %outer.exit.cond, label %ph, label %exit
+
+ph: ; preds = %outer.loop
+ %outer.iv.ext = sext i32 %outer.iv to i64
+ %msg.outer.iv = getelementptr inbounds i8, ptr %msg, i64 %outer.iv.ext
+ %msg.load = load i8, ptr %msg.outer.iv, align 1
+ %crc.init = xor i8 %msg.load, %crc.outer
+ br label %inner.loop
+
+inner.loop: ; preds = %inner.loop, %ph
+ %inner.iv = phi i32 [ 0, %ph ], [ %inner.iv.next, %inner.loop ]
+ %crc = phi i8 [ %crc.init, %ph ], [ %crc.next, %inner.loop ]
+ %crc.shl = shl i8 %crc, 1
+ %crc.xor = xor i8 %crc.shl, 29
+ %check.sb = icmp slt i8 %crc, 0
+ %crc.next = select i1 %check.sb, i8 %crc.xor, i8 %crc.shl
+ %inner.iv.next = add nuw nsw i32 %inner.iv, 1
+ %exit.cond = icmp samesign ult i32 %inner.iv, 7
+ br i1 %exit.cond, label %inner.loop, label %inner.exit
+
+inner.exit: ; preds = %inner.loop
+ %outer.iv.next = add i32 %outer.iv, 1
+ br label %outer.loop
+
+exit: ; preds = %outer.loop
+ ret i8 %crc.outer
+}
+
+define i32 @crc16.le.tc8.data32(i32 %checksum, i32 %msg) {
+; CHECK-LABEL: 'crc16.le.tc8.data32'
+; CHECK-NEXT: Found little-endian CRC-32 loop with trip count 8
+; CHECK-NEXT: Initial CRC: i32 %checksum
+; CHECK-NEXT: Generating polynomial: 33800
+; CHECK-NEXT: Computed CRC: %crc.next = select i1 %check.sb, i32 %crc.lshr, i32 %crc.xor
+; CHECK-NEXT: Auxiliary data: i32 %msg
+; CHECK-NEXT: Computed CRC lookup table:
+; CHECK-NEXT: 0 33800 1 33801 2 33802 3 33803 4 33804 5 33805 6 33806 7 33807
+; CHECK-NEXT: 8 33792 9 33793 10 33794 11 33795 12 33796 13 33797 14 33798 15 33799
+; CHECK-NEXT: 16 33816 17 33817 18 33818 19 33819 20 33820 21 33821 22 33822 23 33823
+; CHECK-NEXT: 24 33808 25 33809 26 33810 27 33811 28 33812 29 33813 30 33814 31 33815
+; CHECK-NEXT: 32 33832 33 33833 34 33834 35 33835 36 33836 37 33837 38 33838 39 33839
+; CHECK-NEXT: 40 33824 41 33825 42 33826 43 33827 44 33828 45 33829 46 33830 47 33831
+; CHECK-NEXT: 48 33848 49 33849 50 33850 51 33851 52 33852 53 33853 54 33854 55 33855
+; CHECK-NEXT: 56 33840 57 33841 58 33842 59 33843 60 33844 61 33845 62 33846 63 33847
+; CHECK-NEXT: 64 33864 65 33865 66 33866 67 33867 68 33868 69 33869 70 33870 71 33871
+; CHECK-NEXT: 72 33856 73 33857 74 33858 75 33859 76 33860 77 33861 78 33862 79 33863
+; CHECK-NEXT: 80 33880 81 33881 82 33882 83 33883 84 33884 85 33885 86 33886 87 33887
+; CHECK-NEXT: 88 33872 89 33873 90 33874 91 33875 92 33876 93 33877 94 33878 95 33879
+; CHECK-NEXT: 96 33896 97 33897 98 33898 99 33899 100 33900 101 33901 102 33902 103 33903
+; CHECK-NEXT: 104 33888 105 33889 106 33890 107 33891 108 33892 109 33893 110 33894 111 33895
+; CHECK-NEXT: 112 33912 113 33913 114 33914 115 33915 116 33916 117 33917 118 33918 119 33919
+; CHECK-NEXT: 120 33904 121 33905 122 33906 123 33907 124 33908 125 33909 126 33910 127 33911
+;
+entry:
+ br label %loop
+
+loop: ; preds = %loop, %entry
+ %crc = phi i32 [ %checksum, %entry ], [ %crc.next, %loop ]
+ %data = phi i32 [ %msg, %entry ], [ %data.next, %loop ]
+ %iv = phi i8 [ 0, %entry ], [ %iv.next, %loop ]
+ %xor.crc.data = xor i32 %crc, %data
+ %sb.crc.data = and i32 %xor.crc.data, 1
+ %check.sb = icmp eq i32 %sb.crc.data, 0
+ %crc.lshr = lshr i32 %crc, 1
+ %crc.xor = xor i32 %crc.lshr, 33800
+ %crc.next = select i1 %check.sb, i32 %crc.lshr, i32 %crc.xor
+ %iv.next = add nuw nsw i8 %iv, 1
+ %data.next = lshr i32 %data, 1
+ %exit.cond = icmp samesign ult i8 %iv, 7
+ br i1 %exit.cond, label %loop, label %exit
+
+exit: ; preds = %loop
+ ret i32 %crc.next
+}
+
+; Negative tests
+
+define i16 @not.crc16.non.const.tc(i16 %crc.init, i32 %loop.limit) {
+; CHECK-LABEL: 'not.crc16.non.const.tc'
+; CHECK-NEXT: Did not find a hash algorithm
+; CHECK-NEXT: Reason: Unable to find a small constant trip count
+;
+entry:
+ br label %loop
+
+loop: ; preds = %loop, %entry
+ %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
+ %crc = phi i16 [ %crc.init, %entry ], [ %crc.next, %loop ]
+ %crc.shl = shl i16 %crc, 1
+ %crc.xor = xor i16 %crc.shl, 4129
+ %check.sb = icmp sge i16 %crc, 0
+ %crc.next = select i1 %check.sb, i16 %crc.shl, i16 %crc.xor
+ %iv.next = add nuw nsw i32 %iv, 1
+ %exit.cond = icmp samesign ult i32 %iv, %loop.limit
+ br i1 %exit.cond, label %loop, label %exit
+
+exit: ; preds = %loop
+ ret i16 %crc.next
+}
+
+define i16 @not.crc16.no.conditional.recurrence(i16 %crc.init) {
+; CHECK-LABEL: 'not.crc16.no.conditional.recurrence'
+; CHECK-NEXT: Did not find a hash algorithm
+; CHECK-NEXT: Reason: Unable to find conditional recurrence
+;
+entry:
+ br label %loop
+
+loop: ; preds = %loop, %entry
+ %iv = phi i8 [ 0, %entry ], [ %iv.next, %loop ]
+ %crc = phi i16 [ %crc.init, %entry ], [ %crc.next, %loop ]
+ %shl = shl i16 %crc, 1
+ %crc.next = xor i16 %shl, 258
+ %iv.next = add nuw nsw i8 %iv, 1
+ %exit.cond = icmp samesign ult i8 %iv, 7
+ br i1 %exit.cond, label %loop, label %exit
+
+exit: ; preds = %loop
+ ret i16 %crc.next
+}
+
+
+define i16 @not.crc16.nonunit.shifts(i16 %checksum, i8 %msg) {
+; CHECK-LABEL: 'not.crc16.nonunit.shifts'
+; CHECK-NEXT: Did not find a hash algorithm
+; CHECK-NEXT: Reason: Loop with non-unit bitshifts
+;
+entry:
+ br label %loop
+
+loop: ; preds = %loop, %entry
+ %iv = phi i8 [ 0, %entry ], [ %iv.next, %loop ]
+ %data = phi i8 [ %msg, %entry ], [ %data.next, %loop ]
+ %crc = phi i16 [ %checksum, %entry ], [ %crc.next, %loop ]
+ %crc.lshr = lshr i16 %crc, 8
+ %data.ext = zext i8 %data to i16
+ %xor.crc.data = xor i16 %crc.lshr, %data.ext
+ %check.sb = icmp samesign ult i16 %xor.crc.data, 128
+ %crc.and = and i16 %crc, 32767
+ %crc.xor = xor i16 %crc.and, 258
+ %crc.next = select i1 %check.sb, i16 %crc.and, i16 %crc.xor
+ %data.next = shl i8 %data, 1
+ %iv.next = add nuw nsw i8 %iv, 1
+ %exit.cond = icmp samesign ult i8 %iv, 7
+ br i1 %exit.cond, label %loop, label %exit
+
+exit: ; preds = %loop
+ ret i16 %crc.next
+}
+
+define i16 @not.crc16.result.unused(i16 %crc.init) {
+; CHECK-LABEL: 'not.crc16.result.unused'
+; CHECK-NEXT: Did not find a hash algorithm
+; CHECK-NEXT: Reason: Unable to find use of computed value in loop exit block
+;
+entry:
+ br label %loop
+
+loop: ; preds = %loop, %entry
+ %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
+ %crc = phi i16 [ %crc.init, %entry ], [ %crc.next, %loop ]
+ %crc.shl = shl i16 %crc, 1
+ %crc.xor = xor i16 %crc.shl, 4129
+ %check.sb = icmp slt i16 %crc, 0
+ %crc.next = select i1 %check.sb, i16 %crc.xor, i16 %crc.shl
+ %iv.next = add nuw nsw i32 %iv, 1
+ %exit.cond = icmp samesign ult i32 %iv, 7
+ br i1 %exit.cond, label %loop, label %exit
+
+exit: ; preds = %loop
+ ret i16 %crc
+}
+
+define i16 @not.crc16.wrong.sb.check.const(i8 %msg, i16 %checksum) {
+; CHECK-LABEL: 'not.crc16.wrong.sb.check.const'
+; CHECK-NEXT: Did not find a hash algorithm
+; CHECK-NEXT: Reason: Simple recurrence doesn't use conditional recurrence with XOR
+;
+entry:
+ br label %loop
+
+loop: ; preds = %loop, %entry
+ %iv = phi i8 [ 0, %entry ], [ %iv.next, %loop ]
+ %data = phi i8 [ %msg, %entry ], [ %data.next, %loop ]
+ %crc = phi i16 [ %checksum, %entry ], [ %crc.next, %loop ]
+ %crc.lshr = lshr i16 %crc, 8
+ %data.ext = zext i8 %data to i16
+ %xor.crc.data = xor i16 %crc.lshr, %data.ext
+ %check.sb = icmp samesign ult i16 %xor.crc.data, 128
+ %crc.shl = shl i16 %crc, 1
+ %crc.xor = xor i16 %crc.shl, 258
+ %crc.next = select i1 %check.sb, i16 %crc.shl, i16 %crc.xor
+ %data.next = shl i8 %data, 1
+ %iv.next = add nuw nsw i8 %iv, 1
+ %exit.cond = icmp samesign ult i8 %iv, 7
+ br i1 %exit.cond, label %loop, label %exit
+
+exit: ; preds = %loop
+ ret i16 %crc.next
+}
+
+define i16 @not.crc16.wrong.sb.check.pred(i16 %crc.init) {
+; CHECK-LABEL: 'not.crc16.wrong.sb.check.pred'
+; CHECK-NEXT: Did not find a hash algorithm
+; CHECK-NEXT: Reason: Bad RHS of significant-bit-check
+;
+entry:
+ br label %loop
+
+loop: ; preds = %loop, %entry
+ %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
+ %crc = phi i16 [ %crc.init, %entry ], [ %crc.next, %loop ]
+ %crc.shl = shl i16 %crc, 1
+ %crc.xor = xor i16 %crc.shl, 4129
+ %check.sb = icmp sgt i16 %crc, 0
+ %crc.next = select i1 %check.sb, i16 %crc.shl, i16 %crc.xor
+ %iv.next = add nuw nsw i32 %iv, 1
+ %exit.cond = icmp samesign ult i32 %iv, 7
+ br i1 %exit.cond, label %loop, label %exit
+
+exit: ; preds = %loop
+ ret i16 %crc.next
+}
+
+define i16 @not.crc16.excess.tc(i16 %msg, i16 %checksum) {
+; CHECK-LABEL: 'not.crc16.excess.tc'
+; CHECK-NEXT: Did not find a hash algorithm
+; CHECK-NEXT: Reason: Loop iterations exceed bitwidth of result
+;
+entry:
+ br label %loop
+
+loop: ; preds = %loop, %entry
+ %iv = phi i8 [ 0, %entry ], [ %iv.next, %loop ]
+ %crc = phi i16 [ %checksum, %entry ], [ %crc.next, %loop ]
+ %data = phi i16 [ %msg, %entry ], [ %data.next, %loop ]
+ %xor.crc.data = xor i16 %crc, %data
+ %and.crc.data = and i16 %xor.crc.data, 1
+ %data.next = lshr i16 %data, 1
+ %check.sb = icmp eq i16 %and.crc.data, 0
+ %crc.lshr = lshr i16 %crc, 1
+ %crc.xor = xor i16 %crc.lshr, -24575
+ %crc.next = select i1 %check.sb, i16 %crc.lshr, i16 %crc.xor
+ %iv.next = add nuw nsw i8 %iv, 1
+ %exit.cond = icmp samesign ult i8 %iv, 20
+ br i1 %exit.cond, label %loop, label %exit
+
+exit: ; preds = %loop
+ ret i16 %crc.next
+}
+
+define i32 @not.crc16.unknown.icmp.rhs(i32 %checksum, i32 %msg, i32 %unknown) {
+; CHECK-LABEL: 'not.crc16.unknown.icmp.rhs'
+; CHECK-NEXT: Did not find a hash algorithm
+; CHECK-NEXT: Reason: Bad LHS of significant-bit-check
+;
+entry:
+ br label %loop
+
+loop: ; preds = %loop, %entry
+ %crc = phi i32 [ %checksum, %entry ], [ %crc.next, %loop ]
+ %data = phi i32 [ %msg, %entry ], [ %data.next, %loop ]
+ %iv = phi i8 [ 0, %entry ], [ %iv.next, %loop ]
+ %xor.crc.data = xor i32 %crc, %data
+ %sb.crc.data = or i32 %xor.crc.data, 1
+ %check.sb = icmp eq i32 %sb.crc.data, %unknown
+ %crc.lshr = lshr i32 %crc, 1
+ %crc.xor = xor i32 %crc.lshr, 33800
+ %crc.next = select i1 %check.sb, i32 %crc.lshr, i32 %crc.xor
+ %iv.next = add nuw nsw i8 %iv, 1
+ %data.next = lshr i32 %data, 1
+ %exit.cond = icmp samesign ult i8 %iv, 7
+ br i1 %exit.cond, label %loop, label %exit
+
+exit: ; preds = %loop
+ ret i32 %crc.next
+}
+
+define i32 @not.crc16.unknown.icmp.lhs(i32 %checksum, i32 %msg, i32 %unknown) {
+; CHECK-LABEL: 'not.crc16.unknown.icmp.lhs'
+; CHECK-NEXT: Did not find a hash algorithm
+; CHECK-NEXT: Reason: Bad LHS of significant-bit-check
+;
+entry:
+ br label %loop
+
+loop: ; preds = %loop, %entry
+ %crc = phi i32 [ %checksum, %entry ], [ %crc.next, %loop ]
+ %data = phi i32 [ %msg, %entry ], [ %data.next, %loop ]
+ %iv = phi i8 [ 0, %entry ], [ %iv.next, %loop ]
+ %xor.crc.data = xor i32 %crc, %data
+ %sb.crc.data = or i32 %xor.crc.data, %unknown
+ %check.sb = icmp eq i32 %sb.crc.data, 0
+ %crc.lshr = lshr i32 %crc, 1
+ %crc.xor = xor i32 %crc.lshr, 33800
+ %crc.next = select i1 %check.sb, i32 %crc.lshr, i32 %crc.xor
+ %iv.next = add nuw nsw i8 %iv, 1
+ %data.next = lshr i32 %data, 1
+ %exit.cond = icmp samesign ult i8 %iv, 7
+ br i1 %exit.cond, label %loop, label %exit
+
+exit: ; preds = %loop
+ ret i32 %crc.next
+}
+
+
+define i16 @not.crc16.stray.or(i16 %msg, i16 %checksum) {
+; CHECK-LABEL: 'not.crc16.stray.or'
+; CHECK-NEXT: Did not find a hash algorithm
+; CHECK-NEXT: Reason: Bad LHS of significant-bit-check
+;
+entry:
+ br label %loop
+
+loop: ; preds = %loop, %entry
+ %iv = phi i8 [ 0, %entry ], [ %iv.next, %loop ]
+ %crc = phi i16 [ %checksum, %entry ], [ %crc.next, %loop ]
+ %data = phi i16 [ %msg, %entry ], [ %data.next, %loop ]
+ %xor.crc.data = xor i16 %crc, %data
+ %and.crc.data = and i16 %xor.crc.data, 1
+ %crc.corrupt = or i16 %and.crc.data, 1
+ %data.next = lshr i16 %data, 1
+ %check.sb = icmp ne i16 %crc.corrupt, 0
+ %crc.lshr = lshr i16 %crc, 1
+ %crc.xor = xor i16 %crc.lshr, -24575
+ %crc.next = select i1 %check.sb, i16 %crc.lshr, i16 %crc.xor
+ %iv.next = add nuw nsw i8 %iv, 1
+ %exit.cond = icmp samesign ult i8 %iv, 15
+ br i1 %exit.cond, label %loop, label %exit
+
+exit: ; preds = %loop
+ ret i16 %crc.next
+}
+
+define i16 @not.crc16.inverse.sb.check(i16 %msg, i16 %checksum) {
+; CHECK-LABEL: 'not.crc16.inverse.sb.check'
+; CHECK-NEXT: Did not find a hash algorithm
+; CHECK-NEXT: Reason: Expected top 16 bits zero (1100000000000001)
+;
+entry:
+ br label %loop
+
+loop: ; preds = %loop, %entry
+ %iv = phi i8 [ 0, %entry ], [ %iv.next, %loop ]
+ %crc = phi i16 [ %checksum, %entry ], [ %crc.next, %loop ]
+ %data = phi i16 [ %msg, %entry ], [ %data.next, %loop ]
+ %xor.crc.data = xor i16 %crc, %data
+ %and.crc.data = and i16 %xor.crc.data, 1
+ %data.next = lshr i16 %data, 1
+ %check.sb = icmp ne i16 %and.crc.data, 0
+ %crc.lshr = lshr i16 %crc, 1
+ %crc.xor = xor i16 %crc.lshr, -24575
+ %crc.next = select i1 %check.sb, i16 %crc.lshr, i16 %crc.xor
+ %iv.next = add nuw nsw i8 %iv, 1
+ %exit.cond = icmp samesign ult i8 %iv, 15
+ br i1 %exit.cond, label %loop, label %exit
+
+exit: ; preds = %loop
+ ret i16 %crc.next
+}
+
+define i16 @crc1.tc8.sb.check.endian.mismatch(i8 %msg, i16 %checksum) {
+; CHECK-LABEL: 'crc1.tc8.sb.check.endian.mismatch'
+; CHECK-NEXT: Did not find a hash algorithm
+; CHECK-NEXT: Reason: Bad RHS of significant-bit-check
+;
+entry:
+ br label %loop
+
+loop: ; preds = %loop, %entry
+ %iv = phi i8 [ 0, %entry ], [ %iv.next, %loop ]
+ %crc = phi i16 [ %checksum, %entry ], [ %crc.next, %loop ]
+ %data = phi i8 [ %msg, %entry ], [ %data.next, %loop ]
+ %crc.trunc = trunc i16 %crc to i8
+ %xor.data.crc = xor i8 %data, %crc.trunc
+ %and.data.crc = and i8 %xor.data.crc, 1
+ %data.next = mul i8 %data, 2
+ %check.sb = icmp eq i8 %and.data.crc, 0
+ %crc.lshr = mul i16 %crc, 2
+ %xor = xor i16 %crc.lshr, 0
+ %crc.next = select i1 %check.sb, i16 %crc.lshr, i16 %xor
+ %iv.next = add nuw nsw i8 %iv, 1
+ %exit.cond = icmp samesign ult i8 %iv, 7
+ br i1 %exit.cond, label %loop, label %exit
+
+exit: ; preds = %loop
+ ret i16 %crc.next
+}
+
+define i16 @not.crc16.init.arg.inverted.select(i16 %crc.init) {
+; CHECK-LABEL: 'not.crc16.init.arg.inverted.select'
+; CHECK-NEXT: Did not find a hash algorithm
+; CHECK-NEXT: Reason: Expected top 8 bits zero (11000000????????)
+;
+entry:
+ br label %loop
+
+loop: ; preds = %loop, %entry
+ %iv = phi i8 [ 0, %entry ], [ %iv.next, %loop ]
+ %crc = phi i16 [ %crc.init, %entry ], [ %crc.next, %loop ]
+ %sb.crc = and i16 %crc, 1
+ %check.sb = icmp eq i16 %sb.crc, 0
+ %crc.lshr = lshr i16 %crc, 1
+ %crc.xor = xor i16 %crc.lshr, -24575
+ %crc.next = select i1 %check.sb, i16 %crc.xor, i16 %crc.lshr
+ %iv.next = add nuw nsw i8 %iv, 1
+ %exit.cond = icmp samesign ult i8 %iv, 7
+ br i1 %exit.cond, label %loop, label %exit
+
+exit: ; preds = %loop
+ ret i16 %crc.next
+}
+
+define i32 @not.crc16.dead.msg.bad.use(i32 %checksum, i32 %msg) {
+; CHECK-LABEL: 'not.crc16.dead.msg.bad.use'
+; CHECK-NEXT: Did not find a hash algorithm
+; CHECK-NEXT: Reason: Simple recurrence doesn't use conditional recurrence with XOR
+;
+entry:
+ br label %loop
+
+loop: ; preds = %loop, %entry
+ %crc = phi i32 [ %checksum, %entry ], [ %crc.next, %loop ]
+ %data = phi i32 [ %msg, %entry ], [ %data.next, %loop ]
+ %iv = phi i8 [ 0, %entry ], [ %iv.next, %loop ]
+ %data.or = or i32 %data, -1
+ %xor.crc.data = xor i32 %crc, %data.or
+ %sb.crc.data = and i32 %xor.crc.data, 1
+ %check.sb = icmp eq i32 %sb.crc.data, 0
+ %crc.lshr = lshr i32 %crc, 1
+ %crc.xor = xor i32 %crc.lshr, 33800
+ %crc.next = select i1 %check.sb, i32 %crc.lshr, i32 %crc.xor
+ %iv.next = add nuw nsw i8 %iv, 1
+ %data.next = lshr i32 %data, 1
+ %exit.cond = icmp samesign ult i8 %iv, 7
+ br i1 %exit.cond, label %loop, label %exit
+
+exit: ; preds = %loop
+ ret i32 %crc.next
+}
+
+define i16 @not.crc16.dead.msg.no.use(i8 %msg, i16 %checksum) {
+; CHECK-LABEL: 'not.crc16.dead.msg.no.use'
+; CHECK-NEXT: Did not find a hash algorithm
+; CHECK-NEXT: Reason: Simple recurrence doesn't use conditional recurrence with XOR
+;
+entry:
+ br label %loop
+
+loop: ; preds = %loop, %entry
+ %iv = phi i8 [ 0, %entry ], [ %iv.next, %loop ]
+ %crc = phi i16 [ %checksum, %entry ], [ %crc.next, %loop ]
+ %data = phi i8 [ %msg, %entry ], [ %data.next, %loop ]
+ %crc.trunc = trunc i16 %crc to i8
+ %and.crc = and i8 %crc.trunc, 1
+ %data.next = lshr i8 %data, 1
+ %check.sb = icmp eq i8 %and.crc, 0
+ %crc.lshr = lshr i16 %crc, 1
+ %xor = xor i16 %crc.lshr, -24575
+ %crc.next = select i1 %check.sb, i16 %crc.lshr, i16 %xor
+ %iv.next = add nuw nsw i8 %iv, 1
+ %exit.cond = icmp samesign ult i8 %iv, 7
+ br i1 %exit.cond, label %loop, label %exit
+
+exit: ; preds = %loop
+ %data.zext = zext i8 %data.next to i16
+ %ret = xor i16 %crc.next, %data.zext
+ ret i16 %ret
+}
+
+define i32 @not.crc16.dead.msg.wrong.op(i32 %checksum, i32 %msg) {
+; CHECK-LABEL: 'not.crc16.dead.msg.wrong.op'
+; CHECK-NEXT: Did not find a hash algorithm
+; CHECK-NEXT: Reason: Simple recurrence doesn't use conditional recurrence with XOR
+;
+entry:
+ br label %loop
+
+loop: ; preds = %loop, %entry
+ %crc = phi i32 [ %checksum, %entry ], [ %crc.next, %loop ]
+ %data = phi i32 [ %msg, %entry ], [ %data.next, %loop ]
+ %iv = phi i8 [ 0, %entry ], [ %iv.next, %loop ]
+ %or.crc.data = or i32 %crc, %data
+ %sb.crc.data = and i32 %or.crc.data, 1
+ %check.sb = icmp eq i32 %sb.crc.data, 0
+ %crc.lshr = lshr i32 %crc, 1
+ %crc.xor = xor i32 %crc.lshr, 33800
+ %crc.next = select i1 %check.sb, i32 %crc.lshr, i32 %crc.xor
+ %iv.next = add nuw nsw i8 %iv, 1
+ %data.next = lshr i32 %data, 1
+ %exit.cond = icmp samesign ult i8 %iv, 7
+ br i1 %exit.cond, label %loop, label %exit
+
+exit: ; preds = %loop
+ ret i32 %crc.next
+}
diff --git a/llvm/utils/gn/secondary/llvm/lib/Analysis/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/Analysis/BUILD.gn
index 7a60dce42802d..a5095eeed7ac9 100644
--- a/llvm/utils/gn/secondary/llvm/lib/Analysis/BUILD.gn
+++ b/llvm/utils/gn/secondary/llvm/lib/Analysis/BUILD.gn
@@ -55,6 +55,7 @@ static_library("Analysis") {
"FunctionPropertiesAnalysis.cpp",
"GlobalsModRef.cpp",
"GuardUtils.cpp",
+ "HashRecognize.cpp",
"HeatUtils.cpp",
"IRSimilarityIdentifier.cpp",
"IVDescriptors.cpp",
diff --git a/llvm/utils/update_analyze_test_checks.py b/llvm/utils/update_analyze_test_checks.py
index 6c194f0923d17..3f14452767f9e 100755
--- a/llvm/utils/update_analyze_test_checks.py
+++ b/llvm/utils/update_analyze_test_checks.py
@@ -109,10 +109,13 @@ def update_test(opt_basename: str, ti: common.TestInfo):
prefixes,
)
elif (
- re.search(r"(LV|LDist): Checking a loop in ", raw_tool_outputs) is not None
+ re.search(
+ r"(LV|LDist|HashRecognize): Checking a loop in ", raw_tool_outputs
+ )
+ is not None
):
for raw_tool_output in re.split(
- r"(LV|LDist): Checking a loop in ", raw_tool_outputs
+ r"(LV|LDist|HashRecognize): Checking a loop in ", raw_tool_outputs
):
builder.process_run_line(
common.LOOP_PASS_DEBUG_RE,
>From 16419b637676c1e4f816bebdcde3e512e664ca0e Mon Sep 17 00:00:00 2001
From: Ramkumar Ramachandra <ramkumar.ramachandra at codasip.com>
Date: Tue, 20 May 2025 16:31:15 +0100
Subject: [PATCH 2/5] [HashRecognize] Address review
---
llvm/include/llvm/Analysis/HashRecognize.h | 21 ++++++++++++
llvm/lib/Analysis/HashRecognize.cpp | 34 ++++++++-----------
.../HashRecognize/cyclic-redundancy-check.ll | 4 +--
3 files changed, 37 insertions(+), 22 deletions(-)
diff --git a/llvm/include/llvm/Analysis/HashRecognize.h b/llvm/include/llvm/Analysis/HashRecognize.h
index cc353836118a3..872404de5bf41 100644
--- a/llvm/include/llvm/Analysis/HashRecognize.h
+++ b/llvm/include/llvm/Analysis/HashRecognize.h
@@ -37,12 +37,33 @@ struct CRCTable : public std::array<APInt, 256> {
/// The structure that is returned when a polynomial algorithm was recognized by
/// the analysis. Currently, only the CRC algorithm is recognized.
struct PolynomialInfo {
+ // The small constant trip-count of the analyzed loop.
unsigned TripCount;
+
+ // The LHS in a polynomial operation, or the initial variable of the
+ // computation, since all polynomial operations must have a constant RHS,
+ // which is the generating polynomial. It is the LHS of the polynomial
+ // division in the case of CRC. Since polynomial division is an XOR in
+ // GF(2^m), this variable must be XOR'ed with RHS in a loop to yield the
+ // ComputedValue.
const Value *LHS;
+
+ // The generating polynomial, or the RHS of the polynomial division in the
+ // case of CRC.
APInt RHS;
+
+ // The final computed value. This is a remainder of a polynomial division in
+ // the case of CRC, which must be zero.
const Value *ComputedValue;
+
+ // Set to true in the case of big-endian.
bool ByteOrderSwapped;
+
+ // An optional auxiliary checksum that augments the LHS. In the case of CRC,
+ // it is XOR'ed with the LHS, so that the computation's final remainder is
+ // zero.
const Value *LHSAux;
+
PolynomialInfo(unsigned TripCount, const Value *LHS, const APInt &RHS,
const Value *ComputedValue, bool ByteOrderSwapped,
const Value *LHSAux = nullptr);
diff --git a/llvm/lib/Analysis/HashRecognize.cpp b/llvm/lib/Analysis/HashRecognize.cpp
index ec624e5e7d7a5..3ca9e0d4c5cc2 100644
--- a/llvm/lib/Analysis/HashRecognize.cpp
+++ b/llvm/lib/Analysis/HashRecognize.cpp
@@ -35,7 +35,7 @@
// following: in such fields, polynomial addition and subtraction are identical
// and equivalent to XOR, polynomial multiplication is an AND, and polynomial
// division is identity: the XOR and AND operations in unoptimized
-// implmentations are performed bit-wise, and can be optimized to be performed
+// implementations are performed bit-wise, and can be optimized to be performed
// chunk-wise, by interleaving copies of the generating polynomial, and storing
// the pre-computed values in a table.
//
@@ -87,11 +87,10 @@ using PhiStepPair = std::pair<const PHINode *, const Instruction *>;
/// given trip count, and predication is specialized for a significant-bit
/// check.
class ValueEvolution {
- unsigned TripCount;
- bool ByteOrderSwapped;
+ const unsigned TripCount;
+ const bool ByteOrderSwapped;
APInt GenPoly;
StringRef ErrStr;
- unsigned AtIteration;
KnownBits computeBinOp(const BinaryOperator *I, const KnownPhiMap &KnownPhis);
KnownBits computeInstr(const Instruction *I, const KnownPhiMap &KnownPhis);
@@ -182,8 +181,8 @@ KnownBits ValueEvolution::computeInstr(const Instruction *I,
if (const PHINode *P = dyn_cast<PHINode>(I))
return KnownPhis.lookup_or(P, BitWidth);
- // Compute the KnownBits for a Select(Cmp()), forcing it to take the take the
- // branch that is predicated on the (least|most)-significant-bit check.
+ // Compute the KnownBits for a Select(Cmp()), forcing it to take the branch
+ // that is predicated on the (least|most)-significant-bit check.
CmpPredicate Pred;
Value *L, *R, *TV, *FV;
if (match(I, m_Select(m_ICmp(Pred, m_Value(L), m_Value(R)), m_Value(TV),
@@ -239,8 +238,6 @@ KnownBits ValueEvolution::computeInstr(const Instruction *I,
/// Compute the KnownBits of Value \p V.
KnownBits ValueEvolution::compute(const Value *V,
const KnownPhiMap &KnownPhis) {
- unsigned BitWidth = V->getType()->getScalarSizeInBits();
-
const APInt *C;
if (match(V, m_APInt(C)))
return KnownBits::makeConstant(*C);
@@ -249,6 +246,7 @@ KnownBits ValueEvolution::compute(const Value *V,
return computeInstr(I, KnownPhis);
ErrStr = "Unknown Value";
+ unsigned BitWidth = V->getType()->getScalarSizeInBits();
return {BitWidth};
}
@@ -258,7 +256,6 @@ std::optional<KnownPhiMap>
ValueEvolution::computeEvolutions(ArrayRef<PhiStepPair> PhiEvolutions) {
KnownPhiMap KnownPhis;
for (unsigned I = 0; I < TripCount; ++I) {
- AtIteration = I;
for (auto [Phi, Step] : PhiEvolutions) {
KnownBits KnownAtIter = computeInstr(Step, KnownPhis);
if (KnownAtIter.getBitWidth() < I + 1) {
@@ -320,8 +317,8 @@ digRecurrence(Instruction *V, const PHINode *P, const Loop &L,
/// \p ExtraConst is relevant if \p BOWithConstOpToMatch is supplied: when
/// digging the use-def chain, a BinOp with opcode \p BOWithConstOpToMatch is
/// matched, and \p ExtraConst is a constant operand of that BinOp. This
-/// peculiary exists, because in a CRC algorithm, the \p BOWithConstOpToMatch is
-/// an XOR, and the \p ExtraConst ends up being the generating polynomial.
+/// peculiarity exists, because in a CRC algorithm, the \p BOWithConstOpToMatch
+/// is an XOR, and the \p ExtraConst ends up being the generating polynomial.
static bool matchConditionalRecurrence(
const PHINode *P, BinaryOperator *&BO, Value *&Start, Value *&Step,
const Loop &L, const APInt *&ExtraConst,
@@ -434,9 +431,9 @@ PolynomialInfo::PolynomialInfo(unsigned TripCount, const Value *LHS,
: TripCount(TripCount), LHS(LHS), RHS(RHS), ComputedValue(ComputedValue),
ByteOrderSwapped(ByteOrderSwapped), LHSAux(LHSAux) {}
-/// In big-endian case, checks that bottom N bits against CheckFn, and that the
-/// rest are unknown. In little-endian case, checks that the top N bits against
-/// CheckFn, and that the rest are unknown.
+/// In big-endian case, checks that the bottom N bits against CheckFn, and that
+/// the rest are unknown. In little-endian case, checks that the top N bits
+/// against CheckFn, and that the rest are unknown.
static bool checkExtractBits(const KnownBits &Known, unsigned N,
function_ref<bool(const KnownBits &)> CheckFn,
bool ByteOrderSwapped) {
@@ -459,7 +456,6 @@ static bool checkExtractBits(const KnownBits &Known, unsigned N,
CRCTable HashRecognize::genSarwateTable(const APInt &GenPoly,
bool ByteOrderSwapped) const {
unsigned BW = GenPoly.getBitWidth();
- unsigned MSB = 1 << (BW - 1);
CRCTable Table;
Table[0] = APInt::getZero(BW);
@@ -467,7 +463,7 @@ CRCTable HashRecognize::genSarwateTable(const APInt &GenPoly,
APInt CRCInit(BW, 1);
for (unsigned I = 1; I < 256; I <<= 1) {
CRCInit = CRCInit.shl(1) ^
- ((CRCInit & MSB).isZero() ? APInt::getZero(BW) : GenPoly);
+ (CRCInit.isSignBitSet() ? GenPoly : APInt::getZero(BW));
for (unsigned J = 0; J < I; ++J)
Table[I + J] = CRCInit ^ Table[J];
}
@@ -476,8 +472,7 @@ CRCTable HashRecognize::genSarwateTable(const APInt &GenPoly,
APInt CRCInit(BW, 128);
for (unsigned I = 128; I; I >>= 1) {
- CRCInit = CRCInit.lshr(1) ^
- ((CRCInit & 1).isZero() ? APInt::getZero(BW) : GenPoly);
+ CRCInit = CRCInit.lshr(1) ^ (CRCInit[0] ? GenPoly : APInt::getZero(BW));
for (unsigned J = 0; J < 256; J += (I << 1))
Table[I + J] = CRCInit ^ Table[J];
}
@@ -597,8 +592,6 @@ HashRecognize::recognizeCRC() const {
if (SimpleRecurrence)
PhiEvolutions.emplace_back(SimpleRecurrence->Phi, SimpleRecurrence->BO);
- const Value *LHSAux = SimpleRecurrence ? SimpleRecurrence->Start : nullptr;
-
ValueEvolution VE(TC, ByteOrderSwapped);
std::optional<KnownPhiMap> KnownPhis = VE.computeEvolutions(PhiEvolutions);
@@ -610,6 +603,7 @@ HashRecognize::recognizeCRC() const {
if (!checkExtractBits(ResultBits, TC, IsZero, ByteOrderSwapped))
return ErrBits(ResultBits, TC, ByteOrderSwapped);
+ const Value *LHSAux = SimpleRecurrence ? SimpleRecurrence->Start : nullptr;
return PolynomialInfo(TC, ConditionalRecurrence->Start, GenPoly,
ComputedValue, ByteOrderSwapped, LHSAux);
}
diff --git a/llvm/test/Analysis/HashRecognize/cyclic-redundancy-check.ll b/llvm/test/Analysis/HashRecognize/cyclic-redundancy-check.ll
index 3c4252d51a152..7b4a2a12ba551 100644
--- a/llvm/test/Analysis/HashRecognize/cyclic-redundancy-check.ll
+++ b/llvm/test/Analysis/HashRecognize/cyclic-redundancy-check.ll
@@ -332,8 +332,8 @@ exit: ; preds = %outer.loop
ret i8 %crc.outer
}
-define i32 @crc16.le.tc8.data32(i32 %checksum, i32 %msg) {
-; CHECK-LABEL: 'crc16.le.tc8.data32'
+define i32 @crc32.le.tc8.data32(i32 %checksum, i32 %msg) {
+; CHECK-LABEL: 'crc32.le.tc8.data32'
; CHECK-NEXT: Found little-endian CRC-32 loop with trip count 8
; CHECK-NEXT: Initial CRC: i32 %checksum
; CHECK-NEXT: Generating polynomial: 33800
>From 9f5864ca14b781e325e55f45d65c934819f60409 Mon Sep 17 00:00:00 2001
From: Ramkumar Ramachandra <ramkumar.ramachandra at codasip.com>
Date: Wed, 21 May 2025 12:24:28 +0100
Subject: [PATCH 3/5] [HashRecognize] NFC simplification from review
---
llvm/lib/Analysis/HashRecognize.cpp | 50 ++++++++++++-----------------
1 file changed, 21 insertions(+), 29 deletions(-)
diff --git a/llvm/lib/Analysis/HashRecognize.cpp b/llvm/lib/Analysis/HashRecognize.cpp
index 3ca9e0d4c5cc2..a365a29f3b586 100644
--- a/llvm/lib/Analysis/HashRecognize.cpp
+++ b/llvm/lib/Analysis/HashRecognize.cpp
@@ -59,7 +59,6 @@
#include "llvm/Analysis/HashRecognize.h"
#include "llvm/ADT/APInt.h"
-#include "llvm/ADT/SmallSet.h"
#include "llvm/Analysis/LoopAnalysisManager.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/ScalarEvolution.h"
@@ -101,8 +100,8 @@ class ValueEvolution {
// In case ValueEvolution encounters an error, these are meant to be used for
// a precise error message.
- bool hasError() const;
- StringRef getError() const;
+ bool hasError() const { return !ErrStr.empty(); }
+ StringRef getError() const { return ErrStr; }
// Given a list of PHI nodes along with their incoming value from within the
// loop, and the trip-count of the loop, computeEvolutions
@@ -114,14 +113,9 @@ class ValueEvolution {
ValueEvolution::ValueEvolution(unsigned TripCount, bool ByteOrderSwapped)
: TripCount(TripCount), ByteOrderSwapped(ByteOrderSwapped) {}
-bool ValueEvolution::hasError() const { return !ErrStr.empty(); }
-StringRef ValueEvolution::getError() const { return ErrStr; }
-
/// Compute the KnownBits of BinaryOperator \p I.
KnownBits ValueEvolution::computeBinOp(const BinaryOperator *I,
const KnownPhiMap &KnownPhis) {
- unsigned BitWidth = I->getType()->getScalarSizeInBits();
-
KnownBits KnownL(compute(I->getOperand(0), KnownPhis));
KnownBits KnownR(compute(I->getOperand(1), KnownPhis));
@@ -167,6 +161,7 @@ KnownBits ValueEvolution::computeBinOp(const BinaryOperator *I,
return KnownBits::srem(KnownL, KnownR);
default:
ErrStr = "Unknown BinaryOperator";
+ unsigned BitWidth = I->getType()->getScalarSizeInBits();
return {BitWidth};
}
}
@@ -437,15 +432,14 @@ PolynomialInfo::PolynomialInfo(unsigned TripCount, const Value *LHS,
static bool checkExtractBits(const KnownBits &Known, unsigned N,
function_ref<bool(const KnownBits &)> CheckFn,
bool ByteOrderSwapped) {
- unsigned BitPos = ByteOrderSwapped ? 0 : Known.getBitWidth() - N;
- unsigned SwappedBitPos = ByteOrderSwapped ? N : 0;
-
// Check that the entire thing is a constant.
if (N == Known.getBitWidth())
return CheckFn(Known.extractBits(N, 0));
// Check that the {top, bottom} N bits are not unknown and that the {bottom,
// top} N bits are known.
+ unsigned BitPos = ByteOrderSwapped ? 0 : Known.getBitWidth() - N;
+ unsigned SwappedBitPos = ByteOrderSwapped ? N : 0;
return CheckFn(Known.extractBits(N, BitPos)) &&
Known.extractBits(Known.getBitWidth() - N, SwappedBitPos).isUnknown();
}
@@ -545,7 +539,7 @@ HashRecognize::recognizeCRC() const {
BasicBlock *Latch = L.getLoopLatch();
BasicBlock *Exit = L.getExitBlock();
const PHINode *IndVar = L.getCanonicalInductionVariable();
- if (!Exit || !Latch || !IndVar)
+ if (!Latch || !Exit || !IndVar)
return "Loop not in canonical form";
auto [SimpleRecurrence, ConditionalRecurrence] =
@@ -556,20 +550,18 @@ HashRecognize::recognizeCRC() const {
// Make sure that all recurrences are either all SCEVMul with two or SCEVDiv
// with two, or in other words, that they're single bit-shifts.
- SmallSet<std::optional<bool>, 2> EndianStatus;
- for (auto Info : {SimpleRecurrence, ConditionalRecurrence})
- if (Info)
- EndianStatus.insert(isBigEndianBitShift(SE.getSCEV(Info->BO)));
-
- if (EndianStatus.size() != 1 || !*EndianStatus.begin())
+ std::optional<bool> ByteOrderSwapped =
+ isBigEndianBitShift(SE.getSCEV(ConditionalRecurrence->BO));
+ if (!ByteOrderSwapped)
return "Loop with non-unit bitshifts";
-
- bool ByteOrderSwapped = **EndianStatus.begin();
-
- if (SimpleRecurrence &&
- !arePHIsIntertwined(SimpleRecurrence->Phi, ConditionalRecurrence->Phi, L,
- Instruction::BinaryOps::Xor))
- return "Simple recurrence doesn't use conditional recurrence with XOR";
+ if (SimpleRecurrence) {
+ if (isBigEndianBitShift(SE.getSCEV(SimpleRecurrence->BO)) !=
+ ByteOrderSwapped)
+ return "Loop with non-unit bitshifts";
+ if (!arePHIsIntertwined(SimpleRecurrence->Phi, ConditionalRecurrence->Phi,
+ L, Instruction::BinaryOps::Xor))
+ return "Simple recurrence doesn't use conditional recurrence with XOR";
+ }
// Make sure that the computed value is used in the exit block: this should be
// true even if it is only really used in an outer loop's exit block, since
@@ -592,7 +584,7 @@ HashRecognize::recognizeCRC() const {
if (SimpleRecurrence)
PhiEvolutions.emplace_back(SimpleRecurrence->Phi, SimpleRecurrence->BO);
- ValueEvolution VE(TC, ByteOrderSwapped);
+ ValueEvolution VE(TC, *ByteOrderSwapped);
std::optional<KnownPhiMap> KnownPhis = VE.computeEvolutions(PhiEvolutions);
if (VE.hasError())
@@ -600,12 +592,12 @@ HashRecognize::recognizeCRC() const {
KnownBits ResultBits = KnownPhis->at(ConditionalRecurrence->Phi);
auto IsZero = [](const KnownBits &K) { return K.isZero(); };
- if (!checkExtractBits(ResultBits, TC, IsZero, ByteOrderSwapped))
- return ErrBits(ResultBits, TC, ByteOrderSwapped);
+ if (!checkExtractBits(ResultBits, TC, IsZero, *ByteOrderSwapped))
+ return ErrBits(ResultBits, TC, *ByteOrderSwapped);
const Value *LHSAux = SimpleRecurrence ? SimpleRecurrence->Start : nullptr;
return PolynomialInfo(TC, ConditionalRecurrence->Start, GenPoly,
- ComputedValue, ByteOrderSwapped, LHSAux);
+ ComputedValue, *ByteOrderSwapped, LHSAux);
}
void CRCTable::print(raw_ostream &OS) const {
>From 0122c77bae84c88981e61fe09fa30e4fceacaa18 Mon Sep 17 00:00:00 2001
From: Ramkumar Ramachandra <ramkumar.ramachandra at codasip.com>
Date: Wed, 21 May 2025 12:26:15 +0100
Subject: [PATCH 4/5] [HashRecognize] Fix BE tables
---
llvm/lib/Analysis/HashRecognize.cpp | 3 +-
.../HashRecognize/cyclic-redundancy-check.ll | 128 +++++++++---------
2 files changed, 65 insertions(+), 66 deletions(-)
diff --git a/llvm/lib/Analysis/HashRecognize.cpp b/llvm/lib/Analysis/HashRecognize.cpp
index a365a29f3b586..665dec8d751b0 100644
--- a/llvm/lib/Analysis/HashRecognize.cpp
+++ b/llvm/lib/Analysis/HashRecognize.cpp
@@ -452,9 +452,9 @@ CRCTable HashRecognize::genSarwateTable(const APInt &GenPoly,
unsigned BW = GenPoly.getBitWidth();
CRCTable Table;
Table[0] = APInt::getZero(BW);
+ APInt CRCInit(BW, 1);
if (ByteOrderSwapped) {
- APInt CRCInit(BW, 1);
for (unsigned I = 1; I < 256; I <<= 1) {
CRCInit = CRCInit.shl(1) ^
(CRCInit.isSignBitSet() ? GenPoly : APInt::getZero(BW));
@@ -464,7 +464,6 @@ CRCTable HashRecognize::genSarwateTable(const APInt &GenPoly,
return Table;
}
- APInt CRCInit(BW, 128);
for (unsigned I = 128; I; I >>= 1) {
CRCInit = CRCInit.lshr(1) ^ (CRCInit[0] ? GenPoly : APInt::getZero(BW));
for (unsigned J = 0; J < 256; J += (I << 1))
diff --git a/llvm/test/Analysis/HashRecognize/cyclic-redundancy-check.ll b/llvm/test/Analysis/HashRecognize/cyclic-redundancy-check.ll
index 7b4a2a12ba551..c838633f7b58d 100644
--- a/llvm/test/Analysis/HashRecognize/cyclic-redundancy-check.ll
+++ b/llvm/test/Analysis/HashRecognize/cyclic-redundancy-check.ll
@@ -9,22 +9,22 @@ define i16 @crc16.le.tc8(i8 %msg, i16 %checksum) {
; CHECK-NEXT: Computed CRC: %crc.next = select i1 %check.sb, i16 %crc.lshr, i16 %xor
; CHECK-NEXT: Auxiliary data: i8 %msg
; CHECK-NEXT: Computed CRC lookup table:
-; CHECK-NEXT: 0 40961 1 40960 2 40963 3 40962 4 40965 5 40964 6 40967 7 40966
-; CHECK-NEXT: 8 40969 9 40968 10 40971 11 40970 12 40973 13 40972 14 40975 15 40974
-; CHECK-NEXT: 16 40977 17 40976 18 40979 19 40978 20 40981 21 40980 22 40983 23 40982
-; CHECK-NEXT: 24 40985 25 40984 26 40987 27 40986 28 40989 29 40988 30 40991 31 40990
-; CHECK-NEXT: 32 40993 33 40992 34 40995 35 40994 36 40997 37 40996 38 40999 39 40998
-; CHECK-NEXT: 40 41001 41 41000 42 41003 43 41002 44 41005 45 41004 46 41007 47 41006
-; CHECK-NEXT: 48 41009 49 41008 50 41011 51 41010 52 41013 53 41012 54 41015 55 41014
-; CHECK-NEXT: 56 41017 57 41016 58 41019 59 41018 60 41021 61 41020 62 41023 63 41022
-; CHECK-NEXT: 64 41025 65 41024 66 41027 67 41026 68 41029 69 41028 70 41031 71 41030
-; CHECK-NEXT: 72 41033 73 41032 74 41035 75 41034 76 41037 77 41036 78 41039 79 41038
-; CHECK-NEXT: 80 41041 81 41040 82 41043 83 41042 84 41045 85 41044 86 41047 87 41046
-; CHECK-NEXT: 88 41049 89 41048 90 41051 91 41050 92 41053 93 41052 94 41055 95 41054
-; CHECK-NEXT: 96 41057 97 41056 98 41059 99 41058 100 41061 101 41060 102 41063 103 41062
-; CHECK-NEXT: 104 41065 105 41064 106 41067 107 41066 108 41069 109 41068 110 41071 111 41070
-; CHECK-NEXT: 112 41073 113 41072 114 41075 115 41074 116 41077 117 41076 118 41079 119 41078
-; CHECK-NEXT: 120 41081 121 41080 122 41083 123 41082 124 41085 125 41084 126 41087 127 41086
+; CHECK-NEXT: 0 49345 49537 320 49921 960 640 49729 50689 1728 1920 51009 1280 50625 50305 1088
+; CHECK-NEXT: 52225 3264 3456 52545 3840 53185 52865 3648 2560 51905 52097 2880 51457 2496 2176 51265
+; CHECK-NEXT: 55297 6336 6528 55617 6912 56257 55937 6720 7680 57025 57217 8000 56577 7616 7296 56385
+; CHECK-NEXT: 5120 54465 54657 5440 55041 6080 5760 54849 53761 4800 4992 54081 4352 53697 53377 4160
+; CHECK-NEXT: 61441 12480 12672 61761 13056 62401 62081 12864 13824 63169 63361 14144 62721 13760 13440 62529
+; CHECK-NEXT: 15360 64705 64897 15680 65281 16320 16000 65089 64001 15040 15232 64321 14592 63937 63617 14400
+; CHECK-NEXT: 10240 59585 59777 10560 60161 11200 10880 59969 60929 11968 12160 61249 11520 60865 60545 11328
+; CHECK-NEXT: 58369 9408 9600 58689 9984 59329 59009 9792 8704 58049 58241 9024 57601 8640 8320 57409
+; CHECK-NEXT: 40961 24768 24960 41281 25344 41921 41601 25152 26112 42689 42881 26432 42241 26048 25728 42049
+; CHECK-NEXT: 27648 44225 44417 27968 44801 28608 28288 44609 43521 27328 27520 43841 26880 43457 43137 26688
+; CHECK-NEXT: 30720 47297 47489 31040 47873 31680 31360 47681 48641 32448 32640 48961 32000 48577 48257 31808
+; CHECK-NEXT: 46081 29888 30080 46401 30464 47041 46721 30272 29184 45761 45953 29504 45313 29120 28800 45121
+; CHECK-NEXT: 20480 37057 37249 20800 37633 21440 21120 37441 38401 22208 22400 38721 21760 38337 38017 21568
+; CHECK-NEXT: 39937 23744 23936 40257 24320 40897 40577 24128 23040 39617 39809 23360 39169 22976 22656 38977
+; CHECK-NEXT: 34817 18624 18816 35137 19200 35777 35457 19008 19968 36545 36737 20288 36097 19904 19584 35905
+; CHECK-NEXT: 17408 33985 34177 17728 34561 18368 18048 34369 33281 17088 17280 33601 16640 33217 32897 16448
;
entry:
br label %loop
@@ -57,22 +57,22 @@ define i16 @crc16.le.tc8.udiv(i8 %msg, i16 %checksum) {
; CHECK-NEXT: Computed CRC: %crc.next = select i1 %check.sb, i16 %crc.lshr, i16 %xor
; CHECK-NEXT: Auxiliary data: i8 %msg
; CHECK-NEXT: Computed CRC lookup table:
-; CHECK-NEXT: 0 40961 1 40960 2 40963 3 40962 4 40965 5 40964 6 40967 7 40966
-; CHECK-NEXT: 8 40969 9 40968 10 40971 11 40970 12 40973 13 40972 14 40975 15 40974
-; CHECK-NEXT: 16 40977 17 40976 18 40979 19 40978 20 40981 21 40980 22 40983 23 40982
-; CHECK-NEXT: 24 40985 25 40984 26 40987 27 40986 28 40989 29 40988 30 40991 31 40990
-; CHECK-NEXT: 32 40993 33 40992 34 40995 35 40994 36 40997 37 40996 38 40999 39 40998
-; CHECK-NEXT: 40 41001 41 41000 42 41003 43 41002 44 41005 45 41004 46 41007 47 41006
-; CHECK-NEXT: 48 41009 49 41008 50 41011 51 41010 52 41013 53 41012 54 41015 55 41014
-; CHECK-NEXT: 56 41017 57 41016 58 41019 59 41018 60 41021 61 41020 62 41023 63 41022
-; CHECK-NEXT: 64 41025 65 41024 66 41027 67 41026 68 41029 69 41028 70 41031 71 41030
-; CHECK-NEXT: 72 41033 73 41032 74 41035 75 41034 76 41037 77 41036 78 41039 79 41038
-; CHECK-NEXT: 80 41041 81 41040 82 41043 83 41042 84 41045 85 41044 86 41047 87 41046
-; CHECK-NEXT: 88 41049 89 41048 90 41051 91 41050 92 41053 93 41052 94 41055 95 41054
-; CHECK-NEXT: 96 41057 97 41056 98 41059 99 41058 100 41061 101 41060 102 41063 103 41062
-; CHECK-NEXT: 104 41065 105 41064 106 41067 107 41066 108 41069 109 41068 110 41071 111 41070
-; CHECK-NEXT: 112 41073 113 41072 114 41075 115 41074 116 41077 117 41076 118 41079 119 41078
-; CHECK-NEXT: 120 41081 121 41080 122 41083 123 41082 124 41085 125 41084 126 41087 127 41086
+; CHECK-NEXT: 0 49345 49537 320 49921 960 640 49729 50689 1728 1920 51009 1280 50625 50305 1088
+; CHECK-NEXT: 52225 3264 3456 52545 3840 53185 52865 3648 2560 51905 52097 2880 51457 2496 2176 51265
+; CHECK-NEXT: 55297 6336 6528 55617 6912 56257 55937 6720 7680 57025 57217 8000 56577 7616 7296 56385
+; CHECK-NEXT: 5120 54465 54657 5440 55041 6080 5760 54849 53761 4800 4992 54081 4352 53697 53377 4160
+; CHECK-NEXT: 61441 12480 12672 61761 13056 62401 62081 12864 13824 63169 63361 14144 62721 13760 13440 62529
+; CHECK-NEXT: 15360 64705 64897 15680 65281 16320 16000 65089 64001 15040 15232 64321 14592 63937 63617 14400
+; CHECK-NEXT: 10240 59585 59777 10560 60161 11200 10880 59969 60929 11968 12160 61249 11520 60865 60545 11328
+; CHECK-NEXT: 58369 9408 9600 58689 9984 59329 59009 9792 8704 58049 58241 9024 57601 8640 8320 57409
+; CHECK-NEXT: 40961 24768 24960 41281 25344 41921 41601 25152 26112 42689 42881 26432 42241 26048 25728 42049
+; CHECK-NEXT: 27648 44225 44417 27968 44801 28608 28288 44609 43521 27328 27520 43841 26880 43457 43137 26688
+; CHECK-NEXT: 30720 47297 47489 31040 47873 31680 31360 47681 48641 32448 32640 48961 32000 48577 48257 31808
+; CHECK-NEXT: 46081 29888 30080 46401 30464 47041 46721 30272 29184 45761 45953 29504 45313 29120 28800 45121
+; CHECK-NEXT: 20480 37057 37249 20800 37633 21440 21120 37441 38401 22208 22400 38721 21760 38337 38017 21568
+; CHECK-NEXT: 39937 23744 23936 40257 24320 40897 40577 24128 23040 39617 39809 23360 39169 22976 22656 38977
+; CHECK-NEXT: 34817 18624 18816 35137 19200 35777 35457 19008 19968 36545 36737 20288 36097 19904 19584 35905
+; CHECK-NEXT: 17408 33985 34177 17728 34561 18368 18048 34369 33281 17088 17280 33601 16640 33217 32897 16448
;
entry:
br label %loop
@@ -105,22 +105,22 @@ define i16 @crc16.le.tc16(i16 %msg, i16 %checksum) {
; CHECK-NEXT: Computed CRC: %crc.next = select i1 %check.sb, i16 %crc.lshr, i16 %crc.xor
; CHECK-NEXT: Auxiliary data: i16 %msg
; CHECK-NEXT: Computed CRC lookup table:
-; CHECK-NEXT: 0 40961 1 40960 2 40963 3 40962 4 40965 5 40964 6 40967 7 40966
-; CHECK-NEXT: 8 40969 9 40968 10 40971 11 40970 12 40973 13 40972 14 40975 15 40974
-; CHECK-NEXT: 16 40977 17 40976 18 40979 19 40978 20 40981 21 40980 22 40983 23 40982
-; CHECK-NEXT: 24 40985 25 40984 26 40987 27 40986 28 40989 29 40988 30 40991 31 40990
-; CHECK-NEXT: 32 40993 33 40992 34 40995 35 40994 36 40997 37 40996 38 40999 39 40998
-; CHECK-NEXT: 40 41001 41 41000 42 41003 43 41002 44 41005 45 41004 46 41007 47 41006
-; CHECK-NEXT: 48 41009 49 41008 50 41011 51 41010 52 41013 53 41012 54 41015 55 41014
-; CHECK-NEXT: 56 41017 57 41016 58 41019 59 41018 60 41021 61 41020 62 41023 63 41022
-; CHECK-NEXT: 64 41025 65 41024 66 41027 67 41026 68 41029 69 41028 70 41031 71 41030
-; CHECK-NEXT: 72 41033 73 41032 74 41035 75 41034 76 41037 77 41036 78 41039 79 41038
-; CHECK-NEXT: 80 41041 81 41040 82 41043 83 41042 84 41045 85 41044 86 41047 87 41046
-; CHECK-NEXT: 88 41049 89 41048 90 41051 91 41050 92 41053 93 41052 94 41055 95 41054
-; CHECK-NEXT: 96 41057 97 41056 98 41059 99 41058 100 41061 101 41060 102 41063 103 41062
-; CHECK-NEXT: 104 41065 105 41064 106 41067 107 41066 108 41069 109 41068 110 41071 111 41070
-; CHECK-NEXT: 112 41073 113 41072 114 41075 115 41074 116 41077 117 41076 118 41079 119 41078
-; CHECK-NEXT: 120 41081 121 41080 122 41083 123 41082 124 41085 125 41084 126 41087 127 41086
+; CHECK-NEXT: 0 49345 49537 320 49921 960 640 49729 50689 1728 1920 51009 1280 50625 50305 1088
+; CHECK-NEXT: 52225 3264 3456 52545 3840 53185 52865 3648 2560 51905 52097 2880 51457 2496 2176 51265
+; CHECK-NEXT: 55297 6336 6528 55617 6912 56257 55937 6720 7680 57025 57217 8000 56577 7616 7296 56385
+; CHECK-NEXT: 5120 54465 54657 5440 55041 6080 5760 54849 53761 4800 4992 54081 4352 53697 53377 4160
+; CHECK-NEXT: 61441 12480 12672 61761 13056 62401 62081 12864 13824 63169 63361 14144 62721 13760 13440 62529
+; CHECK-NEXT: 15360 64705 64897 15680 65281 16320 16000 65089 64001 15040 15232 64321 14592 63937 63617 14400
+; CHECK-NEXT: 10240 59585 59777 10560 60161 11200 10880 59969 60929 11968 12160 61249 11520 60865 60545 11328
+; CHECK-NEXT: 58369 9408 9600 58689 9984 59329 59009 9792 8704 58049 58241 9024 57601 8640 8320 57409
+; CHECK-NEXT: 40961 24768 24960 41281 25344 41921 41601 25152 26112 42689 42881 26432 42241 26048 25728 42049
+; CHECK-NEXT: 27648 44225 44417 27968 44801 28608 28288 44609 43521 27328 27520 43841 26880 43457 43137 26688
+; CHECK-NEXT: 30720 47297 47489 31040 47873 31680 31360 47681 48641 32448 32640 48961 32000 48577 48257 31808
+; CHECK-NEXT: 46081 29888 30080 46401 30464 47041 46721 30272 29184 45761 45953 29504 45313 29120 28800 45121
+; CHECK-NEXT: 20480 37057 37249 20800 37633 21440 21120 37441 38401 22208 22400 38721 21760 38337 38017 21568
+; CHECK-NEXT: 39937 23744 23936 40257 24320 40897 40577 24128 23040 39617 39809 23360 39169 22976 22656 38977
+; CHECK-NEXT: 34817 18624 18816 35137 19200 35777 35457 19008 19968 36545 36737 20288 36097 19904 19584 35905
+; CHECK-NEXT: 17408 33985 34177 17728 34561 18368 18048 34369 33281 17088 17280 33601 16640 33217 32897 16448
;
entry:
br label %loop
@@ -340,22 +340,22 @@ define i32 @crc32.le.tc8.data32(i32 %checksum, i32 %msg) {
; CHECK-NEXT: Computed CRC: %crc.next = select i1 %check.sb, i32 %crc.lshr, i32 %crc.xor
; CHECK-NEXT: Auxiliary data: i32 %msg
; CHECK-NEXT: Computed CRC lookup table:
-; CHECK-NEXT: 0 33800 1 33801 2 33802 3 33803 4 33804 5 33805 6 33806 7 33807
-; CHECK-NEXT: 8 33792 9 33793 10 33794 11 33795 12 33796 13 33797 14 33798 15 33799
-; CHECK-NEXT: 16 33816 17 33817 18 33818 19 33819 20 33820 21 33821 22 33822 23 33823
-; CHECK-NEXT: 24 33808 25 33809 26 33810 27 33811 28 33812 29 33813 30 33814 31 33815
-; CHECK-NEXT: 32 33832 33 33833 34 33834 35 33835 36 33836 37 33837 38 33838 39 33839
-; CHECK-NEXT: 40 33824 41 33825 42 33826 43 33827 44 33828 45 33829 46 33830 47 33831
-; CHECK-NEXT: 48 33848 49 33849 50 33850 51 33851 52 33852 53 33853 54 33854 55 33855
-; CHECK-NEXT: 56 33840 57 33841 58 33842 59 33843 60 33844 61 33845 62 33846 63 33847
-; CHECK-NEXT: 64 33864 65 33865 66 33866 67 33867 68 33868 69 33869 70 33870 71 33871
-; CHECK-NEXT: 72 33856 73 33857 74 33858 75 33859 76 33860 77 33861 78 33862 79 33863
-; CHECK-NEXT: 80 33880 81 33881 82 33882 83 33883 84 33884 85 33885 86 33886 87 33887
-; CHECK-NEXT: 88 33872 89 33873 90 33874 91 33875 92 33876 93 33877 94 33878 95 33879
-; CHECK-NEXT: 96 33896 97 33897 98 33898 99 33899 100 33900 101 33901 102 33902 103 33903
-; CHECK-NEXT: 104 33888 105 33889 106 33890 107 33891 108 33892 109 33893 110 33894 111 33895
-; CHECK-NEXT: 112 33912 113 33913 114 33914 115 33915 116 33916 117 33917 118 33918 119 33919
-; CHECK-NEXT: 120 33904 121 33905 122 33906 123 33907 124 33908 125 33909 126 33910 127 33911
+; CHECK-NEXT: 0 4489 8978 12955 17956 22445 25910 29887 35912 40385 44890 48851 51820 56293 59774 63735
+; CHECK-NEXT: 4225 264 13203 8730 22181 18220 30135 25662 40137 36160 49115 44626 56045 52068 63999 59510
+; CHECK-NEXT: 8450 12427 528 5017 26406 30383 17460 21949 44362 48323 36440 40913 60270 64231 51324 55797
+; CHECK-NEXT: 12675 8202 4753 792 30631 26158 21685 17724 48587 44098 40665 36688 64495 60006 55549 51572
+; CHECK-NEXT: 16900 21389 24854 28831 1056 5545 10034 14011 52812 57285 60766 64727 34920 39393 43898 47859
+; CHECK-NEXT: 21125 17164 29079 24606 5281 1320 14259 9786 57037 53060 64991 60502 39145 35168 48123 43634
+; CHECK-NEXT: 25350 29327 16404 20893 9506 13483 1584 6073 61262 65223 52316 56789 43370 47331 35448 39921
+; CHECK-NEXT: 29575 25102 20629 16668 13731 9258 5809 1848 65487 60998 56541 52564 47595 43106 39673 35696
+; CHECK-NEXT: 33800 38273 42778 46739 49708 54181 57662 61623 2112 6601 11090 15067 20068 24557 28022 31999
+; CHECK-NEXT: 38025 34048 47003 42514 53933 49956 61887 57398 6337 2376 15315 10842 24293 20332 32247 27774
+; CHECK-NEXT: 42250 46211 34328 38801 58158 62119 49212 53685 10562 14539 2640 7129 28518 32495 19572 24061
+; CHECK-NEXT: 46475 41986 38553 34576 62383 57894 53437 49460 14787 10314 6865 2904 32743 28270 23797 19836
+; CHECK-NEXT: 50700 55173 58654 62615 32808 37281 41786 45747 19012 23501 26966 30943 3168 7657 12146 16123
+; CHECK-NEXT: 54925 50948 62879 58390 37033 33056 46011 41522 23237 19276 31191 26718 7393 3432 16371 11898
+; CHECK-NEXT: 59150 63111 50204 54677 41258 45219 33336 37809 27462 31439 18516 23005 11618 15595 3696 8185
+; CHECK-NEXT: 63375 58886 54429 50452 45483 40994 37561 33584 31687 27214 22741 18780 15843 11370 7921 3960
;
entry:
br label %loop
>From 0449f97ffd7daad64bcab36120d2712880515daf Mon Sep 17 00:00:00 2001
From: Ramkumar Ramachandra <ramkumar.ramachandra at codasip.com>
Date: Wed, 21 May 2025 14:05:22 +0100
Subject: [PATCH 5/5] [HashRecognize] Put recurrence-finders inside struct
---
llvm/lib/Analysis/HashRecognize.cpp | 210 +++++++++++++++-------------
1 file changed, 114 insertions(+), 96 deletions(-)
diff --git a/llvm/lib/Analysis/HashRecognize.cpp b/llvm/lib/Analysis/HashRecognize.cpp
index 665dec8d751b0..63e173f01e2cb 100644
--- a/llvm/lib/Analysis/HashRecognize.cpp
+++ b/llvm/lib/Analysis/HashRecognize.cpp
@@ -263,12 +263,74 @@ ValueEvolution::computeEvolutions(ArrayRef<PhiStepPair> PhiEvolutions) {
return KnownPhis;
}
-/// Digs for a recurrence starting with \p V hitting the PHI node \p P in a
-/// use-def chain. Used by matchConditionalRecurrence.
-static BinaryOperator *
-digRecurrence(Instruction *V, const PHINode *P, const Loop &L,
- const APInt *&ExtraConst,
- Instruction::BinaryOps BOWithConstOpToMatch) {
+/// A structure that can hold either a Simple Recurrence or a Conditional
+/// Recurrence. Note that in the case of a Simple Recurrence, Step is an operand
+/// of the BO, while in a Conditional Recurrence, it is a SelectInst.
+struct RecurrenceInfo {
+ const Loop &L;
+ const PHINode *Phi = nullptr;
+ BinaryOperator *BO = nullptr;
+ Value *Start = nullptr;
+ Value *Step = nullptr;
+ std::optional<APInt> ExtraConst;
+
+ RecurrenceInfo(const Loop &L) : L(L) {}
+ operator bool() const { return BO; }
+
+ void print(raw_ostream &OS, unsigned Indent) const {
+ OS.indent(Indent) << "Phi: ";
+ Phi->print(OS);
+ OS << "\n";
+ OS.indent(Indent) << "BinaryOperator: ";
+ BO->print(OS);
+ OS << "\n";
+ OS.indent(Indent) << "Start: ";
+ Start->print(OS);
+ OS << "\n";
+ OS.indent(Indent) << "Step: ";
+ Step->print(OS);
+ OS << "\n";
+ if (ExtraConst) {
+ OS.indent(Indent) << "ExtraConst: ";
+ ExtraConst->print(OS, false);
+ OS << "\n";
+ }
+ }
+
+ bool matchSimpleRecurrence(const PHINode *P);
+ BinaryOperator *digRecurrence(
+ Instruction *V, const PHINode *P,
+ Instruction::BinaryOps BOWithConstOpToMatch = Instruction::BinaryOpsEnd);
+ bool matchConditionalRecurrence(
+ const PHINode *P,
+ Instruction::BinaryOps BOWithConstOpToMatch = Instruction::BinaryOpsEnd);
+};
+
+/// Wraps llvm::matchSimpleRecurrence. Match a simple first order recurrence
+/// cycle of the form:
+///
+/// loop:
+/// %rec = phi [%start, %entry], [%BO, %loop]
+/// ...
+/// %BO = binop %rec, %step
+///
+/// or
+///
+/// loop:
+/// %rec = phi [%start, %entry], [%BO, %loop]
+/// ...
+/// %BO = binop %step, %rec
+///
+bool RecurrenceInfo::matchSimpleRecurrence(const PHINode *P) {
+ Phi = P;
+ return llvm::matchSimpleRecurrence(Phi, BO, Start, Step);
+}
+
+/// Digs for a recurrence starting with \p V hitting the PHI node in a use-def
+/// chain. Used by matchConditionalRecurrence.
+BinaryOperator *
+RecurrenceInfo::digRecurrence(Instruction *V, const PHINode *P,
+ Instruction::BinaryOps BOWithConstOpToMatch) {
SmallVector<Instruction *> Worklist;
Worklist.push_back(V);
while (!Worklist.empty()) {
@@ -280,14 +342,16 @@ digRecurrence(Instruction *V, const PHINode *P, const Loop &L,
// Find a recurrence over a BinOp, by matching either of its operands
// with with the PHINode.
- if (match(I, m_c_BinOp(m_Value(), m_Specific(P))))
+ if (match(I, m_c_BinOp(m_Value(), m_Specific(Phi))))
return cast<BinaryOperator>(I);
// Bind to ExtraConst, if we match exactly one.
if (I->getOpcode() == BOWithConstOpToMatch) {
if (ExtraConst)
return nullptr;
- match(I, m_c_BinOp(m_APInt(ExtraConst), m_Value()));
+ const APInt *C = nullptr;
+ if (match(I, m_c_BinOp(m_APInt(C), m_Value())))
+ ExtraConst = *C;
}
// Continue along the use-def chain.
@@ -314,16 +378,15 @@ digRecurrence(Instruction *V, const PHINode *P, const Loop &L,
/// matched, and \p ExtraConst is a constant operand of that BinOp. This
/// peculiarity exists, because in a CRC algorithm, the \p BOWithConstOpToMatch
/// is an XOR, and the \p ExtraConst ends up being the generating polynomial.
-static bool matchConditionalRecurrence(
- const PHINode *P, BinaryOperator *&BO, Value *&Start, Value *&Step,
- const Loop &L, const APInt *&ExtraConst,
- Instruction::BinaryOps BOWithConstOpToMatch = Instruction::BinaryOpsEnd) {
- if (P->getNumIncomingValues() != 2)
+bool RecurrenceInfo::matchConditionalRecurrence(
+ const PHINode *P, Instruction::BinaryOps BOWithConstOpToMatch) {
+ Phi = P;
+ if (Phi->getNumIncomingValues() != 2)
return false;
for (unsigned Idx = 0; Idx != 2; ++Idx) {
- Value *FoundStep = P->getIncomingValue(Idx);
- Value *FoundStart = P->getIncomingValue(!Idx);
+ Value *FoundStep = Phi->getIncomingValue(Idx);
+ Value *FoundStart = Phi->getIncomingValue(!Idx);
Instruction *TV, *FV;
if (!match(FoundStep,
@@ -332,12 +395,8 @@ static bool matchConditionalRecurrence(
// For a conditional recurrence, both the true and false values of the
// select must ultimately end up in the same recurrent BinOp.
- ExtraConst = nullptr;
- BinaryOperator *FoundBO =
- digRecurrence(TV, P, L, ExtraConst, BOWithConstOpToMatch);
- BinaryOperator *AltBO =
- digRecurrence(FV, P, L, ExtraConst, BOWithConstOpToMatch);
-
+ BinaryOperator *FoundBO = digRecurrence(TV, P, BOWithConstOpToMatch);
+ BinaryOperator *AltBO = digRecurrence(FV, P, BOWithConstOpToMatch);
if (!FoundBO || FoundBO != AltBO)
return false;
@@ -355,69 +414,27 @@ static bool matchConditionalRecurrence(
return false;
}
-/// A structure that can hold either a Simple Recurrence or a Conditional
-/// Recurrence. Note that in the case of a Simple Recurrence, Step is an operand
-/// of the BO, while in a Conditional Recurrence, it is a SelectInst.
-struct RecurrenceInfo {
- PHINode *Phi;
- BinaryOperator *BO;
- Value *Start;
- Value *Step;
- std::optional<APInt> ExtraConst;
-
- RecurrenceInfo(PHINode *Phi, BinaryOperator *BO, Value *Start, Value *Step,
- std::optional<APInt> ExtraConst = std::nullopt)
- : Phi(Phi), BO(BO), Start(Start), Step(Step), ExtraConst(ExtraConst) {}
-
- void print(raw_ostream &OS, unsigned Indent) const {
- OS.indent(Indent) << "Phi: ";
- Phi->print(OS);
- OS << "\n";
- OS.indent(Indent) << "BinaryOperator: ";
- BO->print(OS);
- OS << "\n";
- OS.indent(Indent) << "Start: ";
- Start->print(OS);
- OS << "\n";
- OS.indent(Indent) << "Step: ";
- Step->print(OS);
- OS << "\n";
- if (ExtraConst) {
- OS.indent(Indent) << "ExtraConst: ";
- ExtraConst->print(OS, false);
- OS << "\n";
- }
- }
-};
-
-/// Iterates over all the phis in \p LoopLatch, and attempts to extract a Simple
-/// Recurrence, and a Conditional Recurrence.
-static std::pair<std::optional<RecurrenceInfo>, std::optional<RecurrenceInfo>>
+/// Iterates over all the phis in \p LoopLatch, and attempts to extract a
+/// Conditional Recurrence and an optional Simple Recurrence.
+static std::optional<std::pair<RecurrenceInfo, RecurrenceInfo>>
getRecurrences(BasicBlock *LoopLatch, const PHINode *IndVar, const Loop &L) {
- std::optional<RecurrenceInfo> SimpleRecurrence, ConditionalRecurrence;
- for (PHINode &P : LoopLatch->phis()) {
+ auto Phis = LoopLatch->phis();
+ unsigned NumPhis = std::distance(Phis.begin(), Phis.end());
+ if (NumPhis != 2 && NumPhis != 3)
+ return {};
+
+ RecurrenceInfo SimpleRecurrence(L);
+ RecurrenceInfo ConditionalRecurrence(L);
+ for (PHINode &P : Phis) {
if (&P == IndVar)
continue;
- if (!P.getType()->isIntegerTy()) {
- LLVM_DEBUG(dbgs() << "HashRecognize: Non-integral PHI found\n");
- return {};
- }
-
- BinaryOperator *BO;
- Value *Start, *Step;
- const APInt *GenPoly;
- if (!SimpleRecurrence && matchSimpleRecurrence(&P, BO, Start, Step)) {
- SimpleRecurrence = {&P, BO, Start, Step};
- } else if (!ConditionalRecurrence &&
- matchConditionalRecurrence(&P, BO, Start, Step, L, GenPoly,
- Instruction::BinaryOps::Xor)) {
- ConditionalRecurrence = {&P, BO, Start, Step, *GenPoly};
- } else {
- LLVM_DEBUG(dbgs() << "HashRecognize: Stray PHI found: " << P << "\n");
- return {};
- }
+ if (!SimpleRecurrence)
+ SimpleRecurrence.matchSimpleRecurrence(&P);
+ if (!ConditionalRecurrence)
+ ConditionalRecurrence.matchConditionalRecurrence(
+ &P, Instruction::BinaryOps::Xor);
}
- return {SimpleRecurrence, ConditionalRecurrence};
+ return std::make_pair(SimpleRecurrence, ConditionalRecurrence);
}
PolynomialInfo::PolynomialInfo(unsigned TripCount, const Value *LHS,
@@ -541,47 +558,48 @@ HashRecognize::recognizeCRC() const {
if (!Latch || !Exit || !IndVar)
return "Loop not in canonical form";
- auto [SimpleRecurrence, ConditionalRecurrence] =
- getRecurrences(Latch, IndVar, L);
-
+ auto R = getRecurrences(Latch, IndVar, L);
+ if (!R)
+ return "Found stray PHI";
+ auto [SimpleRecurrence, ConditionalRecurrence] = *R;
if (!ConditionalRecurrence)
return "Unable to find conditional recurrence";
// Make sure that all recurrences are either all SCEVMul with two or SCEVDiv
// with two, or in other words, that they're single bit-shifts.
std::optional<bool> ByteOrderSwapped =
- isBigEndianBitShift(SE.getSCEV(ConditionalRecurrence->BO));
+ isBigEndianBitShift(SE.getSCEV(ConditionalRecurrence.BO));
if (!ByteOrderSwapped)
return "Loop with non-unit bitshifts";
if (SimpleRecurrence) {
- if (isBigEndianBitShift(SE.getSCEV(SimpleRecurrence->BO)) !=
+ if (isBigEndianBitShift(SE.getSCEV(SimpleRecurrence.BO)) !=
ByteOrderSwapped)
return "Loop with non-unit bitshifts";
- if (!arePHIsIntertwined(SimpleRecurrence->Phi, ConditionalRecurrence->Phi,
- L, Instruction::BinaryOps::Xor))
+ if (!arePHIsIntertwined(SimpleRecurrence.Phi, ConditionalRecurrence.Phi, L,
+ Instruction::BinaryOps::Xor))
return "Simple recurrence doesn't use conditional recurrence with XOR";
}
// Make sure that the computed value is used in the exit block: this should be
// true even if it is only really used in an outer loop's exit block, since
// the loop is in LCSSA form.
- auto *ComputedValue = cast<SelectInst>(ConditionalRecurrence->Step);
+ auto *ComputedValue = cast<SelectInst>(ConditionalRecurrence.Step);
if (none_of(ComputedValue->users(), [Exit](User *U) {
auto *UI = dyn_cast<Instruction>(U);
return UI && UI->getParent() == Exit;
}))
return "Unable to find use of computed value in loop exit block";
- assert(ConditionalRecurrence->ExtraConst &&
+ assert(ConditionalRecurrence.ExtraConst &&
"Expected ExtraConst in conditional recurrence");
- const APInt &GenPoly = *ConditionalRecurrence->ExtraConst;
+ const APInt &GenPoly = *ConditionalRecurrence.ExtraConst;
// PhiEvolutions are pairs of PHINodes along with their incoming value from
// within the loop, which we term as their step.
SmallVector<PhiStepPair, 2> PhiEvolutions;
- PhiEvolutions.emplace_back(ConditionalRecurrence->Phi, ComputedValue);
+ PhiEvolutions.emplace_back(ConditionalRecurrence.Phi, ComputedValue);
if (SimpleRecurrence)
- PhiEvolutions.emplace_back(SimpleRecurrence->Phi, SimpleRecurrence->BO);
+ PhiEvolutions.emplace_back(SimpleRecurrence.Phi, SimpleRecurrence.BO);
ValueEvolution VE(TC, *ByteOrderSwapped);
std::optional<KnownPhiMap> KnownPhis = VE.computeEvolutions(PhiEvolutions);
@@ -589,14 +607,14 @@ HashRecognize::recognizeCRC() const {
if (VE.hasError())
return VE.getError();
- KnownBits ResultBits = KnownPhis->at(ConditionalRecurrence->Phi);
+ KnownBits ResultBits = KnownPhis->at(ConditionalRecurrence.Phi);
auto IsZero = [](const KnownBits &K) { return K.isZero(); };
if (!checkExtractBits(ResultBits, TC, IsZero, *ByteOrderSwapped))
return ErrBits(ResultBits, TC, *ByteOrderSwapped);
- const Value *LHSAux = SimpleRecurrence ? SimpleRecurrence->Start : nullptr;
- return PolynomialInfo(TC, ConditionalRecurrence->Start, GenPoly,
- ComputedValue, *ByteOrderSwapped, LHSAux);
+ const Value *LHSAux = SimpleRecurrence ? SimpleRecurrence.Start : nullptr;
+ return PolynomialInfo(TC, ConditionalRecurrence.Start, GenPoly, ComputedValue,
+ *ByteOrderSwapped, LHSAux);
}
void CRCTable::print(raw_ostream &OS) const {
More information about the llvm-commits
mailing list