[llvm] [HashRecognize] Check TC against bitwidth of LHSAux (PR #144881)

Ramkumar Ramachandra via llvm-commits llvm-commits at lists.llvm.org
Thu Jun 19 04:44:48 PDT 2025


https://github.com/artagnon created https://github.com/llvm/llvm-project/pull/144881

The trip-count of a CRC algorithm can legitimately be greater than the bitwidth of the result: what it cannot exceed is the bitwidth of the data, or LHSAux. crc8.le.tc16 is now successfully recognized as a CRC algorithm.

-- 8< --
Based on https://github.com/llvm/llvm-project/pull/144878.

>From 05f587a71518fb6862f2313e6162658aeb2dd053 Mon Sep 17 00:00:00 2001
From: Ramkumar Ramachandra <ramkumar.ramachandra at codasip.com>
Date: Thu, 19 Jun 2025 11:29:47 +0100
Subject: [PATCH 1/3] [HashRecognize] Pre-commit tests for PHIsIntertwined

---
 .../HashRecognize/cyclic-redundancy-check.ll  | 58 ++++++++++++++++++-
 1 file changed, 56 insertions(+), 2 deletions(-)

diff --git a/llvm/test/Analysis/HashRecognize/cyclic-redundancy-check.ll b/llvm/test/Analysis/HashRecognize/cyclic-redundancy-check.ll
index 0366684a13b54..1fa76cb23173b 100644
--- a/llvm/test/Analysis/HashRecognize/cyclic-redundancy-check.ll
+++ b/llvm/test/Analysis/HashRecognize/cyclic-redundancy-check.ll
@@ -144,6 +144,34 @@ exit:                                              ; preds = %loop
   ret i16 %crc.next
 }
 
+define i8 @crc8.le.tc16(i16 %msg, i8 %checksum) {
+; CHECK-LABEL: 'crc8.le.tc16'
+; CHECK-NEXT:  Did not find a hash algorithm
+; CHECK-NEXT:  Reason: Simple recurrence doesn't use conditional recurrence with XOR
+;
+entry:
+  br label %loop
+
+loop:                                              ; preds = %loop, %entry
+  %iv = phi i8 [ 0, %entry ], [ %iv.next, %loop ]
+  %crc = phi i8 [ %checksum, %entry ], [ %crc.next, %loop ]
+  %data = phi i16 [ %msg, %entry ], [ %data.next, %loop ]
+  %data.trunc = trunc i16 %data to i8
+  %xor.crc.data = xor i8 %crc, %data.trunc
+  %and.crc.data = and i8 %xor.crc.data, 1
+  %data.next = lshr i16 %data, 1
+  %check.sb = icmp eq i8 %and.crc.data, 0
+  %crc.lshr = lshr i8 %crc, 1
+  %crc.xor = xor i8 %crc.lshr, 29
+  %crc.next = select i1 %check.sb, i8 %crc.lshr, i8 %crc.xor
+  %iv.next = add nuw nsw i8 %iv, 1
+  %exit.cond = icmp samesign ult i8 %iv, 15
+  br i1 %exit.cond, label %loop, label %exit
+
+exit:                                              ; preds = %loop
+  ret i8 %crc.next
+}
+
 define i16 @crc16.be.tc8.crc.init.li(i16 %checksum, i8 %msg) {
 ; CHECK-LABEL: 'crc16.be.tc8.crc.init.li'
 ; CHECK-NEXT:  Found big-endian CRC-16 loop with trip count 8
@@ -610,9 +638,8 @@ loop:                                              ; preds = %loop, %entry
   %iv = phi i8 [ 0, %entry ], [ %iv.next, %loop ]
   %data = phi i8 [ %msg, %entry ], [ %data.next, %loop ]
   %crc = phi i16 [ %checksum, %entry ], [ %crc.next, %loop ]
-  %crc.lshr = lshr i16 %crc, 8
   %data.ext = zext i8 %data to i16
-  %xor.crc.data = xor i16 %crc.lshr, %data.ext
+  %xor.crc.data = xor i16 %crc, %data.ext
   %check.sb = icmp samesign ult i16 %xor.crc.data, 128
   %crc.shl = shl i16 %crc, 1
   %crc.xor = xor i16 %crc.shl, 258
@@ -838,6 +865,33 @@ exit:                                              ; preds = %loop
   ret i16 %crc.next
 }
 
+define i16 @not.crc.bad.cast(i8 %msg, i16 %checksum) {
+; CHECK-LABEL: 'not.crc.bad.cast'
+; CHECK-NEXT:  Did not find a hash algorithm
+; CHECK-NEXT:  Reason: Simple recurrence doesn't use conditional recurrence with XOR
+;
+entry:
+  br label %loop
+
+loop:                                              ; preds = %loop, %entry
+  %iv = phi i8 [ 0, %entry ], [ %iv.next, %loop ]
+  %data = phi i8 [ %msg, %entry ], [ %data.next, %loop ]
+  %crc = phi i16 [ %checksum, %entry ], [ %crc.next, %loop ]
+  %data.ext = zext i8 %data to i16
+  %xor.crc.data = xor i16 %crc, %data.ext
+  %check.sb = icmp slt i16 %xor.crc.data, 0
+  %crc.shl = shl i16 %crc, 1
+  %crc.xor = xor i16 %crc.shl, 29
+  %crc.next = select i1 %check.sb, i16 %crc.shl, i16 %crc.xor
+  %data.next = shl i8 %data, 1
+  %iv.next = add nuw nsw i8 %iv, 1
+  %exit.cond = icmp samesign ult i8 %iv, 7
+  br i1 %exit.cond, label %loop, label %exit
+
+exit:                                              ; preds = %loop
+  ret i16 %crc.next
+}
+
 define i32 @not.crc.dead.msg.bad.use(i32 %checksum, i32 %msg) {
 ; CHECK-LABEL: 'not.crc.dead.msg.bad.use'
 ; CHECK-NEXT:  Did not find a hash algorithm

>From 201d906ad6699fb1b3e1f2eba26e3cba1af2bd10 Mon Sep 17 00:00:00 2001
From: Ramkumar Ramachandra <ramkumar.ramachandra at codasip.com>
Date: Thu, 19 Jun 2025 12:07:55 +0100
Subject: [PATCH 2/3] [HashRecognize] Rewrite arePHIsIntertwined

The test crc8.le.tc16 is a valid CRC algorithm, but isn't recognized as
such due to a buggy arePHIsIntertwined, which is asymmetric in its PHI
node arguments. Another issue is that arePHIsIntertwined is
unnecessarily complicated: the core functionality is to match a BinOp
that's a recurrence in both PHI nodes, ignoring zext and trunc casts.
Hence, rewrite it for clarity, and make it symmetric in both PHI
operands.

crc8.le.tc16 is still not recognized as a valid CRC algorithm, due to an
incorrect check for loop iterations exceeding the bitwidth of the
result: in reality, it should not exceed the bitwidth of LHSAux, but we
leave this fix to a follow-up.
---
 llvm/include/llvm/IR/PatternMatch.h           |  7 ++
 llvm/lib/Analysis/HashRecognize.cpp           | 66 ++++++++-----------
 .../HashRecognize/cyclic-redundancy-check.ll  | 12 ++--
 3 files changed, 42 insertions(+), 43 deletions(-)

diff --git a/llvm/include/llvm/IR/PatternMatch.h b/llvm/include/llvm/IR/PatternMatch.h
index 2eaa7d0faabc1..2ff24a8956326 100644
--- a/llvm/include/llvm/IR/PatternMatch.h
+++ b/llvm/include/llvm/IR/PatternMatch.h
@@ -2160,6 +2160,13 @@ m_ZExtOrSelf(const OpTy &Op) {
   return m_CombineOr(m_ZExt(Op), Op);
 }
 
+template <typename OpTy>
+inline match_combine_or<match_combine_or<CastInst_match<OpTy, ZExtInst>, OpTy>,
+                        CastInst_match<OpTy, TruncInst>>
+m_ZExtOrTruncOrSelf(const OpTy &Op) {
+  return m_CombineOr(m_ZExtOrSelf(Op), m_Trunc(Op));
+}
+
 template <typename OpTy>
 inline match_combine_or<CastInst_match<OpTy, SExtInst>, OpTy>
 m_SExtOrSelf(const OpTy &Op) {
diff --git a/llvm/lib/Analysis/HashRecognize.cpp b/llvm/lib/Analysis/HashRecognize.cpp
index d11602f921872..ece53e03239d2 100644
--- a/llvm/lib/Analysis/HashRecognize.cpp
+++ b/llvm/lib/Analysis/HashRecognize.cpp
@@ -497,45 +497,37 @@ CRCTable HashRecognize::genSarwateTable(const APInt &GenPoly,
   return Table;
 }
 
-/// Checks if \p Reference is reachable from \p Needle on the use-def chain, and
-/// that there are no stray PHI nodes while digging the use-def chain. \p
-/// BOToMatch is a CRC peculiarity: at least one of the Users of Needle needs to
-/// match this OpCode, which is XOR for CRC.
+/// Checks that \p Needle and \p Reference are used together in a
+/// BinaryOperator, which is a recurrence over both, ignoring zext and trunc.
+/// Additionally checks that the BinaryOperator has opcode \p BOToMatch, which
+/// is XOR in the case of CRC. In other words, it checks for the following
+/// pattern:
+///
+/// loop:
+///   %needle = phi [_, %entry], [%needle.next, %loop]
+///   %reference = phi [_, %entry], [%reference.next, %loop]
+///   ...
+///   _ = BOTOMatch ((trunc|zext|self) %needle) ((trunc|zext|self) %reference)
+///
 static bool arePHIsIntertwined(
     const PHINode *Needle, const PHINode *Reference, const Loop &L,
     Instruction::BinaryOps BOToMatch = Instruction::BinaryOpsEnd) {
-  // Initialize the worklist with Users of the Needle.
-  SmallVector<const Instruction *> Worklist;
-  for (const User *U : Needle->users()) {
-    if (auto *UI = dyn_cast<Instruction>(U))
-      if (L.contains(UI))
-        Worklist.push_back(UI);
-  }
-
-  // BOToMatch is usually XOR for CRC.
-  if (BOToMatch != Instruction::BinaryOpsEnd) {
-    if (count_if(Worklist, [BOToMatch](const Instruction *I) {
-          return I->getOpcode() == BOToMatch;
-        }) != 1)
-      return false;
-  }
-
-  while (!Worklist.empty()) {
-    const Instruction *I = Worklist.pop_back_val();
-
-    // Since Needle is never pushed onto the Worklist, I must either be the
-    // Reference PHI node (in which case we're done), or a stray PHI node (in
-    // which case we abort).
-    if (isa<PHINode>(I))
-      return I == Reference;
-
-    for (const Use &U : I->operands())
-      if (auto *UI = dyn_cast<Instruction>(U))
-        // Don't push Needle back onto the Worklist.
-        if (UI != Needle && L.contains(UI))
-          Worklist.push_back(UI);
-  }
-  return false;
+  return any_of(ArrayRef({Needle, Reference}), [&](const PHINode *S) {
+    return count_if(S->users(), [&](const User *U) {
+             auto *BO = dyn_cast<BinaryOperator>(U);
+             if (!BO)
+               return false;
+             if (BOToMatch != Instruction::BinaryOpsEnd &&
+                 BO->getOpcode() != BOToMatch)
+               return false;
+             return all_of(BO->operands(), [Needle, Reference](const Use &U) {
+               return match(
+                   U.get(),
+                   m_CombineOr(m_ZExtOrTruncOrSelf(m_Specific(Reference)),
+                               m_ZExtOrTruncOrSelf(m_Specific(Needle))));
+             });
+           }) == 1;
+  });
 }
 
 // Recognizes a multiplication or division by the constant two, using SCEV. By
@@ -588,7 +580,7 @@ HashRecognize::recognizeCRC() const {
       return "Loop with non-unit bitshifts";
     if (!arePHIsIntertwined(SimpleRecurrence.Phi, ConditionalRecurrence.Phi, L,
                             Instruction::BinaryOps::Xor))
-      return "Simple recurrence doesn't use conditional recurrence with XOR";
+      return "Recurrences not intertwined with XOR";
   }
 
   // Make sure that the computed value is used in the exit block: this should be
diff --git a/llvm/test/Analysis/HashRecognize/cyclic-redundancy-check.ll b/llvm/test/Analysis/HashRecognize/cyclic-redundancy-check.ll
index 1fa76cb23173b..c42985037d83a 100644
--- a/llvm/test/Analysis/HashRecognize/cyclic-redundancy-check.ll
+++ b/llvm/test/Analysis/HashRecognize/cyclic-redundancy-check.ll
@@ -147,7 +147,7 @@ exit:                                              ; preds = %loop
 define i8 @crc8.le.tc16(i16 %msg, i8 %checksum) {
 ; CHECK-LABEL: 'crc8.le.tc16'
 ; CHECK-NEXT:  Did not find a hash algorithm
-; CHECK-NEXT:  Reason: Simple recurrence doesn't use conditional recurrence with XOR
+; CHECK-NEXT:  Reason: Loop iterations exceed bitwidth of result
 ;
 entry:
   br label %loop
@@ -629,7 +629,7 @@ exit:                                              ; preds = %loop
 define i16 @not.crc.wrong.sb.check.const(i8 %msg, i16 %checksum) {
 ; CHECK-LABEL: 'not.crc.wrong.sb.check.const'
 ; CHECK-NEXT:  Did not find a hash algorithm
-; CHECK-NEXT:  Reason: Simple recurrence doesn't use conditional recurrence with XOR
+; CHECK-NEXT:  Reason: Bad RHS of significant-bit-check
 ;
 entry:
   br label %loop
@@ -868,7 +868,7 @@ exit:                                              ; preds = %loop
 define i16 @not.crc.bad.cast(i8 %msg, i16 %checksum) {
 ; CHECK-LABEL: 'not.crc.bad.cast'
 ; CHECK-NEXT:  Did not find a hash algorithm
-; CHECK-NEXT:  Reason: Simple recurrence doesn't use conditional recurrence with XOR
+; CHECK-NEXT:  Reason: Expected bottom 8 bits zero (????????00001011)
 ;
 entry:
   br label %loop
@@ -895,7 +895,7 @@ exit:                                              ; preds = %loop
 define i32 @not.crc.dead.msg.bad.use(i32 %checksum, i32 %msg) {
 ; CHECK-LABEL: 'not.crc.dead.msg.bad.use'
 ; CHECK-NEXT:  Did not find a hash algorithm
-; CHECK-NEXT:  Reason: Simple recurrence doesn't use conditional recurrence with XOR
+; CHECK-NEXT:  Reason: Recurrences not intertwined with XOR
 ;
 entry:
   br label %loop
@@ -923,7 +923,7 @@ exit:                                              ; preds = %loop
 define i16 @not.crc.dead.msg.no.use(i8 %msg, i16 %checksum) {
 ; CHECK-LABEL: 'not.crc.dead.msg.no.use'
 ; CHECK-NEXT:  Did not find a hash algorithm
-; CHECK-NEXT:  Reason: Simple recurrence doesn't use conditional recurrence with XOR
+; CHECK-NEXT:  Reason: Recurrences not intertwined with XOR
 ;
 entry:
   br label %loop
@@ -952,7 +952,7 @@ exit:                                              ; preds = %loop
 define i32 @not.crc.dead.msg.wrong.op(i32 %checksum, i32 %msg) {
 ; CHECK-LABEL: 'not.crc.dead.msg.wrong.op'
 ; CHECK-NEXT:  Did not find a hash algorithm
-; CHECK-NEXT:  Reason: Simple recurrence doesn't use conditional recurrence with XOR
+; CHECK-NEXT:  Reason: Recurrences not intertwined with XOR
 ;
 entry:
   br label %loop

>From 21d97560e6208d9a58725182230febb85b9d28d7 Mon Sep 17 00:00:00 2001
From: Ramkumar Ramachandra <ramkumar.ramachandra at codasip.com>
Date: Thu, 19 Jun 2025 12:38:35 +0100
Subject: [PATCH 3/3] [HashRecognize] Check TC against bitwidth of LHSAux

The trip-count of a CRC algorithm can legitimiately be greater than the
bitwidth of the result: what it cannot exceed is the bitwidth of the
data, or LHSAux. crc8.le.tc16 is now successfully recognized as a CRC
algorithm.
---
 llvm/lib/Analysis/HashRecognize.cpp           | 29 ++++++++++---------
 .../HashRecognize/cyclic-redundancy-check.ll  | 26 +++++++++++++++--
 2 files changed, 38 insertions(+), 17 deletions(-)

diff --git a/llvm/lib/Analysis/HashRecognize.cpp b/llvm/lib/Analysis/HashRecognize.cpp
index ece53e03239d2..f10689875c774 100644
--- a/llvm/lib/Analysis/HashRecognize.cpp
+++ b/llvm/lib/Analysis/HashRecognize.cpp
@@ -247,16 +247,10 @@ KnownBits ValueEvolution::compute(const Value *V) {
 }
 
 bool ValueEvolution::computeEvolutions(ArrayRef<PhiStepPair> PhiEvolutions) {
-  for (unsigned I = 0; I < TripCount; ++I) {
-    for (auto [Phi, Step] : PhiEvolutions) {
-      KnownBits KnownAtIter = computeInstr(Step);
-      if (KnownAtIter.getBitWidth() < I + 1) {
-        ErrStr = "Loop iterations exceed bitwidth of result";
-        return false;
-      }
-      KnownPhis.emplace_or_assign(Phi, KnownAtIter);
-    }
-  }
+  for (unsigned I = 0; I < TripCount; ++I)
+    for (auto [Phi, Step] : PhiEvolutions)
+      KnownPhis.emplace_or_assign(Phi, computeInstr(Step));
+
   return ErrStr.empty();
 }
 
@@ -583,6 +577,13 @@ HashRecognize::recognizeCRC() const {
       return "Recurrences not intertwined with XOR";
   }
 
+  // Make sure that the TC doesn't exceed the bitwidth of LHSAux, or LHS.
+  Value *LHS = ConditionalRecurrence.Start;
+  Value *LHSAux = SimpleRecurrence ? SimpleRecurrence.Start : nullptr;
+  if (TC > (LHSAux ? LHSAux->getType()->getIntegerBitWidth()
+                   : LHS->getType()->getIntegerBitWidth()))
+    return "Loop iterations exceed bitwidth of data";
+
   // Make sure that the computed value is used in the exit block: this should be
   // true even if it is only really used in an outer loop's exit block, since
   // the loop is in LCSSA form.
@@ -612,12 +613,12 @@ HashRecognize::recognizeCRC() const {
   KnownBits ResultBits = VE.KnownPhis.at(ConditionalRecurrence.Phi);
 
   auto IsZero = [](const KnownBits &K) { return K.isZero(); };
-  if (!checkExtractBits(ResultBits, TC, IsZero, *ByteOrderSwapped))
+  if (!checkExtractBits(ResultBits, std::min(TC, ResultBits.getBitWidth()),
+                        IsZero, *ByteOrderSwapped))
     return ErrBits(ResultBits, TC, *ByteOrderSwapped);
 
-  Value *LHSAux = SimpleRecurrence ? SimpleRecurrence.Start : nullptr;
-  return PolynomialInfo(TC, ConditionalRecurrence.Start, GenPoly, ComputedValue,
-                        *ByteOrderSwapped, LHSAux);
+  return PolynomialInfo(TC, LHS, GenPoly, ComputedValue, *ByteOrderSwapped,
+                        LHSAux);
 }
 
 void CRCTable::print(raw_ostream &OS) const {
diff --git a/llvm/test/Analysis/HashRecognize/cyclic-redundancy-check.ll b/llvm/test/Analysis/HashRecognize/cyclic-redundancy-check.ll
index c42985037d83a..d74a9d0aa9eaf 100644
--- a/llvm/test/Analysis/HashRecognize/cyclic-redundancy-check.ll
+++ b/llvm/test/Analysis/HashRecognize/cyclic-redundancy-check.ll
@@ -146,8 +146,28 @@ exit:                                              ; preds = %loop
 
 define i8 @crc8.le.tc16(i16 %msg, i8 %checksum) {
 ; CHECK-LABEL: 'crc8.le.tc16'
-; CHECK-NEXT:  Did not find a hash algorithm
-; CHECK-NEXT:  Reason: Loop iterations exceed bitwidth of result
+; CHECK-NEXT:  Found little-endian CRC-8 loop with trip count 16
+; CHECK-NEXT:    Initial CRC: i8 %checksum
+; CHECK-NEXT:    Generating polynomial: 29
+; CHECK-NEXT:    Computed CRC: %crc.next = select i1 %check.sb, i8 %crc.lshr, i8 %crc.xor
+; CHECK-NEXT:    Auxiliary data: i16 %msg
+; CHECK-NEXT:    Computed CRC lookup table:
+; CHECK-NEXT:  0 9 18 27 31 22 13 4 5 12 23 30 26 19 8 1
+; CHECK-NEXT:  10 3 24 17 21 28 7 14 15 6 29 20 16 25 2 11
+; CHECK-NEXT:  20 29 6 15 11 2 25 16 17 24 3 10 14 7 28 21
+; CHECK-NEXT:  30 23 12 5 1 8 19 26 27 18 9 0 4 13 22 31
+; CHECK-NEXT:  19 26 1 8 12 5 30 23 22 31 4 13 9 0 27 18
+; CHECK-NEXT:  25 16 11 2 6 15 20 29 28 21 14 7 3 10 17 24
+; CHECK-NEXT:  7 14 21 28 24 17 10 3 2 11 16 25 29 20 15 6
+; CHECK-NEXT:  13 4 31 22 18 27 0 9 8 1 26 19 23 30 5 12
+; CHECK-NEXT:  29 20 15 6 2 11 16 25 24 17 10 3 7 14 21 28
+; CHECK-NEXT:  23 30 5 12 8 1 26 19 18 27 0 9 13 4 31 22
+; CHECK-NEXT:  9 0 27 18 22 31 4 13 12 5 30 23 19 26 1 8
+; CHECK-NEXT:  3 10 17 24 28 21 14 7 6 15 20 29 25 16 11 2
+; CHECK-NEXT:  14 7 28 21 17 24 3 10 11 2 25 16 20 29 6 15
+; CHECK-NEXT:  4 13 22 31 27 18 9 0 1 8 19 26 30 23 12 5
+; CHECK-NEXT:  26 19 8 1 5 12 23 30 31 22 13 4 0 9 18 27
+; CHECK-NEXT:  16 25 2 11 15 6 29 20 21 28 7 14 10 3 24 17
 ;
 entry:
   br label %loop
@@ -679,7 +699,7 @@ exit:                                              ; preds = %loop
 define i16 @not.crc.excess.tc(i16 %msg, i16 %checksum) {
 ; CHECK-LABEL: 'not.crc.excess.tc'
 ; CHECK-NEXT:  Did not find a hash algorithm
-; CHECK-NEXT:  Reason: Loop iterations exceed bitwidth of result
+; CHECK-NEXT:  Reason: Loop iterations exceed bitwidth of data
 ;
 entry:
   br label %loop



More information about the llvm-commits mailing list