[llvm] 2f8b486 - [IR][JumpThreading] Fix infinite recursion on compare self-reference (#129501)

Mon Apr 7 01:01:39 PDT 2025

Author: Robert Imschweiler
Date: 2025-04-07T10:01:36+02:00
New Revision: 2f8b486f979f4b89929a447f516fd1da9a659834

URL: https://github.com/llvm/llvm-project/commit/2f8b486f979f4b89929a447f516fd1da9a659834
DIFF: https://github.com/llvm/llvm-project/commit/2f8b486f979f4b89929a447f516fd1da9a659834.diff

LOG: [IR][JumpThreading] Fix infinite recursion on compare self-reference (#129501)

In unreachable code, constant PHI nodes may appear and be replaced by their
single value. As a result, instructions may become self-referencing. This
commit adds checks to avoid going into infinite recursion when handling
self-referencing compare instructions in `evaluateOnPredecessorEdge()`.

This LLVM defect was identified via the AMD Fuzzing project.

Added: 
    

Modified: 
    llvm/include/llvm/Transforms/Scalar/JumpThreading.h
    llvm/lib/Transforms/Scalar/JumpThreading.cpp
    llvm/test/Transforms/JumpThreading/unreachable-loops.ll

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/Transforms/Scalar/JumpThreading.h b/llvm/include/llvm/Transforms/Scalar/JumpThreading.h
index 84292c716a0a9..182cab02e640c 100644

--- a/llvm/include/llvm/Transforms/Scalar/JumpThreading.h
+++ b/llvm/include/llvm/Transforms/Scalar/JumpThreading.h
@@ -208,6 +208,11 @@ class JumpThreadingPass : public PassInfoMixin<JumpThreadingPass> {
   ///   if 'HasProfile' is true creates new instance through
   ///   FunctionAnalysisManager, otherwise nullptr.
   BlockFrequencyInfo *getOrCreateBFI(bool Force = false);
+
+  // Internal overload of evaluateOnPredecessorEdge().
+  Constant *evaluateOnPredecessorEdge(BasicBlock *BB, BasicBlock *PredPredBB,
+                                      Value *cond, const DataLayout &DL,
+                                      SmallPtrSet<Value *, 8> &Visited);
 };
 
 } // end namespace llvm

diff  --git a/llvm/lib/Transforms/Scalar/JumpThreading.cpp b/llvm/lib/Transforms/Scalar/JumpThreading.cpp
index 18d5f201413c8..3548412001ac6 100644
--- a/llvm/lib/Transforms/Scalar/JumpThreading.cpp
+++ b/llvm/lib/Transforms/Scalar/JumpThreading.cpp
@@ -14,6 +14,7 @@
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/MapVector.h"
 #include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/ScopeExit.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/Statistic.h"
@@ -1494,6 +1495,17 @@ Constant *JumpThreadingPass::evaluateOnPredecessorEdge(BasicBlock *BB,
                                                        BasicBlock *PredPredBB,
                                                        Value *V,
                                                        const DataLayout &DL) {
+  SmallPtrSet<Value *, 8> Visited;
+  return evaluateOnPredecessorEdge(BB, PredPredBB, V, DL, Visited);
+}
+
+Constant *JumpThreadingPass::evaluateOnPredecessorEdge(
+    BasicBlock *BB, BasicBlock *PredPredBB, Value *V, const DataLayout &DL,
+    SmallPtrSet<Value *, 8> &Visited) {
+  if (!Visited.insert(V).second)
+    return nullptr;
+  auto _ = make_scope_exit([&Visited, V]() { Visited.erase(V); });
+
   BasicBlock *PredBB = BB->getSinglePredecessor();
   assert(PredBB && "Expected a single predecessor");
 
@@ -1515,12 +1527,16 @@ Constant *JumpThreadingPass::evaluateOnPredecessorEdge(BasicBlock *BB,
   }
 
   // If we have a CmpInst, try to fold it for each incoming edge into PredBB.
+  // Note that during the execution of the pass, phi nodes may become constant
+  // and may be removed, which can lead to self-referencing instructions in
+  // code that becomes unreachable. Consequently, we need to handle those
+  // instructions in unreachable code and check before going into recursion.
   if (CmpInst *CondCmp = dyn_cast<CmpInst>(V)) {
     if (CondCmp->getParent() == BB) {
-      Constant *Op0 =
-          evaluateOnPredecessorEdge(BB, PredPredBB, CondCmp->getOperand(0), DL);
-      Constant *Op1 =
-          evaluateOnPredecessorEdge(BB, PredPredBB, CondCmp->getOperand(1), DL);
+      Constant *Op0 = evaluateOnPredecessorEdge(
+          BB, PredPredBB, CondCmp->getOperand(0), DL, Visited);
+      Constant *Op1 = evaluateOnPredecessorEdge(
+          BB, PredPredBB, CondCmp->getOperand(1), DL, Visited);
       if (Op0 && Op1) {
         return ConstantFoldCompareInstOperands(CondCmp->getPredicate(), Op0,
                                                Op1, DL);

diff  --git a/llvm/test/Transforms/JumpThreading/unreachable-loops.ll b/llvm/test/Transforms/JumpThreading/unreachable-loops.ll
index d8bd3f389aae8..79c5e9217312d 100644
--- a/llvm/test/Transforms/JumpThreading/unreachable-loops.ll
+++ b/llvm/test/Transforms/JumpThreading/unreachable-loops.ll
@@ -180,4 +180,121 @@ cleanup2343.loopexit4:                            ; preds = %cleanup1491
   unreachable
 }
 
+; This segfaults due to recursion in %C4. Reason: %L6 is identified to be a
+; "partially redundant load" and is replaced by a PHI node. The PHI node is then
+; simplified to be constant and is removed. This leads to %L6 being replaced by
+; %C4, which makes %C4 invalid since it uses %L6.
+; The test case has been generated by the AMD Fuzzing project and simplified
+; manually and by llvm-reduce.
+
+define i32 @constant_phi_leads_to_self_reference(ptr %ptr) {
+; CHECK-LABEL: @constant_phi_leads_to_self_reference(
+; CHECK-NEXT:    [[A9:%.*]] = alloca i1, align 1
+; CHECK-NEXT:    br label [[F6:%.*]]
+; CHECK:       T3:
+; CHECK-NEXT:    br label [[BB5:%.*]]
+; CHECK:       BB5:
+; CHECK-NEXT:    [[L10:%.*]] = load i1, ptr [[A9]], align 1
+; CHECK-NEXT:    br i1 [[L10]], label [[BB6:%.*]], label [[F6]]
+; CHECK:       BB6:
+; CHECK-NEXT:    [[LGV3:%.*]] = load i1, ptr [[PTR:%.*]], align 1
+; CHECK-NEXT:    [[C4:%.*]] = icmp sle i1 [[C4]], true
+; CHECK-NEXT:    store i1 [[C4]], ptr [[PTR]], align 1
+; CHECK-NEXT:    br i1 [[C4]], label [[F6]], label [[T3:%.*]]
+; CHECK:       F6:
+; CHECK-NEXT:    ret i32 0
+; CHECK:       F7:
+; CHECK-NEXT:    br label [[BB5]]
+;
+  %A9 = alloca i1, align 1
+  br i1 false, label %BB4, label %F6
+
+BB4:                                              ; preds = %0
+  br i1 false, label %F6, label %F1
+
+F1:                                               ; preds = %BB4
+  br i1 false, label %T4, label %T3
+
+T3:                                               ; preds = %T4, %BB6, %F1
+  %L6 = load i1, ptr %ptr, align 1
+  br label %BB5
+
+BB5:                                              ; preds = %F7, %T3
+  %L10 = load i1, ptr %A9, align 1
+  br i1 %L10, label %BB6, label %F6
+
+BB6:                                              ; preds = %BB5
+  %LGV3 = load i1, ptr %ptr, align 1
+  %C4 = icmp sle i1 %L6, true
+  store i1 %C4, ptr %ptr, align 1
+  br i1 %L6, label %F6, label %T3
+
+T4:                                               ; preds = %F1
+  br label %T3
+
+F6:                                               ; preds = %BB6, %BB5, %BB4, %0
+  ret i32 0
+
+F7:                                               ; No predecessors!
+  br label %BB5
+}
+
+; Same as above, but with multiple icmps referencing the same PHI node.
+
+define i32 @recursive_icmp_mult(ptr %ptr) {
+; CHECK-LABEL: @recursive_icmp_mult(
+; CHECK-NEXT:    [[A9:%.*]] = alloca i1, align 1
+; CHECK-NEXT:    br label [[F6:%.*]]
+; CHECK:       T3:
+; CHECK-NEXT:    br label [[BB5:%.*]]
+; CHECK:       BB5:
+; CHECK-NEXT:    [[L10:%.*]] = load i1, ptr [[A9]], align 1
+; CHECK-NEXT:    br i1 [[L10]], label [[BB6:%.*]], label [[F6]]
+; CHECK:       BB6:
+; CHECK-NEXT:    [[LGV3:%.*]] = load i1, ptr [[PTR:%.*]], align 1
+; CHECK-NEXT:    [[C4:%.*]] = icmp sle i1 [[C6:%.*]], true
+; CHECK-NEXT:    [[C5:%.*]] = icmp sle i1 [[C6]], false
+; CHECK-NEXT:    [[C6]] = icmp sle i1 [[C4]], [[C5]]
+; CHECK-NEXT:    store i1 [[C6]], ptr [[PTR]], align 1
+; CHECK-NEXT:    br i1 [[C6]], label [[F6]], label [[T3:%.*]]
+; CHECK:       F6:
+; CHECK-NEXT:    ret i32 0
+; CHECK:       F7:
+; CHECK-NEXT:    br label [[BB5]]
+;
+  %A9 = alloca i1, align 1
+  br i1 false, label %BB4, label %F6
+
+BB4:                                              ; preds = %0
+  br i1 false, label %F6, label %F1
+
+F1:                                               ; preds = %BB4
+  br i1 false, label %T4, label %T3
+
+T3:                                               ; preds = %T4, %BB6, %F1
+  %L6 = load i1, ptr %ptr, align 1
+  br label %BB5
+
+BB5:                                              ; preds = %F7, %T3
+  %L10 = load i1, ptr %A9, align 1
+  br i1 %L10, label %BB6, label %F6
+
+BB6:                                              ; preds = %BB5
+  %LGV3 = load i1, ptr %ptr, align 1
+  %C4 = icmp sle i1 %L6, true
+  %C5 = icmp sle i1 %L6, false
+  %C6 = icmp sle i1 %C4, %C5
+  store i1 %C6, ptr %ptr, align 1
+  br i1 %L6, label %F6, label %T3
+
+T4:                                               ; preds = %F1
+  br label %T3
+
+F6:                                               ; preds = %BB6, %BB5, %BB4, %0
+  ret i32 0
+
+F7:                                               ; No predecessors!
+  br label %BB5
+}
+
 !0 = !{!"branch_weights", i32 2146410443, i32 1073205}