[llvm-branch-commits] [llvm] [LoopInterchange] Drop ninf from instructions involved in interchange (PR #197923)

Fri May 15 05:55:35 PDT 2026

https://github.com/kasuga-fj created https://github.com/llvm/llvm-project/pull/197923

None

>From 8b6731a2fe062a5b742dfd71956f671345c9a8eb Mon Sep 17 00:00:00 2001
From: Ryotaro Kasuga <kasuga.ryotaro at fujitsu.com>
Date: Fri, 15 May 2026 12:53:52 +0000
Subject: [PATCH] [LoopInterchange] Drop ninf from instructions involved in
 interchange

---
 .../lib/Transforms/Scalar/LoopInterchange.cpp | 43 ++++++++++++++-----
 llvm/test/Transforms/LoopInterchange/ninf.ll  |  2 +-
 .../LoopInterchange/reduction2mem.ll          |  2 +-
 3 files changed, 34 insertions(+), 13 deletions(-)

diff --git a/llvm/lib/Transforms/Scalar/LoopInterchange.cpp b/llvm/lib/Transforms/Scalar/LoopInterchange.cpp
index 91e2510c33851..cd26b47b852ed 100644
--- a/llvm/lib/Transforms/Scalar/LoopInterchange.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopInterchange.cpp
@@ -481,6 +481,8 @@ class LoopInterchangeLegality {
     return HasNoWrapReductions;
   }
 
+  ArrayRef<Instruction *> getHasNoInfInsts() const { return HasNoInfInsts; }
+
   /// Record reductions in the inner loop. Currently supported reductions:
   /// - initialized from a constant.
   /// - reduction PHI node has only one user.
@@ -549,6 +551,10 @@ class LoopInterchangeLegality {
   /// interchanging the loops.
   SmallVector<Instruction *, 4> HasNoWrapReductions;
 
+  /// Hold instructions that have ninf flags and involved in reductions. Those
+  /// flags must be dropped when interchanging the loops.
+  SmallVector<Instruction *, 4> HasNoInfInsts;
+
   /// Vector of reductions in the inner loop.
   SmallVector<InnerReduction, 8> InnerReductions;
 };
@@ -619,7 +625,8 @@ class LoopInterchangeTransform {
       : OuterLoop(Outer), InnerLoop(Inner), SE(SE), LI(LI), DT(DT), LIL(LIL) {}
 
   /// Interchange OuterLoop and InnerLoop.
-  bool transform(ArrayRef<Instruction *> DropNoWrapInsts);
+  bool transform(ArrayRef<Instruction *> DropNoWrapInsts,
+                 ArrayRef<Instruction *> DropNoInfInsts);
   void reduction2Memory();
   void restructureLoops(Loop *NewInner, Loop *NewOuter,
                         BasicBlock *OrigInnerPreHeader,
@@ -773,7 +780,7 @@ struct LoopInterchange {
     });
 
     LoopInterchangeTransform LIT(OuterLoop, InnerLoop, SE, LI, DT, LIL);
-    LIT.transform(LIL.getHasNoWrapReductions());
+    LIT.transform(LIL.getHasNoWrapReductions(), LIL.getHasNoInfInsts());
     LLVM_DEBUG(dbgs() << "Loops interchanged: outer loop '"
                       << OuterLoop->getName() << "' and inner loop '"
                       << InnerLoop->getName() << "'\n");
@@ -970,7 +977,8 @@ static Value *followLCSSA(Value *SV) {
 }
 
 static bool checkReductionKind(Loop *L, PHINode *PHI,
-                               SmallVectorImpl<Instruction *> &HasNoWrapInsts) {
+                               SmallVectorImpl<Instruction *> &HasNoWrapInsts,
+                               SmallVectorImpl<Instruction *> &HasNoInfInsts) {
   RecurrenceDescriptor RD;
   if (RecurrenceDescriptor::isReductionPHI(PHI, L, RD)) {
     // Detect floating point reduction only when it can be reordered.
@@ -986,6 +994,13 @@ static bool checkReductionKind(Loop *L, PHINode *PHI,
     case RecurKind::SMax:
     case RecurKind::UMin:
     case RecurKind::UMax:
+    case RecurKind::AnyOf:
+      return true;
+
+    // Change the order of floating-point operations may alter the results. If a
+    // certain instruction has ninf flag, it means that reordering can produce a
+    // poison value, which may lead to undefined behavior. To prevent this, we
+    // must drop the ninf flags if we decide to apply the transformation.
     case RecurKind::FAdd:
     case RecurKind::FMul:
     case RecurKind::FMin:
@@ -995,7 +1010,9 @@ static bool checkReductionKind(Loop *L, PHINode *PHI,
     case RecurKind::FMinimumNum:
     case RecurKind::FMaximumNum:
     case RecurKind::FMulAdd:
-    case RecurKind::AnyOf:
+      for (Instruction *I : RD.getReductionOpChain(PHI, L))
+        if (isa<FPMathOperator>(I) && I->hasNoInfs())
+          HasNoInfInsts.push_back(I);
       return true;
 
     // Change the order of integer addition/multiplication may change the
@@ -1043,7 +1060,8 @@ static bool checkReductionKind(Loop *L, PHINode *PHI,
 // Check V's users to see if it is involved in a reduction in L.
 static PHINode *
 findInnerReductionPhi(Loop *L, Value *V,
-                      SmallVectorImpl<Instruction *> &HasNoWrapInsts) {
+                      SmallVectorImpl<Instruction *> &HasNoWrapInsts,
+                      SmallVectorImpl<Instruction *> &HasNoInfInsts) {
   // Reduction variables cannot be constants.
   if (isa<Constant>(V))
     return nullptr;
@@ -1053,7 +1071,7 @@ findInnerReductionPhi(Loop *L, Value *V,
       if (PHI->getNumIncomingValues() == 1)
         continue;
 
-      if (checkReductionKind(L, PHI, HasNoWrapInsts))
+      if (checkReductionKind(L, PHI, HasNoWrapInsts, HasNoInfInsts))
         return PHI;
       else
         return nullptr;
@@ -1110,7 +1128,7 @@ bool LoopInterchangeLegality::isInnerReduction(
     return false;
 
   // Check the reduction kind.
-  if (!checkReductionKind(L, Phi, HasNoWrapInsts))
+  if (!checkReductionKind(L, Phi, HasNoWrapInsts, HasNoInfInsts))
     return false;
 
   // Find lcssa_phi in OuterLoop's Latch
@@ -1213,8 +1231,8 @@ bool LoopInterchangeLegality::findInductionAndReductions(
         // Check if we have a PHI node in the outer loop that has a reduction
         // result from the inner loop as an incoming value.
         Value *V = followLCSSA(PHI.getIncomingValueForBlock(L->getLoopLatch()));
-        PHINode *InnerRedPhi =
-            findInnerReductionPhi(InnerLoop, V, HasNoWrapReductions);
+        PHINode *InnerRedPhi = findInnerReductionPhi(
+            InnerLoop, V, HasNoWrapReductions, HasNoInfInsts);
         if (!InnerRedPhi ||
             !llvm::is_contained(InnerRedPhi->incoming_values(), &PHI)) {
           LLVM_DEBUG(
@@ -1926,7 +1944,8 @@ void LoopInterchangeTransform::reduction2Memory() {
 }
 
 bool LoopInterchangeTransform::transform(
-    ArrayRef<Instruction *> DropNoWrapInsts) {
+    ArrayRef<Instruction *> DropNoWrapInsts,
+    ArrayRef<Instruction *> DropNoInfInsts) {
   bool Transformed = false;
 
   ArrayRef<LoopInterchangeLegality::InnerReduction> InnerReductions =
@@ -2032,12 +2051,14 @@ bool LoopInterchangeTransform::transform(
     return false;
   }
 
-  // Finally, drop the nsw/nuw flags from the instructions for reduction
+  // Finally, drop the nsw/nuw/ninf flags from the instructions for reduction
   // calculations.
   for (Instruction *Reduction : DropNoWrapInsts) {
     Reduction->setHasNoSignedWrap(false);
     Reduction->setHasNoUnsignedWrap(false);
   }
+  for (Instruction *I : DropNoInfInsts)
+    I->setHasNoInfs(false);
 
   return true;
 }
diff --git a/llvm/test/Transforms/LoopInterchange/ninf.ll b/llvm/test/Transforms/LoopInterchange/ninf.ll
index 23cd7610a5b30..da90d6d526fef 100644
--- a/llvm/test/Transforms/LoopInterchange/ninf.ll
+++ b/llvm/test/Transforms/LoopInterchange/ninf.ll
@@ -34,7 +34,7 @@ define noundef float @reduction_reassoc_ninf(ptr %A) {
 ; CHECK:       [[FOR_J_SPLIT1]]:
 ; CHECK-NEXT:    [[GEP:%.*]] = getelementptr [2 x float], ptr [[A]], i64 [[I]], i64 [[J]]
 ; CHECK-NEXT:    [[VAL:%.*]] = load float, ptr [[GEP]], align 4
-; CHECK-NEXT:    [[SUM_J_NEXT]] = fadd reassoc ninf float [[SUM_J]], [[VAL]]
+; CHECK-NEXT:    [[SUM_J_NEXT]] = fadd reassoc float [[SUM_J]], [[VAL]]
 ; CHECK-NEXT:    [[J_INC:%.*]] = add i64 [[J]], 1
 ; CHECK-NEXT:    [[EC_J:%.*]] = icmp eq i64 [[J_INC]], 2
 ; CHECK-NEXT:    br label %[[FOR_I_LATCH]]
diff --git a/llvm/test/Transforms/LoopInterchange/reduction2mem.ll b/llvm/test/Transforms/LoopInterchange/reduction2mem.ll
index 1293c22b5a7a0..5618cb6c93e3f 100644
--- a/llvm/test/Transforms/LoopInterchange/reduction2mem.ll
+++ b/llvm/test/Transforms/LoopInterchange/reduction2mem.ll
@@ -37,7 +37,7 @@ define void @func(ptr noalias readonly %a, ptr noalias readonly %b, ptr noalias
 ; CHECK-NEXT:    [[ADDR_B_J_I:%.*]] = getelementptr inbounds nuw [100 x double], ptr [[ADDR_B]], i64 [[INDEX_J]]
 ; CHECK-NEXT:    [[B_J_I:%.*]] = load double, ptr [[ADDR_B_J_I]], align 8
 ; CHECK-NEXT:    [[MUL:%.*]] = fmul fast double [[B_J_I]], [[A_J_I]]
-; CHECK-NEXT:    [[ADD:%.*]] = fadd fast double [[MUL]], [[NEW_VAR]]
+; CHECK-NEXT:    [[ADD:%.*]] = fadd reassoc nnan nsz arcp contract afn double [[MUL]], [[NEW_VAR]]
 ; CHECK-NEXT:    store double [[ADD]], ptr [[ADDR_S]], align 8
 ; CHECK-NEXT:    [[DEAD_J_NEXT:%.*]] = add nuw nsw i64 [[INDEX_J]], 1
 ; CHECK-NEXT:    [[COND1:%.*]] = icmp eq i64 [[DEAD_J_NEXT]], 100