[llvm] [LoopInterchange] Drop nuw/nsw flags from reduction ops when interchanging (PR #148612)
Ryotaro Kasuga via llvm-commits
llvm-commits at lists.llvm.org
Tue Jul 15 05:22:13 PDT 2025
https://github.com/kasuga-fj updated https://github.com/llvm/llvm-project/pull/148612
>From 34ce36c9bb657f1cb3ea93af33f387efd497adf8 Mon Sep 17 00:00:00 2001
From: Ryotaro Kasuga <kasuga.ryotaro at fujitsu.com>
Date: Mon, 14 Jul 2025 00:15:42 +0000
Subject: [PATCH 1/5] [LoopInterchange] Reject interchange if non-reassociative
reduction exists
---
.../lib/Transforms/Scalar/LoopInterchange.cpp | 57 +-
.../Transforms/LoopInterchange/pr48212.ll | 2 +-
.../LoopInterchange/reductions-kind.ll | 864 ++++++++++++++++++
3 files changed, 921 insertions(+), 2 deletions(-)
create mode 100644 llvm/test/Transforms/LoopInterchange/reductions-kind.ll
diff --git a/llvm/lib/Transforms/Scalar/LoopInterchange.cpp b/llvm/lib/Transforms/Scalar/LoopInterchange.cpp
index a5008907b9014..a2aa72e1a01f2 100644
--- a/llvm/lib/Transforms/Scalar/LoopInterchange.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopInterchange.cpp
@@ -812,7 +812,62 @@ static PHINode *findInnerReductionPhi(Loop *L, Value *V) {
// Detect floating point reduction only when it can be reordered.
if (RD.getExactFPMathInst() != nullptr)
return nullptr;
- return PHI;
+
+ RecurKind RK = RD.getRecurrenceKind();
+ switch (RK) {
+ case RecurKind::Or:
+ case RecurKind::And:
+ case RecurKind::Xor:
+ case RecurKind::SMin:
+ case RecurKind::SMax:
+ case RecurKind::UMin:
+ case RecurKind::UMax:
+ case RecurKind::FAdd:
+ case RecurKind::FMul:
+ case RecurKind::FMin:
+ case RecurKind::FMax:
+ case RecurKind::FMinimum:
+ case RecurKind::FMaximum:
+ case RecurKind::FMinimumNum:
+ case RecurKind::FMaximumNum:
+ case RecurKind::FMulAdd:
+ case RecurKind::AnyOf:
+ return PHI;
+
+ // Change the order of integer addition/multiplication may change the
+ // semantics. Consider the following case:
+ //
+ // int A[2][2] = {{ INT_MAX, INT_MAX }, { INT_MIN, INT_MIN }};
+ // int sum = 0;
+ // for (int i = 0; i < 2; i++)
+ // for (int j = 0; j < 2; j++)
+ // sum += A[j][i];
+ //
+ // If the above loops are exchanged, the addition will cause an
+ // overflow. To prove the legality, we must ensure that all reduction
+ // operations don't have nuw/nsw flags.
+ case RecurKind::Add:
+ case RecurKind::Mul: {
+ unsigned OpCode = RecurrenceDescriptor::getOpcode(RK);
+ SmallVector<Instruction *, 4> Ops = RD.getReductionOpChain(PHI, L);
+
+ // FIXME: Is this check necessary?
+ if (Ops.empty())
+ return nullptr;
+ for (Instruction *I : Ops) {
+ // FIXME: Is this check necessary?
+ if (I->getOpcode() != OpCode)
+ return nullptr;
+
+ // Reject if the reduction operation has nuw/nsw flags.
+ if (I->hasNoSignedWrap() || I->hasNoUnsignedWrap())
+ return nullptr;
+ }
+ return PHI;
+ }
+ default:
+ return nullptr;
+ }
}
return nullptr;
}
diff --git a/llvm/test/Transforms/LoopInterchange/pr48212.ll b/llvm/test/Transforms/LoopInterchange/pr48212.ll
index 936c53e217540..cb1300846cf0f 100644
--- a/llvm/test/Transforms/LoopInterchange/pr48212.ll
+++ b/llvm/test/Transforms/LoopInterchange/pr48212.ll
@@ -38,7 +38,7 @@ for.body3: ; preds = %L2, %for.inc
%idxprom4 = sext i32 %k1.03 to i64
%arrayidx5 = getelementptr inbounds [5 x i32], ptr %arrayidx, i64 0, i64 %idxprom4
%0 = load i32, ptr %arrayidx5
- %add = add nsw i32 %temp.12, %0
+ %add = add i32 %temp.12, %0
br label %for.inc
for.inc: ; preds = %for.body3
diff --git a/llvm/test/Transforms/LoopInterchange/reductions-kind.ll b/llvm/test/Transforms/LoopInterchange/reductions-kind.ll
new file mode 100644
index 0000000000000..d9e4d58a1780e
--- /dev/null
+++ b/llvm/test/Transforms/LoopInterchange/reductions-kind.ll
@@ -0,0 +1,864 @@
+; RUN: opt < %s -passes=loop-interchange -cache-line-size=64 -pass-remarks-output=%t -disable-output \
+; RUN: -verify-dom-info -verify-loop-info -verify-loop-lcssa
+; RUN: FileCheck -input-file=%t %s
+
+; int sum = 0;
+; for (int i = 0; i < 2; i++)
+; for (int j = 0; j < 2; j++)
+; sum += A[j][i];
+
+; CHECK: --- !Missed
+; CHECK-NEXT: Pass: loop-interchange
+; CHECK-NEXT: Name: UnsupportedPHIOuter
+; CHECK-NEXT: Function: reduction_add
+define void @reduction_add(ptr %A) {
+entry:
+ br label %for.i.header
+
+for.i.header:
+ %i = phi i32 [ 0, %entry ], [ %i.inc, %for.i.latch ]
+ %sum.i = phi i32 [ 0, %entry ], [ %sum.i.lcssa, %for.i.latch ]
+ br label %for.j
+
+for.j:
+ %j = phi i32 [ 0, %for.i.header ], [ %j.inc, %for.j ]
+ %sum.j = phi i32 [ %sum.i, %for.i.header ], [ %sum.j.next, %for.j ]
+ %idx = getelementptr inbounds [2 x [2 x i32]], ptr %A, i32 0, i32 %j, i32 %i
+ %a = load i32, ptr %idx, align 4
+ %sum.j.next = add nsw i32 %sum.j, %a
+ %j.inc = add i32 %j, 1
+ %cmp.j = icmp slt i32 %j.inc, 2
+ br i1 %cmp.j, label %for.j, label %for.i.latch
+
+for.i.latch:
+ %sum.i.lcssa = phi i32 [ %sum.j.next, %for.j ]
+ %i.inc = add i32 %i, 1
+ %cmp.i = icmp slt i32 %i.inc, 2
+ br i1 %cmp.i, label %for.i.header, label %exit
+
+exit:
+ ret void
+}
+
+; CHECK: --- !Pass
+; CHECK-NEXT: Pass: loop-interchange
+; CHECK-NEXT: Name: Interchanged
+; CHECK-NEXT: Function: reduction_wrap_add
+define void @reduction_wrap_add(ptr %A) {
+entry:
+ br label %for.i.header
+
+for.i.header:
+ %i = phi i32 [ 0, %entry ], [ %i.inc, %for.i.latch ]
+ %sum.i = phi i32 [ 0, %entry ], [ %sum.i.lcssa, %for.i.latch ]
+ br label %for.j
+
+for.j:
+ %j = phi i32 [ 0, %for.i.header ], [ %j.inc, %for.j ]
+ %sum.j = phi i32 [ %sum.i, %for.i.header ], [ %sum.j.next, %for.j ]
+ %idx = getelementptr inbounds [2 x [2 x i32]], ptr %A, i32 0, i32 %j, i32 %i
+ %a = load i32, ptr %idx, align 4
+ %sum.j.next = add i32 %sum.j, %a
+ %j.inc = add i32 %j, 1
+ %cmp.j = icmp slt i32 %j.inc, 2
+ br i1 %cmp.j, label %for.j, label %for.i.latch
+
+for.i.latch:
+ %sum.i.lcssa = phi i32 [ %sum.j.next, %for.j ]
+ %i.inc = add i32 %i, 1
+ %cmp.i = icmp slt i32 %i.inc, 2
+ br i1 %cmp.i, label %for.i.header, label %exit
+
+exit:
+ ret void
+}
+
+; CHECK: --- !Missed
+; CHECK-NEXT: Pass: loop-interchange
+; CHECK-NEXT: Name: UnsupportedPHIOuter
+; CHECK-NEXT: Function: reduction_cast_add
+define void @reduction_cast_add(ptr %A) {
+entry:
+ br label %for.i.header
+
+for.i.header:
+ %i = phi i32 [ 0, %entry ], [ %i.inc, %for.i.latch ]
+ %sum.i = phi i32 [ 0, %entry ], [ %sum.i.lcssa, %for.i.latch ]
+ br label %for.j
+
+for.j:
+ %j = phi i32 [ 0, %for.i.header ], [ %j.inc, %for.j ]
+ %sum.j = phi i32 [ %sum.i, %for.i.header ], [ %sum.j.next, %for.j ]
+ %idx = getelementptr inbounds [2 x [2 x i32]], ptr %A, i32 0, i32 %j, i32 %i
+ %a = load i32, ptr %idx, align 4
+ %sum.j.trunc = trunc i32 %sum.j to i16
+ %sum.j.ext = zext i16 %sum.j.trunc to i32
+ %sum.j.next = add nsw i32 %sum.j.ext, %a
+ %j.inc = add i32 %j, 1
+ %cmp.j = icmp slt i32 %j.inc, 2
+ br i1 %cmp.j, label %for.j, label %for.i.latch
+
+for.i.latch:
+ %sum.i.lcssa = phi i32 [ %sum.j.next, %for.j ]
+ %i.inc = add i32 %i, 1
+ %cmp.i = icmp slt i32 %i.inc, 2
+ br i1 %cmp.i, label %for.i.header, label %exit
+
+exit:
+ ret void
+}
+
+
+; int prod = 1;
+; for (int i = 0; i < 2; i++)
+; for (int j = 0; j < 2; j++)
+; prod *= A[j][i];
+
+; CHECK: --- !Missed
+; CHECK-NEXT: Pass: loop-interchange
+; CHECK-NEXT: Name: UnsupportedPHIOuter
+; CHECK-NEXT: Function: reduction_mul
+define void @reduction_mul(ptr %A) {
+entry:
+ br label %for.i.header
+
+for.i.header:
+ %i = phi i32 [ 0, %entry ], [ %i.inc, %for.i.latch ]
+ %prod.i = phi i32 [ 1, %entry ], [ %prod.i.lcssa, %for.i.latch ]
+ br label %for.j
+
+for.j:
+ %j = phi i32 [ 0, %for.i.header ], [ %j.inc, %for.j ]
+ %prod.j = phi i32 [ %prod.i, %for.i.header ], [ %prod.j.next, %for.j ]
+ %idx = getelementptr inbounds [2 x [2 x i32]], ptr %A, i32 0, i32 %j, i32 %i
+ %a = load i32, ptr %idx, align 4
+ %prod.j.next = mul nsw i32 %prod.j, %a
+ %j.inc = add i32 %j, 1
+ %cmp.j = icmp slt i32 %j.inc, 2
+ br i1 %cmp.j, label %for.j, label %for.i.latch
+
+for.i.latch:
+ %prod.i.lcssa = phi i32 [ %prod.j.next, %for.j ]
+ %i.inc = add i32 %i, 1
+ %cmp.i = icmp slt i32 %i.inc, 2
+ br i1 %cmp.i, label %for.i.header, label %exit
+
+exit:
+ ret void
+}
+
+; CHECK: --- !Pass
+; CHECK-NEXT: Pass: loop-interchange
+; CHECK-NEXT: Name: Interchanged
+; CHECK-NEXT: Function: reduction_wrap_mul
+define void @reduction_wrap_mul(ptr %A) {
+entry:
+ br label %for.i.header
+
+for.i.header:
+ %i = phi i32 [ 0, %entry ], [ %i.inc, %for.i.latch ]
+ %prod.i = phi i32 [ 1, %entry ], [ %prod.i.lcssa, %for.i.latch ]
+ br label %for.j
+
+for.j:
+ %j = phi i32 [ 0, %for.i.header ], [ %j.inc, %for.j ]
+ %prod.j = phi i32 [ %prod.i, %for.i.header ], [ %prod.j.next, %for.j ]
+ %idx = getelementptr inbounds [2 x [2 x i32]], ptr %A, i32 0, i32 %j, i32 %i
+ %a = load i32, ptr %idx, align 4
+ %prod.j.next = mul i32 %prod.j, %a
+ %j.inc = add i32 %j, 1
+ %cmp.j = icmp slt i32 %j.inc, 2
+ br i1 %cmp.j, label %for.j, label %for.i.latch
+
+for.i.latch:
+ %prod.i.lcssa = phi i32 [ %prod.j.next, %for.j ]
+ %i.inc = add i32 %i, 1
+ %cmp.i = icmp slt i32 %i.inc, 2
+ br i1 %cmp.i, label %for.i.header, label %exit
+
+exit:
+ ret void
+}
+
+
+; int b_or = 0;
+; for (int i = 0; i < 2; i++)
+; for (int j = 0; j < 2; j++)
+; b_or |= A[j][i];
+
+; CHECK: --- !Pass
+; CHECK-NEXT: Pass: loop-interchange
+; CHECK-NEXT: Name: Interchanged
+; CHECK-NEXT: Function: reduction_or
+define void @reduction_or(ptr %A) {
+entry:
+ br label %for.i.header
+
+for.i.header:
+ %i = phi i32 [ 0, %entry ], [ %i.inc, %for.i.latch ]
+ %or.i = phi i32 [ 0, %entry ], [ %or.i.lcssa, %for.i.latch ]
+ br label %for.j
+
+for.j:
+ %j = phi i32 [ 0, %for.i.header ], [ %j.inc, %for.j ]
+ %or.j = phi i32 [ %or.i, %for.i.header ], [ %or.j.next, %for.j ]
+ %idx = getelementptr inbounds [2 x [2 x i32]], ptr %A, i32 0, i32 %j, i32 %i
+ %a = load i32, ptr %idx, align 4
+ %or.j.next = or i32 %or.j, %a
+ %j.inc = add i32 %j, 1
+ %cmp.j = icmp slt i32 %j.inc, 2
+ br i1 %cmp.j, label %for.j, label %for.i.latch
+
+for.i.latch:
+ %or.i.lcssa = phi i32 [ %or.j.next, %for.j ]
+ %i.inc = add i32 %i, 1
+ %cmp.i = icmp slt i32 %i.inc, 2
+ br i1 %cmp.i, label %for.i.header, label %exit
+
+exit:
+ ret void
+}
+
+
+; int b_and = -1;
+; for (int i = 0; i < 2; i++)
+; for (int j = 0; j < 2; j++)
+; b_and &= A[j][i];
+
+; CHECK: --- !Pass
+; CHECK-NEXT: Pass: loop-interchange
+; CHECK-NEXT: Name: Interchanged
+; CHECK-NEXT: Function: reduction_and
+define void @reduction_and(ptr %A) {
+entry:
+ br label %for.i.header
+
+for.i.header:
+ %i = phi i32 [ 0, %entry ], [ %i.inc, %for.i.latch ]
+ %and.i = phi i32 [ -1, %entry ], [ %and.i.lcssa, %for.i.latch ]
+ br label %for.j
+
+for.j:
+ %j = phi i32 [ 0, %for.i.header ], [ %j.inc, %for.j ]
+ %and.j = phi i32 [ %and.i, %for.i.header ], [ %and.j.next, %for.j ]
+ %idx = getelementptr inbounds [2 x [2 x i32]], ptr %A, i32 0, i32 %j, i32 %i
+ %a = load i32, ptr %idx, align 4
+ %and.j.next = and i32 %and.j, %a
+ %j.inc = add i32 %j, 1
+ %cmp.j = icmp slt i32 %j.inc, 2
+ br i1 %cmp.j, label %for.j, label %for.i.latch
+
+for.i.latch:
+ %and.i.lcssa = phi i32 [ %and.j.next, %for.j ]
+ %i.inc = add i32 %i, 1
+ %cmp.i = icmp slt i32 %i.inc, 2
+ br i1 %cmp.i, label %for.i.header, label %exit
+
+exit:
+ ret void
+}
+
+
+; int b_xor = 0;
+; for (int i = 0; i < 2; i++)
+; for (int j = 0; j < 2; j++)
+; b_xor ^= A[j][i];
+
+; CHECK: --- !Pass
+; CHECK-NEXT: Pass: loop-interchange
+; CHECK-NEXT: Name: Interchanged
+; CHECK-NEXT: Function: reduction_xor
+define void @reduction_xor(ptr %A) {
+entry:
+ br label %for.i.header
+
+for.i.header:
+ %i = phi i32 [ 0, %entry ], [ %i.inc, %for.i.latch ]
+ %xor.i = phi i32 [ 0, %entry ], [ %xor.i.lcssa, %for.i.latch ]
+ br label %for.j
+
+for.j:
+ %j = phi i32 [ 0, %for.i.header ], [ %j.inc, %for.j ]
+ %xor.j = phi i32 [ %xor.i, %for.i.header ], [ %xor.j.next, %for.j ]
+ %idx = getelementptr inbounds [2 x [2 x i32]], ptr %A, i32 0, i32 %j, i32 %i
+ %a = load i32, ptr %idx, align 4
+ %xor.j.next = xor i32 %xor.j, %a
+ %j.inc = add i32 %j, 1
+ %cmp.j = icmp slt i32 %j.inc, 2
+ br i1 %cmp.j, label %for.j, label %for.i.latch
+
+for.i.latch:
+ %xor.i.lcssa = phi i32 [ %xor.j.next, %for.j ]
+ %i.inc = add i32 %i, 1
+ %cmp.i = icmp slt i32 %i.inc, 2
+ br i1 %cmp.i, label %for.i.header, label %exit
+
+exit:
+ ret void
+}
+
+
+; int smin = init;
+; for (int i = 0; i < 2; i++)
+; for (int j = 0; j < 2; j++)
+; smin = (A[j][i] < smin) ? A[j][i] : smin;
+
+; CHECK: --- !Pass
+; CHECK-NEXT: Pass: loop-interchange
+; CHECK-NEXT: Name: Interchanged
+; CHECK-NEXT: Function: reduction_smin
+define void @reduction_smin(ptr %A, i32 %init) {
+entry:
+ br label %for.i.header
+
+for.i.header:
+ %i = phi i32 [ 0, %entry ], [ %i.inc, %for.i.latch ]
+ %smin.i = phi i32 [ %init, %entry ], [ %smin.i.lcssa, %for.i.latch ]
+ br label %for.j
+
+for.j:
+ %j = phi i32 [ 0, %for.i.header ], [ %j.inc, %for.j ]
+ %smin.j = phi i32 [ %smin.i, %for.i.header ], [ %smin.j.next, %for.j ]
+ %idx = getelementptr inbounds [2 x [2 x i32]], ptr %A, i32 0, i32 %j, i32 %i
+ %a = load i32, ptr %idx, align 4
+ %cmp = icmp slt i32 %a, %smin.j
+ %smin.j.next = select i1 %cmp, i32 %a, i32 %smin.j
+ %j.inc = add i32 %j, 1
+ %cmp.j = icmp slt i32 %j.inc, 2
+ br i1 %cmp.j, label %for.j, label %for.i.latch
+
+for.i.latch:
+ %smin.i.lcssa = phi i32 [ %smin.j.next, %for.j ]
+ %i.inc = add i32 %i, 1
+ %cmp.i = icmp slt i32 %i.inc, 2
+ br i1 %cmp.i, label %for.i.header, label %exit
+
+exit:
+ ret void
+}
+
+
+; int smax = init;
+; for (int i = 0; i < 2; i++)
+; for (int j = 0; j < 2; j++)
+; smax = (A[j][i] > smax) ? A[j][i] : smax;
+
+; CHECK: --- !Pass
+; CHECK-NEXT: Pass: loop-interchange
+; CHECK-NEXT: Name: Interchanged
+; CHECK-NEXT: Function: reduction_smax
+define void @reduction_smax(ptr %A, i32 %init) {
+entry:
+ br label %for.i.header
+
+for.i.header:
+ %i = phi i32 [ 0, %entry ], [ %i.inc, %for.i.latch ]
+ %smax.i = phi i32 [ %init, %entry ], [ %smax.i.lcssa, %for.i.latch ]
+ br label %for.j
+
+for.j:
+ %j = phi i32 [ 0, %for.i.header ], [ %j.inc, %for.j ]
+ %smax.j = phi i32 [ %smax.i, %for.i.header ], [ %smax.j.next, %for.j ]
+ %idx = getelementptr inbounds [2 x [2 x i32]], ptr %A, i32 0, i32 %j, i32 %i
+ %a = load i32, ptr %idx, align 4
+ %cmp = icmp sgt i32 %a, %smax.j
+ %smax.j.next = select i1 %cmp, i32 %a, i32 %smax.j
+ %j.inc = add i32 %j, 1
+ %cmp.j = icmp slt i32 %j.inc, 2
+ br i1 %cmp.j, label %for.j, label %for.i.latch
+
+for.i.latch:
+ %smax.i.lcssa = phi i32 [ %smax.j.next, %for.j ]
+ %i.inc = add i32 %i, 1
+ %cmp.i = icmp slt i32 %i.inc, 2
+ br i1 %cmp.i, label %for.i.header, label %exit
+
+exit:
+ ret void
+}
+
+
+; unsigned umin = init;
+; for (int i = 0; i < 2; i++)
+; for (int j = 0; j < 2; j++)
+; umin = (A[j][i] < umin) ? A[j][i] : umin;
+
+; CHECK: --- !Pass
+; CHECK-NEXT: Pass: loop-interchange
+; CHECK-NEXT: Name: Interchanged
+; CHECK-NEXT: Function: reduction_umin
+define void @reduction_umin(ptr %A, i32 %init) {
+entry:
+ br label %for.i.header
+
+for.i.header:
+ %i = phi i32 [ 0, %entry ], [ %i.inc, %for.i.latch ]
+ %umin.i = phi i32 [ %init, %entry ], [ %umin.i.lcssa, %for.i.latch ]
+ br label %for.j
+
+for.j:
+ %j = phi i32 [ 0, %for.i.header ], [ %j.inc, %for.j ]
+ %umin.j = phi i32 [ %umin.i, %for.i.header ], [ %umin.j.next, %for.j ]
+ %idx = getelementptr inbounds [2 x [2 x i32]], ptr %A, i32 0, i32 %j, i32 %i
+ %a = load i32, ptr %idx, align 4
+ %cmp = icmp ult i32 %a, %umin.j
+ %umin.j.next = select i1 %cmp, i32 %a, i32 %umin.j
+ %j.inc = add i32 %j, 1
+ %cmp.j = icmp slt i32 %j.inc, 2
+ br i1 %cmp.j, label %for.j, label %for.i.latch
+
+for.i.latch:
+ %umin.i.lcssa = phi i32 [ %umin.j.next, %for.j ]
+ %i.inc = add i32 %i, 1
+ %cmp.i = icmp slt i32 %i.inc, 2
+ br i1 %cmp.i, label %for.i.header, label %exit
+
+exit:
+ ret void
+}
+
+
+; unsigned umax = 0;
+; for (int i = 0; i < 2; i++)
+; for (int j = 0; j < 2; j++)
+; smax = (A[j][i] > smax) ? A[j][i] : smax;
+
+; CHECK: --- !Pass
+; CHECK-NEXT: Pass: loop-interchange
+; CHECK-NEXT: Name: Interchanged
+; CHECK-NEXT: Function: reduction_umax
+define void @reduction_umax(ptr %A) {
+entry:
+ br label %for.i.header
+
+for.i.header:
+ %i = phi i32 [ 0, %entry ], [ %i.inc, %for.i.latch ]
+ %umax.i = phi i32 [ 0, %entry ], [ %umax.i.lcssa, %for.i.latch ]
+ br label %for.j
+
+for.j:
+ %j = phi i32 [ 0, %for.i.header ], [ %j.inc, %for.j ]
+ %umax.j = phi i32 [ %umax.i, %for.i.header ], [ %umax.j.next, %for.j ]
+ %idx = getelementptr inbounds [2 x [2 x i32]], ptr %A, i32 0, i32 %j, i32 %i
+ %a = load i32, ptr %idx, align 4
+ %cmp = icmp ugt i32 %a, %umax.j
+ %umax.j.next = select i1 %cmp, i32 %a, i32 %umax.j
+ %j.inc = add i32 %j, 1
+ %cmp.j = icmp slt i32 %j.inc, 2
+ br i1 %cmp.j, label %for.j, label %for.i.latch
+
+for.i.latch:
+ %umax.i.lcssa = phi i32 [ %umax.j.next, %for.j ]
+ %i.inc = add i32 %i, 1
+ %cmp.i = icmp slt i32 %i.inc, 2
+ br i1 %cmp.i, label %for.i.header, label %exit
+
+exit:
+ ret void
+}
+
+
+; int any_of = 0;
+; for (int i = 0; i < 2; i++)
+; for (int j = 0; j < 2; j++)
+; any_of = (A[j][i] == 42) ? 1 : any_of;
+
+; CHECK: --- !Pass
+; CHECK-NEXT: Pass: loop-interchange
+; CHECK-NEXT: Name: Interchanged
+; CHECK-NEXT: Function: reduction_anyof
+define void @reduction_anyof(ptr %A) {
+entry:
+ br label %for.i.header
+
+for.i.header:
+ %i = phi i32 [ 0, %entry ], [ %i.inc, %for.i.latch ]
+ %anyof.i = phi i32 [ 0, %entry ], [ %anyof.i.lcssa, %for.i.latch ]
+ br label %for.j
+
+for.j:
+ %j = phi i32 [ 0, %for.i.header ], [ %j.inc, %for.j ]
+ %anyof.j = phi i32 [ %anyof.i, %for.i.header ], [ %anyof.j.next, %for.j ]
+ %idx = getelementptr inbounds [2 x [2 x i32]], ptr %A, i32 0, i32 %j, i32 %i
+ %a = load i32, ptr %idx, align 4
+ %cmp = icmp eq i32 %a, 42
+ %anyof.j.next = select i1 %cmp, i32 1, i32 %anyof.j
+ %j.inc = add i32 %j, 1
+ %cmp.j = icmp slt i32 %j.inc, 2
+ br i1 %cmp.j, label %for.j, label %for.i.latch
+
+for.i.latch:
+ %anyof.i.lcssa = phi i32 [ %anyof.j.next, %for.j ]
+ %i.inc = add i32 %i, 1
+ %cmp.i = icmp slt i32 %i.inc, 2
+ br i1 %cmp.i, label %for.i.header, label %exit
+
+exit:
+ ret void
+}
+
+; float sum = 0;
+; for (int i = 0; i < 2; i++)
+; for (int j = 0; j < 2; j++)
+; sum += A[j][i];
+
+; CHECK: --- !Missed
+; CHECK-NEXT: Pass: loop-interchange
+; CHECK-NEXT: Name: UnsupportedPHIOuter
+; CHECK-NEXT: Function: reduction_fadd
+define void @reduction_fadd(ptr %A) {
+entry:
+ br label %for.i.header
+
+for.i.header:
+ %i = phi i32 [ 0, %entry ], [ %i.inc, %for.i.latch ]
+ %sum.i = phi float [ 0.0, %entry ], [ %sum.i.lcssa, %for.i.latch ]
+ br label %for.j
+
+for.j:
+ %j = phi i32 [ 0, %for.i.header ], [ %j.inc, %for.j ]
+ %sum.j = phi float [ %sum.i, %for.i.header ], [ %sum.j.next, %for.j ]
+ %idx = getelementptr inbounds [2 x [2 x i32]], ptr %A, i32 0, i32 %j, i32 %i
+ %a = load float, ptr %idx, align 4
+ %sum.j.next = fadd float %sum.j, %a
+ %j.inc = add i32 %j, 1
+ %cmp.j = icmp slt i32 %j.inc, 2
+ br i1 %cmp.j, label %for.j, label %for.i.latch
+
+for.i.latch:
+ %sum.i.lcssa = phi float [ %sum.j.next, %for.j ]
+ %i.inc = add i32 %i, 1
+ %cmp.i = icmp slt i32 %i.inc, 2
+ br i1 %cmp.i, label %for.i.header, label %exit
+
+exit:
+ ret void
+}
+
+; CHECK: --- !Pass
+; CHECK-NEXT: Pass: loop-interchange
+; CHECK-NEXT: Name: Interchanged
+; CHECK-NEXT: Function: reduction_reassoc_fadd
+define void @reduction_reassoc_fadd(ptr %A) {
+entry:
+ br label %for.i.header
+
+for.i.header:
+ %i = phi i32 [ 0, %entry ], [ %i.inc, %for.i.latch ]
+ %sum.i = phi float [ 0.0, %entry ], [ %sum.i.lcssa, %for.i.latch ]
+ br label %for.j
+
+for.j:
+ %j = phi i32 [ 0, %for.i.header ], [ %j.inc, %for.j ]
+ %sum.j = phi float [ %sum.i, %for.i.header ], [ %sum.j.next, %for.j ]
+ %idx = getelementptr inbounds [2 x [2 x i32]], ptr %A, i32 0, i32 %j, i32 %i
+ %a = load float, ptr %idx, align 4
+ %sum.j.next = fadd reassoc float %sum.j, %a
+ %j.inc = add i32 %j, 1
+ %cmp.j = icmp slt i32 %j.inc, 2
+ br i1 %cmp.j, label %for.j, label %for.i.latch
+
+for.i.latch:
+ %sum.i.lcssa = phi float [ %sum.j.next, %for.j ]
+ %i.inc = add i32 %i, 1
+ %cmp.i = icmp slt i32 %i.inc, 2
+ br i1 %cmp.i, label %for.i.header, label %exit
+
+exit:
+ ret void
+}
+
+; float prod = 1;
+; for (int i = 0; i < 2; i++)
+; for (int j = 0; j < 2; j++)
+; prod *= A[j][i];
+
+; CHECK: --- !Missed
+; CHECK-NEXT: Pass: loop-interchange
+; CHECK-NEXT: Name: UnsupportedPHIOuter
+; CHECK-NEXT: Function: reduction_fmul
+define void @reduction_fmul(ptr %A) {
+entry:
+ br label %for.i.header
+
+for.i.header:
+ %i = phi i32 [ 0, %entry ], [ %i.inc, %for.i.latch ]
+ %prod.i = phi float [ 1.0, %entry ], [ %prod.i.lcssa, %for.i.latch ]
+ br label %for.j
+
+for.j:
+ %j = phi i32 [ 0, %for.i.header ], [ %j.inc, %for.j ]
+ %prod.j = phi float [ %prod.i, %for.i.header ], [ %prod.j.next, %for.j ]
+ %idx = getelementptr inbounds [2 x [2 x i32]], ptr %A, i32 0, i32 %j, i32 %i
+ %a = load float, ptr %idx, align 4
+ %prod.j.next = fmul float %prod.j, %a
+ %j.inc = add i32 %j, 1
+ %cmp.j = icmp slt i32 %j.inc, 2
+ br i1 %cmp.j, label %for.j, label %for.i.latch
+
+for.i.latch:
+ %prod.i.lcssa = phi float [ %prod.j.next, %for.j ]
+ %i.inc = add i32 %i, 1
+ %cmp.i = icmp slt i32 %i.inc, 2
+ br i1 %cmp.i, label %for.i.header, label %exit
+
+exit:
+ ret void
+}
+
+; CHECK: --- !Pass
+; CHECK-NEXT: Pass: loop-interchange
+; CHECK-NEXT: Name: Interchanged
+; CHECK-NEXT: Function: reduction_reassoc_fmul
+define void @reduction_reassoc_fmul(ptr %A) {
+entry:
+ br label %for.i.header
+
+for.i.header:
+ %i = phi i32 [ 0, %entry ], [ %i.inc, %for.i.latch ]
+ %prod.i = phi float [ 1.0, %entry ], [ %prod.i.lcssa, %for.i.latch ]
+ br label %for.j
+
+for.j:
+ %j = phi i32 [ 0, %for.i.header ], [ %j.inc, %for.j ]
+ %prod.j = phi float [ %prod.i, %for.i.header ], [ %prod.j.next, %for.j ]
+ %idx = getelementptr inbounds [2 x [2 x i32]], ptr %A, i32 0, i32 %j, i32 %i
+ %a = load float, ptr %idx, align 4
+ %prod.j.next = fmul reassoc float %prod.j, %a
+ %j.inc = add i32 %j, 1
+ %cmp.j = icmp slt i32 %j.inc, 2
+ br i1 %cmp.j, label %for.j, label %for.i.latch
+
+for.i.latch:
+ %prod.i.lcssa = phi float [ %prod.j.next, %for.j ]
+ %i.inc = add i32 %i, 1
+ %cmp.i = icmp slt i32 %i.inc, 2
+ br i1 %cmp.i, label %for.i.header, label %exit
+
+exit:
+ ret void
+}
+
+; float fmuladd = 0;
+; for (int i = 0; i < 2; i++)
+; for (int j = 0; j < 2; j++)
+; fmuladd += A[j][i] * B[j][i];
+
+; CHECK: --- !Missed
+; CHECK-NEXT: Pass: loop-interchange
+; CHECK-NEXT: Name: UnsupportedPHIOuter
+; CHECK-NEXT: Function: reduction_fmuladd
+define void @reduction_fmuladd(ptr %A, ptr %B) {
+entry:
+ br label %for.i.header
+
+for.i.header:
+ %i = phi i32 [ 0, %entry ], [ %i.inc, %for.i.latch ]
+ %fmuladd.i = phi float [ 1.0, %entry ], [ %fmuladd.i.lcssa, %for.i.latch ]
+ br label %for.j
+
+for.j:
+ %j = phi i32 [ 0, %for.i.header ], [ %j.inc, %for.j ]
+ %fmuladd.j = phi float [ %fmuladd.i, %for.i.header ], [ %fmuladd.j.next, %for.j ]
+ %idx.a = getelementptr inbounds [2 x [2 x i32]], ptr %A, i32 0, i32 %j, i32 %i
+ %idx.b = getelementptr inbounds [2 x [2 x i32]], ptr %B, i32 0, i32 %j, i32 %i
+ %a = load float, ptr %idx.a, align 4
+ %b = load float, ptr %idx.b, align 4
+ %fmuladd.j.next = call float @llvm.fmuladd.f32(float %a, float %b, float %fmuladd.j)
+ %j.inc = add i32 %j, 1
+ %cmp.j = icmp slt i32 %j.inc, 2
+ br i1 %cmp.j, label %for.j, label %for.i.latch
+
+for.i.latch:
+ %fmuladd.i.lcssa = phi float [ %fmuladd.j.next, %for.j ]
+ %i.inc = add i32 %i, 1
+ %cmp.i = icmp slt i32 %i.inc, 2
+ br i1 %cmp.i, label %for.i.header, label %exit
+
+exit:
+ ret void
+}
+
+; CHECK: --- !Pass
+; CHECK-NEXT: Pass: loop-interchange
+; CHECK-NEXT: Name: Interchanged
+; CHECK-NEXT: Function: reduction_reassoc_fmuladd
+define void @reduction_reassoc_fmuladd(ptr %A, ptr %B) {
+entry:
+ br label %for.i.header
+
+for.i.header:
+ %i = phi i32 [ 0, %entry ], [ %i.inc, %for.i.latch ]
+ %fmuladd.i = phi float [ 1.0, %entry ], [ %fmuladd.i.lcssa, %for.i.latch ]
+ br label %for.j
+
+for.j:
+ %j = phi i32 [ 0, %for.i.header ], [ %j.inc, %for.j ]
+ %fmuladd.j = phi float [ %fmuladd.i, %for.i.header ], [ %fmuladd.j.next, %for.j ]
+ %idx.a = getelementptr inbounds [2 x [2 x i32]], ptr %A, i32 0, i32 %j, i32 %i
+ %idx.b = getelementptr inbounds [2 x [2 x i32]], ptr %B, i32 0, i32 %j, i32 %i
+ %a = load float, ptr %idx.a, align 4
+ %b = load float, ptr %idx.b, align 4
+ %fmuladd.j.next = call reassoc float @llvm.fmuladd.f32(float %a, float %b, float %fmuladd.j)
+ %j.inc = add i32 %j, 1
+ %cmp.j = icmp slt i32 %j.inc, 2
+ br i1 %cmp.j, label %for.j, label %for.i.latch
+
+for.i.latch:
+ %fmuladd.i.lcssa = phi float [ %fmuladd.j.next, %for.j ]
+ %i.inc = add i32 %i, 1
+ %cmp.i = icmp slt i32 %i.inc, 2
+ br i1 %cmp.i, label %for.i.header, label %exit
+
+exit:
+ ret void
+}
+
+; float fmin = init;
+; for (int i = 0; i < 2; i++)
+; for (int j = 0; j < 2; j++)
+; fmin = (A[j][i] < fmin) ? A[j][i] : fmin;
+
+; CHECK: --- !Pass
+; CHECK-NEXT: Pass: loop-interchange
+; CHECK-NEXT: Name: Interchanged
+; CHECK-NEXT: Function: reduction_fmin
+define void @reduction_fmin(ptr %A, float %init) {
+entry:
+ br label %for.i.header
+
+for.i.header:
+ %i = phi i32 [ 0, %entry ], [ %i.inc, %for.i.latch ]
+ %fmin.i = phi float [ %init, %entry ], [ %fmin.i.lcssa, %for.i.latch ]
+ br label %for.j
+
+for.j:
+ %j = phi i32 [ 0, %for.i.header ], [ %j.inc, %for.j ]
+ %fmin.j = phi float [ %fmin.i, %for.i.header ], [ %fmin.j.next, %for.j ]
+ %idx = getelementptr inbounds [2 x [2 x i32]], ptr %A, i32 0, i32 %j, i32 %i
+ %a = load float, ptr %idx, align 4
+ %cmp = fcmp nnan nsz olt float %a, %fmin.j
+ %fmin.j.next = select nnan nsz i1 %cmp, float %a, float %fmin.j
+ %j.inc = add i32 %j, 1
+ %cmp.j = icmp slt i32 %j.inc, 2
+ br i1 %cmp.j, label %for.j, label %for.i.latch
+
+for.i.latch:
+ %fmin.i.lcssa = phi float [ %fmin.j.next, %for.j ]
+ %i.inc = add i32 %i, 1
+ %cmp.i = icmp slt i32 %i.inc, 2
+ br i1 %cmp.i, label %for.i.header, label %exit
+
+exit:
+ ret void
+}
+
+
+; CHECK: --- !Pass
+; CHECK-NEXT: Pass: loop-interchange
+; CHECK-NEXT: Name: Interchanged
+; CHECK-NEXT: Function: reduction_fmininumnum
+define void @reduction_fmininumnum(ptr %A, float %init) {
+entry:
+ br label %for.i.header
+
+for.i.header:
+ %i = phi i32 [ 0, %entry ], [ %i.inc, %for.i.latch ]
+ %fmin.i = phi float [ %init, %entry ], [ %fmin.i.lcssa, %for.i.latch ]
+ br label %for.j
+
+for.j:
+ %j = phi i32 [ 0, %for.i.header ], [ %j.inc, %for.j ]
+ %fmin.j = phi float [ %fmin.i, %for.i.header ], [ %fmin.j.next, %for.j ]
+ %idx = getelementptr inbounds [2 x [2 x i32]], ptr %A, i32 0, i32 %j, i32 %i
+ %a = load float, ptr %idx, align 4
+ %fmin.j.next = call float @llvm.minimumnum.f32(float %a, float %fmin.j)
+ %j.inc = add i32 %j, 1
+ %cmp.j = icmp slt i32 %j.inc, 2
+ br i1 %cmp.j, label %for.j, label %for.i.latch
+
+for.i.latch:
+ %fmin.i.lcssa = phi float [ %fmin.j.next, %for.j ]
+ %i.inc = add i32 %i, 1
+ %cmp.i = icmp slt i32 %i.inc, 2
+ br i1 %cmp.i, label %for.i.header, label %exit
+
+exit:
+ ret void
+}
+
+; float fmax = init;
+; for (int i = 0; i < 2; i++)
+; for (int j = 0; j < 2; j++)
+; fmax = (A[j][i] > fmax) ? A[j][i] : fmax;
+
+; CHECK: --- !Pass
+; CHECK-NEXT: Pass: loop-interchange
+; CHECK-NEXT: Name: Interchanged
+; CHECK-NEXT: Function: reduction_fmax
+define void @reduction_fmax(ptr %A, float %init) {
+entry:
+ br label %for.i.header
+
+for.i.header:
+ %i = phi i32 [ 0, %entry ], [ %i.inc, %for.i.latch ]
+ %fmax.i = phi float [ %init, %entry ], [ %fmax.i.lcssa, %for.i.latch ]
+ br label %for.j
+
+for.j:
+ %j = phi i32 [ 0, %for.i.header ], [ %j.inc, %for.j ]
+ %fmax.j = phi float [ %fmax.i, %for.i.header ], [ %fmax.j.next, %for.j ]
+ %idx = getelementptr inbounds [2 x [2 x i32]], ptr %A, i32 0, i32 %j, i32 %i
+ %a = load float, ptr %idx, align 4
+ %cmp = fcmp nnan nsz ogt float %a, %fmax.j
+ %fmax.j.next = select nnan nsz i1 %cmp, float %a, float %fmax.j
+ %j.inc = add i32 %j, 1
+ %cmp.j = icmp slt i32 %j.inc, 2
+ br i1 %cmp.j, label %for.j, label %for.i.latch
+
+for.i.latch:
+ %fmax.i.lcssa = phi float [ %fmax.j.next, %for.j ]
+ %i.inc = add i32 %i, 1
+ %cmp.i = icmp slt i32 %i.inc, 2
+ br i1 %cmp.i, label %for.i.header, label %exit
+
+exit:
+ ret void
+}
+
+; CHECK: --- !Pass
+; CHECK-NEXT: Pass: loop-interchange
+; CHECK-NEXT: Name: Interchanged
+; CHECK-NEXT: Function: reduction_fmaxinumnum
+define void @reduction_fmaxinumnum(ptr %A, float %init) {
+entry:
+ br label %for.i.header
+
+for.i.header:
+ %i = phi i32 [ 0, %entry ], [ %i.inc, %for.i.latch ]
+ %fmax.i = phi float [ %init, %entry ], [ %fmax.i.lcssa, %for.i.latch ]
+ br label %for.j
+
+for.j:
+ %j = phi i32 [ 0, %for.i.header ], [ %j.inc, %for.j ]
+ %fmax.j = phi float [ %fmax.i, %for.i.header ], [ %fmax.j.next, %for.j ]
+ %idx = getelementptr inbounds [2 x [2 x i32]], ptr %A, i32 0, i32 %j, i32 %i
+ %a = load float, ptr %idx, align 4
+ %fmax.j.next = call float @llvm.maximumnum.f32(float %a, float %fmax.j)
+ %j.inc = add i32 %j, 1
+ %cmp.j = icmp slt i32 %j.inc, 2
+ br i1 %cmp.j, label %for.j, label %for.i.latch
+
+for.i.latch:
+ %fmax.i.lcssa = phi float [ %fmax.j.next, %for.j ]
+ %i.inc = add i32 %i, 1
+ %cmp.i = icmp slt i32 %i.inc, 2
+ br i1 %cmp.i, label %for.i.header, label %exit
+
+exit:
+ ret void
+}
+
+declare float @llvm.fmuladd.f32(float %a, float %b, float %c)
+declare float @llvm.minimumnum.f32(float %a, float %b)
+declare float @llvm.maximumnum.f32(float %a, float %b)
>From 770ccd44913a428a6a56903de009e90ecd54df3a Mon Sep 17 00:00:00 2001
From: Ryotaro Kasuga <kasuga.ryotaro at fujitsu.com>
Date: Tue, 15 Jul 2025 07:38:16 +0000
Subject: [PATCH 2/5] Drop nuw/nsw flags from reductions
---
.../lib/Transforms/Scalar/LoopInterchange.cpp | 41 +++-
.../Transforms/LoopInterchange/pr48212.ll | 2 +-
...l => reductions-non-wrapped-operations.ll} | 225 ++++--------------
.../reductions-with-nowraps.ll | 144 +++++++++++
4 files changed, 223 insertions(+), 189 deletions(-)
rename llvm/test/Transforms/LoopInterchange/{reductions-kind.ll => reductions-non-wrapped-operations.ll} (79%)
create mode 100644 llvm/test/Transforms/LoopInterchange/reductions-with-nowraps.ll
diff --git a/llvm/lib/Transforms/Scalar/LoopInterchange.cpp b/llvm/lib/Transforms/Scalar/LoopInterchange.cpp
index a2aa72e1a01f2..ba3d60980da6f 100644
--- a/llvm/lib/Transforms/Scalar/LoopInterchange.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopInterchange.cpp
@@ -379,6 +379,10 @@ class LoopInterchangeLegality {
return InnerLoopInductions;
}
+ ArrayRef<Instruction *> getHasNoWrapReductions() const {
+ return HasNoWrapReductions;
+ }
+
private:
bool tightlyNested(Loop *Outer, Loop *Inner);
bool containsUnsafeInstructions(BasicBlock *BB);
@@ -405,6 +409,11 @@ class LoopInterchangeLegality {
/// Set of inner loop induction PHIs
SmallVector<PHINode *, 8> InnerLoopInductions;
+
+ /// Hold instructions that have nuw/nsw flags and involved in reductions,
+ /// like integer addition/multiplication. Those flags must be dropped when
+ /// exchanging the loops.
+ SmallVector<Instruction *, 4> HasNoWrapReductions;
};
/// Manages information utilized by the profitability check for cache. The main
@@ -473,7 +482,7 @@ class LoopInterchangeTransform {
: OuterLoop(Outer), InnerLoop(Inner), SE(SE), LI(LI), DT(DT), LIL(LIL) {}
/// Interchange OuterLoop and InnerLoop.
- bool transform();
+ bool transform(ArrayRef<Instruction *> DropNoWrapInsts);
void restructureLoops(Loop *NewInner, Loop *NewOuter,
BasicBlock *OrigInnerPreHeader,
BasicBlock *OrigOuterPreHeader);
@@ -613,7 +622,7 @@ struct LoopInterchange {
});
LoopInterchangeTransform LIT(OuterLoop, InnerLoop, SE, LI, DT, LIL);
- LIT.transform();
+ LIT.transform(LIL.getHasNoWrapReductions());
LLVM_DEBUG(dbgs() << "Loops interchanged.\n");
LoopsInterchanged++;
@@ -798,7 +807,9 @@ static Value *followLCSSA(Value *SV) {
}
// Check V's users to see if it is involved in a reduction in L.
-static PHINode *findInnerReductionPhi(Loop *L, Value *V) {
+static PHINode *
+findInnerReductionPhi(Loop *L, Value *V,
+ SmallVectorImpl<Instruction *> &HasNoWrapInsts) {
// Reduction variables cannot be constants.
if (isa<Constant>(V))
return nullptr;
@@ -844,8 +855,9 @@ static PHINode *findInnerReductionPhi(Loop *L, Value *V) {
// sum += A[j][i];
//
// If the above loops are exchanged, the addition will cause an
- // overflow. To prove the legality, we must ensure that all reduction
- // operations don't have nuw/nsw flags.
+ // overflow. To prevent this, we must drop the nuw/nsw flags from the
+ // addition/multiplication instructions when we actually exchanges the
+ // loops.
case RecurKind::Add:
case RecurKind::Mul: {
unsigned OpCode = RecurrenceDescriptor::getOpcode(RK);
@@ -859,12 +871,14 @@ static PHINode *findInnerReductionPhi(Loop *L, Value *V) {
if (I->getOpcode() != OpCode)
return nullptr;
- // Reject if the reduction operation has nuw/nsw flags.
+ // If the instruction has nuw/nsw flags, we must drop them when the
+ // transformation is actually performed.
if (I->hasNoSignedWrap() || I->hasNoUnsignedWrap())
- return nullptr;
+ HasNoWrapInsts.push_back(I);
}
return PHI;
}
+
default:
return nullptr;
}
@@ -899,7 +913,8 @@ bool LoopInterchangeLegality::findInductionAndReductions(
// Check if we have a PHI node in the outer loop that has a reduction
// result from the inner loop as an incoming value.
Value *V = followLCSSA(PHI.getIncomingValueForBlock(L->getLoopLatch()));
- PHINode *InnerRedPhi = findInnerReductionPhi(InnerLoop, V);
+ PHINode *InnerRedPhi =
+ findInnerReductionPhi(InnerLoop, V, HasNoWrapReductions);
if (!InnerRedPhi ||
!llvm::is_contained(InnerRedPhi->incoming_values(), &PHI)) {
LLVM_DEBUG(
@@ -1485,7 +1500,8 @@ void LoopInterchangeTransform::restructureLoops(
SE->forgetLoop(NewOuter);
}
-bool LoopInterchangeTransform::transform() {
+bool LoopInterchangeTransform::transform(
+ ArrayRef<Instruction *> DropNoWrapInsts) {
bool Transformed = false;
if (InnerLoop->getSubLoops().empty()) {
@@ -1586,6 +1602,13 @@ bool LoopInterchangeTransform::transform() {
return false;
}
+ // Finally, drop the nsw/nuw flags from the instructions for reduction
+ // calculations.
+ for (Instruction *Reduction : DropNoWrapInsts) {
+ Reduction->setHasNoSignedWrap(false);
+ Reduction->setHasNoUnsignedWrap(false);
+ }
+
return true;
}
diff --git a/llvm/test/Transforms/LoopInterchange/pr48212.ll b/llvm/test/Transforms/LoopInterchange/pr48212.ll
index cb1300846cf0f..936c53e217540 100644
--- a/llvm/test/Transforms/LoopInterchange/pr48212.ll
+++ b/llvm/test/Transforms/LoopInterchange/pr48212.ll
@@ -38,7 +38,7 @@ for.body3: ; preds = %L2, %for.inc
%idxprom4 = sext i32 %k1.03 to i64
%arrayidx5 = getelementptr inbounds [5 x i32], ptr %arrayidx, i64 0, i64 %idxprom4
%0 = load i32, ptr %arrayidx5
- %add = add i32 %temp.12, %0
+ %add = add nsw i32 %temp.12, %0
br label %for.inc
for.inc: ; preds = %for.body3
diff --git a/llvm/test/Transforms/LoopInterchange/reductions-kind.ll b/llvm/test/Transforms/LoopInterchange/reductions-non-wrapped-operations.ll
similarity index 79%
rename from llvm/test/Transforms/LoopInterchange/reductions-kind.ll
rename to llvm/test/Transforms/LoopInterchange/reductions-non-wrapped-operations.ll
index d9e4d58a1780e..35ffd49666983 100644
--- a/llvm/test/Transforms/LoopInterchange/reductions-kind.ll
+++ b/llvm/test/Transforms/LoopInterchange/reductions-non-wrapped-operations.ll
@@ -2,185 +2,8 @@
; RUN: -verify-dom-info -verify-loop-info -verify-loop-lcssa
; RUN: FileCheck -input-file=%t %s
-; int sum = 0;
-; for (int i = 0; i < 2; i++)
-; for (int j = 0; j < 2; j++)
-; sum += A[j][i];
-
-; CHECK: --- !Missed
-; CHECK-NEXT: Pass: loop-interchange
-; CHECK-NEXT: Name: UnsupportedPHIOuter
-; CHECK-NEXT: Function: reduction_add
-define void @reduction_add(ptr %A) {
-entry:
- br label %for.i.header
-
-for.i.header:
- %i = phi i32 [ 0, %entry ], [ %i.inc, %for.i.latch ]
- %sum.i = phi i32 [ 0, %entry ], [ %sum.i.lcssa, %for.i.latch ]
- br label %for.j
-
-for.j:
- %j = phi i32 [ 0, %for.i.header ], [ %j.inc, %for.j ]
- %sum.j = phi i32 [ %sum.i, %for.i.header ], [ %sum.j.next, %for.j ]
- %idx = getelementptr inbounds [2 x [2 x i32]], ptr %A, i32 0, i32 %j, i32 %i
- %a = load i32, ptr %idx, align 4
- %sum.j.next = add nsw i32 %sum.j, %a
- %j.inc = add i32 %j, 1
- %cmp.j = icmp slt i32 %j.inc, 2
- br i1 %cmp.j, label %for.j, label %for.i.latch
-
-for.i.latch:
- %sum.i.lcssa = phi i32 [ %sum.j.next, %for.j ]
- %i.inc = add i32 %i, 1
- %cmp.i = icmp slt i32 %i.inc, 2
- br i1 %cmp.i, label %for.i.header, label %exit
-
-exit:
- ret void
-}
-
-; CHECK: --- !Pass
-; CHECK-NEXT: Pass: loop-interchange
-; CHECK-NEXT: Name: Interchanged
-; CHECK-NEXT: Function: reduction_wrap_add
-define void @reduction_wrap_add(ptr %A) {
-entry:
- br label %for.i.header
-
-for.i.header:
- %i = phi i32 [ 0, %entry ], [ %i.inc, %for.i.latch ]
- %sum.i = phi i32 [ 0, %entry ], [ %sum.i.lcssa, %for.i.latch ]
- br label %for.j
-
-for.j:
- %j = phi i32 [ 0, %for.i.header ], [ %j.inc, %for.j ]
- %sum.j = phi i32 [ %sum.i, %for.i.header ], [ %sum.j.next, %for.j ]
- %idx = getelementptr inbounds [2 x [2 x i32]], ptr %A, i32 0, i32 %j, i32 %i
- %a = load i32, ptr %idx, align 4
- %sum.j.next = add i32 %sum.j, %a
- %j.inc = add i32 %j, 1
- %cmp.j = icmp slt i32 %j.inc, 2
- br i1 %cmp.j, label %for.j, label %for.i.latch
-
-for.i.latch:
- %sum.i.lcssa = phi i32 [ %sum.j.next, %for.j ]
- %i.inc = add i32 %i, 1
- %cmp.i = icmp slt i32 %i.inc, 2
- br i1 %cmp.i, label %for.i.header, label %exit
-
-exit:
- ret void
-}
-
-; CHECK: --- !Missed
-; CHECK-NEXT: Pass: loop-interchange
-; CHECK-NEXT: Name: UnsupportedPHIOuter
-; CHECK-NEXT: Function: reduction_cast_add
-define void @reduction_cast_add(ptr %A) {
-entry:
- br label %for.i.header
-
-for.i.header:
- %i = phi i32 [ 0, %entry ], [ %i.inc, %for.i.latch ]
- %sum.i = phi i32 [ 0, %entry ], [ %sum.i.lcssa, %for.i.latch ]
- br label %for.j
-
-for.j:
- %j = phi i32 [ 0, %for.i.header ], [ %j.inc, %for.j ]
- %sum.j = phi i32 [ %sum.i, %for.i.header ], [ %sum.j.next, %for.j ]
- %idx = getelementptr inbounds [2 x [2 x i32]], ptr %A, i32 0, i32 %j, i32 %i
- %a = load i32, ptr %idx, align 4
- %sum.j.trunc = trunc i32 %sum.j to i16
- %sum.j.ext = zext i16 %sum.j.trunc to i32
- %sum.j.next = add nsw i32 %sum.j.ext, %a
- %j.inc = add i32 %j, 1
- %cmp.j = icmp slt i32 %j.inc, 2
- br i1 %cmp.j, label %for.j, label %for.i.latch
-
-for.i.latch:
- %sum.i.lcssa = phi i32 [ %sum.j.next, %for.j ]
- %i.inc = add i32 %i, 1
- %cmp.i = icmp slt i32 %i.inc, 2
- br i1 %cmp.i, label %for.i.header, label %exit
-
-exit:
- ret void
-}
-
-
-; int prod = 1;
-; for (int i = 0; i < 2; i++)
-; for (int j = 0; j < 2; j++)
-; prod *= A[j][i];
-
-; CHECK: --- !Missed
-; CHECK-NEXT: Pass: loop-interchange
-; CHECK-NEXT: Name: UnsupportedPHIOuter
-; CHECK-NEXT: Function: reduction_mul
-define void @reduction_mul(ptr %A) {
-entry:
- br label %for.i.header
-
-for.i.header:
- %i = phi i32 [ 0, %entry ], [ %i.inc, %for.i.latch ]
- %prod.i = phi i32 [ 1, %entry ], [ %prod.i.lcssa, %for.i.latch ]
- br label %for.j
-
-for.j:
- %j = phi i32 [ 0, %for.i.header ], [ %j.inc, %for.j ]
- %prod.j = phi i32 [ %prod.i, %for.i.header ], [ %prod.j.next, %for.j ]
- %idx = getelementptr inbounds [2 x [2 x i32]], ptr %A, i32 0, i32 %j, i32 %i
- %a = load i32, ptr %idx, align 4
- %prod.j.next = mul nsw i32 %prod.j, %a
- %j.inc = add i32 %j, 1
- %cmp.j = icmp slt i32 %j.inc, 2
- br i1 %cmp.j, label %for.j, label %for.i.latch
-
-for.i.latch:
- %prod.i.lcssa = phi i32 [ %prod.j.next, %for.j ]
- %i.inc = add i32 %i, 1
- %cmp.i = icmp slt i32 %i.inc, 2
- br i1 %cmp.i, label %for.i.header, label %exit
-
-exit:
- ret void
-}
-
-; CHECK: --- !Pass
-; CHECK-NEXT: Pass: loop-interchange
-; CHECK-NEXT: Name: Interchanged
-; CHECK-NEXT: Function: reduction_wrap_mul
-define void @reduction_wrap_mul(ptr %A) {
-entry:
- br label %for.i.header
-
-for.i.header:
- %i = phi i32 [ 0, %entry ], [ %i.inc, %for.i.latch ]
- %prod.i = phi i32 [ 1, %entry ], [ %prod.i.lcssa, %for.i.latch ]
- br label %for.j
-
-for.j:
- %j = phi i32 [ 0, %for.i.header ], [ %j.inc, %for.j ]
- %prod.j = phi i32 [ %prod.i, %for.i.header ], [ %prod.j.next, %for.j ]
- %idx = getelementptr inbounds [2 x [2 x i32]], ptr %A, i32 0, i32 %j, i32 %i
- %a = load i32, ptr %idx, align 4
- %prod.j.next = mul i32 %prod.j, %a
- %j.inc = add i32 %j, 1
- %cmp.j = icmp slt i32 %j.inc, 2
- br i1 %cmp.j, label %for.j, label %for.i.latch
-
-for.i.latch:
- %prod.i.lcssa = phi i32 [ %prod.j.next, %for.j ]
- %i.inc = add i32 %i, 1
- %cmp.i = icmp slt i32 %i.inc, 2
- br i1 %cmp.i, label %for.i.header, label %exit
-
-exit:
- ret void
-}
-
-
+; Check that exchanging the loops is legal for the bitwise-or reduction.
+;
; int b_or = 0;
; for (int i = 0; i < 2; i++)
; for (int j = 0; j < 2; j++)
@@ -220,6 +43,8 @@ exit:
}
+; Check that exchanging the loops is legal for the bitwise-and reduction.
+;
; int b_and = -1;
; for (int i = 0; i < 2; i++)
; for (int j = 0; j < 2; j++)
@@ -259,6 +84,8 @@ exit:
}
+; Check that exchanging the loops is legal for the bitwise-xor reduction.
+;
; int b_xor = 0;
; for (int i = 0; i < 2; i++)
; for (int j = 0; j < 2; j++)
@@ -298,6 +125,8 @@ exit:
}
+; Check that exchanging the loops is legal for the signed-minimum reduction.
+;
; int smin = init;
; for (int i = 0; i < 2; i++)
; for (int j = 0; j < 2; j++)
@@ -338,6 +167,8 @@ exit:
}
+; Check that exchanging the loops is legal for the signed-maximum reduction.
+;
; int smax = init;
; for (int i = 0; i < 2; i++)
; for (int j = 0; j < 2; j++)
@@ -378,6 +209,8 @@ exit:
}
+; Check that exchanging the loops is legal for the unsigned-minimum reduction.
+;
; unsigned umin = init;
; for (int i = 0; i < 2; i++)
; for (int j = 0; j < 2; j++)
@@ -418,6 +251,8 @@ exit:
}
+; Check that exchanging the loops is legal for the unsigned-maximum reduction.
+;
; unsigned umax = 0;
; for (int i = 0; i < 2; i++)
; for (int j = 0; j < 2; j++)
@@ -458,6 +293,8 @@ exit:
}
+; Check that exchanging the loops is legal for the any-of reduction.
+;
; int any_of = 0;
; for (int i = 0; i < 2; i++)
; for (int j = 0; j < 2; j++)
@@ -497,6 +334,9 @@ exit:
ret void
}
+; Check that the loops aren't exchanged if there is a reduction of
+; non-reassociative floating-point addition.
+;
; float sum = 0;
; for (int i = 0; i < 2; i++)
; for (int j = 0; j < 2; j++)
@@ -535,6 +375,9 @@ exit:
ret void
}
+; Check that the interchange is legal if the floation-point addition is marked
+; as reassoc.
+;
; CHECK: --- !Pass
; CHECK-NEXT: Pass: loop-interchange
; CHECK-NEXT: Name: Interchanged
@@ -568,6 +411,9 @@ exit:
ret void
}
+; Check that the loops aren't exchanged if there is a reduction of
+; non-reassociative floating-point multiplication.
+;
; float prod = 1;
; for (int i = 0; i < 2; i++)
; for (int j = 0; j < 2; j++)
@@ -606,6 +452,9 @@ exit:
ret void
}
+; Check that the interchange is legal if the floation-point multiplication is
+; marked as reassoc.
+;
; CHECK: --- !Pass
; CHECK-NEXT: Pass: loop-interchange
; CHECK-NEXT: Name: Interchanged
@@ -639,6 +488,9 @@ exit:
ret void
}
+; Check that the loops aren't exchanged if there is a reduction of
+; non-reassociative floating-point fmuladd.
+;
; float fmuladd = 0;
; for (int i = 0; i < 2; i++)
; for (int j = 0; j < 2; j++)
@@ -679,6 +531,9 @@ exit:
ret void
}
+; Check that the interchange is legal if the floation-point fmuladd is marked
+; as reassoc.
+;
; CHECK: --- !Pass
; CHECK-NEXT: Pass: loop-interchange
; CHECK-NEXT: Name: Interchanged
@@ -714,6 +569,9 @@ exit:
ret void
}
+; Check that exchanging the loops is legal for the reassociative floating-point
+; minimum.
+;
; float fmin = init;
; for (int i = 0; i < 2; i++)
; for (int j = 0; j < 2; j++)
@@ -754,6 +612,9 @@ exit:
}
+; Check that exchanging the loops is legal for the floation-point
+; llvm.minimumnum.
+;
; CHECK: --- !Pass
; CHECK-NEXT: Pass: loop-interchange
; CHECK-NEXT: Name: Interchanged
@@ -787,6 +648,9 @@ exit:
ret void
}
+; Check that exchanging the loops is legal for the reassociative floating-point
+; maximum.
+;
; float fmax = init;
; for (int i = 0; i < 2; i++)
; for (int j = 0; j < 2; j++)
@@ -826,6 +690,9 @@ exit:
ret void
}
+; Check that exchanging the loops is legal for the floation-point
+; llvm.maximumnum.
+
; CHECK: --- !Pass
; CHECK-NEXT: Pass: loop-interchange
; CHECK-NEXT: Name: Interchanged
diff --git a/llvm/test/Transforms/LoopInterchange/reductions-with-nowraps.ll b/llvm/test/Transforms/LoopInterchange/reductions-with-nowraps.ll
new file mode 100644
index 0000000000000..5c05f963a2f3e
--- /dev/null
+++ b/llvm/test/Transforms/LoopInterchange/reductions-with-nowraps.ll
@@ -0,0 +1,144 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -passes=loop-interchange -cache-line-size=64 -S < %s | FileCheck %s
+
+; Check that nsw/nuw flags are dropped when interchanging loops.
+;
+; int sum = 0;
+; for (int i = 0; i < 2; i++)
+; for (int j = 0; j < 2; j++)
+; sum += A[j][i];
+;
+define void @reduction_add(ptr %A) {
+; CHECK-LABEL: define void @reduction_add(
+; CHECK-SAME: ptr [[A:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: br label %[[FOR_J_PREHEADER:.*]]
+; CHECK: [[FOR_I_HEADER_PREHEADER:.*]]:
+; CHECK-NEXT: br label %[[FOR_I_HEADER:.*]]
+; CHECK: [[FOR_I_HEADER]]:
+; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[I_INC:%.*]], %[[FOR_I_LATCH:.*]] ], [ 0, %[[FOR_I_HEADER_PREHEADER]] ]
+; CHECK-NEXT: [[SUM_J:%.*]] = phi i32 [ [[SUM_J_NEXT:%.*]], %[[FOR_I_LATCH]] ], [ [[SUM_I:%.*]], %[[FOR_I_HEADER_PREHEADER]] ]
+; CHECK-NEXT: br label %[[FOR_J_SPLIT1:.*]]
+; CHECK: [[FOR_J_PREHEADER]]:
+; CHECK-NEXT: br label %[[FOR_J:.*]]
+; CHECK: [[FOR_J]]:
+; CHECK-NEXT: [[J:%.*]] = phi i32 [ [[TMP0:%.*]], %[[FOR_J_SPLIT:.*]] ], [ 0, %[[FOR_J_PREHEADER]] ]
+; CHECK-NEXT: [[SUM_I]] = phi i32 [ [[SUM_I_LCSSA:%.*]], %[[FOR_J_SPLIT]] ], [ 0, %[[FOR_J_PREHEADER]] ]
+; CHECK-NEXT: br label %[[FOR_I_HEADER_PREHEADER]]
+; CHECK: [[FOR_J_SPLIT1]]:
+; CHECK-NEXT: [[IDX:%.*]] = getelementptr inbounds [2 x [2 x i32]], ptr [[A]], i32 0, i32 [[J]], i32 [[I]]
+; CHECK-NEXT: [[A:%.*]] = load i32, ptr [[IDX]], align 4
+; CHECK-NEXT: [[SUM_J_NEXT]] = add i32 [[SUM_J]], [[A]]
+; CHECK-NEXT: [[J_INC:%.*]] = add i32 [[J]], 1
+; CHECK-NEXT: [[CMP_J:%.*]] = icmp slt i32 [[J_INC]], 2
+; CHECK-NEXT: br label %[[FOR_I_LATCH]]
+; CHECK: [[FOR_J_SPLIT]]:
+; CHECK-NEXT: [[SUM_I_LCSSA]] = phi i32 [ [[SUM_J_NEXT]], %[[FOR_I_LATCH]] ]
+; CHECK-NEXT: [[TMP0]] = add i32 [[J]], 1
+; CHECK-NEXT: [[TMP1:%.*]] = icmp slt i32 [[TMP0]], 2
+; CHECK-NEXT: br i1 [[TMP1]], label %[[FOR_J]], label %[[EXIT:.*]]
+; CHECK: [[FOR_I_LATCH]]:
+; CHECK-NEXT: [[I_INC]] = add i32 [[I]], 1
+; CHECK-NEXT: [[CMP_I:%.*]] = icmp slt i32 [[I_INC]], 2
+; CHECK-NEXT: br i1 [[CMP_I]], label %[[FOR_I_HEADER]], label %[[FOR_J_SPLIT]]
+; CHECK: [[EXIT]]:
+; CHECK-NEXT: ret void
+;
+entry:
+ br label %for.i.header
+
+for.i.header:
+ %i = phi i32 [ 0, %entry ], [ %i.inc, %for.i.latch ]
+ %sum.i = phi i32 [ 0, %entry ], [ %sum.i.lcssa, %for.i.latch ]
+ br label %for.j
+
+for.j:
+ %j = phi i32 [ 0, %for.i.header ], [ %j.inc, %for.j ]
+ %sum.j = phi i32 [ %sum.i, %for.i.header ], [ %sum.j.next, %for.j ]
+ %idx = getelementptr inbounds [2 x [2 x i32]], ptr %A, i32 0, i32 %j, i32 %i
+ %a = load i32, ptr %idx, align 4
+ %sum.j.next = add nuw nsw i32 %sum.j, %a
+ %j.inc = add i32 %j, 1
+ %cmp.j = icmp slt i32 %j.inc, 2
+ br i1 %cmp.j, label %for.j, label %for.i.latch
+
+for.i.latch:
+ %sum.i.lcssa = phi i32 [ %sum.j.next, %for.j ]
+ %i.inc = add i32 %i, 1
+ %cmp.i = icmp slt i32 %i.inc, 2
+ br i1 %cmp.i, label %for.i.header, label %exit
+
+exit:
+ ret void
+}
+
+; Check that nsw/nuw flags are dropped when interchanging loops.
+;
+; int prod = 1;
+; for (int i = 0; i < 2; i++)
+; for (int j = 0; j < 2; j++)
+; prod *= A[j][i];
+;
+define void @reduction_mul(ptr %A) {
+; CHECK-LABEL: define void @reduction_mul(
+; CHECK-SAME: ptr [[A:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: br label %[[FOR_J_PREHEADER:.*]]
+; CHECK: [[FOR_I_HEADER_PREHEADER:.*]]:
+; CHECK-NEXT: br label %[[FOR_I_HEADER:.*]]
+; CHECK: [[FOR_I_HEADER]]:
+; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[I_INC:%.*]], %[[FOR_I_LATCH:.*]] ], [ 0, %[[FOR_I_HEADER_PREHEADER]] ]
+; CHECK-NEXT: [[PROD_J:%.*]] = phi i32 [ [[PROD_J_NEXT:%.*]], %[[FOR_I_LATCH]] ], [ [[PROD_I:%.*]], %[[FOR_I_HEADER_PREHEADER]] ]
+; CHECK-NEXT: br label %[[FOR_J_SPLIT1:.*]]
+; CHECK: [[FOR_J_PREHEADER]]:
+; CHECK-NEXT: br label %[[FOR_J:.*]]
+; CHECK: [[FOR_J]]:
+; CHECK-NEXT: [[J:%.*]] = phi i32 [ [[TMP0:%.*]], %[[FOR_J_SPLIT:.*]] ], [ 0, %[[FOR_J_PREHEADER]] ]
+; CHECK-NEXT: [[PROD_I]] = phi i32 [ [[PROD_I_LCSSA:%.*]], %[[FOR_J_SPLIT]] ], [ 1, %[[FOR_J_PREHEADER]] ]
+; CHECK-NEXT: br label %[[FOR_I_HEADER_PREHEADER]]
+; CHECK: [[FOR_J_SPLIT1]]:
+; CHECK-NEXT: [[IDX:%.*]] = getelementptr inbounds [2 x [2 x i32]], ptr [[A]], i32 0, i32 [[J]], i32 [[I]]
+; CHECK-NEXT: [[A:%.*]] = load i32, ptr [[IDX]], align 4
+; CHECK-NEXT: [[PROD_J_NEXT]] = mul i32 [[PROD_J]], [[A]]
+; CHECK-NEXT: [[J_INC:%.*]] = add i32 [[J]], 1
+; CHECK-NEXT: [[CMP_J:%.*]] = icmp slt i32 [[J_INC]], 2
+; CHECK-NEXT: br label %[[FOR_I_LATCH]]
+; CHECK: [[FOR_J_SPLIT]]:
+; CHECK-NEXT: [[PROD_I_LCSSA]] = phi i32 [ [[PROD_J_NEXT]], %[[FOR_I_LATCH]] ]
+; CHECK-NEXT: [[TMP0]] = add i32 [[J]], 1
+; CHECK-NEXT: [[TMP1:%.*]] = icmp slt i32 [[TMP0]], 2
+; CHECK-NEXT: br i1 [[TMP1]], label %[[FOR_J]], label %[[EXIT:.*]]
+; CHECK: [[FOR_I_LATCH]]:
+; CHECK-NEXT: [[I_INC]] = add i32 [[I]], 1
+; CHECK-NEXT: [[CMP_I:%.*]] = icmp slt i32 [[I_INC]], 2
+; CHECK-NEXT: br i1 [[CMP_I]], label %[[FOR_I_HEADER]], label %[[FOR_J_SPLIT]]
+; CHECK: [[EXIT]]:
+; CHECK-NEXT: ret void
+;
+entry:
+ br label %for.i.header
+
+for.i.header:
+ %i = phi i32 [ 0, %entry ], [ %i.inc, %for.i.latch ]
+ %prod.i = phi i32 [ 1, %entry ], [ %prod.i.lcssa, %for.i.latch ]
+ br label %for.j
+
+for.j:
+ %j = phi i32 [ 0, %for.i.header ], [ %j.inc, %for.j ]
+ %prod.j = phi i32 [ %prod.i, %for.i.header ], [ %prod.j.next, %for.j ]
+ %idx = getelementptr inbounds [2 x [2 x i32]], ptr %A, i32 0, i32 %j, i32 %i
+ %a = load i32, ptr %idx, align 4
+ %prod.j.next = mul nsw nuw i32 %prod.j, %a
+ %j.inc = add i32 %j, 1
+ %cmp.j = icmp slt i32 %j.inc, 2
+ br i1 %cmp.j, label %for.j, label %for.i.latch
+
+for.i.latch:
+ %prod.i.lcssa = phi i32 [ %prod.j.next, %for.j ]
+ %i.inc = add i32 %i, 1
+ %cmp.i = icmp slt i32 %i.inc, 2
+ br i1 %cmp.i, label %for.i.header, label %exit
+
+exit:
+ ret void
+}
>From 00bc619fb0ea7a4ba75d385e1ab52c45de4b538f Mon Sep 17 00:00:00 2001
From: Ryotaro Kasuga <kasuga.ryotaro at fujitsu.com>
Date: Tue, 15 Jul 2025 07:41:40 +0000
Subject: [PATCH 3/5] Address FIXME comments
---
llvm/lib/Transforms/Scalar/LoopInterchange.cpp | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/llvm/lib/Transforms/Scalar/LoopInterchange.cpp b/llvm/lib/Transforms/Scalar/LoopInterchange.cpp
index ba3d60980da6f..61da144bdd270 100644
--- a/llvm/lib/Transforms/Scalar/LoopInterchange.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopInterchange.cpp
@@ -863,13 +863,13 @@ findInnerReductionPhi(Loop *L, Value *V,
unsigned OpCode = RecurrenceDescriptor::getOpcode(RK);
SmallVector<Instruction *, 4> Ops = RD.getReductionOpChain(PHI, L);
- // FIXME: Is this check necessary?
+ // Bail out when we fail to collect reduction instructions chain.
if (Ops.empty())
return nullptr;
+
for (Instruction *I : Ops) {
- // FIXME: Is this check necessary?
- if (I->getOpcode() != OpCode)
- return nullptr;
+ assert(I->getOpcode() == OpCode &&
+ "Expected the instruction to be the reduction operation");
// If the instruction has nuw/nsw flags, we must drop them when the
// transformation is actually performed.
>From ae7f398a0c1a2108455b77935b2bd6dc164ffa02 Mon Sep 17 00:00:00 2001
From: Ryotaro Kasuga <kasuga.ryotaro at fujitsu.com>
Date: Tue, 15 Jul 2025 09:23:54 +0000
Subject: [PATCH 4/5] Fix typo
---
.../reductions-non-wrapped-operations.ll | 34 +++++++++----------
1 file changed, 17 insertions(+), 17 deletions(-)
diff --git a/llvm/test/Transforms/LoopInterchange/reductions-non-wrapped-operations.ll b/llvm/test/Transforms/LoopInterchange/reductions-non-wrapped-operations.ll
index 35ffd49666983..3ed69485bc8f2 100644
--- a/llvm/test/Transforms/LoopInterchange/reductions-non-wrapped-operations.ll
+++ b/llvm/test/Transforms/LoopInterchange/reductions-non-wrapped-operations.ll
@@ -2,7 +2,7 @@
; RUN: -verify-dom-info -verify-loop-info -verify-loop-lcssa
; RUN: FileCheck -input-file=%t %s
-; Check that exchanging the loops is legal for the bitwise-or reduction.
+; Check that interchanging the loops is legal for the bitwise-or reduction.
;
; int b_or = 0;
; for (int i = 0; i < 2; i++)
@@ -43,7 +43,7 @@ exit:
}
-; Check that exchanging the loops is legal for the bitwise-and reduction.
+; Check that interchanging the loops is legal for the bitwise-and reduction.
;
; int b_and = -1;
; for (int i = 0; i < 2; i++)
@@ -84,7 +84,7 @@ exit:
}
-; Check that exchanging the loops is legal for the bitwise-xor reduction.
+; Check that interchanging the loops is legal for the bitwise-xor reduction.
;
; int b_xor = 0;
; for (int i = 0; i < 2; i++)
@@ -125,7 +125,7 @@ exit:
}
-; Check that exchanging the loops is legal for the signed-minimum reduction.
+; Check that interchanging the loops is legal for the signed-minimum reduction.
;
; int smin = init;
; for (int i = 0; i < 2; i++)
@@ -167,7 +167,7 @@ exit:
}
-; Check that exchanging the loops is legal for the signed-maximum reduction.
+; Check that interchanging the loops is legal for the signed-maximum reduction.
;
; int smax = init;
; for (int i = 0; i < 2; i++)
@@ -209,7 +209,7 @@ exit:
}
-; Check that exchanging the loops is legal for the unsigned-minimum reduction.
+; Check that interchanging the loops is legal for the unsigned-minimum reduction.
;
; unsigned umin = init;
; for (int i = 0; i < 2; i++)
@@ -251,7 +251,7 @@ exit:
}
-; Check that exchanging the loops is legal for the unsigned-maximum reduction.
+; Check that interchanging the loops is legal for the unsigned-maximum reduction.
;
; unsigned umax = 0;
; for (int i = 0; i < 2; i++)
@@ -293,7 +293,7 @@ exit:
}
-; Check that exchanging the loops is legal for the any-of reduction.
+; Check that interchanging the loops is legal for the any-of reduction.
;
; int any_of = 0;
; for (int i = 0; i < 2; i++)
@@ -375,7 +375,7 @@ exit:
ret void
}
-; Check that the interchange is legal if the floation-point addition is marked
+; Check that the interchange is legal if the floating-point addition is marked
; as reassoc.
;
; CHECK: --- !Pass
@@ -452,7 +452,7 @@ exit:
ret void
}
-; Check that the interchange is legal if the floation-point multiplication is
+; Check that the interchange is legal if the floating-point multiplication is
; marked as reassoc.
;
; CHECK: --- !Pass
@@ -531,7 +531,7 @@ exit:
ret void
}
-; Check that the interchange is legal if the floation-point fmuladd is marked
+; Check that the interchange is legal if the floating-point fmuladd is marked
; as reassoc.
;
; CHECK: --- !Pass
@@ -569,8 +569,8 @@ exit:
ret void
}
-; Check that exchanging the loops is legal for the reassociative floating-point
-; minimum.
+; Check that interchanging the loops is legal for the reassociative
+; floating-point minimum.
;
; float fmin = init;
; for (int i = 0; i < 2; i++)
@@ -612,7 +612,7 @@ exit:
}
-; Check that exchanging the loops is legal for the floation-point
+; Check that interchanging the loops is legal for the floating-point
; llvm.minimumnum.
;
; CHECK: --- !Pass
@@ -648,8 +648,8 @@ exit:
ret void
}
-; Check that exchanging the loops is legal for the reassociative floating-point
-; maximum.
+; Check that interchanging the loops is legal for the reassociative
+; floating-point maximum.
;
; float fmax = init;
; for (int i = 0; i < 2; i++)
@@ -690,7 +690,7 @@ exit:
ret void
}
-; Check that exchanging the loops is legal for the floation-point
+; Check that interchanging the loops is legal for the floating-point
; llvm.maximumnum.
; CHECK: --- !Pass
>From d8bbcc3179fc0e5dcf26e8eac9eda26e2b500434 Mon Sep 17 00:00:00 2001
From: Ryotaro Kasuga <kasuga.ryotaro at fujitsu.com>
Date: Tue, 15 Jul 2025 21:21:48 +0900
Subject: [PATCH 5/5] exchanging -> interchanging
---
llvm/lib/Transforms/Scalar/LoopInterchange.cpp | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/llvm/lib/Transforms/Scalar/LoopInterchange.cpp b/llvm/lib/Transforms/Scalar/LoopInterchange.cpp
index 61da144bdd270..09ebd2c913c0e 100644
--- a/llvm/lib/Transforms/Scalar/LoopInterchange.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopInterchange.cpp
@@ -412,7 +412,7 @@ class LoopInterchangeLegality {
/// Hold instructions that have nuw/nsw flags and involved in reductions,
/// like integer addition/multiplication. Those flags must be dropped when
- /// exchanging the loops.
+ /// interchanging the loops.
SmallVector<Instruction *, 4> HasNoWrapReductions;
};
More information about the llvm-commits
mailing list