[llvm] 1c10821 - [LoopVectorize] Fix divide-by-zero bug (#80836) (#81721)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Feb 14 08:07:54 PST 2024
Author: David Sherwood
Date: 2024-02-14T16:07:51Z
New Revision: 1c10821022f1799452065fb57474e894e2562b7f
URL: https://github.com/llvm/llvm-project/commit/1c10821022f1799452065fb57474e894e2562b7f
DIFF: https://github.com/llvm/llvm-project/commit/1c10821022f1799452065fb57474e894e2562b7f.diff
LOG: [LoopVectorize] Fix divide-by-zero bug (#80836) (#81721)
When attempting to use the estimated trip count to refine the costs of
the runtime memory checks we should also check for sane trip counts to
prevent divide-by-zero faults on some platforms.
Fixes #80836
Added:
Modified:
llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
llvm/test/Transforms/LoopVectorize/AArch64/low_trip_memcheck_cost.ll
Removed:
################################################################################
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index b39bf21cacdbb5..98b177cf5d2d0e 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -2010,16 +2010,18 @@ class GeneratedRTChecks {
BestTripCount = *EstimatedTC;
}
+ BestTripCount = std::max(BestTripCount, 1U);
InstructionCost NewMemCheckCost = MemCheckCost / BestTripCount;
// Let's ensure the cost is always at least 1.
NewMemCheckCost = std::max(*NewMemCheckCost.getValue(),
(InstructionCost::CostType)1);
- LLVM_DEBUG(dbgs()
- << "We expect runtime memory checks to be hoisted "
- << "out of the outer loop. Cost reduced from "
- << MemCheckCost << " to " << NewMemCheckCost << '\n');
+ if (BestTripCount > 1)
+ LLVM_DEBUG(dbgs()
+ << "We expect runtime memory checks to be hoisted "
+ << "out of the outer loop. Cost reduced from "
+ << MemCheckCost << " to " << NewMemCheckCost << '\n');
MemCheckCost = NewMemCheckCost;
}
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/low_trip_memcheck_cost.ll b/llvm/test/Transforms/LoopVectorize/AArch64/low_trip_memcheck_cost.ll
index 8a796bb3065b19..800c55d6740bc8 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/low_trip_memcheck_cost.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/low_trip_memcheck_cost.ll
@@ -177,6 +177,43 @@ outer.exit:
}
+define void @outer_pgo_minus1(ptr nocapture noundef %a, ptr nocapture noundef readonly %b, i64 noundef %m, i64 noundef %n) {
+; CHECK-LABEL: LV: Checking a loop in 'outer_pgo_minus1'
+; CHECK: Calculating cost of runtime checks:
+; CHECK-NOT: We expect runtime memory checks to be hoisted out of the outer loop. Cost reduced
+; CHECK: Total cost of runtime checks: 6
+; CHECK-NEXT: LV: Minimum required TC for runtime checks to be profitable:16
+entry:
+ br label %outer.loop
+
+outer.loop:
+ %outer.iv = phi i64 [ %outer.iv.next, %inner.exit ], [ 0, %entry ]
+ %mul.us = mul nsw i64 %outer.iv, %n
+ br label %inner.loop
+
+inner.loop:
+ %inner.iv = phi i64 [ 0, %outer.loop ], [ %inner.iv.next, %inner.loop ]
+ %add.us = add nuw nsw i64 %inner.iv, %mul.us
+ %arrayidx.us = getelementptr inbounds i8, ptr %b, i64 %add.us
+ %0 = load i8, ptr %arrayidx.us, align 1
+ %arrayidx7.us = getelementptr inbounds i8, ptr %a, i64 %add.us
+ %1 = load i8, ptr %arrayidx7.us, align 1
+ %add9.us = add i8 %1, %0
+ store i8 %add9.us, ptr %arrayidx7.us, align 1
+ %inner.iv.next = add nuw nsw i64 %inner.iv, 1
+ %exitcond.not = icmp eq i64 %inner.iv.next, %n
+ br i1 %exitcond.not, label %inner.exit, label %inner.loop
+
+inner.exit:
+ %outer.iv.next = add nuw nsw i64 %outer.iv, 1
+ %exitcond26.not = icmp eq i64 %outer.iv.next, %m
+ br i1 %exitcond26.not, label %outer.exit, label %outer.loop, !prof !1
+
+outer.exit:
+ ret void
+}
+
+
define void @outer_known_tc3_full_range_checks(ptr nocapture noundef %dst, ptr nocapture noundef readonly %src, i64 noundef %n) {
; CHECK-LABEL: LV: Checking a loop in 'outer_known_tc3_full_range_checks'
; CHECK: Calculating cost of runtime checks:
@@ -215,3 +252,4 @@ outer.exit:
!0 = !{!"branch_weights", i32 10, i32 20}
+!1 = !{!"branch_weights", i32 1, i32 -1}
More information about the llvm-commits
mailing list