[llvm] [LV] Fix runtime-VF logic when generating RT-checks (PR #130118)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Mar 6 07:30:56 PST 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-llvm-transforms
Author: Ramkumar Ramachandra (artagnon)
<details>
<summary>Changes</summary>
GeneratedRTChecks::create has a bug when calling addDiffRuntimeChecks: it persists the value of RuntimeVF from a previous call to the GetVF lambda, which results in a smaller runtime VF being returned in some cases. Fix the bug, stripping a FIXME in a test.
---
Full diff: https://github.com/llvm/llvm-project/pull/130118.diff
3 Files Affected:
- (modified) llvm/lib/Transforms/Vectorize/LoopVectorize.cpp (+11-15)
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/sve-runtime-check-size-based-threshold.ll (+17-10)
- (modified) llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-call-intrinsics.ll (+20-12)
``````````diff
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index cb860a472d8f7..5fe6551c3f8e2 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -1924,21 +1924,17 @@ class GeneratedRTChecks {
"vector.memcheck");
auto DiffChecks = RtPtrChecking.getDiffChecks();
- if (DiffChecks) {
- Value *RuntimeVF = nullptr;
- MemRuntimeCheckCond = addDiffRuntimeChecks(
- MemCheckBlock->getTerminator(), *DiffChecks, MemCheckExp,
- [VF, &RuntimeVF](IRBuilderBase &B, unsigned Bits) {
- if (!RuntimeVF)
- RuntimeVF = getRuntimeVF(B, B.getIntNTy(Bits), VF);
- return RuntimeVF;
- },
- IC);
- } else {
- MemRuntimeCheckCond = addRuntimeChecks(
- MemCheckBlock->getTerminator(), L, RtPtrChecking.getChecks(),
- MemCheckExp, VectorizerParams::HoistRuntimeChecks);
- }
+ MemRuntimeCheckCond =
+ DiffChecks
+ ? addDiffRuntimeChecks(
+ MemCheckBlock->getTerminator(), *DiffChecks, MemCheckExp,
+ [VF](IRBuilderBase &B, unsigned Bits) {
+ return getRuntimeVF(B, B.getIntNTy(Bits), VF);
+ },
+ IC)
+ : addRuntimeChecks(MemCheckBlock->getTerminator(), L,
+ RtPtrChecking.getChecks(), MemCheckExp,
+ VectorizerParams::HoistRuntimeChecks);
assert(MemRuntimeCheckCond &&
"no RT checks generated although RtPtrChecking "
"claimed checks are required");
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-runtime-check-size-based-threshold.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-runtime-check-size-based-threshold.ll
index feb27caf305a2..f1b54e6569afe 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-runtime-check-size-based-threshold.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-runtime-check-size-based-threshold.ll
@@ -5,7 +5,6 @@ target triple = "aarch64-unknown-linux-gnu"
; Test case where the minimum profitable trip count due to runtime checks
; exceeds VF.getKnownMinValue() * UF.
-; FIXME: The code currently incorrectly is missing a umax(VF * UF, 28).
define void @min_trip_count_due_to_runtime_checks_1(ptr %dst.1, ptr %dst.2, ptr %src.1, ptr %src.2, i64 %n) {
; CHECK-LABEL: @min_trip_count_due_to_runtime_checks_1(
; CHECK-NEXT: entry:
@@ -16,7 +15,7 @@ define void @min_trip_count_due_to_runtime_checks_1(ptr %dst.1, ptr %dst.2, ptr
; CHECK-NEXT: [[UMAX:%.*]] = call i64 @llvm.umax.i64(i64 [[N:%.*]], i64 1)
; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 4
-; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.umax.i64(i64 20, i64 [[TMP1]])
+; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.umax.i64(i64 28, i64 [[TMP1]])
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[UMAX]], [[TMP2]]
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]]
; CHECK: vector.memcheck:
@@ -25,21 +24,29 @@ define void @min_trip_count_due_to_runtime_checks_1(ptr %dst.1, ptr %dst.2, ptr
; CHECK-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 16
; CHECK-NEXT: [[TMP6:%.*]] = sub i64 [[DST_21]], [[DST_12]]
; CHECK-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP6]], [[TMP5]]
-; CHECK-NEXT: [[TMP7:%.*]] = mul i64 [[TMP4]], 16
+; CHECK-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT: [[TMP18:%.*]] = mul i64 [[TMP7]], 2
+; CHECK-NEXT: [[TMP9:%.*]] = mul i64 [[TMP18]], 16
; CHECK-NEXT: [[TMP8:%.*]] = sub i64 [[DST_12]], [[SRC_13]]
-; CHECK-NEXT: [[DIFF_CHECK4:%.*]] = icmp ult i64 [[TMP8]], [[TMP7]]
+; CHECK-NEXT: [[DIFF_CHECK4:%.*]] = icmp ult i64 [[TMP8]], [[TMP9]]
; CHECK-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[DIFF_CHECK]], [[DIFF_CHECK4]]
-; CHECK-NEXT: [[TMP9:%.*]] = mul i64 [[TMP4]], 16
+; CHECK-NEXT: [[TMP11:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT: [[TMP22:%.*]] = mul i64 [[TMP11]], 2
+; CHECK-NEXT: [[TMP13:%.*]] = mul i64 [[TMP22]], 16
; CHECK-NEXT: [[TMP10:%.*]] = sub i64 [[DST_12]], [[SRC_25]]
-; CHECK-NEXT: [[DIFF_CHECK6:%.*]] = icmp ult i64 [[TMP10]], [[TMP9]]
+; CHECK-NEXT: [[DIFF_CHECK6:%.*]] = icmp ult i64 [[TMP10]], [[TMP13]]
; CHECK-NEXT: [[CONFLICT_RDX7:%.*]] = or i1 [[CONFLICT_RDX]], [[DIFF_CHECK6]]
-; CHECK-NEXT: [[TMP11:%.*]] = mul i64 [[TMP4]], 16
+; CHECK-NEXT: [[TMP24:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT: [[TMP26:%.*]] = mul i64 [[TMP24]], 2
+; CHECK-NEXT: [[TMP38:%.*]] = mul i64 [[TMP26]], 16
; CHECK-NEXT: [[TMP12:%.*]] = sub i64 [[DST_21]], [[SRC_13]]
-; CHECK-NEXT: [[DIFF_CHECK8:%.*]] = icmp ult i64 [[TMP12]], [[TMP11]]
+; CHECK-NEXT: [[DIFF_CHECK8:%.*]] = icmp ult i64 [[TMP12]], [[TMP38]]
; CHECK-NEXT: [[CONFLICT_RDX9:%.*]] = or i1 [[CONFLICT_RDX7]], [[DIFF_CHECK8]]
-; CHECK-NEXT: [[TMP13:%.*]] = mul i64 [[TMP4]], 16
+; CHECK-NEXT: [[TMP19:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT: [[TMP20:%.*]] = mul i64 [[TMP19]], 2
+; CHECK-NEXT: [[TMP21:%.*]] = mul i64 [[TMP20]], 16
; CHECK-NEXT: [[TMP14:%.*]] = sub i64 [[DST_21]], [[SRC_25]]
-; CHECK-NEXT: [[DIFF_CHECK10:%.*]] = icmp ult i64 [[TMP14]], [[TMP13]]
+; CHECK-NEXT: [[DIFF_CHECK10:%.*]] = icmp ult i64 [[TMP14]], [[TMP21]]
; CHECK-NEXT: [[CONFLICT_RDX11:%.*]] = or i1 [[CONFLICT_RDX9]], [[DIFF_CHECK10]]
; CHECK-NEXT: br i1 [[CONFLICT_RDX11]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-call-intrinsics.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-call-intrinsics.ll
index f19e581d1c028..bd4d973046ae3 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-call-intrinsics.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-call-intrinsics.ll
@@ -19,7 +19,7 @@ define void @vp_smax(ptr %a, ptr %b, ptr %c, i64 %N) {
; IF-EVL-NEXT: [[TMP0:%.*]] = sub i64 -1, [[N]]
; IF-EVL-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64()
; IF-EVL-NEXT: [[TMP2:%.*]] = mul i64 [[TMP1]], 4
-; IF-EVL-NEXT: [[TMP3:%.*]] = call i64 @llvm.umax.i64(i64 13, i64 [[TMP2]])
+; IF-EVL-NEXT: [[TMP3:%.*]] = call i64 @llvm.umax.i64(i64 16, i64 [[TMP2]])
; IF-EVL-NEXT: [[TMP22:%.*]] = icmp ult i64 [[TMP0]], [[TMP3]]
; IF-EVL-NEXT: br i1 [[TMP22]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]]
; IF-EVL: [[VECTOR_MEMCHECK]]:
@@ -28,9 +28,11 @@ define void @vp_smax(ptr %a, ptr %b, ptr %c, i64 %N) {
; IF-EVL-NEXT: [[TMP23:%.*]] = mul i64 [[TMP5]], 4
; IF-EVL-NEXT: [[TMP24:%.*]] = sub i64 [[A1]], [[B2]]
; IF-EVL-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP24]], [[TMP23]]
-; IF-EVL-NEXT: [[TMP25:%.*]] = mul i64 [[TMP5]], 4
+; IF-EVL-NEXT: [[TMP15:%.*]] = call i64 @llvm.vscale.i64()
+; IF-EVL-NEXT: [[TMP25:%.*]] = mul i64 [[TMP15]], 4
+; IF-EVL-NEXT: [[TMP30:%.*]] = mul i64 [[TMP25]], 4
; IF-EVL-NEXT: [[TMP26:%.*]] = sub i64 [[A1]], [[C3]]
-; IF-EVL-NEXT: [[DIFF_CHECK4:%.*]] = icmp ult i64 [[TMP26]], [[TMP25]]
+; IF-EVL-NEXT: [[DIFF_CHECK4:%.*]] = icmp ult i64 [[TMP26]], [[TMP30]]
; IF-EVL-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[DIFF_CHECK]], [[DIFF_CHECK4]]
; IF-EVL-NEXT: br i1 [[CONFLICT_RDX]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]]
; IF-EVL: [[VECTOR_PH]]:
@@ -134,7 +136,7 @@ define void @vp_smin(ptr %a, ptr %b, ptr %c, i64 %N) {
; IF-EVL-NEXT: [[TMP0:%.*]] = sub i64 -1, [[N]]
; IF-EVL-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64()
; IF-EVL-NEXT: [[TMP2:%.*]] = mul i64 [[TMP1]], 4
-; IF-EVL-NEXT: [[TMP3:%.*]] = call i64 @llvm.umax.i64(i64 13, i64 [[TMP2]])
+; IF-EVL-NEXT: [[TMP3:%.*]] = call i64 @llvm.umax.i64(i64 16, i64 [[TMP2]])
; IF-EVL-NEXT: [[TMP22:%.*]] = icmp ult i64 [[TMP0]], [[TMP3]]
; IF-EVL-NEXT: br i1 [[TMP22]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]]
; IF-EVL: [[VECTOR_MEMCHECK]]:
@@ -143,9 +145,11 @@ define void @vp_smin(ptr %a, ptr %b, ptr %c, i64 %N) {
; IF-EVL-NEXT: [[TMP23:%.*]] = mul i64 [[TMP5]], 4
; IF-EVL-NEXT: [[TMP24:%.*]] = sub i64 [[A1]], [[B2]]
; IF-EVL-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP24]], [[TMP23]]
-; IF-EVL-NEXT: [[TMP25:%.*]] = mul i64 [[TMP5]], 4
+; IF-EVL-NEXT: [[TMP15:%.*]] = call i64 @llvm.vscale.i64()
+; IF-EVL-NEXT: [[TMP25:%.*]] = mul i64 [[TMP15]], 4
+; IF-EVL-NEXT: [[TMP30:%.*]] = mul i64 [[TMP25]], 4
; IF-EVL-NEXT: [[TMP26:%.*]] = sub i64 [[A1]], [[C3]]
-; IF-EVL-NEXT: [[DIFF_CHECK4:%.*]] = icmp ult i64 [[TMP26]], [[TMP25]]
+; IF-EVL-NEXT: [[DIFF_CHECK4:%.*]] = icmp ult i64 [[TMP26]], [[TMP30]]
; IF-EVL-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[DIFF_CHECK]], [[DIFF_CHECK4]]
; IF-EVL-NEXT: br i1 [[CONFLICT_RDX]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]]
; IF-EVL: [[VECTOR_PH]]:
@@ -249,7 +253,7 @@ define void @vp_umax(ptr %a, ptr %b, ptr %c, i64 %N) {
; IF-EVL-NEXT: [[TMP0:%.*]] = sub i64 -1, [[N]]
; IF-EVL-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64()
; IF-EVL-NEXT: [[TMP2:%.*]] = mul i64 [[TMP1]], 4
-; IF-EVL-NEXT: [[TMP3:%.*]] = call i64 @llvm.umax.i64(i64 13, i64 [[TMP2]])
+; IF-EVL-NEXT: [[TMP3:%.*]] = call i64 @llvm.umax.i64(i64 16, i64 [[TMP2]])
; IF-EVL-NEXT: [[TMP22:%.*]] = icmp ult i64 [[TMP0]], [[TMP3]]
; IF-EVL-NEXT: br i1 [[TMP22]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]]
; IF-EVL: [[VECTOR_MEMCHECK]]:
@@ -258,9 +262,11 @@ define void @vp_umax(ptr %a, ptr %b, ptr %c, i64 %N) {
; IF-EVL-NEXT: [[TMP23:%.*]] = mul i64 [[TMP5]], 4
; IF-EVL-NEXT: [[TMP24:%.*]] = sub i64 [[A1]], [[B2]]
; IF-EVL-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP24]], [[TMP23]]
-; IF-EVL-NEXT: [[TMP25:%.*]] = mul i64 [[TMP5]], 4
+; IF-EVL-NEXT: [[TMP15:%.*]] = call i64 @llvm.vscale.i64()
+; IF-EVL-NEXT: [[TMP25:%.*]] = mul i64 [[TMP15]], 4
+; IF-EVL-NEXT: [[TMP30:%.*]] = mul i64 [[TMP25]], 4
; IF-EVL-NEXT: [[TMP26:%.*]] = sub i64 [[A1]], [[C3]]
-; IF-EVL-NEXT: [[DIFF_CHECK4:%.*]] = icmp ult i64 [[TMP26]], [[TMP25]]
+; IF-EVL-NEXT: [[DIFF_CHECK4:%.*]] = icmp ult i64 [[TMP26]], [[TMP30]]
; IF-EVL-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[DIFF_CHECK]], [[DIFF_CHECK4]]
; IF-EVL-NEXT: br i1 [[CONFLICT_RDX]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]]
; IF-EVL: [[VECTOR_PH]]:
@@ -364,7 +370,7 @@ define void @vp_umin(ptr %a, ptr %b, ptr %c, i64 %N) {
; IF-EVL-NEXT: [[TMP0:%.*]] = sub i64 -1, [[N]]
; IF-EVL-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64()
; IF-EVL-NEXT: [[TMP2:%.*]] = mul i64 [[TMP1]], 4
-; IF-EVL-NEXT: [[TMP3:%.*]] = call i64 @llvm.umax.i64(i64 13, i64 [[TMP2]])
+; IF-EVL-NEXT: [[TMP3:%.*]] = call i64 @llvm.umax.i64(i64 16, i64 [[TMP2]])
; IF-EVL-NEXT: [[TMP22:%.*]] = icmp ult i64 [[TMP0]], [[TMP3]]
; IF-EVL-NEXT: br i1 [[TMP22]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]]
; IF-EVL: [[VECTOR_MEMCHECK]]:
@@ -373,9 +379,11 @@ define void @vp_umin(ptr %a, ptr %b, ptr %c, i64 %N) {
; IF-EVL-NEXT: [[TMP23:%.*]] = mul i64 [[TMP5]], 4
; IF-EVL-NEXT: [[TMP24:%.*]] = sub i64 [[A1]], [[B2]]
; IF-EVL-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP24]], [[TMP23]]
-; IF-EVL-NEXT: [[TMP25:%.*]] = mul i64 [[TMP5]], 4
+; IF-EVL-NEXT: [[TMP15:%.*]] = call i64 @llvm.vscale.i64()
+; IF-EVL-NEXT: [[TMP25:%.*]] = mul i64 [[TMP15]], 4
+; IF-EVL-NEXT: [[TMP30:%.*]] = mul i64 [[TMP25]], 4
; IF-EVL-NEXT: [[TMP26:%.*]] = sub i64 [[A1]], [[C3]]
-; IF-EVL-NEXT: [[DIFF_CHECK4:%.*]] = icmp ult i64 [[TMP26]], [[TMP25]]
+; IF-EVL-NEXT: [[DIFF_CHECK4:%.*]] = icmp ult i64 [[TMP26]], [[TMP30]]
; IF-EVL-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[DIFF_CHECK]], [[DIFF_CHECK4]]
; IF-EVL-NEXT: br i1 [[CONFLICT_RDX]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]]
; IF-EVL: [[VECTOR_PH]]:
``````````
</details>
https://github.com/llvm/llvm-project/pull/130118
More information about the llvm-commits
mailing list