[llvm] [LV] Disable fold tail by masking - when induction vars used outside (PR #81609)
via llvm-commits
llvm-commits at lists.llvm.org
Tue Feb 13 06:46:29 PST 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-llvm-transforms
Author: Niwin Anto (niwinanto)
<details>
<summary>Changes</summary>
When induction variable are used outside the loop body, tail folding by masking mis-compiles.
https://github.com/llvm/llvm-project/issues/76069
---
Full diff: https://github.com/llvm/llvm-project/pull/81609.diff
2 Files Affected:
- (modified) llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp (+13)
- (added) llvm/test/Transforms/LoopVectorize/no-fold-tail-by-masking-iv-external-uses.ll (+85)
``````````diff
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
index 37a356c43e29a4..d33743e74cbe31 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
@@ -1552,6 +1552,19 @@ bool LoopVectorizationLegality::prepareToFoldTailByMasking() {
}
}
+ for (const auto &Entry : getInductionVars()) {
+ PHINode *OrigPhi = Entry.first;
+ for (User *U : OrigPhi->users()) {
+ auto *UI = cast<Instruction>(U);
+ if (!TheLoop->contains(UI)) {
+ LLVM_DEBUG(dbgs() << "LV: Cannot fold tail by masking, loop IV has an "
+ "outside user for "
+ << *UI << "\n");
+ return false;
+ }
+ }
+ }
+
// The list of pointers that we can safely read and write to remains empty.
SmallPtrSet<Value *, 8> SafePointers;
diff --git a/llvm/test/Transforms/LoopVectorize/no-fold-tail-by-masking-iv-external-uses.ll b/llvm/test/Transforms/LoopVectorize/no-fold-tail-by-masking-iv-external-uses.ll
new file mode 100644
index 00000000000000..f7379df934bd77
--- /dev/null
+++ b/llvm/test/Transforms/LoopVectorize/no-fold-tail-by-masking-iv-external-uses.ll
@@ -0,0 +1,85 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -passes=loop-vectorize -S | FileCheck %s
+
+
+; #include <stdio.h>
+; #define SIZE 17
+;
+; unsigned char result;
+; unsigned char arr_1[SIZE];
+;
+; __attribute__((__noinline__))
+; void test(int limit, unsigned char val, int arr_2[SIZE][SIZE][SIZE]) {
+; #pragma clang loop vectorize_predicate(enable)
+; for (short i_5 = 0; i_5 < limit; i_5++) {
+; arr_1 [i_5] = val;
+; result = arr_2[0][0][i_5] != arr_2[i_5][i_5][0];
+; }
+; }
+;
+;int main(void) {
+; int arr_2[SIZE][SIZE][SIZE];
+;
+; __builtin_memset(arr_2, 1, sizeof(arr_2));
+;
+; test(SIZE, 0, arr_2);
+; printf("%hu \n", result);
+;}
+; clang miss-compiles the above code
+; with vectorize_predicate(enable), result is 0 and 1 without.
+
+
+ at result = global i8 0, align 1
+ at arr_17 = global [17 x i8] zeroinitializer, align 1
+ at a = external global i8, align 1
+
+define void @test(i32 %limit, i8 zeroext %val, ptr readonly %arr_14) {
+; CHECK-LABEL: @test(
+; CHECK-NOT: pred.store.if:
+; CHECK-NOT: pred.store.continue:
+;
+entry:
+ %cmp18 = icmp sgt i32 %limit, 0
+ br i1 %cmp18, label %for.body.preheader, label %for.cond.cleanup
+
+for.body.preheader: ; preds = %entry
+ br label %for.body
+
+for.cond.for.cond.cleanup_crit_edge: ; preds = %for.body
+ %conv20.lcssa = phi i32 [ %conv20, %for.body ]
+ %arrayidx4 = getelementptr inbounds [17 x i32], ptr %arr_14, i32 0, i32 %conv20.lcssa
+ %0 = load i32, ptr %arrayidx4, align 4, !tbaa !4
+ %arrayidx8 = getelementptr inbounds [17 x [17 x i32]], ptr %arr_14, i32 %conv20.lcssa, i32 %conv20.lcssa
+ %1 = load i32, ptr %arrayidx8, align 4, !tbaa !4
+ %cmp10 = icmp ne i32 %0, %1
+ %conv11 = zext i1 %cmp10 to i8
+ store i8 %conv11, ptr @result, align 1, !tbaa !8
+ br label %for.cond.cleanup
+
+for.cond.cleanup: ; preds = %for.cond.for.cond.cleanup_crit_edge, %entry
+ ret void
+
+for.body: ; preds = %for.body.preheader, %for.body
+ %conv20 = phi i32 [ %conv, %for.body ], [ 0, %for.body.preheader ]
+ %i_5.019 = phi i16 [ %inc, %for.body ], [ 0, %for.body.preheader ]
+ %arrayidx = getelementptr inbounds [17 x i8], ptr @arr_17, i32 0, i32 %conv20
+ store i8 %val, ptr %arrayidx, align 1, !tbaa !8
+ %inc = add i16 %i_5.019, 1
+ %conv = sext i16 %inc to i32
+ %cmp = icmp slt i32 %conv, %limit
+ br i1 %cmp, label %for.body, label %for.cond.for.cond.cleanup_crit_edge, !llvm.loop !9
+}
+
+
+
+!4 = !{!5, !5, i64 0}
+!5 = !{!"int", !6, i64 0}
+!6 = !{!"omnipotent char", !7, i64 0}
+!7 = !{!"Simple C++ TBAA"}
+!8 = !{!6, !6, i64 0}
+!9 = distinct !{!9, !10, !11, !12, !13, !14}
+!10 = !{!"llvm.loop.mustprogress"}
+!11 = !{!"llvm.loop.vectorize.predicate.enable", i1 true}
+!12 = !{!"llvm.loop.vectorize.width", i32 2}
+!13 = !{!"llvm.loop.vectorize.scalable.enable", i1 false}
+!14 = !{!"llvm.loop.vectorize.enable", i1 true}
``````````
</details>
https://github.com/llvm/llvm-project/pull/81609
More information about the llvm-commits
mailing list