[llvm] [LV] Disable fold tail by masking - when induction vars used outside (PR #81609)
Florian Hahn via llvm-commits
llvm-commits at lists.llvm.org
Tue Feb 13 09:59:04 PST 2024
================
@@ -0,0 +1,85 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -passes=loop-vectorize -S | FileCheck %s
+
+
+; #include <stdio.h>
+; #define SIZE 17
+;
+; unsigned char result;
+; unsigned char arr_1[SIZE];
+;
+; __attribute__((__noinline__))
+; void test(int limit, unsigned char val, int arr_2[SIZE][SIZE][SIZE]) {
+; #pragma clang loop vectorize_predicate(enable)
+; for (short i_5 = 0; i_5 < limit; i_5++) {
+; arr_1 [i_5] = val;
+; result = arr_2[0][0][i_5] != arr_2[i_5][i_5][0];
+; }
+; }
+;
+;int main(void) {
+; int arr_2[SIZE][SIZE][SIZE];
+;
+; __builtin_memset(arr_2, 1, sizeof(arr_2));
+;
+; test(SIZE, 0, arr_2);
+; printf("%hu \n", result);
+;}
+; clang miss-compiles the above code
+; with vectorize_predicate(enable), result is 0 and 1 without.
+
+
+ at result = global i8 0, align 1
+ at arr_17 = global [17 x i8] zeroinitializer, align 1
+ at a = external global i8, align 1
+
+define void @test(i32 %limit, i8 zeroext %val, ptr readonly %arr_14) {
+; CHECK-LABEL: @test(
+; CHECK-NOT: pred.store.if:
+; CHECK-NOT: pred.store.continue:
+;
+entry:
+ %cmp18 = icmp sgt i32 %limit, 0
+ br i1 %cmp18, label %for.body.preheader, label %for.cond.cleanup
+
+for.body.preheader: ; preds = %entry
+ br label %for.body
+
+for.cond.for.cond.cleanup_crit_edge: ; preds = %for.body
+ %conv20.lcssa = phi i32 [ %conv20, %for.body ]
+ %arrayidx4 = getelementptr inbounds [17 x i32], ptr %arr_14, i32 0, i32 %conv20.lcssa
+ %0 = load i32, ptr %arrayidx4, align 4, !tbaa !4
+ %arrayidx8 = getelementptr inbounds [17 x [17 x i32]], ptr %arr_14, i32 %conv20.lcssa, i32 %conv20.lcssa
+ %1 = load i32, ptr %arrayidx8, align 4, !tbaa !4
+ %cmp10 = icmp ne i32 %0, %1
+ %conv11 = zext i1 %cmp10 to i8
+ store i8 %conv11, ptr @result, align 1, !tbaa !8
+ br label %for.cond.cleanup
+
+for.cond.cleanup: ; preds = %for.cond.for.cond.cleanup_crit_edge, %entry
+ ret void
+
+for.body: ; preds = %for.body.preheader, %for.body
+ %conv20 = phi i32 [ %conv, %for.body ], [ 0, %for.body.preheader ]
+ %i_5.019 = phi i16 [ %inc, %for.body ], [ 0, %for.body.preheader ]
----------------
fhahn wrote:
Can the phi be changed to i32, so the `sext` in the loop isn't needed?
https://github.com/llvm/llvm-project/pull/81609
More information about the llvm-commits
mailing list