[llvm] [LV] Disable fold tail by masking - when induction vars used outside (PR #81609)
    Florian Hahn via llvm-commits 
    llvm-commits at lists.llvm.org
       
    Tue Feb 13 09:59:04 PST 2024
    
    
  
================
@@ -0,0 +1,85 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -passes=loop-vectorize -S | FileCheck %s
+
+
+; #include <stdio.h>
+; #define SIZE 17
+;
+; unsigned char result;
+; unsigned char arr_1[SIZE];
+;
+; __attribute__((__noinline__))
+; void test(int limit, unsigned char val, int arr_2[SIZE][SIZE][SIZE]) {
+;     #pragma clang loop vectorize_predicate(enable)
+;     for (short i_5 = 0; i_5 < limit; i_5++) {
+;         arr_1 [i_5] = val;
+;         result = arr_2[0][0][i_5] != arr_2[i_5][i_5][0];
+;     }
+; }
+;
+;int main(void) {
+;  int arr_2[SIZE][SIZE][SIZE];
+;
+;  __builtin_memset(arr_2, 1, sizeof(arr_2));
+;
+;  test(SIZE, 0, arr_2);
+;  printf("%hu \n", result);
+;}
+; clang miss-compiles the above code
+; with vectorize_predicate(enable), result is 0 and 1 without.
+
+
+ at result = global i8 0, align 1
+ at arr_17 = global [17 x i8] zeroinitializer, align 1
+ at a = external global i8, align 1
+
+define void @test(i32 %limit, i8 zeroext %val, ptr readonly %arr_14)   {
+; CHECK-LABEL: @test(
+; CHECK-NOT:       pred.store.if:
+; CHECK-NOT:       pred.store.continue:
+;
+entry:
+  %cmp18 = icmp sgt i32 %limit, 0
+  br i1 %cmp18, label %for.body.preheader, label %for.cond.cleanup
+
+for.body.preheader:                               ; preds = %entry
+  br label %for.body
+
+for.cond.for.cond.cleanup_crit_edge:              ; preds = %for.body
+  %conv20.lcssa = phi i32 [ %conv20, %for.body ]
+  %arrayidx4 = getelementptr inbounds [17 x i32], ptr %arr_14, i32 0, i32 %conv20.lcssa
+  %0 = load i32, ptr %arrayidx4, align 4, !tbaa !4
+  %arrayidx8 = getelementptr inbounds [17 x [17 x i32]], ptr %arr_14, i32 %conv20.lcssa, i32 %conv20.lcssa
+  %1 = load i32, ptr %arrayidx8, align 4, !tbaa !4
+  %cmp10 = icmp ne i32 %0, %1
+  %conv11 = zext i1 %cmp10 to i8
+  store i8 %conv11, ptr @result, align 1, !tbaa !8
+  br label %for.cond.cleanup
+
+for.cond.cleanup:                                 ; preds = %for.cond.for.cond.cleanup_crit_edge, %entry
+  ret void
+
+for.body:                                         ; preds = %for.body.preheader, %for.body
+  %conv20 = phi i32 [ %conv, %for.body ], [ 0, %for.body.preheader ]
+  %i_5.019 = phi i16 [ %inc, %for.body ], [ 0, %for.body.preheader ]
+  %arrayidx = getelementptr inbounds [17 x i8], ptr @arr_17, i32 0, i32 %conv20
+  store i8 %val, ptr %arrayidx, align 1, !tbaa !8
+  %inc = add i16 %i_5.019, 1
+  %conv = sext i16 %inc to i32
+  %cmp = icmp slt i32 %conv, %limit
+  br i1 %cmp, label %for.body, label %for.cond.for.cond.cleanup_crit_edge, !llvm.loop !9
+}
+
+
+
+!4 = !{!5, !5, i64 0}
----------------
fhahn wrote:
nodes used by tbaa shouldn't be needed after dropping `!tbaa`
https://github.com/llvm/llvm-project/pull/81609
    
    
More information about the llvm-commits
mailing list