[llvm] dee810e - [NFC][LAA] Precommit tests for forked pointers

Wed Nov 24 08:21:23 PST 2021

Author: Graham Hunter
Date: 2021-11-24T16:20:35Z
New Revision: dee810e117ad442c426a1213277faa8b028751d5

URL: https://github.com/llvm/llvm-project/commit/dee810e117ad442c426a1213277faa8b028751d5
DIFF: https://github.com/llvm/llvm-project/commit/dee810e117ad442c426a1213277faa8b028751d5.diff

LOG: [NFC][LAA] Precommit tests for forked pointers

Precommit for https://reviews.llvm.org/D108699

Added: 
    llvm/test/Analysis/LoopAccessAnalysis/forked-pointers.ll
    llvm/test/Transforms/LoopVectorize/forked-pointers.ll

Modified: 
    

Removed: 
    


################################################################################
diff  --git a/llvm/test/Analysis/LoopAccessAnalysis/forked-pointers.ll b/llvm/test/Analysis/LoopAccessAnalysis/forked-pointers.ll
new file mode 100644
index 0000000000000..f9f0e24bb9391

--- /dev/null
+++ b/llvm/test/Analysis/LoopAccessAnalysis/forked-pointers.ll
@@ -0,0 +1,183 @@
+; RUN: opt -loop-accesses -analyze -enable-new-pm=0 %s 2>&1 | FileCheck %s
+; RUN: opt -disable-output -passes='require<scalar-evolution>,require<aa>,loop(print-access-info)' %s 2>&1 | FileCheck %s
+
+target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
+
+; CHECK-LABEL: function 'forked_ptrs_
diff erent_base_same_offset':
+; CHECK-NEXT:  for.body:
+; CHECK-NEXT:    Report: cannot identify array bounds
+; CHECK-NEXT:    Dependences:
+; CHECK-NEXT:    Run-time memory checks:
+; CHECK-NEXT:    Grouped accesses:
+; CHECK-EMPTY:
+; CHECK-NEXT:    Non vectorizable stores to invariant address were not found in loop.
+; CHECK-NEXT:    SCEV assumptions:
+; CHECK-EMPTY:
+; CHECK-NEXT:    Expressions re-written:
+
+;;;; Derived from the following C code
+;; void forked_ptrs_
diff erent_base_same_offset(float *A, float *B, float *C, int *D) {
+;;   for (int i=0; i<100; i++) {
+;;     if (D[i] != 0) {
+;;       C[i] = A[i];
+;;     } else {
+;;       C[i] = B[i];
+;;     }
+;;   }
+;; }
+
+define dso_local void @forked_ptrs_
diff erent_base_same_offset(float* nocapture readonly %Base1, float* nocapture readonly %Base2, float* nocapture %Dest, i32* nocapture readonly %Preds) {
+entry:
+  br label %for.body
+
+for.cond.cleanup:
+  ret void
+
+for.body:
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %arrayidx = getelementptr inbounds i32, i32* %Preds, i64 %indvars.iv
+  %0 = load i32, i32* %arrayidx, align 4
+  %cmp1.not = icmp eq i32 %0, 0
+  %spec.select = select i1 %cmp1.not, float* %Base2, float* %Base1
+  %.sink.in = getelementptr inbounds float, float* %spec.select, i64 %indvars.iv
+  %.sink = load float, float* %.sink.in, align 4
+  %1 = getelementptr inbounds float, float* %Dest, i64 %indvars.iv
+  store float %.sink, float* %1, align 4
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %exitcond.not = icmp eq i64 %indvars.iv.next, 100
+  br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
+}
+
+; CHECK-LABEL: function 'forked_ptrs_same_base_
diff erent_offset':
+; CHECK-NEXT:   for.body:
+; CHECK-NEXT:     Report: cannot identify array bounds
+; CHECK-NEXT:     Dependences:
+; CHECK-NEXT:     Run-time memory checks:
+; CHECK-NEXT:     Grouped accesses:
+; CHECK-EMPTY:
+; CHECK-NEXT:     Non vectorizable stores to invariant address were not found in loop.
+; CHECK-NEXT:     SCEV assumptions:
+; CHECK-EMPTY:
+; CHECK-NEXT:     Expressions re-written:
+
+;;;; Derived from the following C code
+;; void forked_ptrs_same_base_
diff erent_offset(float *A, float *B, int *C) {
+;;   int offset;
+;;   for (int i = 0; i < 100; i++) {
+;;     if (C[i] != 0)
+;;       offset = i;
+;;     else
+;;       offset = i+1;
+;;     B[i] = A[offset];
+;;   }
+;; }
+
+define dso_local void @forked_ptrs_same_base_
diff erent_offset(float* nocapture readonly %Base, float* nocapture %Dest, i32* nocapture readonly %Preds) {
+entry:
+  br label %for.body
+
+for.cond.cleanup:                                 ; preds = %for.body
+  ret void
+
+for.body:                                         ; preds = %entry, %for.body
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %i.014 = phi i32 [ 0, %entry ], [ %add, %for.body ]
+  %arrayidx = getelementptr inbounds i32, i32* %Preds, i64 %indvars.iv
+  %0 = load i32, i32* %arrayidx, align 4
+  %cmp1.not = icmp eq i32 %0, 0
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %add = add nuw nsw i32 %i.014, 1
+  %1 = trunc i64 %indvars.iv to i32
+  %offset.0 = select i1 %cmp1.not, i32 %add, i32 %1
+  %idxprom213 = zext i32 %offset.0 to i64
+  %arrayidx3 = getelementptr inbounds float, float* %Base, i64 %idxprom213
+  %2 = load float, float* %arrayidx3, align 4
+  %arrayidx5 = getelementptr inbounds float, float* %Dest, i64 %indvars.iv
+  store float %2, float* %arrayidx5, align 4
+  %exitcond.not = icmp eq i64 %indvars.iv.next, 100
+  br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
+}
+
+;;;; Cases that can be handled by a forked pointer but are not currently allowed.
+
+; CHECK-LABEL: function 'forked_ptrs_uniform_and_strided_forks':
+; CHECK-NEXT:  for.body:
+; CHECK-NEXT:    Report: cannot identify array bounds
+; CHECK-NEXT:    Dependences:
+; CHECK-NEXT:    Run-time memory checks:
+; CHECK-NEXT:    Grouped accesses:
+; CHECK-EMPTY:
+; CHECK-NEXT:    Non vectorizable stores to invariant address were not found in loop.
+; CHECK-NEXT:    SCEV assumptions:
+; CHECK-EMPTY:
+; CHECK-NEXT:    Expressions re-written:
+
+;;;; Derived from forked_ptrs_same_base_
diff erent_offset with a manually
+;;;; added uniform offset and a mul to provide a stride
+
+define dso_local void @forked_ptrs_uniform_and_strided_forks(float* nocapture readonly %Base, float* nocapture %Dest, i32* nocapture readonly %Preds) {
+entry:
+  br label %for.body
+
+for.cond.cleanup:                                 ; preds = %for.body
+  ret void
+
+for.body:                                         ; preds = %entry, %for.body
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %i.014 = phi i32 [ 0, %entry ], [ %add, %for.body ]
+  %arrayidx = getelementptr inbounds i32, i32* %Preds, i64 %indvars.iv
+  %0 = load i32, i32* %arrayidx, align 4
+  %cmp1.not = icmp eq i32 %0, 0
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %add = add nuw nsw i32 %i.014, 1
+  %1 = trunc i64 %indvars.iv to i32
+  %mul = mul i32 %1, 3
+  %offset.0 = select i1 %cmp1.not, i32 4, i32 %mul
+  %idxprom213 = sext i32 %offset.0 to i64
+  %arrayidx3 = getelementptr inbounds float, float* %Base, i64 %idxprom213
+  %2 = load float, float* %arrayidx3, align 4
+  %arrayidx5 = getelementptr inbounds float, float* %Dest, i64 %indvars.iv
+  store float %2, float* %arrayidx5, align 4
+  %exitcond.not = icmp eq i64 %indvars.iv.next, 100
+  br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
+}
+
+; CHECK-LABEL:  function 'forked_ptrs_gather_and_contiguous_forks':
+; CHECK-NEXT:   for.body:
+; CHECK-NEXT:     Report: cannot identify array bounds
+; CHECK-NEXT:     Dependences:
+; CHECK-NEXT:     Run-time memory checks:
+; CHECK-NEXT:     Grouped accesses:
+; CHECK-EMPTY:
+; CHECK-NEXT:     Non vectorizable stores to invariant address were not found in loop.
+; CHECK-NEXT:     SCEV assumptions:
+; CHECK-EMPTY:
+; CHECK-NEXT:     Expressions re-written:
+
+;;;; Derived from forked_ptrs_same_base_
diff erent_offset with a gather
+;;;; added using Preds as an index array in addition to the per-iteration
+;;;; condition.
+
+define dso_local void @forked_ptrs_gather_and_contiguous_forks(float* nocapture readonly %Base1, float* nocapture readonly %Base2, float* nocapture %Dest, i32* nocapture readonly %Preds) {
+entry:
+  br label %for.body
+
+for.cond.cleanup:                                 ; preds = %for.body
+  ret void
+
+for.body:                                         ; preds = %entry, %for.body
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %arrayidx = getelementptr inbounds i32, i32* %Preds, i64 %indvars.iv
+  %0 = load i32, i32* %arrayidx, align 4
+  %cmp1.not = icmp eq i32 %0, 0
+  %arrayidx9 = getelementptr inbounds float, float* %Base2, i64 %indvars.iv
+  %idxprom4 = sext i32 %0 to i64
+  %arrayidx5 = getelementptr inbounds float, float* %Base1, i64 %idxprom4
+  %.sink.in = select i1 %cmp1.not, float* %arrayidx9, float* %arrayidx5
+  %.sink = load float, float* %.sink.in, align 4
+  %1 = getelementptr inbounds float, float* %Dest, i64 %indvars.iv
+  store float %.sink, float* %1, align 4
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %exitcond.not = icmp eq i64 %indvars.iv.next, 100
+  br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
+}

diff  --git a/llvm/test/Transforms/LoopVectorize/forked-pointers.ll b/llvm/test/Transforms/LoopVectorize/forked-pointers.ll
new file mode 100644
index 0000000000000..4ddfc220a3094
--- /dev/null
+++ b/llvm/test/Transforms/LoopVectorize/forked-pointers.ll
@@ -0,0 +1,117 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -loop-vectorize -instcombine -force-vector-width=4 -S < %s 2>&1 | FileCheck %s
+
+target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
+
+;;;; Derived from the following C code
+;; void forked_ptrs_
diff erent_base_same_offset(float *A, float *B, float *C, int *D) {
+;;   for (int i=0; i<100; i++) {
+;;     if (D[i] != 0) {
+;;       C[i] = A[i];
+;;     } else {
+;;       C[i] = B[i];
+;;     }
+;;   }
+;; }
+
+define dso_local void @forked_ptrs_
diff erent_base_same_offset(float* nocapture readonly %Base1, float* nocapture readonly %Base2, float* nocapture %Dest, i32* nocapture readonly %Preds) {
+; CHECK-LABEL: @forked_ptrs_
diff erent_base_same_offset(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
+; CHECK:       for.cond.cleanup:
+; CHECK-NEXT:    ret void
+; CHECK:       for.body:
+; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[PREDS:%.*]], i64 [[INDVARS_IV]]
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
+; CHECK-NEXT:    [[CMP1_NOT:%.*]] = icmp eq i32 [[TMP0]], 0
+; CHECK-NEXT:    [[SPEC_SELECT:%.*]] = select i1 [[CMP1_NOT]], float* [[BASE2:%.*]], float* [[BASE1:%.*]]
+; CHECK-NEXT:    [[DOTSINK_IN:%.*]] = getelementptr inbounds float, float* [[SPEC_SELECT]], i64 [[INDVARS_IV]]
+; CHECK-NEXT:    [[DOTSINK:%.*]] = load float, float* [[DOTSINK_IN]], align 4
+; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds float, float* [[DEST:%.*]], i64 [[INDVARS_IV]]
+; CHECK-NEXT:    store float [[DOTSINK]], float* [[TMP1]], align 4
+; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
+; CHECK-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 100
+; CHECK-NEXT:    br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY]]
+;
+entry:
+  br label %for.body
+
+for.cond.cleanup:
+  ret void
+
+for.body:
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %arrayidx = getelementptr inbounds i32, i32* %Preds, i64 %indvars.iv
+  %0 = load i32, i32* %arrayidx, align 4
+  %cmp1.not = icmp eq i32 %0, 0
+  %spec.select = select i1 %cmp1.not, float* %Base2, float* %Base1
+  %.sink.in = getelementptr inbounds float, float* %spec.select, i64 %indvars.iv
+  %.sink = load float, float* %.sink.in, align 4
+  %1 = getelementptr inbounds float, float* %Dest, i64 %indvars.iv
+  store float %.sink, float* %1, align 4
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %exitcond.not = icmp eq i64 %indvars.iv.next, 100
+  br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
+}
+
+;;;; Derived from the following C code
+;; void forked_ptrs_same_base_
diff erent_offset(float *A, float *B, int *C) {
+;;   int offset;
+;;   for (int i = 0; i < 100; i++) {
+;;     if (C[i] != 0)
+;;       offset = i;
+;;     else
+;;       offset = i+1;
+;;     B[i] = A[offset];
+;;   }
+;; }
+
+define dso_local void @forked_ptrs_same_base_
diff erent_offset(float* nocapture readonly %Base, float* nocapture %Dest, i32* nocapture readonly %Preds) {
+; CHECK-LABEL: @forked_ptrs_same_base_
diff erent_offset(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
+; CHECK:       for.cond.cleanup:
+; CHECK-NEXT:    ret void
+; CHECK:       for.body:
+; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[I_014:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[ADD:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[PREDS:%.*]], i64 [[INDVARS_IV]]
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
+; CHECK-NEXT:    [[CMP1_NOT:%.*]] = icmp eq i32 [[TMP0]], 0
+; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
+; CHECK-NEXT:    [[ADD]] = add nuw nsw i32 [[I_014]], 1
+; CHECK-NEXT:    [[TMP1:%.*]] = trunc i64 [[INDVARS_IV]] to i32
+; CHECK-NEXT:    [[OFFSET_0:%.*]] = select i1 [[CMP1_NOT]], i32 [[ADD]], i32 [[TMP1]]
+; CHECK-NEXT:    [[IDXPROM213:%.*]] = zext i32 [[OFFSET_0]] to i64
+; CHECK-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds float, float* [[BASE:%.*]], i64 [[IDXPROM213]]
+; CHECK-NEXT:    [[TMP2:%.*]] = load float, float* [[ARRAYIDX3]], align 4
+; CHECK-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds float, float* [[DEST:%.*]], i64 [[INDVARS_IV]]
+; CHECK-NEXT:    store float [[TMP2]], float* [[ARRAYIDX5]], align 4
+; CHECK-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 100
+; CHECK-NEXT:    br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY]]
+;
+entry:
+  br label %for.body
+
+for.cond.cleanup:                                 ; preds = %for.body
+  ret void
+
+for.body:                                         ; preds = %entry, %for.body
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %i.014 = phi i32 [ 0, %entry ], [ %add, %for.body ]
+  %arrayidx = getelementptr inbounds i32, i32* %Preds, i64 %indvars.iv
+  %0 = load i32, i32* %arrayidx, align 4
+  %cmp1.not = icmp eq i32 %0, 0
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %add = add nuw nsw i32 %i.014, 1
+  %1 = trunc i64 %indvars.iv to i32
+  %offset.0 = select i1 %cmp1.not, i32 %add, i32 %1
+  %idxprom213 = zext i32 %offset.0 to i64
+  %arrayidx3 = getelementptr inbounds float, float* %Base, i64 %idxprom213
+  %2 = load float, float* %arrayidx3, align 4
+  %arrayidx5 = getelementptr inbounds float, float* %Dest, i64 %indvars.iv
+  store float %2, float* %arrayidx5, align 4
+  %exitcond.not = icmp eq i64 %indvars.iv.next, 100
+  br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
+}