[llvm] faf3333 - [SCEV] Add coverage for flag inference with vscale strided IVs

Philip Reames via llvm-commits llvm-commits at lists.llvm.org
Wed Jul 31 09:49:35 PDT 2024


Author: Philip Reames
Date: 2024-07-31T09:48:44-07:00
New Revision: faf3333510e0c2c3f319af40456e10c471e11ce8

URL: https://github.com/llvm/llvm-project/commit/faf3333510e0c2c3f319af40456e10c471e11ce8
DIFF: https://github.com/llvm/llvm-project/commit/faf3333510e0c2c3f319af40456e10c471e11ce8.diff

LOG: [SCEV] Add coverage for flag inference with vscale strided IVs

Given vscale is a power of two, we should be able to prove no-self-wrap
in these cases.  We currently don't, but an upcoming change will fix this.

Added: 
    

Modified: 
    llvm/test/Analysis/ScalarEvolution/trip-count-scalable-stride.ll

Removed: 
    


################################################################################
diff  --git a/llvm/test/Analysis/ScalarEvolution/trip-count-scalable-stride.ll b/llvm/test/Analysis/ScalarEvolution/trip-count-scalable-stride.ll
index befcabd911467..943389d07eb8b 100644
--- a/llvm/test/Analysis/ScalarEvolution/trip-count-scalable-stride.ll
+++ b/llvm/test/Analysis/ScalarEvolution/trip-count-scalable-stride.ll
@@ -362,3 +362,165 @@ for.body:                                         ; preds = %entry, %for.body
 for.end:                                          ; preds = %for.body, %entry
   ret void
 }
+
+; The next two cases check to see if we can infer the flags on the IV
+; of a countup loop using vscale strides.
+; TODO: We should be able to because vscale is a power of two and these
+; are finite loops by assumption.
+
+define void @vscale_slt_noflags(ptr nocapture %A, i32 %n) mustprogress vscale_range(2,1024) {
+; CHECK-LABEL: 'vscale_slt_noflags'
+; CHECK-NEXT:  Classifying expressions for: @vscale_slt_noflags
+; CHECK-NEXT:    %vscale = call i32 @llvm.vscale.i32()
+; CHECK-NEXT:    --> vscale U: [2,1025) S: [2,1025)
+; CHECK-NEXT:    %i.05 = phi i32 [ %add, %for.body ], [ 0, %entry ]
+; CHECK-NEXT:    --> {0,+,vscale}<%for.body> U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %for.body: Computable }
+; CHECK-NEXT:    %arrayidx = getelementptr inbounds i32, ptr %A, i32 %i.05
+; CHECK-NEXT:    --> {%A,+,(4 * vscale)<nuw><nsw>}<%for.body> U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %for.body: Computable }
+; CHECK-NEXT:    %add = add i32 %i.05, %vscale
+; CHECK-NEXT:    --> {vscale,+,vscale}<%for.body> U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %for.body: Computable }
+; CHECK-NEXT:  Determining loop execution counts for: @vscale_slt_noflags
+; CHECK-NEXT:  Loop %for.body: Unpredictable backedge-taken count.
+; CHECK-NEXT:  Loop %for.body: Unpredictable constant max backedge-taken count.
+; CHECK-NEXT:  Loop %for.body: Unpredictable symbolic max backedge-taken count.
+;
+entry:
+  %vscale = call i32 @llvm.vscale.i32()
+  %cmp4 = icmp sgt i32 %n, 0
+  br i1 %cmp4, label %for.body, label %for.end
+
+for.body:                                         ; preds = %entry, %for.body
+  %i.05 = phi i32 [ %add, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds i32, ptr %A, i32 %i.05
+  %0 = load <vscale x 4 x i32>, ptr %arrayidx, align 4
+  %inc = add nsw <vscale x 4 x i32> %0, splat (i32 1)
+  store <vscale x 4 x i32> %inc, ptr %arrayidx, align 4
+  %add = add i32 %i.05, %vscale
+  %cmp = icmp slt i32 %add, %n
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+define void @vscalex4_ult_noflags(ptr nocapture %A, i32 %n) mustprogress vscale_range(2,1024) {
+; CHECK-LABEL: 'vscalex4_ult_noflags'
+; CHECK-NEXT:  Classifying expressions for: @vscalex4_ult_noflags
+; CHECK-NEXT:    %vscale = call i32 @llvm.vscale.i32()
+; CHECK-NEXT:    --> vscale U: [2,1025) S: [2,1025)
+; CHECK-NEXT:    %VF = mul i32 %vscale, 4
+; CHECK-NEXT:    --> (4 * vscale)<nuw><nsw> U: [8,4097) S: [8,4097)
+; CHECK-NEXT:    %i.05 = phi i32 [ %add, %for.body ], [ 0, %entry ]
+; CHECK-NEXT:    --> {0,+,(4 * vscale)<nuw><nsw>}<%for.body> U: [0,-3) S: [-2147483648,2147483645) Exits: <<Unknown>> LoopDispositions: { %for.body: Computable }
+; CHECK-NEXT:    %arrayidx = getelementptr inbounds i32, ptr %A, i32 %i.05
+; CHECK-NEXT:    --> {%A,+,(16 * vscale)<nuw><nsw>}<%for.body> U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %for.body: Computable }
+; CHECK-NEXT:    %add = add i32 %i.05, %VF
+; CHECK-NEXT:    --> {(4 * vscale)<nuw><nsw>,+,(4 * vscale)<nuw><nsw>}<%for.body> U: [0,-3) S: [-2147483648,2147483645) Exits: <<Unknown>> LoopDispositions: { %for.body: Computable }
+; CHECK-NEXT:  Determining loop execution counts for: @vscalex4_ult_noflags
+; CHECK-NEXT:  Loop %for.body: Unpredictable backedge-taken count.
+; CHECK-NEXT:  Loop %for.body: Unpredictable constant max backedge-taken count.
+; CHECK-NEXT:  Loop %for.body: Unpredictable symbolic max backedge-taken count.
+;
+entry:
+  %vscale = call i32 @llvm.vscale.i32()
+  %VF = mul i32 %vscale, 4
+  %cmp4 = icmp sgt i32 %n, 0
+  br i1 %cmp4, label %for.body, label %for.end
+
+for.body:                                         ; preds = %entry, %for.body
+  %i.05 = phi i32 [ %add, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds i32, ptr %A, i32 %i.05
+  %0 = load <vscale x 4 x i32>, ptr %arrayidx, align 4
+  %inc = add nsw <vscale x 4 x i32> %0, splat (i32 1)
+  store <vscale x 4 x i32> %inc, ptr %arrayidx, align 4
+  %add = add i32 %i.05, %VF
+  %cmp = icmp ult i32 %add, %n
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+; The next two cases check to see if we can infer the flags on the IV
+; of a countdown loop using vscale strides.
+; TODO: We should be able to because vscale is a power of two and these
+; are finite loops by assumption.
+
+define void @vscale_countdown_ne(ptr nocapture %A, i32 %n) mustprogress vscale_range(2,1024) {
+; CHECK-LABEL: 'vscale_countdown_ne'
+; CHECK-NEXT:  Classifying expressions for: @vscale_countdown_ne
+; CHECK-NEXT:    %vscale = call i32 @llvm.vscale.i32()
+; CHECK-NEXT:    --> vscale U: [2,1025) S: [2,1025)
+; CHECK-NEXT:    %start = sub i32 %n, %vscale
+; CHECK-NEXT:    --> ((-1 * vscale)<nsw> + %n) U: full-set S: full-set
+; CHECK-NEXT:    %iv = phi i32 [ %sub, %for.body ], [ %start, %entry ]
+; CHECK-NEXT:    --> {((-1 * vscale)<nsw> + %n),+,(-1 * vscale)<nsw>}<%for.body> U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %for.body: Computable }
+; CHECK-NEXT:    %arrayidx = getelementptr inbounds i32, ptr %A, i32 %iv
+; CHECK-NEXT:    --> {((4 * %n) + (-4 * vscale)<nsw> + %A),+,(-4 * vscale)<nsw>}<%for.body> U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %for.body: Computable }
+; CHECK-NEXT:    %sub = sub i32 %iv, %vscale
+; CHECK-NEXT:    --> {((-2 * vscale)<nsw> + %n),+,(-1 * vscale)<nsw>}<%for.body> U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %for.body: Computable }
+; CHECK-NEXT:  Determining loop execution counts for: @vscale_countdown_ne
+; CHECK-NEXT:  Loop %for.body: Unpredictable backedge-taken count.
+; CHECK-NEXT:  Loop %for.body: Unpredictable constant max backedge-taken count.
+; CHECK-NEXT:  Loop %for.body: Unpredictable symbolic max backedge-taken count.
+;
+entry:
+  %vscale = call i32 @llvm.vscale.i32()
+  %cmp4 = icmp sgt i32 %n, 0
+  %start = sub i32 %n, %vscale
+  br i1 %cmp4, label %for.body, label %for.end
+
+for.body:                                         ; preds = %entry, %for.body
+  %iv = phi i32 [ %sub, %for.body ], [ %start, %entry ]
+  %arrayidx = getelementptr inbounds i32, ptr %A, i32 %iv
+  %ld = load <vscale x 4 x i32>, ptr %arrayidx, align 4
+  %inc = add nsw <vscale x 4 x i32> %ld, splat (i32 1)
+  store <vscale x 4 x i32> %inc, ptr %arrayidx, align 4
+  %sub = sub i32 %iv, %vscale
+  %cmp = icmp ne i32 %sub, 0
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+define void @vscalex4_countdown_ne(ptr nocapture %A, i32 %n) mustprogress vscale_range(2,1024) {
+; CHECK-LABEL: 'vscalex4_countdown_ne'
+; CHECK-NEXT:  Classifying expressions for: @vscalex4_countdown_ne
+; CHECK-NEXT:    %vscale = call i32 @llvm.vscale.i32()
+; CHECK-NEXT:    --> vscale U: [2,1025) S: [2,1025)
+; CHECK-NEXT:    %VF = shl i32 %vscale, 2
+; CHECK-NEXT:    --> (4 * vscale)<nuw><nsw> U: [8,4097) S: [8,4097)
+; CHECK-NEXT:    %start = sub i32 %n, %VF
+; CHECK-NEXT:    --> ((-4 * vscale)<nsw> + %n) U: full-set S: full-set
+; CHECK-NEXT:    %iv = phi i32 [ %sub, %for.body ], [ %start, %entry ]
+; CHECK-NEXT:    --> {((-4 * vscale)<nsw> + %n),+,(-4 * vscale)<nsw>}<%for.body> U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %for.body: Computable }
+; CHECK-NEXT:    %arrayidx = getelementptr inbounds i32, ptr %A, i32 %iv
+; CHECK-NEXT:    --> {((4 * %n) + (-16 * vscale)<nsw> + %A),+,(-16 * vscale)<nsw>}<%for.body> U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %for.body: Computable }
+; CHECK-NEXT:    %sub = sub i32 %iv, %VF
+; CHECK-NEXT:    --> {((-8 * vscale)<nsw> + %n),+,(-4 * vscale)<nsw>}<%for.body> U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %for.body: Computable }
+; CHECK-NEXT:  Determining loop execution counts for: @vscalex4_countdown_ne
+; CHECK-NEXT:  Loop %for.body: Unpredictable backedge-taken count.
+; CHECK-NEXT:  Loop %for.body: Unpredictable constant max backedge-taken count.
+; CHECK-NEXT:  Loop %for.body: Unpredictable symbolic max backedge-taken count.
+;
+entry:
+  %vscale = call i32 @llvm.vscale.i32()
+  %VF = shl i32 %vscale, 2
+  %cmp4 = icmp sgt i32 %n, 0
+  %start = sub i32 %n, %VF
+  br i1 %cmp4, label %for.body, label %for.end
+
+for.body:                                         ; preds = %entry, %for.body
+  %iv = phi i32 [ %sub, %for.body ], [ %start, %entry ]
+  %arrayidx = getelementptr inbounds i32, ptr %A, i32 %iv
+  %ld = load <vscale x 4 x i32>, ptr %arrayidx, align 4
+  %inc = add nsw <vscale x 4 x i32> %ld, splat (i32 1)
+  store <vscale x 4 x i32> %inc, ptr %arrayidx, align 4
+  %sub = sub i32 %iv, %VF
+  %cmp = icmp ne i32 %sub, 0
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}


        


More information about the llvm-commits mailing list