[PATCH] D144066: [Pseudo probe] Duplicate probes in vectorized loop body.

Wed Feb 15 10:18:23 PST 2023

This revision was automatically updated to reflect the committed changes.
Closed by commit rGeddec9de44cd: [Pseudo probe] Duplicate probes in vectorized loop body. (authored by hoy).

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D144066/new/

https://reviews.llvm.org/D144066

Files:
  llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
  llvm/test/Transforms/SampleProfile/pseudo-probe-loop-vectorize.ll


Index: llvm/test/Transforms/SampleProfile/pseudo-probe-loop-vectorize.ll
===================================================================

--- /dev/null
+++ llvm/test/Transforms/SampleProfile/pseudo-probe-loop-vectorize.ll
@@ -0,0 +1,53 @@
+; RUN: opt < %s -passes=loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -S | FileCheck %s
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+
+; Function Attrs: nounwind uwtable
+define i32 @test1(ptr nocapture %a, ptr nocapture readonly %b) #0 {
+entry:
+  call void @llvm.pseudoprobe(i64 3666282617048535130, i64 1, i32 0, i64 -1)
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %arrayidx = getelementptr inbounds float, ptr %b, i64 %indvars.iv
+  %0 = load float, ptr %arrayidx, align 4, !tbaa !1
+  %conv = fptosi float %0 to i32
+  %arrayidx2 = getelementptr inbounds i32, ptr %a, i64 %indvars.iv
+  store i32 %conv, ptr %arrayidx2, align 4, !tbaa !5
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %exitcond = icmp eq i64 %indvars.iv.next, 1600
+  call void @llvm.pseudoprobe(i64 3666282617048535130, i64 2, i32 0, i64 -1)
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body
+  call void @llvm.pseudoprobe(i64 3666282617048535130, i64 3, i32 0, i64 -1)
+  ret i32 0
+}
+
+
+; CHECK-LABEL:  @test1
+; CHECK:        vector.body:
+; CHECK:          load <4 x float>, ptr %{{.*}}
+; CHECK:          store <4 x i32> %{{.*}}, ptr %{{.*}}
+; CHECK-COUNT-4:  call void @llvm.pseudoprobe(i64 3666282617048535130, i64 2, i32 0, i64 -1)
+; CHECK:          %index.next = add nuw i64 %index, 4
+
+
+
+
+; Function Attrs: nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: readwrite)
+declare void @llvm.pseudoprobe(i64, i64, i32, i64) #1
+
+attributes #0 = { nounwind uwtable }
+attributes #1 = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: readwrite) }
+
+!llvm.pseudo_probe_desc = !{!0}
+
+!0 = !{i64 3666282617048535130, i64 52824598631, !"test1"}
+!1 = !{!2, !2, i64 0}
+!2 = !{!"float", !3, i64 0}
+!3 = !{!"omnipotent char", !4, i64 0}
+!4 = !{!"Simple C/C++ TBAA"}
+!5 = !{!6, !6, i64 0}
+!6 = !{!"int", !3, i64 0}
Index: llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
===================================================================
--- llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -1285,6 +1285,12 @@
 
   /// Returns true if \p I is known to be uniform after vectorization.
   bool isUniformAfterVectorization(Instruction *I, ElementCount VF) const {
+    // Pseudo probe needs to be duplicated for each unrolled iteration and
+    // vector lane so that profiled loop trip count can be accurately
+    // accumulated instead of being under counted.
+    if (isa<PseudoProbeInst>(I))
+      return false;
+
     if (VF.isScalar())
       return true;
 
@@ -8939,7 +8945,7 @@
 
     // Introduce each ingredient into VPlan.
     // TODO: Model and preserve debug intrinsics in VPlan.
-    for (Instruction &I : BB->instructionsWithoutDebug()) {
+    for (Instruction &I : BB->instructionsWithoutDebug(false)) {
       Instruction *Instr = &I;
 
       // First filter out irrelevant instructions, to ensure no recipes are


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D144066.497731.patch
Type: text/x-patch
Size: 3412 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20230215/19011fc4/attachment.bin>