[PATCH] D144066: [Pseudo probe] Duplicate probes in vectorized loop body.

Hongtao Yu via Phabricator via llvm-commits llvm-commits at lists.llvm.org
Tue Feb 14 18:21:53 PST 2023


hoy created this revision.
Herald added subscribers: modimo, wenlei, hiraditya.
Herald added a project: All.
hoy requested review of this revision.
Herald added subscribers: llvm-commits, pcwang-thead.
Herald added a project: LLVM.

Prevoius pseudo probes were dropped out of a vectorized loop body during loop vectorization. This can result in the samples of the loop entry is used for the loop body, which in turn can cause undercounting of the loop iteration count. The undercounting can further prevent the loop from being vectorized in the next build. I'm fixing this by explicting allowing pseudo probes to be kept in the vectorized loop body, and by claiming a probe instruction is not "uniform", the vectorizer will duplicate it by the number of vector lanes.

For one internal service, I'm seeing the change causes the size increase of the .pseudoprobe section by 0.7%, which should count around 0.2% of the whole binary size.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D144066

Files:
  llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
  llvm/test/Transforms/SampleProfile/pseudo-probe-loop-vectorize.ll


Index: llvm/test/Transforms/SampleProfile/pseudo-probe-loop-vectorize.ll
===================================================================
--- /dev/null
+++ llvm/test/Transforms/SampleProfile/pseudo-probe-loop-vectorize.ll
@@ -0,0 +1,53 @@
+; RUN: opt < %s -passes=loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -S | FileCheck %s
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+
+; Function Attrs: nounwind uwtable
+define i32 @test1(ptr nocapture %a, ptr nocapture readonly %b) #0 {
+entry:
+  call void @llvm.pseudoprobe(i64 3666282617048535130, i64 1, i32 0, i64 -1)
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %arrayidx = getelementptr inbounds float, ptr %b, i64 %indvars.iv
+  %0 = load float, ptr %arrayidx, align 4, !tbaa !1
+  %conv = fptosi float %0 to i32
+  %arrayidx2 = getelementptr inbounds i32, ptr %a, i64 %indvars.iv
+  store i32 %conv, ptr %arrayidx2, align 4, !tbaa !5
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %exitcond = icmp eq i64 %indvars.iv.next, 1600
+  call void @llvm.pseudoprobe(i64 3666282617048535130, i64 2, i32 0, i64 -1)
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body
+  call void @llvm.pseudoprobe(i64 3666282617048535130, i64 3, i32 0, i64 -1)
+  ret i32 0
+}
+
+
+; CHECK-LABEL:  @test1
+; CHECK:        vector.body:
+; CHECK:          load <4 x float>, ptr %{{.*}}
+; CHECK:          store <4 x i32> %{{.*}}, ptr %{{.*}}
+; CHECK-COUNT-4:  call void @llvm.pseudoprobe(i64 3666282617048535130, i64 2, i32 0, i64 -1)
+; CHECK:          %index.next = add nuw i64 %index, 4
+
+
+
+
+; Function Attrs: nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: readwrite)
+declare void @llvm.pseudoprobe(i64, i64, i32, i64) #1
+
+attributes #0 = { nounwind uwtable }
+attributes #1 = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: readwrite) }
+
+!llvm.pseudo_probe_desc = !{!0}
+
+!0 = !{i64 3666282617048535130, i64 52824598631, !"test1"}
+!1 = !{!2, !2, i64 0}
+!2 = !{!"float", !3, i64 0}
+!3 = !{!"omnipotent char", !4, i64 0}
+!4 = !{!"Simple C/C++ TBAA"}
+!5 = !{!6, !6, i64 0}
+!6 = !{!"int", !3, i64 0}
Index: llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
===================================================================
--- llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -1285,6 +1285,10 @@
 
   /// Returns true if \p I is known to be uniform after vectorization.
   bool isUniformAfterVectorization(Instruction *I, ElementCount VF) const {
+    // Pseudo probe needs to be run for each lane.
+    if (isa<PseudoProbeInst>(I))
+      return false;
+
     if (VF.isScalar())
       return true;
 
@@ -8939,7 +8943,7 @@
 
     // Introduce each ingredient into VPlan.
     // TODO: Model and preserve debug intrinsics in VPlan.
-    for (Instruction &I : BB->instructionsWithoutDebug()) {
+    for (Instruction &I : BB->instructionsWithoutDebug(false)) {
       Instruction *Instr = &I;
 
       // First filter out irrelevant instructions, to ensure no recipes are


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D144066.497517.patch
Type: text/x-patch
Size: 3265 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20230215/4afe63fd/attachment.bin>


More information about the llvm-commits mailing list