[llvm] [SandboxVec][Scheduler] Fix top of schedule (PR #126820)

via llvm-commits llvm-commits at lists.llvm.org
Tue Feb 11 14:55:38 PST 2025


https://github.com/vporpo created https://github.com/llvm/llvm-project/pull/126820

This patch fixes the way the top-of-schedule variable gets set and updated. Before this patch it used to get updated whenever we scheduled a bundle, which is wrong, as the top-of-schedule needs to be maintained across scheduling attempts.

It should get reset only when we clear the schedule or when we destroy the current schedule and re-schedule.

>From 62f4285562d98cc987e494defb2e62c3ebed891d Mon Sep 17 00:00:00 2001
From: Vasileios Porpodas <vporpodas at google.com>
Date: Thu, 23 Jan 2025 13:36:22 -0800
Subject: [PATCH] [SandboxVec][Scheduler] Fix top of schedule

This patch fixes the way the top-of-schedule variable gets set and updated.
Before this patch it used to get updated whenever we scheduled a bundle,
which is wrong, as the top-of-schedule needs to be maintained across
scheduling attempts.

It should get reset only when we clear the schedule or when we destroy the
current schedule and re-schedule.
---
 .../Vectorize/SandboxVectorizer/Scheduler.cpp | 12 +++++++--
 .../SandboxVectorizer/bottomup_basic.ll       |  4 +--
 .../bottomup_seed_slice_pow2.ll               |  2 +-
 .../SandboxVectorizer/repeated_instrs.ll      |  4 +--
 .../Transforms/SandboxVectorizer/scheduler.ll | 25 +++++++++++++++++++
 5 files changed, 40 insertions(+), 7 deletions(-)

diff --git a/llvm/lib/Transforms/Vectorize/SandboxVectorizer/Scheduler.cpp b/llvm/lib/Transforms/Vectorize/SandboxVectorizer/Scheduler.cpp
index dd24cc3d98cf8..2f7d7087ca880 100644
--- a/llvm/lib/Transforms/Vectorize/SandboxVectorizer/Scheduler.cpp
+++ b/llvm/lib/Transforms/Vectorize/SandboxVectorizer/Scheduler.cpp
@@ -230,11 +230,13 @@ bool Scheduler::trySchedule(ArrayRef<Instruction *> Instrs) {
     // top-most part of the schedule that includes the instrs in the bundle and
     // re-schedule.
     trimSchedule(Instrs);
+    ScheduleTopItOpt = std::nullopt;
     [[fallthrough]];
   case BndlSchedState::NoneScheduled: {
     // TODO: Set the window of the DAG that we are interested in.
-    // We start scheduling at the bottom instr of Instrs.
-    ScheduleTopItOpt = std::next(VecUtils::getLowest(Instrs)->getIterator());
+    if (!ScheduleTopItOpt)
+      // We start scheduling at the bottom instr of Instrs.
+      ScheduleTopItOpt = std::next(VecUtils::getLowest(Instrs)->getIterator());
 
     // TODO: For now don't cross BBs.
     if (!DAG.getInterval().empty()) {
@@ -262,6 +264,12 @@ bool Scheduler::trySchedule(ArrayRef<Instruction *> Instrs) {
 void Scheduler::dump(raw_ostream &OS) const {
   OS << "ReadyList:\n";
   ReadyList.dump(OS);
+  OS << "Top of schedule: ";
+  if (ScheduleTopItOpt)
+    OS << **ScheduleTopItOpt;
+  else
+    OS << "Empty";
+  OS << "\n";
 }
 void Scheduler::dump() const { dump(dbgs()); }
 #endif // NDEBUG
diff --git a/llvm/test/Transforms/SandboxVectorizer/bottomup_basic.ll b/llvm/test/Transforms/SandboxVectorizer/bottomup_basic.ll
index ee8592c04b62c..45b937dc1b1b6 100644
--- a/llvm/test/Transforms/SandboxVectorizer/bottomup_basic.ll
+++ b/llvm/test/Transforms/SandboxVectorizer/bottomup_basic.ll
@@ -77,7 +77,7 @@ define void @store_fadd_load(ptr %ptr) {
 ; CHECK-NEXT:    [[PTR0:%.*]] = getelementptr float, ptr [[PTR]], i32 0
 ; CHECK-NEXT:    [[VECL:%.*]] = load <2 x float>, ptr [[PTR0]], align 4
 ; CHECK-NEXT:    [[VECL1:%.*]] = load <2 x float>, ptr [[PTR0]], align 4
-; CHECK-NEXT:    [[VEC:%.*]] = fadd <2 x float> [[VECL]], [[VECL1]]
+; CHECK-NEXT:    [[VEC:%.*]] = fadd <2 x float> [[VECL1]], [[VECL]]
 ; CHECK-NEXT:    store <2 x float> [[VEC]], ptr [[PTR0]], align 4
 ; CHECK-NEXT:    ret void
 ;
@@ -247,8 +247,8 @@ define void @diamondMultiInput(ptr %ptr, ptr %ptrX) {
 ; CHECK-LABEL: define void @diamondMultiInput(
 ; CHECK-SAME: ptr [[PTR:%.*]], ptr [[PTRX:%.*]]) {
 ; CHECK-NEXT:    [[PTR0:%.*]] = getelementptr float, ptr [[PTR]], i32 0
-; CHECK-NEXT:    [[VECL:%.*]] = load <2 x float>, ptr [[PTR0]], align 4
 ; CHECK-NEXT:    [[LDX:%.*]] = load float, ptr [[PTRX]], align 4
+; CHECK-NEXT:    [[VECL:%.*]] = load <2 x float>, ptr [[PTR0]], align 4
 ; CHECK-NEXT:    [[VINS:%.*]] = insertelement <2 x float> poison, float [[LDX]], i32 0
 ; CHECK-NEXT:    [[VEXT:%.*]] = extractelement <2 x float> [[VECL]], i32 0
 ; CHECK-NEXT:    [[VINS1:%.*]] = insertelement <2 x float> [[VINS]], float [[VEXT]], i32 1
diff --git a/llvm/test/Transforms/SandboxVectorizer/bottomup_seed_slice_pow2.ll b/llvm/test/Transforms/SandboxVectorizer/bottomup_seed_slice_pow2.ll
index f1c6e3297d79c..1b189831569f5 100644
--- a/llvm/test/Transforms/SandboxVectorizer/bottomup_seed_slice_pow2.ll
+++ b/llvm/test/Transforms/SandboxVectorizer/bottomup_seed_slice_pow2.ll
@@ -7,8 +7,8 @@ define void @pow2(ptr %ptr, float %val) {
 ; POW2-SAME: ptr [[PTR:%.*]], float [[VAL:%.*]]) {
 ; POW2-NEXT:    [[PTR0:%.*]] = getelementptr float, ptr [[PTR]], i32 0
 ; POW2-NEXT:    [[PTR2:%.*]] = getelementptr float, ptr [[PTR]], i32 2
-; POW2-NEXT:    [[VECL:%.*]] = load <2 x float>, ptr [[PTR0]], align 4
 ; POW2-NEXT:    [[LD2:%.*]] = load float, ptr [[PTR2]], align 4
+; POW2-NEXT:    [[VECL:%.*]] = load <2 x float>, ptr [[PTR0]], align 4
 ; POW2-NEXT:    store <2 x float> [[VECL]], ptr [[PTR0]], align 4
 ; POW2-NEXT:    store float [[LD2]], ptr [[PTR2]], align 4
 ; POW2-NEXT:    ret void
diff --git a/llvm/test/Transforms/SandboxVectorizer/repeated_instrs.ll b/llvm/test/Transforms/SandboxVectorizer/repeated_instrs.ll
index 25d9d79154d35..add762ac2d894 100644
--- a/llvm/test/Transforms/SandboxVectorizer/repeated_instrs.ll
+++ b/llvm/test/Transforms/SandboxVectorizer/repeated_instrs.ll
@@ -5,10 +5,10 @@ define i32 @repeated_splat(ptr %ptr, i32 %v) #0 {
 ; CHECK-LABEL: define i32 @repeated_splat(
 ; CHECK-SAME: ptr [[PTR:%.*]], i32 [[V:%.*]]) {
 ; CHECK-NEXT:    [[GEP0:%.*]] = getelementptr inbounds i32, ptr [[PTR]], i64 0
-; CHECK-NEXT:    [[VECL:%.*]] = load <2 x i32>, ptr [[GEP0]], align 4
 ; CHECK-NEXT:    [[SPLAT:%.*]] = add i32 [[V]], 0
 ; CHECK-NEXT:    [[PACK:%.*]] = insertelement <2 x i32> poison, i32 [[SPLAT]], i32 0
 ; CHECK-NEXT:    [[PACK1:%.*]] = insertelement <2 x i32> [[PACK]], i32 [[SPLAT]], i32 1
+; CHECK-NEXT:    [[VECL:%.*]] = load <2 x i32>, ptr [[GEP0]], align 4
 ; CHECK-NEXT:    [[VEC:%.*]] = mul <2 x i32> [[VECL]], [[PACK1]]
 ; CHECK-NEXT:    store <2 x i32> [[VEC]], ptr [[GEP0]], align 4
 ; CHECK-NEXT:    ret i32 0
@@ -31,6 +31,7 @@ define i32 @repeated_partial(ptr %ptr, i32 %v) #0 {
 ; CHECK-NEXT:    [[GEP0:%.*]] = getelementptr inbounds i32, ptr [[PTR]], i64 0
 ; CHECK-NEXT:    [[GEP1:%.*]] = getelementptr inbounds i32, ptr [[PTR]], i64 1
 ; CHECK-NEXT:    [[GEP3:%.*]] = getelementptr inbounds i32, ptr [[PTR]], i64 3
+; CHECK-NEXT:    [[SPLAT:%.*]] = add i32 [[V]], 0
 ; CHECK-NEXT:    [[LD0:%.*]] = load i32, ptr [[GEP0]], align 4
 ; CHECK-NEXT:    [[LD1:%.*]] = load i32, ptr [[GEP1]], align 4
 ; CHECK-NEXT:    [[LD3:%.*]] = load i32, ptr [[GEP3]], align 4
@@ -39,7 +40,6 @@ define i32 @repeated_partial(ptr %ptr, i32 %v) #0 {
 ; CHECK-NEXT:    [[PACK2:%.*]] = insertelement <4 x i32> [[PACK1]], i32 [[LD1]], i32 2
 ; CHECK-NEXT:    [[PACK3:%.*]] = insertelement <4 x i32> [[PACK2]], i32 [[LD3]], i32 3
 ; CHECK-NEXT:    [[VECL:%.*]] = load <4 x i32>, ptr [[GEP0]], align 4
-; CHECK-NEXT:    [[SPLAT:%.*]] = add i32 [[V]], 0
 ; CHECK-NEXT:    [[VEC:%.*]] = mul <4 x i32> [[VECL]], [[PACK3]]
 ; CHECK-NEXT:    store <4 x i32> [[VEC]], ptr [[GEP0]], align 4
 ; CHECK-NEXT:    ret i32 0
diff --git a/llvm/test/Transforms/SandboxVectorizer/scheduler.ll b/llvm/test/Transforms/SandboxVectorizer/scheduler.ll
index 92a78a979192b..acbec80db6b06 100644
--- a/llvm/test/Transforms/SandboxVectorizer/scheduler.ll
+++ b/llvm/test/Transforms/SandboxVectorizer/scheduler.ll
@@ -49,3 +49,28 @@ define void @check_dag_scheduler_update(ptr noalias %p, ptr noalias %p1) {
   store i32 %add21, ptr %arrayidx23
   ret void
 }
+
+; This used to generate use-before-def because of a buggy update of the
+; top-of-schedule variable.
+define <4 x float> @check_top_of_schedule(ptr %0) {
+; CHECK-LABEL: define <4 x float> @check_top_of_schedule(
+; CHECK-SAME: ptr [[TMP0:%.*]]) {
+; CHECK-NEXT:    [[INS_1:%.*]] = insertelement <4 x float> zeroinitializer, float poison, i64 0
+; CHECK-NEXT:    [[TRUNC_1:%.*]] = fptrunc double 0.000000e+00 to float
+; CHECK-NEXT:    [[INS_2:%.*]] = insertelement <4 x float> [[INS_1]], float [[TRUNC_1]], i64 0
+; CHECK-NEXT:    [[GEP_1:%.*]] = getelementptr double, ptr [[TMP0]], i64 1
+; CHECK-NEXT:    store <2 x double> <double 0.000000e+00, double 1.000000e+00>, ptr [[GEP_1]], align 8
+; CHECK-NEXT:    ret <4 x float> [[INS_2]]
+;
+  %trunc.1 = fptrunc double 0.000000e+00 to float
+  %trunc.2 = fptrunc double 1.000000e+00 to float
+  %ins.1 = insertelement <4 x float> zeroinitializer, float poison, i64 0
+  %ins.2 = insertelement <4 x float> %ins.1, float %trunc.1, i64 0
+  %ext.1 = fpext float %trunc.1 to double
+  %gep.1  = getelementptr double, ptr %0, i64 1
+  store double %ext.1, ptr %gep.1, align 8
+  %ext.2 = fpext float %trunc.2 to double
+  %gep.2 = getelementptr double, ptr %0, i64 2
+  store double %ext.2, ptr %gep.2, align 8
+  ret <4 x float> %ins.2
+}



More information about the llvm-commits mailing list