[llvm-branch-commits] [llvm] release/20.x: [SLP] Check for PHI nodes (potentially cycles!) when checking dependencies: Backport Attempt 3 (PR #128371)
Tom Stellard via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Mon Feb 24 20:59:37 PST 2025
https://github.com/tstellar updated https://github.com/llvm/llvm-project/pull/128371
>From 098492a228f781a37997637e0953fd4e7faa2193 Mon Sep 17 00:00:00 2001
From: Alexey Bataev <a.bataev at outlook.com>
Date: Thu, 13 Feb 2025 14:19:51 -0800
Subject: [PATCH] [SLP] Check for PHI nodes (potentially cycles!) when checking
dependencies
When checking for dependecies for gather nodes with users with the same
last instruction, cannot rely on the index order, if there is (even
potential!) cycle in the graph, which may cause order not work correctly
and cause compiler crash.
Fixes #127128
(cherry picked from commit ac217ee389d63124432e5e6890851a678f7a676b)
---
.../Transforms/Vectorize/SLPVectorizer.cpp | 10 +++-
.../X86/delayed-gather-emission.ll | 2 +-
.../X86/matching-gather-nodes-phi-users.ll | 2 +-
.../X86/perfect-matched-reused-bv.ll | 13 ++--
.../SLPVectorizer/X86/phi-node-with-cycle.ll | 59 +++++++++++++++++++
5 files changed, 77 insertions(+), 9 deletions(-)
create mode 100644 llvm/test/Transforms/SLPVectorizer/X86/phi-node-with-cycle.ll
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 19963e780ebd3..7b20eda550095 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -13181,8 +13181,16 @@ BoUpSLP::isGatherShuffledSingleRegisterEntry(
continue;
// If the user instruction is used for some reason in different
// vectorized nodes - make it depend on index.
+ // If any vector node is PHI node, this dependency might not work
+ // because of cycle dependencies, so disable it.
if (TEUseEI.UserTE != UseEI.UserTE &&
- TEUseEI.UserTE->Idx < UseEI.UserTE->Idx)
+ (TEUseEI.UserTE->Idx < UseEI.UserTE->Idx ||
+ any_of(
+ VectorizableTree,
+ [](const std::unique_ptr<TreeEntry> &TE) {
+ return TE->State == TreeEntry::Vectorize &&
+ TE->getOpcode() == Instruction::PHI;
+ })))
continue;
}
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/delayed-gather-emission.ll b/llvm/test/Transforms/SLPVectorizer/X86/delayed-gather-emission.ll
index 5562291dbb6be..bf3f0c4df74e4 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/delayed-gather-emission.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/delayed-gather-emission.ll
@@ -31,7 +31,7 @@ define void @test() {
; CHECK-NEXT: [[TOBOOL:%.*]] = fcmp une float [[I2]], 0.000000e+00
; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <2 x float> [[TMP5]], <2 x float> poison, <2 x i32> <i32 poison, i32 0>
; CHECK-NEXT: [[TMP9]] = insertelement <2 x float> [[TMP8]], float [[I2]], i32 0
-; CHECK-NEXT: [[TMP10]] = shufflevector <2 x float> [[TMP9]], <2 x float> [[TMP2]], <2 x i32> <i32 0, i32 3>
+; CHECK-NEXT: [[TMP10]] = insertelement <2 x float> [[TMP2]], float [[I2]], i32 0
; CHECK-NEXT: br i1 [[TOBOOL]], label [[BB1]], label [[BB2]]
;
entry:
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/matching-gather-nodes-phi-users.ll b/llvm/test/Transforms/SLPVectorizer/X86/matching-gather-nodes-phi-users.ll
index 166c819098c8c..d649465c9ff12 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/matching-gather-nodes-phi-users.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/matching-gather-nodes-phi-users.ll
@@ -8,7 +8,7 @@
; YAML: Function: test
; YAML: Args:
; YAML: - String: 'Stores SLP vectorized with cost '
-; YAML: - Cost: '-6'
+; YAML: - Cost: '-3'
; YAML: - String: ' and with tree size '
; YAML: - TreeSize: '14'
; YAML: ...
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/perfect-matched-reused-bv.ll b/llvm/test/Transforms/SLPVectorizer/X86/perfect-matched-reused-bv.ll
index 1053e0fc10669..c4a49242a5583 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/perfect-matched-reused-bv.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/perfect-matched-reused-bv.ll
@@ -7,16 +7,17 @@ define void @test() {
; CHECK-NEXT: [[BB:.*]]:
; CHECK-NEXT: br label %[[BB1:.*]]
; CHECK: [[BB1]]:
-; CHECK-NEXT: [[TMP0:%.*]] = phi <2 x i32> [ zeroinitializer, %[[BB]] ], [ [[TMP4:%.*]], %[[BB4:.*]] ]
-; CHECK-NEXT: [[TMP1:%.*]] = or <2 x i32> [[TMP0]], zeroinitializer
+; CHECK-NEXT: [[PHI:%.*]] = phi i32 [ 0, %[[BB]] ], [ [[ADD6:%.*]], %[[BB4:.*]] ]
+; CHECK-NEXT: [[PHI2:%.*]] = phi i32 [ 0, %[[BB]] ], [ 0, %[[BB4]] ]
+; CHECK-NEXT: [[OR:%.*]] = or i32 [[PHI2]], 0
+; CHECK-NEXT: [[OR3:%.*]] = or i32 [[PHI]], 0
; CHECK-NEXT: br i1 false, label %[[BB7:.*]], label %[[BB4]]
; CHECK: [[BB4]]:
-; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x i32> [[TMP0]], <2 x i32> <i32 0, i32 poison>, <2 x i32> <i32 2, i32 1>
-; CHECK-NEXT: [[TMP3:%.*]] = add <2 x i32> zeroinitializer, [[TMP2]]
-; CHECK-NEXT: [[TMP4]] = add <2 x i32> zeroinitializer, [[TMP2]]
+; CHECK-NEXT: [[ADD6]] = add i32 [[PHI]], 0
; CHECK-NEXT: br i1 false, label %[[BB7]], label %[[BB1]]
; CHECK: [[BB7]]:
-; CHECK-NEXT: [[TMP5:%.*]] = phi <2 x i32> [ [[TMP1]], %[[BB1]] ], [ [[TMP3]], %[[BB4]] ]
+; CHECK-NEXT: [[PHI8:%.*]] = phi i32 [ [[OR]], %[[BB1]] ], [ 0, %[[BB4]] ]
+; CHECK-NEXT: [[PHI9:%.*]] = phi i32 [ [[OR3]], %[[BB1]] ], [ [[ADD6]], %[[BB4]] ]
; CHECK-NEXT: ret void
;
bb:
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/phi-node-with-cycle.ll b/llvm/test/Transforms/SLPVectorizer/X86/phi-node-with-cycle.ll
new file mode 100644
index 0000000000000..22e7e6a8e6624
--- /dev/null
+++ b/llvm/test/Transforms/SLPVectorizer/X86/phi-node-with-cycle.ll
@@ -0,0 +1,59 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -S --passes=slp-vectorizer -mtriple=x86_64-unknown-linux-gnu -mcpu=haswell < %s | FileCheck %s
+
+define void @test(float %0) {
+; CHECK-LABEL: define void @test(
+; CHECK-SAME: float [[TMP0:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x float> <float 0.000000e+00, float poison>, float [[TMP0]], i32 1
+; CHECK-NEXT: [[TMP3:%.*]] = fdiv <2 x float> [[TMP2]], zeroinitializer
+; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x float> <float poison, float 0.000000e+00>, float [[TMP0]], i32 0
+; CHECK-NEXT: [[TMP5:%.*]] = fdiv <2 x float> [[TMP4]], zeroinitializer
+; CHECK-NEXT: br label %[[BB6:.*]]
+; CHECK: [[BB6]]:
+; CHECK-NEXT: [[TMP7:%.*]] = fmul <2 x float> [[TMP5]], zeroinitializer
+; CHECK-NEXT: [[TMP8:%.*]] = fsub <2 x float> zeroinitializer, [[TMP7]]
+; CHECK-NEXT: br label %[[BB10:.*]]
+; CHECK: [[BB9:.*]]:
+; CHECK-NEXT: br label %[[BB10]]
+; CHECK: [[BB10]]:
+; CHECK-NEXT: [[TMP11:%.*]] = phi <2 x float> [ [[TMP8]], %[[BB6]] ], [ poison, %[[BB9]] ]
+; CHECK-NEXT: br label %[[BB12:.*]]
+; CHECK: [[BB12]]:
+; CHECK-NEXT: [[TMP13:%.*]] = fmul <2 x float> [[TMP3]], zeroinitializer
+; CHECK-NEXT: [[TMP14:%.*]] = fsub <2 x float> [[TMP11]], [[TMP13]]
+; CHECK-NEXT: [[TMP15:%.*]] = extractelement <2 x float> [[TMP14]], i32 0
+; CHECK-NEXT: [[TMP16:%.*]] = extractelement <2 x float> [[TMP14]], i32 1
+; CHECK-NEXT: [[TMP17:%.*]] = fadd float [[TMP15]], [[TMP16]]
+; CHECK-NEXT: [[TMP18:%.*]] = call float @llvm.fabs.f32(float [[TMP17]])
+; CHECK-NEXT: ret void
+;
+ %2 = fdiv float 0.000000e+00, 0.000000e+00
+ %3 = fdiv float 0.000000e+00, 0.000000e+00
+ %4 = fdiv float %0, 0.000000e+00
+ br label %5
+
+5:
+ %6 = fmul float %4, 0.000000e+00
+ %7 = fsub float 0.000000e+00, %6
+ %8 = fmul float %3, 0.000000e+00
+ %9 = fsub float 0.000000e+00, %8
+ br label %11
+
+10:
+ br label %11
+
+11:
+ %12 = phi float [ %7, %5 ], [ 0.000000e+00, %10 ]
+ %13 = phi float [ %9, %5 ], [ 0.000000e+00, %10 ]
+ br label %14
+
+14:
+ %15 = fmul float %2, 0.000000e+00
+ %16 = fsub float %12, %15
+ %17 = fmul float %4, 0.000000e+00
+ %18 = fsub float %13, %17
+ %19 = fadd float %16, %18
+ %20 = call float @llvm.fabs.f32(float %19)
+ ret void
+}
+
More information about the llvm-branch-commits
mailing list