[llvm] cf6a452 - [SLP]Fix same/alternate analysis in split node analysis for compares
via llvm-commits
llvm-commits at lists.llvm.org
Mon Mar 31 16:33:43 PDT 2025
Author: Alexey Bataev
Date: 2025-03-31T19:33:40-04:00
New Revision: cf6a452cc734e00a1207514bb1fc2b123e31bb5f
URL: https://github.com/llvm/llvm-project/commit/cf6a452cc734e00a1207514bb1fc2b123e31bb5f
DIFF: https://github.com/llvm/llvm-project/commit/cf6a452cc734e00a1207514bb1fc2b123e31bb5f.diff
LOG: [SLP]Fix same/alternate analysis in split node analysis for compares
getSameOpcode in some cases may consider 2 compares as having same
opcode, even though previously they were considered as alternate. It may
happen, because getSameOpcode looses info about previous instructions
and their states. Need to use isAlternateInstruction function instead
for the correct analysis.
Reviewers: RKSimon, hiraditya
Reviewed By: RKSimon
Pull Request: https://github.com/llvm/llvm-project/pull/133769
Added:
Modified:
llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
llvm/test/Transforms/PhaseOrdering/X86/vector-reductions-logical.ll
llvm/test/Transforms/SLPVectorizer/X86/reduction-logical.ll
llvm/test/Transforms/SLPVectorizer/revec-reduction-logical.ll
Removed:
################################################################################
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 0679eac176584..18c896767b6d2 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -9013,8 +9013,11 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
Op1Indices.set(Idx);
continue;
}
- InstructionsState NewS = getSameOpcode({LocalState.getMainOp(), I}, *TLI);
- if (NewS && !NewS.isAltShuffle()) {
+ if ((LocalState.getAltOpcode() != LocalState.getOpcode() &&
+ I->getOpcode() == LocalState.getOpcode()) ||
+ (LocalState.getAltOpcode() == LocalState.getOpcode() &&
+ !isAlternateInstruction(I, LocalState.getMainOp(),
+ LocalState.getAltOp(), *TLI))) {
Op1.push_back(V);
Op1Indices.set(Idx);
continue;
diff --git a/llvm/test/Transforms/PhaseOrdering/X86/vector-reductions-logical.ll b/llvm/test/Transforms/PhaseOrdering/X86/vector-reductions-logical.ll
index 035f3f145bf8d..ee51e467c5b8e 100644
--- a/llvm/test/Transforms/PhaseOrdering/X86/vector-reductions-logical.ll
+++ b/llvm/test/Transforms/PhaseOrdering/X86/vector-reductions-logical.ll
@@ -86,10 +86,9 @@ return:
define float @test_merge_anyof_v4sf(<4 x float> %t) {
; CHECK-LABEL: @test_merge_anyof_v4sf(
; CHECK-NEXT: entry:
-; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <4 x float> [[T:%.*]], <4 x float> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
-; CHECK-NEXT: [[TMP1:%.*]] = fcmp ogt <8 x float> [[TMP0]], <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00>
-; CHECK-NEXT: [[TMP2:%.*]] = fcmp olt <8 x float> [[TMP0]], <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00>
-; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <8 x i1> [[TMP1]], <8 x i1> [[TMP2]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 12, i32 13, i32 14, i32 15>
+; CHECK-NEXT: [[TMP0:%.*]] = fcmp olt <4 x float> [[T:%.*]], zeroinitializer
+; CHECK-NEXT: [[TMP1:%.*]] = fcmp ogt <4 x float> [[T]], splat (float 1.000000e+00)
+; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x i1> [[TMP1]], <4 x i1> [[TMP0]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
; CHECK-NEXT: [[TMP4:%.*]] = freeze <8 x i1> [[TMP3]]
; CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i1> [[TMP4]] to i8
; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i8 [[TMP5]], 0
@@ -401,10 +400,9 @@ return:
define float @test_merge_anyof_v4si(<4 x i32> %t) {
; CHECK-LABEL: @test_merge_anyof_v4si(
; CHECK-NEXT: entry:
-; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <4 x i32> [[T:%.*]], <4 x i32> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
-; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt <8 x i32> [[TMP0]], <i32 255, i32 255, i32 255, i32 255, i32 1, i32 1, i32 1, i32 1>
-; CHECK-NEXT: [[TMP2:%.*]] = icmp slt <8 x i32> [[TMP0]], <i32 255, i32 255, i32 255, i32 255, i32 1, i32 1, i32 1, i32 1>
-; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <8 x i1> [[TMP1]], <8 x i1> [[TMP2]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 12, i32 13, i32 14, i32 15>
+; CHECK-NEXT: [[TMP0:%.*]] = icmp slt <4 x i32> [[T:%.*]], splat (i32 1)
+; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt <4 x i32> [[T]], splat (i32 255)
+; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x i1> [[TMP1]], <4 x i1> [[TMP0]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
; CHECK-NEXT: [[TMP4:%.*]] = freeze <8 x i1> [[TMP3]]
; CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i1> [[TMP4]] to i8
; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i8 [[TMP5]], 0
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/reduction-logical.ll b/llvm/test/Transforms/SLPVectorizer/X86/reduction-logical.ll
index e0b3ff714162f..81da11dc42e88 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/reduction-logical.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/reduction-logical.ll
@@ -207,10 +207,10 @@ define i1 @logical_and_icmp_subvec(<4 x i32> %x) {
define i1 @logical_and_icmp_clamp(<4 x i32> %x) {
; CHECK-LABEL: @logical_and_icmp_clamp(
-; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
-; CHECK-NEXT: [[TMP2:%.*]] = icmp sgt <8 x i32> [[TMP1]], <i32 17, i32 17, i32 17, i32 17, i32 42, i32 42, i32 42, i32 42>
-; CHECK-NEXT: [[TMP3:%.*]] = icmp slt <8 x i32> [[TMP1]], <i32 17, i32 17, i32 17, i32 17, i32 42, i32 42, i32 42, i32 42>
-; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <8 x i1> [[TMP2]], <8 x i1> [[TMP3]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 12, i32 13, i32 14, i32 15>
+; CHECK-NEXT: [[TMP1:%.*]] = icmp slt <4 x i32> [[X:%.*]], splat (i32 42)
+; CHECK-NEXT: [[TMP2:%.*]] = icmp sgt <4 x i32> [[X]], splat (i32 17)
+; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x i1> [[TMP2]], <4 x i1> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT: [[TMP4:%.*]] = call <8 x i1> @llvm.vector.insert.v8i1.v4i1(<8 x i1> [[TMP3]], <4 x i1> [[TMP1]], i64 4)
; CHECK-NEXT: [[TMP5:%.*]] = freeze <8 x i1> [[TMP4]]
; CHECK-NEXT: [[TMP6:%.*]] = call i1 @llvm.vector.reduce.and.v8i1(<8 x i1> [[TMP5]])
; CHECK-NEXT: ret i1 [[TMP6]]
@@ -239,12 +239,12 @@ define i1 @logical_and_icmp_clamp(<4 x i32> %x) {
define i1 @logical_and_icmp_clamp_extra_use_cmp(<4 x i32> %x) {
; CHECK-LABEL: @logical_and_icmp_clamp_extra_use_cmp(
-; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
-; CHECK-NEXT: [[TMP2:%.*]] = icmp sgt <8 x i32> [[TMP1]], <i32 17, i32 17, i32 17, i32 17, i32 42, i32 42, i32 42, i32 42>
-; CHECK-NEXT: [[TMP3:%.*]] = icmp slt <8 x i32> [[TMP1]], <i32 17, i32 17, i32 17, i32 17, i32 42, i32 42, i32 42, i32 42>
-; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <8 x i1> [[TMP2]], <8 x i1> [[TMP3]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 12, i32 13, i32 14, i32 15>
-; CHECK-NEXT: [[TMP5:%.*]] = extractelement <8 x i1> [[TMP4]], i32 6
+; CHECK-NEXT: [[TMP1:%.*]] = icmp slt <4 x i32> [[X:%.*]], splat (i32 42)
+; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x i1> [[TMP1]], i32 2
; CHECK-NEXT: call void @use1(i1 [[TMP5]])
+; CHECK-NEXT: [[TMP3:%.*]] = icmp sgt <4 x i32> [[X]], splat (i32 17)
+; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <4 x i1> [[TMP3]], <4 x i1> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT: [[TMP4:%.*]] = call <8 x i1> @llvm.vector.insert.v8i1.v4i1(<8 x i1> [[TMP8]], <4 x i1> [[TMP1]], i64 4)
; CHECK-NEXT: [[TMP6:%.*]] = freeze <8 x i1> [[TMP4]]
; CHECK-NEXT: [[TMP7:%.*]] = call i1 @llvm.vector.reduce.and.v8i1(<8 x i1> [[TMP6]])
; CHECK-NEXT: ret i1 [[TMP7]]
diff --git a/llvm/test/Transforms/SLPVectorizer/revec-reduction-logical.ll b/llvm/test/Transforms/SLPVectorizer/revec-reduction-logical.ll
index 25f161e9f1276..250c60a61fea1 100644
--- a/llvm/test/Transforms/SLPVectorizer/revec-reduction-logical.ll
+++ b/llvm/test/Transforms/SLPVectorizer/revec-reduction-logical.ll
@@ -1,6 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: %if x86-registered-target %{ opt < %s -passes=slp-vectorizer -slp-revec -mtriple=x86_64 -S | FileCheck %s %}
-; RUN: %if aarch64-registered-target %{ opt < %s -passes=slp-vectorizer -slp-revec -mtriple=aarch64-unknown-linux-gnu -S | FileCheck %s %}
+; RUN: %if x86-registered-target %{ opt < %s -passes=slp-vectorizer -slp-revec -mtriple=x86_64 -S | FileCheck %s --check-prefixes=CHECK,X86 %}
+; RUN: %if aarch64-registered-target %{ opt < %s -passes=slp-vectorizer -slp-revec -mtriple=aarch64-unknown-linux-gnu -S | FileCheck %s --check-prefixes=CHECK,AARCH64 %}
define i1 @logical_and_icmp_
diff _preds(<4 x i32> %x) {
; CHECK-LABEL: @logical_and_icmp_
diff _preds(
@@ -28,14 +28,23 @@ define i1 @logical_and_icmp_
diff _preds(<4 x i32> %x) {
}
define i1 @logical_and_icmp_clamp(<4 x i32> %x) {
-; CHECK-LABEL: @logical_and_icmp_clamp(
-; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
-; CHECK-NEXT: [[TMP2:%.*]] = icmp sgt <8 x i32> [[TMP1]], <i32 17, i32 17, i32 17, i32 17, i32 42, i32 42, i32 42, i32 42>
-; CHECK-NEXT: [[TMP3:%.*]] = icmp slt <8 x i32> [[TMP1]], <i32 17, i32 17, i32 17, i32 17, i32 42, i32 42, i32 42, i32 42>
-; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <8 x i1> [[TMP2]], <8 x i1> [[TMP3]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 12, i32 13, i32 14, i32 15>
-; CHECK-NEXT: [[TMP5:%.*]] = freeze <8 x i1> [[TMP4]]
-; CHECK-NEXT: [[TMP6:%.*]] = call i1 @llvm.vector.reduce.and.v8i1(<8 x i1> [[TMP5]])
-; CHECK-NEXT: ret i1 [[TMP6]]
+; X86-LABEL: @logical_and_icmp_clamp(
+; X86-NEXT: [[TMP1:%.*]] = icmp slt <4 x i32> [[X:%.*]], splat (i32 42)
+; X86-NEXT: [[TMP2:%.*]] = icmp sgt <4 x i32> [[X]], splat (i32 17)
+; X86-NEXT: [[TMP3:%.*]] = shufflevector <4 x i1> [[TMP2]], <4 x i1> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison>
+; X86-NEXT: [[TMP4:%.*]] = call <8 x i1> @llvm.vector.insert.v8i1.v4i1(<8 x i1> [[TMP3]], <4 x i1> [[TMP1]], i64 4)
+; X86-NEXT: [[TMP5:%.*]] = freeze <8 x i1> [[TMP4]]
+; X86-NEXT: [[TMP6:%.*]] = call i1 @llvm.vector.reduce.and.v8i1(<8 x i1> [[TMP5]])
+; X86-NEXT: ret i1 [[TMP6]]
+;
+; AARCH64-LABEL: @logical_and_icmp_clamp(
+; AARCH64-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
+; AARCH64-NEXT: [[TMP2:%.*]] = icmp sgt <8 x i32> [[TMP1]], <i32 17, i32 17, i32 17, i32 17, i32 42, i32 42, i32 42, i32 42>
+; AARCH64-NEXT: [[TMP3:%.*]] = icmp slt <8 x i32> [[TMP1]], <i32 17, i32 17, i32 17, i32 17, i32 42, i32 42, i32 42, i32 42>
+; AARCH64-NEXT: [[TMP4:%.*]] = shufflevector <8 x i1> [[TMP2]], <8 x i1> [[TMP3]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 12, i32 13, i32 14, i32 15>
+; AARCH64-NEXT: [[TMP5:%.*]] = freeze <8 x i1> [[TMP4]]
+; AARCH64-NEXT: [[TMP6:%.*]] = call i1 @llvm.vector.reduce.and.v8i1(<8 x i1> [[TMP5]])
+; AARCH64-NEXT: ret i1 [[TMP6]]
;
%x0 = extractelement <4 x i32> %x, i32 0
%x1 = extractelement <4 x i32> %x, i32 1
More information about the llvm-commits
mailing list