[llvm] af524de - [SLP]Do not include subvectors for fully matched buildvectors
Alexey Bataev via llvm-commits
llvm-commits at lists.llvm.org
Mon Jan 13 07:24:28 PST 2025
Author: Alexey Bataev
Date: 2025-01-13T07:24:16-08:00
New Revision: af524de1fa94e4b4cee8b745d1b68f4ea0090759
URL: https://github.com/llvm/llvm-project/commit/af524de1fa94e4b4cee8b745d1b68f4ea0090759
DIFF: https://github.com/llvm/llvm-project/commit/af524de1fa94e4b4cee8b745d1b68f4ea0090759.diff
LOG: [SLP]Do not include subvectors for fully matched buildvectors
If the buildvector node fully matched another node, need to exclude
subvectors, when building final shuffle, just a shuffle of the original
node must be emitted.
Fixes #122584
Added:
llvm/test/Transforms/SLPVectorizer/X86/full-matched-bv-with-subvectors.ll
Modified:
llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
llvm/test/Transforms/SLPVectorizer/AArch64/vectorizable-selects-uniform-cmps.ll
Removed:
################################################################################
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index e3487b50153422..df46c69ff3ab40 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -14935,8 +14935,8 @@ ResTy BoUpSLP::processBuildVector(const TreeEntry *E, Type *ScalarTy,
}
}
ShuffleBuilder.add(*FrontTE, Mask);
- Res = ShuffleBuilder.finalize(E->getCommonMask(), SubVectors,
- SubVectorsMask);
+ // Full matched entry found, no need to insert subvectors.
+ Res = ShuffleBuilder.finalize(E->getCommonMask(), {}, {});
return Res;
}
if (!Resized) {
diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/vectorizable-selects-uniform-cmps.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/vectorizable-selects-uniform-cmps.ll
index f9e415a3cefc13..27f3155b50dbb9 100644
--- a/llvm/test/Transforms/SLPVectorizer/AArch64/vectorizable-selects-uniform-cmps.ll
+++ b/llvm/test/Transforms/SLPVectorizer/AArch64/vectorizable-selects-uniform-cmps.ll
@@ -259,11 +259,9 @@ define void @select_uniform_ugt_16xi8(ptr %ptr, i8 %x) {
; CHECK-NEXT: [[TMP7:%.*]] = call <16 x i8> @llvm.vector.insert.v16i8.v8i8(<16 x i8> [[TMP6]], <8 x i8> [[TMP0]], i64 0)
; CHECK-NEXT: [[TMP8:%.*]] = call <16 x i8> @llvm.vector.insert.v16i8.v4i8(<16 x i8> [[TMP7]], <4 x i8> [[TMP3]], i64 12)
; CHECK-NEXT: [[TMP9:%.*]] = icmp ugt <16 x i8> [[TMP8]], splat (i8 -1)
-; CHECK-NEXT: [[TMP10:%.*]] = call <16 x i8> @llvm.vector.insert.v16i8.v8i8(<16 x i8> [[TMP8]], <8 x i8> [[TMP0]], i64 0)
-; CHECK-NEXT: [[TMP11:%.*]] = call <16 x i8> @llvm.vector.insert.v16i8.v4i8(<16 x i8> [[TMP10]], <4 x i8> [[TMP3]], i64 12)
; CHECK-NEXT: [[TMP12:%.*]] = insertelement <16 x i8> poison, i8 [[X]], i32 0
; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <16 x i8> [[TMP12]], <16 x i8> poison, <16 x i32> zeroinitializer
-; CHECK-NEXT: [[TMP14:%.*]] = select <16 x i1> [[TMP9]], <16 x i8> [[TMP11]], <16 x i8> [[TMP13]]
+; CHECK-NEXT: [[TMP14:%.*]] = select <16 x i1> [[TMP9]], <16 x i8> [[TMP8]], <16 x i8> [[TMP13]]
; CHECK-NEXT: store <16 x i8> [[TMP14]], ptr [[PTR]], align 2
; CHECK-NEXT: ret void
;
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/full-matched-bv-with-subvectors.ll b/llvm/test/Transforms/SLPVectorizer/X86/full-matched-bv-with-subvectors.ll
new file mode 100644
index 00000000000000..7576eb7a8f55ed
--- /dev/null
+++ b/llvm/test/Transforms/SLPVectorizer/X86/full-matched-bv-with-subvectors.ll
@@ -0,0 +1,99 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -S --passes=slp-vectorizer -slp-threshold=-9999 -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck %s
+
+define i32 @test(i64 %l.549) {
+; CHECK-LABEL: define i32 @test(
+; CHECK-SAME: i64 [[L_549:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: [[CONV3:%.*]] = sext i32 0 to i64
+; CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x i64> poison, i64 [[CONV3]], i32 3
+; CHECK-NEXT: br label %[[IF_THEN19:.*]]
+; CHECK: [[P:.*]]:
+; CHECK-NEXT: [[TMP1:%.*]] = phi <2 x i64> [ zeroinitializer, %[[IF_END29:.*]] ], [ [[TMP13:%.*]], %[[IF_END25:.*]] ]
+; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x i64> [[TMP1]], <2 x i64> poison, <4 x i32> <i32 1, i32 0, i32 1, i32 0>
+; CHECK-NEXT: br i1 false, label %[[S:.*]], label %[[Q:.*]]
+; CHECK: [[Q]]:
+; CHECK-NEXT: [[XOR39:%.*]] = phi i64 [ 0, %[[P]] ], [ 0, %[[LAND_LHS_TRUE:.*]] ]
+; CHECK-NEXT: [[TMP3:%.*]] = phi <2 x i64> [ zeroinitializer, %[[P]] ], [ zeroinitializer, %[[LAND_LHS_TRUE]] ]
+; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x i64> [[TMP0]], i64 [[XOR39]], i32 2
+; CHECK-NEXT: [[TMP5:%.*]] = call <4 x i64> @llvm.vector.insert.v4i64.v2i64(<4 x i64> [[TMP4]], <2 x i64> [[TMP3]], i64 0)
+; CHECK-NEXT: br i1 false, label %[[LOR_LHS_FALSE:.*]], label %[[R:.*]]
+; CHECK: [[LOR_LHS_FALSE]]:
+; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <4 x i64> [[TMP5]], <4 x i64> poison, <4 x i32> <i32 0, i32 1, i32 3, i32 2>
+; CHECK-NEXT: br i1 false, label %[[LAND_LHS_TRUE]], label %[[S]]
+; CHECK: [[R]]:
+; CHECK-NEXT: [[TMP7:%.*]] = phi <4 x i64> [ [[TMP5]], %[[Q]] ], [ [[TMP16:%.*]], %[[IF_THEN19]] ]
+; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <4 x i64> [[TMP7]], <4 x i64> poison, <4 x i32> <i32 0, i32 1, i32 3, i32 2>
+; CHECK-NEXT: br i1 false, label %[[S]], label %[[LAND_LHS_TRUE]]
+; CHECK: [[LAND_LHS_TRUE]]:
+; CHECK-NEXT: [[TMP9:%.*]] = phi <4 x i64> [ [[TMP8]], %[[R]] ], [ zeroinitializer, %[[LOR_LHS_FALSE]] ]
+; CHECK-NEXT: br i1 false, label %[[Q]], label %[[S]]
+; CHECK: [[S]]:
+; CHECK-NEXT: [[TMP10:%.*]] = phi <4 x i64> [ [[TMP9]], %[[LAND_LHS_TRUE]] ], [ [[TMP8]], %[[R]] ], [ [[TMP6]], %[[LOR_LHS_FALSE]] ], [ [[TMP2]], %[[P]] ]
+; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <4 x i64> [[TMP10]], <4 x i64> poison, <2 x i32> <i32 0, i32 1>
+; CHECK-NEXT: br label %[[IF_THEN19]]
+; CHECK: [[IF_THEN19]]:
+; CHECK-NEXT: [[TMP12:%.*]] = phi <2 x i64> [ zeroinitializer, %[[ENTRY]] ], [ [[TMP11]], %[[S]] ]
+; CHECK-NEXT: [[TMP13]] = shufflevector <2 x i64> [[TMP12]], <2 x i64> poison, <2 x i32> <i32 1, i32 0>
+; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <2 x i64> [[TMP12]], <2 x i64> poison, <4 x i32> <i32 0, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT: [[TMP15:%.*]] = insertelement <4 x i64> [[TMP14]], i64 [[L_549]], i32 1
+; CHECK-NEXT: [[TMP16]] = call <4 x i64> @llvm.vector.insert.v4i64.v2i64(<4 x i64> [[TMP15]], <2 x i64> zeroinitializer, i64 2)
+; CHECK-NEXT: br i1 false, label %[[R]], label %[[IF_END25]]
+; CHECK: [[IF_END25]]:
+; CHECK-NEXT: br i1 false, label %[[IF_END29]], label %[[P]]
+; CHECK: [[IF_END29]]:
+; CHECK-NEXT: br label %[[P]]
+;
+entry:
+ %conv3 = sext i32 0 to i64
+ br label %if.then19
+
+p:
+ %l.0 = phi i64 [ %xor, %if.end29 ], [ %l.5493, %if.end25 ]
+ %m.0 = phi i64 [ %not21, %if.end29 ], [ %m.550, %if.end25 ]
+ br i1 false, label %s, label %q
+
+q:
+ %xor39 = phi i64 [ 0, %p ], [ 0, %land.lhs.true ]
+ %l.1 = phi i64 [ 0, %p ], [ 0, %land.lhs.true ]
+ %m.1 = phi i64 [ 0, %p ], [ 0, %land.lhs.true ]
+ br i1 false, label %lor.lhs.false, label %r
+
+lor.lhs.false:
+ br i1 false, label %land.lhs.true, label %s
+
+r:
+ %xor38 = phi i64 [ %xor39, %q ], [ %xor, %if.then19 ]
+ %j.0 = phi i64 [ %conv3, %q ], [ %not21, %if.then19 ]
+ %l.2 = phi i64 [ %l.1, %q ], [ %l.549, %if.then19 ]
+ %m.2 = phi i64 [ %m.1, %q ], [ %m.550, %if.then19 ]
+ br i1 false, label %s, label %land.lhs.true
+
+land.lhs.true:
+ %xor37 = phi i64 [ %xor38, %r ], [ 0, %lor.lhs.false ]
+ %j.1 = phi i64 [ %j.0, %r ], [ 0, %lor.lhs.false ]
+ %l.3 = phi i64 [ %l.2, %r ], [ 0, %lor.lhs.false ]
+ %m.3 = phi i64 [ %m.2, %r ], [ 0, %lor.lhs.false ]
+ br i1 false, label %q, label %s
+
+s:
+ %xor36 = phi i64 [ %xor37, %land.lhs.true ], [ %xor38, %r ], [ %xor39, %lor.lhs.false ], [ %l.0, %p ]
+ %j.2 = phi i64 [ %j.1, %land.lhs.true ], [ %j.0, %r ], [ %conv3, %lor.lhs.false ], [ %m.0, %p ]
+ %l.4 = phi i64 [ %l.3, %land.lhs.true ], [ %l.2, %r ], [ %l.1, %lor.lhs.false ], [ %l.0, %p ]
+ %m.4 = phi i64 [ %m.3, %land.lhs.true ], [ %m.2, %r ], [ %m.1, %lor.lhs.false ], [ %m.0, %p ]
+ br label %if.then19
+
+if.then19:
+ %m.550 = phi i64 [ 0, %entry ], [ %m.4, %s ]
+ %l.5493 = phi i64 [ 0, %entry ], [ %l.4, %s ]
+ %xor = xor i64 0, 0
+ %not21 = xor i64 0, 0
+ br i1 false, label %r, label %if.end25
+
+if.end25:
+ br i1 false, label %if.end29, label %p
+
+if.end29:
+ br label %p
+}
+
More information about the llvm-commits
mailing list