[llvm] 48a4b14 - [SLP]Fix whole vector registers calculations for compares
Alexey Bataev via llvm-commits
llvm-commits at lists.llvm.org
Wed Apr 2 07:26:51 PDT 2025
Author: Alexey Bataev
Date: 2025-04-02T07:26:40-07:00
New Revision: 48a4b14cb65d56ec3bbb473887c31d251092c83c
URL: https://github.com/llvm/llvm-project/commit/48a4b14cb65d56ec3bbb473887c31d251092c83c
DIFF: https://github.com/llvm/llvm-project/commit/48a4b14cb65d56ec3bbb473887c31d251092c83c.diff
LOG: [SLP]Fix whole vector registers calculations for compares
Need to check that the calculated number of the elements is not larger
than the original number of scalars to prevent a compiler crash.
Fixes #134013
Added:
llvm/test/Transforms/SLPVectorizer/X86/whole-registers-compare.ll
Modified:
llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
Removed:
################################################################################
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index f9284b3a5f219..838e952c024c0 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -8873,6 +8873,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
// Find the number of elements, which forms full vectors.
unsigned PWSz = getFullVectorNumberOfElements(
*TTI, UniqueValues.front()->getType(), UniqueValues.size());
+ PWSz = std::min<unsigned>(PWSz, VL.size());
if (PWSz == VL.size()) {
ReuseShuffleIndices.clear();
} else {
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/whole-registers-compare.ll b/llvm/test/Transforms/SLPVectorizer/X86/whole-registers-compare.ll
new file mode 100644
index 0000000000000..5e52bd7cc954c
--- /dev/null
+++ b/llvm/test/Transforms/SLPVectorizer/X86/whole-registers-compare.ll
@@ -0,0 +1,62 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -S --passes=slp-vectorizer -mtriple=x86_64-unknown-linux-gnu -slp-threshold=-99999 < %s | FileCheck %s
+
+define void @test() {
+; CHECK-LABEL: define void @test() {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: br label %[[DO_BODY:.*]]
+; CHECK: [[DO_BODY]]:
+; CHECK-NEXT: [[TMP0:%.*]] = phi <12 x float> [ zeroinitializer, %[[ENTRY]] ], [ [[TMP7:%.*]], %[[DO_BODY]] ]
+; CHECK-NEXT: [[CMP119:%.*]] = fcmp uge float 0.000000e+00, 0.000000e+00
+; CHECK-NEXT: [[VAL_SROA_6_1:%.*]] = select i1 [[CMP119]], float 0.000000e+00, float 0.000000e+00
+; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x float> <float 0.000000e+00, float poison>, float [[VAL_SROA_6_1]], i32 1
+; CHECK-NEXT: [[TMP2:%.*]] = fcmp uge <2 x float> [[TMP1]], zeroinitializer
+; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <2 x i1> [[TMP2]], <2 x i1> poison, <12 x i32> <i32 0, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <12 x i1> <i1 true, i1 undef, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 undef, i1 undef, i1 undef>, <12 x i1> [[TMP3]], <12 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 12, i32 13, i32 11>
+; CHECK-NEXT: [[TMP5:%.*]] = insertelement <12 x i1> [[TMP4]], i1 [[CMP119]], i32 11
+; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <12 x i1> [[TMP5]], <12 x i1> poison, <12 x i32> <i32 0, i32 0, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
+; CHECK-NEXT: [[TMP7]] = select <12 x i1> [[TMP6]], <12 x float> zeroinitializer, <12 x float> zeroinitializer
+; CHECK-NEXT: br label %[[DO_BODY]]
+;
+entry:
+ br label %do.body
+
+do.body:
+ %val.sroa.66.0 = phi float [ 0.000000e+00, %entry ], [ %val.sroa.66.1, %do.body ]
+ %val.sroa.60.0 = phi float [ 0.000000e+00, %entry ], [ %val.sroa.60.2, %do.body ]
+ %val.sroa.54.0 = phi float [ 0.000000e+00, %entry ], [ %val.sroa.54.2, %do.body ]
+ %val.sroa.48.0 = phi float [ 0.000000e+00, %entry ], [ %val.sroa.48.2, %do.body ]
+ %val.sroa.42.0 = phi float [ 0.000000e+00, %entry ], [ %val.sroa.42.2, %do.body ]
+ %val.sroa.36.0 = phi float [ 0.000000e+00, %entry ], [ %val.sroa.36.2, %do.body ]
+ %val.sroa.30.0 = phi float [ 0.000000e+00, %entry ], [ %val.sroa.30.2, %do.body ]
+ %val.sroa.24.0 = phi float [ 0.000000e+00, %entry ], [ %val.sroa.24.2, %do.body ]
+ %val.sroa.18.0 = phi float [ 0.000000e+00, %entry ], [ %val.sroa.18.2, %do.body ]
+ %val.sroa.12.0 = phi float [ 0.000000e+00, %entry ], [ %val.sroa.12.2, %do.body ]
+ %val.sroa.6.0 = phi float [ 0.000000e+00, %entry ], [ %val.sroa.6.2, %do.body ]
+ %val.sroa.0.0 = phi float [ 0.000000e+00, %entry ], [ %val.sroa.0.1, %do.body ]
+ %cmp119 = fcmp uge float 0.000000e+00, 0.000000e+00
+ %val.sroa.6.1 = select i1 %cmp119, float 0.000000e+00, float 0.000000e+00
+ %val.sroa.0.1 = select i1 %cmp119, float 0.000000e+00, float 0.000000e+00
+ %cmp119.1 = fcmp uge float %val.sroa.6.1, 0.000000e+00
+ %val.sroa.6.2 = select i1 %cmp119.1, float 0.000000e+00, float 0.000000e+00
+ %cmp119.2 = fcmp uge float 0.000000e+00, 0.000000e+00
+ %val.sroa.12.2 = select i1 %cmp119.2, float 0.000000e+00, float 0.000000e+00
+ %cmp119.3 = fcmp uge float 0.000000e+00, 0.000000e+00
+ %val.sroa.18.2 = select i1 %cmp119.3, float 0.000000e+00, float 0.000000e+00
+ %cmp119.4 = fcmp uge float 0.000000e+00, 0.000000e+00
+ %val.sroa.24.2 = select i1 %cmp119.4, float 0.000000e+00, float 0.000000e+00
+ %cmp119.5 = fcmp uge float 0.000000e+00, 0.000000e+00
+ %val.sroa.30.2 = select i1 %cmp119.5, float 0.000000e+00, float 0.000000e+00
+ %cmp119.6 = fcmp uge float 0.000000e+00, 0.000000e+00
+ %val.sroa.36.2 = select i1 %cmp119.6, float 0.000000e+00, float 0.000000e+00
+ %cmp119.7 = fcmp uge float 0.000000e+00, 0.000000e+00
+ %val.sroa.42.2 = select i1 %cmp119.7, float 0.000000e+00, float 0.000000e+00
+ %cmp119.8 = fcmp uge float 0.000000e+00, 0.000000e+00
+ %val.sroa.48.2 = select i1 %cmp119.8, float 0.000000e+00, float 0.000000e+00
+ %cmp119.9 = fcmp uge float 0.000000e+00, 0.000000e+00
+ %val.sroa.54.2 = select i1 %cmp119.9, float 0.000000e+00, float 0.000000e+00
+ %cmp119.10 = fcmp uge float 0.000000e+00, 0.000000e+00
+ %val.sroa.66.1 = select i1 %cmp119.10, float 0.000000e+00, float 0.000000e+00
+ %val.sroa.60.2 = select i1 %cmp119.10, float 0.000000e+00, float 0.000000e+00
+ br label %do.body
+}
More information about the llvm-commits
mailing list