[llvm] 0c18def - [SLP]Allow interleaving check only if it is less than number of elements
Alexey Bataev via llvm-commits
llvm-commits at lists.llvm.org
Tue Nov 5 07:06:26 PST 2024
Author: Alexey Bataev
Date: 2024-11-05T07:06:15-08:00
New Revision: 0c18def2c18c4b99a5f448496461b60f576992fa
URL: https://github.com/llvm/llvm-project/commit/0c18def2c18c4b99a5f448496461b60f576992fa
DIFF: https://github.com/llvm/llvm-project/commit/0c18def2c18c4b99a5f448496461b60f576992fa.diff
LOG: [SLP]Allow interleaving check only if it is less than number of elements
Need to check if the interleaving factor is less than total number of
elements in loads slice to handle it correctly and avoid compiler crash.
Fixes report https://github.com/llvm/llvm-project/pull/112361#issuecomment-2457227670
Added:
llvm/test/Transforms/SLPVectorizer/RISCV/interleave-greater-than-slice.ll
Modified:
llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
Removed:
################################################################################
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 3c6daf7b9fbb90..4454eb3e34d983 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -7046,7 +7046,8 @@ void BoUpSLP::tryToVectorizeGatheredLoads(
OrdersType Order;
SmallVector<Value *> PointerOps;
// Segmented load detected - vectorize at maximum vector factor.
- if (TTI.isLegalInterleavedAccessType(
+ if (InterleaveFactor <= Slice.size() &&
+ TTI.isLegalInterleavedAccessType(
getWidenedType(Slice.front()->getType(), VF),
InterleaveFactor,
cast<LoadInst>(Slice.front())->getAlign(),
diff --git a/llvm/test/Transforms/SLPVectorizer/RISCV/interleave-greater-than-slice.ll b/llvm/test/Transforms/SLPVectorizer/RISCV/interleave-greater-than-slice.ll
new file mode 100644
index 00000000000000..4a8b8517861d57
--- /dev/null
+++ b/llvm/test/Transforms/SLPVectorizer/RISCV/interleave-greater-than-slice.ll
@@ -0,0 +1,74 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -S --passes=slp-vectorizer -mtriple=riscv64-unknown-linux -mattr=+v,+zvl128b < %s | FileCheck %s
+
+define void @test(ptr %a, float %0) {
+; CHECK-LABEL: define void @test(
+; CHECK-SAME: ptr [[A:%.*]], float [[TMP0:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A]], align 8
+; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr i8, ptr [[TMP1]], i64 84
+; CHECK-NEXT: [[TMP2:%.*]] = load float, ptr [[ARRAYIDX]], align 4
+; CHECK-NEXT: [[TMP3:%.*]] = tail call float @llvm.fmuladd.f32(float [[TMP2]], float 0.000000e+00, float 0.000000e+00)
+; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr i8, ptr [[TMP1]], i64 28
+; CHECK-NEXT: [[TMP4:%.*]] = load float, ptr [[ARRAYIDX1]], align 4
+; CHECK-NEXT: [[TMP5:%.*]] = tail call float @llvm.fmuladd.f32(float [[TMP4]], float 0.000000e+00, float [[TMP3]])
+; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr i8, ptr [[TMP1]], i64 8
+; CHECK-NEXT: [[TMP6:%.*]] = load float, ptr [[ARRAYIDX2]], align 4
+; CHECK-NEXT: [[TMP7:%.*]] = tail call float @llvm.fmuladd.f32(float [[TMP6]], float 0.000000e+00, float 0.000000e+00)
+; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr i8, ptr [[TMP1]], i64 68
+; CHECK-NEXT: [[TMP8:%.*]] = load float, ptr [[ARRAYIDX3]], align 4
+; CHECK-NEXT: [[TMP9:%.*]] = tail call float @llvm.fmuladd.f32(float [[TMP8]], float 0.000000e+00, float [[TMP5]])
+; CHECK-NEXT: [[ARRAYIDX4:%.*]] = getelementptr i8, ptr [[TMP1]], i64 88
+; CHECK-NEXT: [[TMP10:%.*]] = load float, ptr [[ARRAYIDX4]], align 4
+; CHECK-NEXT: [[TMP11:%.*]] = tail call float @llvm.fmuladd.f32(float [[TMP10]], float 0.000000e+00, float [[TMP7]])
+; CHECK-NEXT: [[ARRAYIDX5:%.*]] = getelementptr i8, ptr [[TMP1]], i64 92
+; CHECK-NEXT: [[TMP12:%.*]] = load float, ptr [[ARRAYIDX5]], align 4
+; CHECK-NEXT: [[TMP13:%.*]] = tail call float @llvm.fmuladd.f32(float [[TMP12]], float 0.000000e+00, float [[TMP11]])
+; CHECK-NEXT: [[TMP14:%.*]] = tail call float @llvm.fmuladd.f32(float [[TMP0]], float 0.000000e+00, float [[TMP9]])
+; CHECK-NEXT: [[ARRAYIDX6:%.*]] = getelementptr i8, ptr [[TMP1]], i64 96
+; CHECK-NEXT: [[TMP15:%.*]] = load float, ptr [[ARRAYIDX6]], align 4
+; CHECK-NEXT: [[TMP16:%.*]] = tail call float @llvm.fmuladd.f32(float [[TMP15]], float 0.000000e+00, float [[TMP13]])
+; CHECK-NEXT: [[ARRAYIDX7:%.*]] = getelementptr i8, ptr [[TMP1]], i64 80
+; CHECK-NEXT: [[TMP17:%.*]] = load float, ptr [[ARRAYIDX7]], align 4
+; CHECK-NEXT: [[TMP18:%.*]] = tail call float @llvm.fmuladd.f32(float [[TMP0]], float [[TMP17]], float [[TMP16]])
+; CHECK-NEXT: [[ARRAYIDX8:%.*]] = getelementptr i8, ptr [[TMP1]], i64 100
+; CHECK-NEXT: [[TMP19:%.*]] = load float, ptr [[ARRAYIDX8]], align 4
+; CHECK-NEXT: [[TMP20:%.*]] = tail call float @llvm.fmuladd.f32(float [[TMP19]], float 0.000000e+00, float [[TMP14]])
+; CHECK-NEXT: [[ADD:%.*]] = fadd float [[TMP18]], [[TMP20]]
+; CHECK-NEXT: store float [[ADD]], ptr [[A]], align 4
+; CHECK-NEXT: ret void
+;
+entry:
+ %1 = load ptr, ptr %a, align 8
+ %arrayidx = getelementptr i8, ptr %1, i64 84
+ %2 = load float, ptr %arrayidx, align 4
+ %3 = tail call float @llvm.fmuladd.f32(float %2, float 0.000000e+00, float 0.000000e+00)
+ %arrayidx1 = getelementptr i8, ptr %1, i64 28
+ %4 = load float, ptr %arrayidx1, align 4
+ %5 = tail call float @llvm.fmuladd.f32(float %4, float 0.000000e+00, float %3)
+ %arrayidx2 = getelementptr i8, ptr %1, i64 8
+ %6 = load float, ptr %arrayidx2, align 4
+ %7 = tail call float @llvm.fmuladd.f32(float %6, float 0.000000e+00, float 0.000000e+00)
+ %arrayidx3 = getelementptr i8, ptr %1, i64 68
+ %8 = load float, ptr %arrayidx3, align 4
+ %9 = tail call float @llvm.fmuladd.f32(float %8, float 0.000000e+00, float %5)
+ %arrayidx4 = getelementptr i8, ptr %1, i64 88
+ %10 = load float, ptr %arrayidx4, align 4
+ %11 = tail call float @llvm.fmuladd.f32(float %10, float 0.000000e+00, float %7)
+ %arrayidx5 = getelementptr i8, ptr %1, i64 92
+ %12 = load float, ptr %arrayidx5, align 4
+ %13 = tail call float @llvm.fmuladd.f32(float %12, float 0.000000e+00, float %11)
+ %14 = tail call float @llvm.fmuladd.f32(float %0, float 0.000000e+00, float %9)
+ %arrayidx6 = getelementptr i8, ptr %1, i64 96
+ %15 = load float, ptr %arrayidx6, align 4
+ %16 = tail call float @llvm.fmuladd.f32(float %15, float 0.000000e+00, float %13)
+ %arrayidx7 = getelementptr i8, ptr %1, i64 80
+ %17 = load float, ptr %arrayidx7, align 4
+ %18 = tail call float @llvm.fmuladd.f32(float %0, float %17, float %16)
+ %arrayidx8 = getelementptr i8, ptr %1, i64 100
+ %19 = load float, ptr %arrayidx8, align 4
+ %20 = tail call float @llvm.fmuladd.f32(float %19, float 0.000000e+00, float %14)
+ %add = fadd float %18, %20
+ store float %add, ptr %a, align 4
+ ret void
+}
More information about the llvm-commits
mailing list