[llvm] 26f944b - [SLP]Fix an ArrayRef out-of-bounds access in slice
Alexey Bataev via llvm-commits
llvm-commits at lists.llvm.org
Tue Feb 17 10:00:30 PST 2026
Author: Alexey Bataev
Date: 2026-02-17T10:00:13-08:00
New Revision: 26f944bb50b0f585ba1be805368a987ad5543a07
URL: https://github.com/llvm/llvm-project/commit/26f944bb50b0f585ba1be805368a987ad5543a07
DIFF: https://github.com/llvm/llvm-project/commit/26f944bb50b0f585ba1be805368a987ad5543a07.diff
LOG: [SLP]Fix an ArrayRef out-of-bounds access in slice
If the revec is enabled, may have the number of parts (registers) for
the combined node, not a single element node, so need to check for
potential out-of-bounds access
Fixes #181798
Added:
llvm/test/Transforms/SLPVectorizer/AArch64/revec-non-pow2.ll
Modified:
llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
Removed:
################################################################################
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index d97faa59a483f..afc2ffb71e1c6 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -17846,8 +17846,13 @@ BoUpSLP::tryToGatherExtractElements(SmallVectorImpl<Value *> &VL,
for (unsigned Part : seq<unsigned>(NumParts)) {
// Scan list of gathered scalars for extractelements that can be represented
// as shuffles.
- MutableArrayRef<Value *> SubVL = MutableArrayRef(VL).slice(
- Part * SliceSize, getNumElems(VL.size(), SliceSize, Part));
+ const unsigned PartOffset = Part * SliceSize;
+ const unsigned PartSize = getNumElems(VL.size(), SliceSize, Part);
+ // It may happen in case of revec, need to check no access out of bounds.
+ if (PartOffset + PartSize > VL.size())
+ break;
+ MutableArrayRef<Value *> SubVL =
+ MutableArrayRef(VL).slice(PartOffset, PartSize);
SmallVector<int> SubMask;
std::optional<TTI::ShuffleKind> Res =
tryToGatherSingleRegisterExtractElements(SubVL, SubMask);
diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/revec-non-pow2.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/revec-non-pow2.ll
new file mode 100644
index 0000000000000..1355224e94b71
--- /dev/null
+++ b/llvm/test/Transforms/SLPVectorizer/AArch64/revec-non-pow2.ll
@@ -0,0 +1,121 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
+; RUN: opt -slp-revec -slp-vectorize-non-power-of-2 --passes=slp-vectorizer -S -mtriple=aarch64-pc-windows-gnu < %s | FileCheck %s
+
+define i32 @test(ptr %0) {
+; CHECK-LABEL: define i32 @test(
+; CHECK-SAME: ptr [[TMP0:%.*]]) {
+; CHECK-NEXT: [[_SPLIT:.*:]]
+; CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[TMP0]], align 8
+; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[TMP1]], i64 96
+; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[TMP1]], i64 108
+; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[TMP1]], i64 120
+; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[TMP1]], i64 132
+; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[TMP1]], i64 144
+; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[TMP1]], i64 156
+; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[TMP1]], i64 168
+; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[TMP1]], i64 180
+; CHECK-NEXT: [[TMP10:%.*]] = load <3 x i32>, ptr [[TMP3]], align 4
+; CHECK-NEXT: [[TMP11:%.*]] = load <3 x i32>, ptr [[TMP2]], align 4
+; CHECK-NEXT: [[TMP12:%.*]] = add <3 x i32> [[TMP11]], <i32 1, i32 1, i32 0>
+; CHECK-NEXT: [[TMP13:%.*]] = add <3 x i32> [[TMP10]], [[TMP12]]
+; CHECK-NEXT: [[TMP14:%.*]] = load <3 x i32>, ptr [[TMP4]], align 4
+; CHECK-NEXT: [[TMP15:%.*]] = add <3 x i32> [[TMP13]], [[TMP14]]
+; CHECK-NEXT: [[TMP16:%.*]] = load <3 x i32>, ptr [[TMP5]], align 4
+; CHECK-NEXT: [[TMP17:%.*]] = add <3 x i32> [[TMP15]], [[TMP16]]
+; CHECK-NEXT: [[TMP18:%.*]] = load <3 x i32>, ptr [[TMP6]], align 4
+; CHECK-NEXT: [[TMP19:%.*]] = add <3 x i32> [[TMP17]], [[TMP18]]
+; CHECK-NEXT: [[TMP20:%.*]] = load <3 x i32>, ptr [[TMP7]], align 4
+; CHECK-NEXT: [[TMP21:%.*]] = add <3 x i32> [[TMP19]], [[TMP20]]
+; CHECK-NEXT: [[TMP22:%.*]] = load <3 x i32>, ptr [[TMP8]], align 4
+; CHECK-NEXT: [[TMP23:%.*]] = add <3 x i32> [[TMP21]], [[TMP22]]
+; CHECK-NEXT: [[TMP24:%.*]] = load <3 x i32>, ptr [[TMP9]], align 4
+; CHECK-NEXT: [[TMP25:%.*]] = add <3 x i32> [[TMP23]], [[TMP24]]
+; CHECK-NEXT: [[TMP26:%.*]] = icmp sgt <3 x i32> [[TMP25]], zeroinitializer
+; CHECK-NEXT: [[TMP27:%.*]] = bitcast <3 x i1> [[TMP26]] to i3
+; CHECK-NEXT: [[TMP28:%.*]] = call i3 @llvm.ctpop.i3(i3 [[TMP27]])
+; CHECK-NEXT: [[TMP29:%.*]] = zext i3 [[TMP28]] to i32
+; CHECK-NEXT: ret i32 [[TMP29]]
+;
+.split:
+ %1 = load ptr, ptr %0, align 8
+ %2 = getelementptr i8, ptr %1, i64 96
+ %3 = load i32, ptr %2, align 4
+ %4 = add i32 %3, 1
+ %5 = getelementptr i8, ptr %1, i64 108
+ %6 = load i32, ptr %5, align 4
+ %7 = add i32 %4, %6
+ %8 = getelementptr i8, ptr %1, i64 120
+ %9 = load i32, ptr %8, align 4
+ %10 = add i32 %7, %9
+ %11 = getelementptr i8, ptr %1, i64 132
+ %12 = load i32, ptr %11, align 4
+ %13 = add i32 %10, %12
+ %14 = getelementptr i8, ptr %1, i64 144
+ %15 = load i32, ptr %14, align 4
+ %16 = add i32 %13, %15
+ %17 = getelementptr i8, ptr %1, i64 156
+ %18 = load i32, ptr %17, align 4
+ %19 = add i32 %16, %18
+ %20 = getelementptr i8, ptr %1, i64 168
+ %21 = load i32, ptr %20, align 4
+ %22 = add i32 %19, %21
+ %23 = getelementptr i8, ptr %1, i64 180
+ %24 = load i32, ptr %23, align 4
+ %25 = add i32 %22, %24
+ %26 = getelementptr i8, ptr %1, i64 100
+ %27 = load i32, ptr %26, align 4
+ %28 = add i32 %27, 1
+ %29 = getelementptr i8, ptr %1, i64 112
+ %30 = load i32, ptr %29, align 4
+ %31 = add i32 %28, %30
+ %32 = getelementptr i8, ptr %1, i64 124
+ %33 = load i32, ptr %32, align 4
+ %34 = add i32 %31, %33
+ %35 = getelementptr i8, ptr %1, i64 136
+ %36 = load i32, ptr %35, align 4
+ %37 = add i32 %34, %36
+ %38 = getelementptr i8, ptr %1, i64 148
+ %39 = load i32, ptr %38, align 4
+ %40 = add i32 %37, %39
+ %41 = getelementptr i8, ptr %1, i64 160
+ %42 = load i32, ptr %41, align 4
+ %43 = getelementptr i8, ptr %1, i64 172
+ %44 = getelementptr i8, ptr %1, i64 116
+ %45 = load i32, ptr %44, align 4
+ %46 = getelementptr i8, ptr %1, i64 104
+ %47 = load i32, ptr %46, align 4
+ %48 = add i32 %45, %47
+ %49 = getelementptr i8, ptr %1, i64 128
+ %50 = load i32, ptr %49, align 4
+ %51 = add i32 %48, %50
+ %52 = getelementptr i8, ptr %1, i64 140
+ %53 = load i32, ptr %52, align 4
+ %54 = add i32 %51, %53
+ %55 = getelementptr i8, ptr %1, i64 152
+ %56 = load i32, ptr %55, align 4
+ %57 = add i32 %54, %56
+ %58 = getelementptr i8, ptr %1, i64 164
+ %59 = load i32, ptr %58, align 4
+ %60 = add i32 %57, %59
+ %61 = getelementptr i8, ptr %1, i64 176
+ %62 = load i32, ptr %61, align 4
+ %63 = add i32 %60, %62
+ %64 = getelementptr i8, ptr %1, i64 188
+ %65 = load i32, ptr %64, align 4
+ %66 = add i32 %63, %65
+ %67 = add i32 %40, %42
+ %68 = load i32, ptr %43, align 4
+ %69 = add i32 %67, %68
+ %70 = getelementptr i8, ptr %1, i64 184
+ %71 = load i32, ptr %70, align 4
+ %72 = add i32 %69, %71
+ %73 = icmp sgt i32 %72, 0
+ %74 = zext i1 %73 to i32
+ %75 = icmp sgt i32 %25, 0
+ %76 = zext i1 %75 to i32
+ %77 = add i32 %74, %76
+ %78 = icmp sgt i32 %66, 0
+ %79 = zext i1 %78 to i32
+ %80 = add i32 %77, %79
+ ret i32 %80
+}
More information about the llvm-commits
mailing list