[llvm] f413772 - [SLP]Fix last instruction selection for vectorized last instruction in SplitVectorize nodes

Alexey Bataev via llvm-commits llvm-commits at lists.llvm.org
Mon Apr 7 09:27:19 PDT 2025


Author: Alexey Bataev
Date: 2025-04-07T09:27:08-07:00
New Revision: f413772b318c2da4dfc488df81cb3c458606301f

URL: https://github.com/llvm/llvm-project/commit/f413772b318c2da4dfc488df81cb3c458606301f
DIFF: https://github.com/llvm/llvm-project/commit/f413772b318c2da4dfc488df81cb3c458606301f.diff

LOG: [SLP]Fix last instruction selection for vectorized last instruction in SplitVectorize nodes

If the last instruction in the SplitVectorize node is vectorized and
scheduled as part of some bundles, the SplitVectorize node might be
placed in the wrong order, leading to a compiler crash. Need to check if
the vectorized node has vector value and place the SplitVectorize node after the vector instruction to prevent a compile crash.

Fixes issue reported in https://github.com/llvm/llvm-project/pull/133091#issuecomment-2782826805

Added: 
    llvm/test/Transforms/SLPVectorizer/X86/split-node-last-inst-vectorized.ll

Modified: 
    llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index e2031df810573..e9ba944924837 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -15412,12 +15412,20 @@ Instruction &BoUpSLP::getLastInstructionInBundle(const TreeEntry *E) {
 
   if (E->State == TreeEntry::SplitVectorize) {
     Res = FindLastInst();
+    if (ArrayRef<TreeEntry *> Entries = getTreeEntries(Res); !Entries.empty()) {
+      for (auto *E : Entries) {
+        auto *I = dyn_cast_or_null<Instruction>(E->VectorizedValue);
+        if (!I)
+          I = &getLastInstructionInBundle(E);
+        if (Res->comesBefore(I))
+          Res = I;
+      }
+    }
     return *Res;
   }
 
   // Set insertpoint for gathered loads to the very first load.
-  if (E->State != TreeEntry::SplitVectorize &&
-      GatheredLoadsEntriesFirst.has_value() &&
+  if (GatheredLoadsEntriesFirst.has_value() &&
       E->Idx >= *GatheredLoadsEntriesFirst && !E->isGather() &&
       E->getOpcode() == Instruction::Load) {
     Res = FindFirstInst();

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/split-node-last-inst-vectorized.ll b/llvm/test/Transforms/SLPVectorizer/X86/split-node-last-inst-vectorized.ll
new file mode 100644
index 0000000000000..c3da2aad4c869
--- /dev/null
+++ b/llvm/test/Transforms/SLPVectorizer/X86/split-node-last-inst-vectorized.ll
@@ -0,0 +1,99 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -S --passes=slp-vectorizer -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck %s
+
+define void @test(ptr %0, <8 x i8> %1) {
+; CHECK-LABEL: define void @test(
+; CHECK-SAME: ptr [[TMP0:%.*]], <8 x i8> [[TMP1:%.*]]) {
+; CHECK-NEXT:    [[TMP3:%.*]] = load i8, ptr [[TMP0]], align 2
+; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr i8, ptr [[TMP0]], i64 13436
+; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr i8, ptr [[TMP0]], i64 13536
+; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr i8, ptr [[TMP0]], i64 13437
+; CHECK-NEXT:    [[TMP7:%.*]] = load <8 x i8>, ptr [[TMP4]], align 4
+; CHECK-NEXT:    [[TMP8:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> poison, <8 x i32> <i32 0, i32 0, i32 2, i32 3, i32 4, i32 5, i32 0, i32 7>
+; CHECK-NEXT:    [[TMP9:%.*]] = insertelement <8 x i8> [[TMP7]], i8 [[TMP3]], i32 1
+; CHECK-NEXT:    [[TMP10:%.*]] = shufflevector <8 x i8> [[TMP9]], <8 x i8> poison, <8 x i32> <i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+; CHECK-NEXT:    [[TMP11:%.*]] = shufflevector <8 x i8> [[TMP8]], <8 x i8> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT:    [[TMP12:%.*]] = call <16 x i8> @llvm.vector.insert.v16i8.v8i8(<16 x i8> [[TMP11]], <8 x i8> [[TMP10]], i64 8)
+; CHECK-NEXT:    [[TMP13:%.*]] = load <8 x i8>, ptr [[TMP6]], align 1
+; CHECK-NEXT:    [[TMP14:%.*]] = shufflevector <8 x i8> [[TMP13]], <8 x i8> poison, <8 x i32> <i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6>
+; CHECK-NEXT:    [[TMP15:%.*]] = call <16 x i8> @llvm.vector.insert.v16i8.v8i8(<16 x i8> poison, <8 x i8> [[TMP7]], i64 0)
+; CHECK-NEXT:    [[TMP16:%.*]] = call <16 x i8> @llvm.vector.insert.v16i8.v8i8(<16 x i8> [[TMP15]], <8 x i8> [[TMP14]], i64 8)
+; CHECK-NEXT:    [[TMP17:%.*]] = or <16 x i8> [[TMP16]], [[TMP12]]
+; CHECK-NEXT:    store <16 x i8> [[TMP17]], ptr [[TMP5]], align 4
+; CHECK-NEXT:    ret void
+;
+  %3 = load i8, ptr %0, align 2
+  %4 = getelementptr i8, ptr %0, i64 13442
+  %5 = load i8, ptr %4, align 2
+  %6 = or i8 %5, %3
+  %7 = getelementptr i8, ptr %0, i64 13550
+  store i8 %6, ptr %7, align 2
+  %8 = extractelement <8 x i8> %1, i64 0
+  %9 = or i8 %5, %8
+  %10 = getelementptr i8, ptr %0, i64 13542
+  store i8 %9, ptr %10, align 2
+  %11 = getelementptr i8, ptr %0, i64 13438
+  %12 = load i8, ptr %11, align 2
+  %13 = or i8 %12, %3
+  %14 = getelementptr i8, ptr %0, i64 13546
+  store i8 %13, ptr %14, align 2
+  %15 = extractelement <8 x i8> %1, i64 2
+  %16 = or i8 %12, %15
+  %17 = getelementptr i8, ptr %0, i64 13538
+  store i8 %16, ptr %17, align 2
+  %18 = getelementptr i8, ptr %0, i64 13440
+  %19 = load i8, ptr %18, align 4
+  %20 = or i8 %19, %3
+  %21 = getelementptr i8, ptr %0, i64 13548
+  store i8 %20, ptr %21, align 4
+  %22 = extractelement <8 x i8> %1, i64 4
+  %23 = or i8 %19, %22
+  %24 = getelementptr i8, ptr %0, i64 13540
+  store i8 %23, ptr %24, align 4
+  %25 = getelementptr i8, ptr %0, i64 13436
+  %26 = load i8, ptr %25, align 4
+  %27 = getelementptr i8, ptr %0, i64 13444
+  %28 = load i8, ptr %27, align 4
+  %29 = or i8 %28, %26
+  %30 = getelementptr i8, ptr %0, i64 13544
+  store i8 %29, ptr %30, align 4
+  %31 = or i8 %26, %8
+  %32 = getelementptr i8, ptr %0, i64 13536
+  store i8 %31, ptr %32, align 4
+  %33 = getelementptr i8, ptr %0, i64 13443
+  %34 = load i8, ptr %33, align 1
+  %35 = or i8 %34, %3
+  %36 = getelementptr i8, ptr %0, i64 13551
+  store i8 %35, ptr %36, align 1
+  %37 = extractelement <8 x i8> %1, i64 7
+  %38 = or i8 %34, %37
+  %39 = getelementptr i8, ptr %0, i64 13543
+  store i8 %38, ptr %39, align 1
+  %40 = getelementptr i8, ptr %0, i64 13439
+  %41 = load i8, ptr %40, align 1
+  %42 = or i8 %41, %3
+  %43 = getelementptr i8, ptr %0, i64 13547
+  store i8 %42, ptr %43, align 1
+  %44 = extractelement <8 x i8> %1, i64 3
+  %45 = or i8 %41, %44
+  %46 = getelementptr i8, ptr %0, i64 13539
+  store i8 %45, ptr %46, align 1
+  %47 = getelementptr i8, ptr %0, i64 13441
+  %48 = load i8, ptr %47, align 1
+  %49 = or i8 %48, %3
+  %50 = getelementptr i8, ptr %0, i64 13549
+  store i8 %49, ptr %50, align 1
+  %51 = extractelement <8 x i8> %1, i64 5
+  %52 = or i8 %48, %51
+  %53 = getelementptr i8, ptr %0, i64 13541
+  store i8 %52, ptr %53, align 1
+  %54 = getelementptr i8, ptr %0, i64 13437
+  %55 = load i8, ptr %54, align 1
+  %56 = or i8 %55, %3
+  %57 = getelementptr i8, ptr %0, i64 13545
+  store i8 %56, ptr %57, align 1
+  %58 = or i8 %55, %8
+  %59 = getelementptr i8, ptr %0, i64 13537
+  store i8 %58, ptr %59, align 1
+  ret void
+}


        


More information about the llvm-commits mailing list