[llvm] 6704faf - [SLP] Use StoreTy to compute min VF.

Florian Hahn via llvm-commits llvm-commits at lists.llvm.org
Sat Apr 13 03:13:03 PDT 2024


Author: Florian Hahn
Date: 2024-04-13T11:12:33+01:00
New Revision: 6704faf6f8a26c21c71fd0efad6af6a8d6ee52b4

URL: https://github.com/llvm/llvm-project/commit/6704faf6f8a26c21c71fd0efad6af6a8d6ee52b4
DIFF: https://github.com/llvm/llvm-project/commit/6704faf6f8a26c21c71fd0efad6af6a8d6ee52b4.diff

LOG: [SLP] Use StoreTy to compute min VF.

This ensures that MinVF is a power-of-2, even if ValueTy's width is
not a power-of-2.

This should fix a number of buildbot failures with X86 bootstrapping.

Added: 
    llvm/test/Transforms/SLPVectorizer/X86/trunc-store-value-ty-not-power-of-2.ll

Modified: 
    llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 09db50132c892e..b031b40a978f5d 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -15137,7 +15137,7 @@ bool SLPVectorizerPass::vectorizeStores(ArrayRef<StoreInst *> Stores,
       if (auto *Trunc = dyn_cast<TruncInst>(Store->getValueOperand()))
         ValueTy = Trunc->getSrcTy();
       unsigned MinVF = TTI->getStoreMinimumVF(
-          R.getMinVF(DL->getTypeSizeInBits(ValueTy)), StoreTy, ValueTy);
+          R.getMinVF(DL->getTypeSizeInBits(StoreTy)), StoreTy, ValueTy);
 
       if (MaxVF < MinVF) {
         LLVM_DEBUG(dbgs() << "SLP: Vectorization infeasible as MaxVF (" << MaxVF

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/trunc-store-value-ty-not-power-of-2.ll b/llvm/test/Transforms/SLPVectorizer/X86/trunc-store-value-ty-not-power-of-2.ll
new file mode 100644
index 00000000000000..81b4ee40e7fdf3
--- /dev/null
+++ b/llvm/test/Transforms/SLPVectorizer/X86/trunc-store-value-ty-not-power-of-2.ll
@@ -0,0 +1,109 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
+; RUN: opt -p slp-vectorizer -S %s | FileCheck %s
+
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define void @test_2_i24_stores(ptr %A) {
+; CHECK-LABEL: define void @test_2_i24_stores(
+; CHECK-SAME: ptr [[A:%.*]]) {
+; CHECK-NEXT:    [[L:%.*]] = load i24, ptr [[A]], align 4
+; CHECK-NEXT:    store i24 [[L]], ptr [[A]], align 1
+; CHECK-NEXT:    [[GEP:%.*]] = getelementptr i24, ptr [[A]], i64 1
+; CHECK-NEXT:    store i24 0, ptr [[GEP]], align 1
+; CHECK-NEXT:    ret void
+;
+  %l = load i24, ptr %A
+  store i24 %l, ptr %A, align 1
+  %gep = getelementptr i24, ptr %A, i64 1
+  store i24 0, ptr %gep, align 1
+  ret void
+}
+
+define void @test_2_trunc_i24_to_i8(i24 %x, ptr %A) {
+; CHECK-LABEL: define void @test_2_trunc_i24_to_i8(
+; CHECK-SAME: i24 [[X:%.*]], ptr [[A:%.*]]) {
+; CHECK-NEXT:    [[T:%.*]] = trunc i24 [[X]] to i8
+; CHECK-NEXT:    store i8 [[T]], ptr [[A]], align 1
+; CHECK-NEXT:    [[GEP:%.*]] = getelementptr i8, ptr [[A]], i64 1
+; CHECK-NEXT:    store i8 0, ptr [[GEP]], align 1
+; CHECK-NEXT:    ret void
+;
+  %t = trunc i24 %x to i8
+  store i8 %t, ptr %A, align 1
+  %gep = getelementptr i8, ptr %A, i64 1
+  store i8 0, ptr %gep, align 1
+  ret void
+}
+
+define void @test_4_trunc_i24_to_i8(i24 %x, ptr %A) {
+; CHECK-LABEL: define void @test_4_trunc_i24_to_i8(
+; CHECK-SAME: i24 [[X:%.*]], ptr [[A:%.*]]) {
+; CHECK-NEXT:    [[T:%.*]] = trunc i24 [[X]] to i8
+; CHECK-NEXT:    store i8 [[T]], ptr [[A]], align 1
+; CHECK-NEXT:    [[GEP_1:%.*]] = getelementptr i8, ptr [[A]], i64 1
+; CHECK-NEXT:    store i8 [[T]], ptr [[GEP_1]], align 1
+; CHECK-NEXT:    [[GEP_2:%.*]] = getelementptr i8, ptr [[A]], i64 2
+; CHECK-NEXT:    store i8 [[T]], ptr [[GEP_2]], align 1
+; CHECK-NEXT:    [[GEP_3:%.*]] = getelementptr i8, ptr [[A]], i64 3
+; CHECK-NEXT:    store i8 [[T]], ptr [[GEP_3]], align 1
+; CHECK-NEXT:    ret void
+;
+  %t = trunc i24 %x to i8
+  store i8 %t, ptr %A, align 1
+  %gep.1 = getelementptr i8, ptr %A, i64 1
+  store i8 %t, ptr %gep.1, align 1
+  %gep.2 = getelementptr i8, ptr %A, i64 2
+  store i8 %t, ptr %gep.2, align 1
+  %gep.3 = getelementptr i8, ptr %A, i64 3
+  store i8 %t, ptr %gep.3, align 1
+  ret void
+}
+
+define void @test_8_trunc_i24_to_i8(i24 %x, ptr %A) {
+; CHECK-LABEL: define void @test_8_trunc_i24_to_i8(
+; CHECK-SAME: i24 [[X:%.*]], ptr [[A:%.*]]) {
+; CHECK-NEXT:    [[T:%.*]] = trunc i24 [[X]] to i8
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <8 x i8> poison, i8 [[T]], i32 0
+; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> poison, <8 x i32> zeroinitializer
+; CHECK-NEXT:    store <8 x i8> [[TMP2]], ptr [[A]], align 1
+; CHECK-NEXT:    ret void
+;
+  %t = trunc i24 %x to i8
+  store i8 %t, ptr %A, align 1
+  %gep.1 = getelementptr i8, ptr %A, i64 1
+  store i8 %t, ptr %gep.1, align 1
+  %gep.2 = getelementptr i8, ptr %A, i64 2
+  store i8 %t, ptr %gep.2, align 1
+  %gep.3 = getelementptr i8, ptr %A, i64 3
+  store i8 %t, ptr %gep.3, align 1
+  %gep.4 = getelementptr i8, ptr %A, i64 4
+  store i8 %t, ptr %gep.4, align 1
+  %gep.5 = getelementptr i8, ptr %A, i64 5
+  store i8 %t, ptr %gep.5, align 1
+  %gep.6 = getelementptr i8, ptr %A, i64 6
+  store i8 %t, ptr %gep.6, align 1
+  %gep.7 = getelementptr i8, ptr %A, i64 7
+  store i8 %t, ptr %gep.7, align 1
+  ret void
+}
+
+define void @test_4_trunc_i24_to_i16(i24 %x, ptr %A) {
+; CHECK-LABEL: define void @test_4_trunc_i24_to_i16(
+; CHECK-SAME: i24 [[X:%.*]], ptr [[A:%.*]]) {
+; CHECK-NEXT:    [[T:%.*]] = trunc i24 [[X]] to i16
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x i16> poison, i16 [[T]], i32 0
+; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> poison, <4 x i32> zeroinitializer
+; CHECK-NEXT:    store <4 x i16> [[TMP2]], ptr [[A]], align 1
+; CHECK-NEXT:    ret void
+;
+  %t = trunc i24 %x to i16
+  store i16 %t, ptr %A, align 1
+  %gep.1 = getelementptr i16, ptr %A, i64 1
+  store i16 %t, ptr %gep.1, align 1
+  %gep.2 = getelementptr i16, ptr %A, i64 2
+  store i16 %t, ptr %gep.2, align 1
+  %gep.3 = getelementptr i16, ptr %A, i64 3
+  store i16 %t, ptr %gep.3, align 1
+  ret void
+}


        


More information about the llvm-commits mailing list