[llvm] [SROA]: Only defer trying partial sized ptr or ptr vector types (PR #82279)

Jeffrey Byrnes via llvm-commits llvm-commits at lists.llvm.org
Mon Feb 19 13:42:31 PST 2024


https://github.com/jrbyrnes created https://github.com/llvm/llvm-project/pull/82279

https://github.com/llvm/llvm-project/pull/77678 introduced regressions of the following type: LoadStoreTys had the better candidate type, but it was not selected since considering LoadStoreTys was deferred until after full consideration of CandidateTys (those that are the same size as the partition).

See: https://godbolt.org/z/G5sxbf1sx

>From 5e628b374d78e4319995e17b0b4c40aee79ccb00 Mon Sep 17 00:00:00 2001
From: Jeffrey Byrnes <Jeffrey.Byrnes at amd.com>
Date: Mon, 19 Feb 2024 12:59:13 -0800
Subject: [PATCH 1/2] [SROA] NFC: Extract common code to
 createAndCheckVectorTypesForPromotion

Change-Id: If6598c7869e30d9013b2d631e4e08a6efcce044c
---
 llvm/lib/Transforms/Scalar/SROA.cpp | 51 ++++++++++++++++-------------
 1 file changed, 29 insertions(+), 22 deletions(-)

diff --git a/llvm/lib/Transforms/Scalar/SROA.cpp b/llvm/lib/Transforms/Scalar/SROA.cpp
index 6c8785d52c4eab..f1b5575fe772b8 100644
--- a/llvm/lib/Transforms/Scalar/SROA.cpp
+++ b/llvm/lib/Transforms/Scalar/SROA.cpp
@@ -2271,6 +2271,7 @@ static VectorType *isVectorPromotionViable(Partition &P, const DataLayout &DL) {
   // we have different element types.
   SmallVector<VectorType *, 4> CandidateTys;
   SetVector<Type *> LoadStoreTys;
+  SetVector<Type *> DeferredTys;
   Type *CommonEltTy = nullptr;
   VectorType *CommonVecPtrTy = nullptr;
   bool HaveVecPtrTy = false;
@@ -2305,35 +2306,17 @@ static VectorType *isVectorPromotionViable(Partition &P, const DataLayout &DL) {
     }
   };
 
-  // Put load and store types into a set for de-duplication.
-  for (const Slice &S : P) {
-    Type *Ty;
-    if (auto *LI = dyn_cast<LoadInst>(S.getUse()->getUser()))
-      Ty = LI->getType();
-    else if (auto *SI = dyn_cast<StoreInst>(S.getUse()->getUser()))
-      Ty = SI->getValueOperand()->getType();
-    else
-      continue;
-    LoadStoreTys.insert(Ty);
-    // Consider any loads or stores that are the exact size of the slice.
-    if (S.beginOffset() == P.beginOffset() && S.endOffset() == P.endOffset())
-      CheckCandidateType(Ty);
-  }
-
-  if (auto *VTy = checkVectorTypesForPromotion(
-          P, DL, CandidateTys, HaveCommonEltTy, CommonEltTy, HaveVecPtrTy,
-          HaveCommonVecPtrTy, CommonVecPtrTy))
-    return VTy;
-
+  auto createAndCheckVectorTypesForPromotion = [&](SetVector<Type *> OtherTys) {
   // Consider additional vector types where the element type size is a
   // multiple of load/store element size.
-  for (Type *Ty : LoadStoreTys) {
+  for (Type *Ty : OtherTys) {
     if (!VectorType::isValidElementType(Ty))
       continue;
     unsigned TypeSize = DL.getTypeSizeInBits(Ty).getFixedValue();
     // Make a copy of CandidateTys and iterate through it, because we might
     // append to CandidateTys in the loop.
-    SmallVector<VectorType *, 4> CandidateTysCopy = CandidateTys;
+    SmallVector<VectorType *, 4> CandidateTysCopy;
+    CandidateTysCopy.assign(CandidateTys);
     CandidateTys.clear();
     for (VectorType *&VTy : CandidateTysCopy) {
       unsigned VectorSize = DL.getTypeSizeInBits(VTy).getFixedValue();
@@ -2350,8 +2333,32 @@ static VectorType *isVectorPromotionViable(Partition &P, const DataLayout &DL) {
   return checkVectorTypesForPromotion(P, DL, CandidateTys, HaveCommonEltTy,
                                       CommonEltTy, HaveVecPtrTy,
                                       HaveCommonVecPtrTy, CommonVecPtrTy);
+  };
+
+  // Put load and store types into a set for de-duplication.
+  for (const Slice &S : P) {
+    Type *Ty;
+    if (auto *LI = dyn_cast<LoadInst>(S.getUse()->getUser()))
+      Ty = LI->getType();
+    else if (auto *SI = dyn_cast<StoreInst>(S.getUse()->getUser()))
+      Ty = SI->getValueOperand()->getType();
+    else
+      continue;
+    LoadStoreTys.insert(Ty);
+    // Consider any loads or stores that are the exact size of the slice.
+    if (S.beginOffset() == P.beginOffset() && S.endOffset() == P.endOffset())
+      CheckCandidateType(Ty);
+  }
+
+  if (auto *VTy = checkVectorTypesForPromotion(
+          P, DL, CandidateTys, HaveCommonEltTy, CommonEltTy, HaveVecPtrTy,
+          HaveCommonVecPtrTy, CommonVecPtrTy))
+    return VTy;
+
+  return createAndCheckVectorTypesForPromotion(LoadStoreTys);
 }
 
+
 /// Test whether a slice of an alloca is valid for integer widening.
 ///
 /// This implements the necessary checking for the \c isIntegerWideningViable

>From 02e746474ba04df8149c445a25f5069ca9995183 Mon Sep 17 00:00:00 2001
From: Jeffrey Byrnes <Jeffrey.Byrnes at amd.com>
Date: Mon, 19 Feb 2024 13:28:32 -0800
Subject: [PATCH 2/2] [SROA]: Only defer trying partial sized ptr or ptr vector
 types

Change-Id: Ic9cc79aa66890ef6e23471f0ab3829aad5041558
---
 llvm/lib/Transforms/Scalar/SROA.cpp           | 72 +++++++++++--------
 llvm/test/Transforms/SROA/vector-promotion.ll | 32 +++++++++
 2 files changed, 73 insertions(+), 31 deletions(-)

diff --git a/llvm/lib/Transforms/Scalar/SROA.cpp b/llvm/lib/Transforms/Scalar/SROA.cpp
index f1b5575fe772b8..b12d156e067edd 100644
--- a/llvm/lib/Transforms/Scalar/SROA.cpp
+++ b/llvm/lib/Transforms/Scalar/SROA.cpp
@@ -2306,34 +2306,34 @@ static VectorType *isVectorPromotionViable(Partition &P, const DataLayout &DL) {
     }
   };
 
-  auto createAndCheckVectorTypesForPromotion = [&](SetVector<Type *> OtherTys) {
-  // Consider additional vector types where the element type size is a
-  // multiple of load/store element size.
-  for (Type *Ty : OtherTys) {
-    if (!VectorType::isValidElementType(Ty))
-      continue;
-    unsigned TypeSize = DL.getTypeSizeInBits(Ty).getFixedValue();
-    // Make a copy of CandidateTys and iterate through it, because we might
-    // append to CandidateTys in the loop.
-    SmallVector<VectorType *, 4> CandidateTysCopy;
-    CandidateTysCopy.assign(CandidateTys);
-    CandidateTys.clear();
-    for (VectorType *&VTy : CandidateTysCopy) {
-      unsigned VectorSize = DL.getTypeSizeInBits(VTy).getFixedValue();
-      unsigned ElementSize =
-          DL.getTypeSizeInBits(VTy->getElementType()).getFixedValue();
-      if (TypeSize != VectorSize && TypeSize != ElementSize &&
-          VectorSize % TypeSize == 0) {
-        VectorType *NewVTy = VectorType::get(Ty, VectorSize / TypeSize, false);
-        CheckCandidateType(NewVTy);
-      }
-    }
-  }
+  auto createAndCheckVectorTypesForPromotion =
+      [&](SetVector<Type *> OtherTys,
+          SmallVector<VectorType *, 4> CandidateTysCopy) {
+        // Consider additional vector types where the element type size is a
+        // multiple of load/store element size.
+        for (Type *Ty : OtherTys) {
+          if (!VectorType::isValidElementType(Ty))
+            continue;
+          unsigned TypeSize = DL.getTypeSizeInBits(Ty).getFixedValue();
+          // Make a copy of CandidateTys and iterate through it, because we
+          // might append to CandidateTys in the loop.
+          for (VectorType *&VTy : CandidateTysCopy) {
+            unsigned VectorSize = DL.getTypeSizeInBits(VTy).getFixedValue();
+            unsigned ElementSize =
+                DL.getTypeSizeInBits(VTy->getElementType()).getFixedValue();
+            if (TypeSize != VectorSize && TypeSize != ElementSize &&
+                VectorSize % TypeSize == 0) {
+              VectorType *NewVTy =
+                  VectorType::get(Ty, VectorSize / TypeSize, false);
+              CheckCandidateType(NewVTy);
+            }
+          }
+        }
 
-  return checkVectorTypesForPromotion(P, DL, CandidateTys, HaveCommonEltTy,
-                                      CommonEltTy, HaveVecPtrTy,
-                                      HaveCommonVecPtrTy, CommonVecPtrTy);
-  };
+        return checkVectorTypesForPromotion(
+            P, DL, CandidateTys, HaveCommonEltTy, CommonEltTy, HaveVecPtrTy,
+            HaveCommonVecPtrTy, CommonVecPtrTy);
+      };
 
   // Put load and store types into a set for de-duplication.
   for (const Slice &S : P) {
@@ -2344,18 +2344,28 @@ static VectorType *isVectorPromotionViable(Partition &P, const DataLayout &DL) {
       Ty = SI->getValueOperand()->getType();
     else
       continue;
+
+    auto CandTy =
+        isa<VectorType>(Ty) ? cast<VectorType>(Ty)->getElementType() : Ty;
+    if (CandTy->isPointerTy() && (S.beginOffset() != P.beginOffset() ||
+                                  S.endOffset() != P.endOffset())) {
+      DeferredTys.insert(Ty);
+      continue;
+    }
+
     LoadStoreTys.insert(Ty);
     // Consider any loads or stores that are the exact size of the slice.
     if (S.beginOffset() == P.beginOffset() && S.endOffset() == P.endOffset())
       CheckCandidateType(Ty);
   }
 
-  if (auto *VTy = checkVectorTypesForPromotion(
-          P, DL, CandidateTys, HaveCommonEltTy, CommonEltTy, HaveVecPtrTy,
-          HaveCommonVecPtrTy, CommonVecPtrTy))
+  SmallVector<VectorType *, 4> CandidateTysCopy = CandidateTys;
+  if (auto *VTy =
+          createAndCheckVectorTypesForPromotion(LoadStoreTys, CandidateTysCopy))
     return VTy;
 
-  return createAndCheckVectorTypesForPromotion(LoadStoreTys);
+  CandidateTys.clear();
+  return createAndCheckVectorTypesForPromotion(DeferredTys, CandidateTysCopy);
 }
 
 
diff --git a/llvm/test/Transforms/SROA/vector-promotion.ll b/llvm/test/Transforms/SROA/vector-promotion.ll
index e48dd5bb392082..dc70520fc5ca70 100644
--- a/llvm/test/Transforms/SROA/vector-promotion.ll
+++ b/llvm/test/Transforms/SROA/vector-promotion.ll
@@ -1392,6 +1392,38 @@ define <4 x ptr> @ptrLoadStoreTysPtr(ptr %init, i64 %val2) {
   ret <4 x ptr> %sroaval
 }
 
+define <4 x i32> @validLoadStoreTy([2 x i64] %cond.coerce) {
+; CHECK-LABEL: @validLoadStoreTy(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[COND_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [2 x i64] [[COND_COERCE:%.*]], 0
+; CHECK-NEXT:    [[COND_SROA_0_0_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[COND_COERCE_FCA_0_EXTRACT]], i32 0
+; CHECK-NEXT:    [[COND_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [2 x i64] [[COND_COERCE]], 1
+; CHECK-NEXT:    [[COND_SROA_0_8_VEC_INSERT:%.*]] = insertelement <2 x i64> [[COND_SROA_0_0_VEC_INSERT]], i64 [[COND_COERCE_FCA_1_EXTRACT]], i32 1
+; CHECK-NEXT:    [[TMP0:%.*]] = bitcast <2 x i64> [[COND_SROA_0_8_VEC_INSERT]] to <4 x i32>
+; CHECK-NEXT:    ret <4 x i32> [[TMP0]]
+;
+; DEBUG-LABEL: @validLoadStoreTy(
+; DEBUG-NEXT:  entry:
+; DEBUG-NEXT:    call void @llvm.dbg.value(metadata ptr undef, metadata [[META553:![0-9]+]], metadata !DIExpression()), !dbg [[DBG557:![0-9]+]]
+; DEBUG-NEXT:    call void @llvm.dbg.value(metadata ptr undef, metadata [[META554:![0-9]+]], metadata !DIExpression()), !dbg [[DBG558:![0-9]+]]
+; DEBUG-NEXT:    [[COND_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [2 x i64] [[COND_COERCE:%.*]], 0, !dbg [[DBG559:![0-9]+]]
+; DEBUG-NEXT:    [[COND_SROA_0_0_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[COND_COERCE_FCA_0_EXTRACT]], i32 0, !dbg [[DBG559]]
+; DEBUG-NEXT:    [[COND_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [2 x i64] [[COND_COERCE]], 1, !dbg [[DBG559]]
+; DEBUG-NEXT:    [[COND_SROA_0_8_VEC_INSERT:%.*]] = insertelement <2 x i64> [[COND_SROA_0_0_VEC_INSERT]], i64 [[COND_COERCE_FCA_1_EXTRACT]], i32 1, !dbg [[DBG559]]
+; DEBUG-NEXT:    call void @llvm.dbg.value(metadata ptr undef, metadata [[META555:![0-9]+]], metadata !DIExpression()), !dbg [[DBG560:![0-9]+]]
+; DEBUG-NEXT:    [[TMP0:%.*]] = bitcast <2 x i64> [[COND_SROA_0_8_VEC_INSERT]] to <4 x i32>, !dbg [[DBG561:![0-9]+]]
+; DEBUG-NEXT:    call void @llvm.dbg.value(metadata <4 x i32> [[TMP0]], metadata [[META556:![0-9]+]], metadata !DIExpression()), !dbg [[DBG561]]
+; DEBUG-NEXT:    ret <4 x i32> [[TMP0]], !dbg [[DBG562:![0-9]+]]
+;
+entry:
+  %cond = alloca <4 x i32>, align 8
+  %coerce.dive2 = getelementptr inbounds <4 x i32>, ptr %cond, i32 0, i32 0
+  store [2 x i64] %cond.coerce, ptr %coerce.dive2, align 8
+  %m5 = getelementptr inbounds <4 x i32>, ptr %cond, i32 0, i32 0
+  %0 = load <4 x i32>, ptr %m5, align 8
+  ret <4 x i32> %0
+}
+
 declare void @llvm.memcpy.p0.p0.i64(ptr, ptr, i64, i1)
 declare void @llvm.lifetime.end.p0(i64, ptr)
 ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:



More information about the llvm-commits mailing list