[llvm] [SROA]: Only defer trying partial sized ptr or ptr vector types (PR #82279)

Jeffrey Byrnes via llvm-commits llvm-commits at lists.llvm.org
Mon Mar 4 10:30:11 PST 2024


https://github.com/jrbyrnes updated https://github.com/llvm/llvm-project/pull/82279

>From 5ea353115b99d7aa7015d8bca504aa0fd9604cc9 Mon Sep 17 00:00:00 2001
From: Jeffrey Byrnes <Jeffrey.Byrnes at amd.com>
Date: Mon, 19 Feb 2024 12:59:13 -0800
Subject: [PATCH 1/7] [SROA] NFC: Extract common code to
 createAndCheckVectorTypesForPromotion

Change-Id: Iea5d60b12e2de7033fc1a71e80aa96c261e998bf
---
 llvm/lib/Transforms/Scalar/SROA.cpp | 56 ++++++++++++++++-------------
 1 file changed, 31 insertions(+), 25 deletions(-)

diff --git a/llvm/lib/Transforms/Scalar/SROA.cpp b/llvm/lib/Transforms/Scalar/SROA.cpp
index 6c8785d52c4eab..7e19efe7f4aaec 100644
--- a/llvm/lib/Transforms/Scalar/SROA.cpp
+++ b/llvm/lib/Transforms/Scalar/SROA.cpp
@@ -2304,6 +2304,34 @@ static VectorType *isVectorPromotionViable(Partition &P, const DataLayout &DL) {
       }
     }
   };
+  auto createAndCheckVectorTypesForPromotion =
+      [&](SetVector<Type *> OtherTys,
+          SmallVector<VectorType *, 4> CandidateTysCopy) {
+        // Consider additional vector types where the element type size is a
+        // multiple of load/store element size.
+        for (Type *Ty : OtherTys) {
+          if (!VectorType::isValidElementType(Ty))
+            continue;
+          unsigned TypeSize = DL.getTypeSizeInBits(Ty).getFixedValue();
+          // Make a copy of CandidateTys and iterate through it, because we
+          // might append to CandidateTys in the loop.
+          for (VectorType *&VTy : CandidateTysCopy) {
+            unsigned VectorSize = DL.getTypeSizeInBits(VTy).getFixedValue();
+            unsigned ElementSize =
+                DL.getTypeSizeInBits(VTy->getElementType()).getFixedValue();
+            if (TypeSize != VectorSize && TypeSize != ElementSize &&
+                VectorSize % TypeSize == 0) {
+              VectorType *NewVTy =
+                  VectorType::get(Ty, VectorSize / TypeSize, false);
+              CheckCandidateType(NewVTy);
+            }
+          }
+        }
+
+        return checkVectorTypesForPromotion(
+            P, DL, CandidateTys, HaveCommonEltTy, CommonEltTy, HaveVecPtrTy,
+            HaveCommonVecPtrTy, CommonVecPtrTy);
+      };
 
   // Put load and store types into a set for de-duplication.
   for (const Slice &S : P) {
@@ -2325,31 +2353,9 @@ static VectorType *isVectorPromotionViable(Partition &P, const DataLayout &DL) {
           HaveCommonVecPtrTy, CommonVecPtrTy))
     return VTy;
 
-  // Consider additional vector types where the element type size is a
-  // multiple of load/store element size.
-  for (Type *Ty : LoadStoreTys) {
-    if (!VectorType::isValidElementType(Ty))
-      continue;
-    unsigned TypeSize = DL.getTypeSizeInBits(Ty).getFixedValue();
-    // Make a copy of CandidateTys and iterate through it, because we might
-    // append to CandidateTys in the loop.
-    SmallVector<VectorType *, 4> CandidateTysCopy = CandidateTys;
-    CandidateTys.clear();
-    for (VectorType *&VTy : CandidateTysCopy) {
-      unsigned VectorSize = DL.getTypeSizeInBits(VTy).getFixedValue();
-      unsigned ElementSize =
-          DL.getTypeSizeInBits(VTy->getElementType()).getFixedValue();
-      if (TypeSize != VectorSize && TypeSize != ElementSize &&
-          VectorSize % TypeSize == 0) {
-        VectorType *NewVTy = VectorType::get(Ty, VectorSize / TypeSize, false);
-        CheckCandidateType(NewVTy);
-      }
-    }
-  }
-
-  return checkVectorTypesForPromotion(P, DL, CandidateTys, HaveCommonEltTy,
-                                      CommonEltTy, HaveVecPtrTy,
-                                      HaveCommonVecPtrTy, CommonVecPtrTy);
+  SmallVector<VectorType *, 4> CandidateTysCopy = CandidateTys;
+  CandidateTys.clear();
+  return createAndCheckVectorTypesForPromotion(LoadStoreTys, CandidateTysCopy);
 }
 
 /// Test whether a slice of an alloca is valid for integer widening.

>From 31d512678a7993bc755f4ebb8a0f2346f86b3b09 Mon Sep 17 00:00:00 2001
From: Jeffrey Byrnes <Jeffrey.Byrnes at amd.com>
Date: Mon, 19 Feb 2024 13:28:32 -0800
Subject: [PATCH 2/7] [SROA]: Only defer trying partial sized ptr or ptr vector
 types

Change-Id: Ic77f87290905addadd5819dff2d0c62f031022ab
---
 llvm/lib/Transforms/Scalar/SROA.cpp           | 19 ++++++++---
 llvm/test/Transforms/SROA/vector-promotion.ll | 32 +++++++++++++++++++
 2 files changed, 46 insertions(+), 5 deletions(-)

diff --git a/llvm/lib/Transforms/Scalar/SROA.cpp b/llvm/lib/Transforms/Scalar/SROA.cpp
index 7e19efe7f4aaec..a6c44e2ebcf7c4 100644
--- a/llvm/lib/Transforms/Scalar/SROA.cpp
+++ b/llvm/lib/Transforms/Scalar/SROA.cpp
@@ -2271,6 +2271,7 @@ static VectorType *isVectorPromotionViable(Partition &P, const DataLayout &DL) {
   // we have different element types.
   SmallVector<VectorType *, 4> CandidateTys;
   SetVector<Type *> LoadStoreTys;
+  SetVector<Type *> DeferredTys;
   Type *CommonEltTy = nullptr;
   VectorType *CommonVecPtrTy = nullptr;
   bool HaveVecPtrTy = false;
@@ -2342,20 +2343,28 @@ static VectorType *isVectorPromotionViable(Partition &P, const DataLayout &DL) {
       Ty = SI->getValueOperand()->getType();
     else
       continue;
+
+    auto CandTy =
+        isa<VectorType>(Ty) ? cast<VectorType>(Ty)->getElementType() : Ty;
+    if (CandTy->isPointerTy() && (S.beginOffset() != P.beginOffset() ||
+                                  S.endOffset() != P.endOffset())) {
+      DeferredTys.insert(Ty);
+      continue;
+    }
+
     LoadStoreTys.insert(Ty);
     // Consider any loads or stores that are the exact size of the slice.
     if (S.beginOffset() == P.beginOffset() && S.endOffset() == P.endOffset())
       CheckCandidateType(Ty);
   }
 
-  if (auto *VTy = checkVectorTypesForPromotion(
-          P, DL, CandidateTys, HaveCommonEltTy, CommonEltTy, HaveVecPtrTy,
-          HaveCommonVecPtrTy, CommonVecPtrTy))
+  SmallVector<VectorType *, 4> CandidateTysCopy = CandidateTys;
+  if (auto *VTy =
+          createAndCheckVectorTypesForPromotion(LoadStoreTys, CandidateTysCopy))
     return VTy;
 
-  SmallVector<VectorType *, 4> CandidateTysCopy = CandidateTys;
   CandidateTys.clear();
-  return createAndCheckVectorTypesForPromotion(LoadStoreTys, CandidateTysCopy);
+  return createAndCheckVectorTypesForPromotion(DeferredTys, CandidateTysCopy);
 }
 
 /// Test whether a slice of an alloca is valid for integer widening.
diff --git a/llvm/test/Transforms/SROA/vector-promotion.ll b/llvm/test/Transforms/SROA/vector-promotion.ll
index e48dd5bb392082..dc70520fc5ca70 100644
--- a/llvm/test/Transforms/SROA/vector-promotion.ll
+++ b/llvm/test/Transforms/SROA/vector-promotion.ll
@@ -1392,6 +1392,38 @@ define <4 x ptr> @ptrLoadStoreTysPtr(ptr %init, i64 %val2) {
   ret <4 x ptr> %sroaval
 }
 
+define <4 x i32> @validLoadStoreTy([2 x i64] %cond.coerce) {
+; CHECK-LABEL: @validLoadStoreTy(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[COND_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [2 x i64] [[COND_COERCE:%.*]], 0
+; CHECK-NEXT:    [[COND_SROA_0_0_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[COND_COERCE_FCA_0_EXTRACT]], i32 0
+; CHECK-NEXT:    [[COND_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [2 x i64] [[COND_COERCE]], 1
+; CHECK-NEXT:    [[COND_SROA_0_8_VEC_INSERT:%.*]] = insertelement <2 x i64> [[COND_SROA_0_0_VEC_INSERT]], i64 [[COND_COERCE_FCA_1_EXTRACT]], i32 1
+; CHECK-NEXT:    [[TMP0:%.*]] = bitcast <2 x i64> [[COND_SROA_0_8_VEC_INSERT]] to <4 x i32>
+; CHECK-NEXT:    ret <4 x i32> [[TMP0]]
+;
+; DEBUG-LABEL: @validLoadStoreTy(
+; DEBUG-NEXT:  entry:
+; DEBUG-NEXT:    call void @llvm.dbg.value(metadata ptr undef, metadata [[META553:![0-9]+]], metadata !DIExpression()), !dbg [[DBG557:![0-9]+]]
+; DEBUG-NEXT:    call void @llvm.dbg.value(metadata ptr undef, metadata [[META554:![0-9]+]], metadata !DIExpression()), !dbg [[DBG558:![0-9]+]]
+; DEBUG-NEXT:    [[COND_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [2 x i64] [[COND_COERCE:%.*]], 0, !dbg [[DBG559:![0-9]+]]
+; DEBUG-NEXT:    [[COND_SROA_0_0_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[COND_COERCE_FCA_0_EXTRACT]], i32 0, !dbg [[DBG559]]
+; DEBUG-NEXT:    [[COND_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [2 x i64] [[COND_COERCE]], 1, !dbg [[DBG559]]
+; DEBUG-NEXT:    [[COND_SROA_0_8_VEC_INSERT:%.*]] = insertelement <2 x i64> [[COND_SROA_0_0_VEC_INSERT]], i64 [[COND_COERCE_FCA_1_EXTRACT]], i32 1, !dbg [[DBG559]]
+; DEBUG-NEXT:    call void @llvm.dbg.value(metadata ptr undef, metadata [[META555:![0-9]+]], metadata !DIExpression()), !dbg [[DBG560:![0-9]+]]
+; DEBUG-NEXT:    [[TMP0:%.*]] = bitcast <2 x i64> [[COND_SROA_0_8_VEC_INSERT]] to <4 x i32>, !dbg [[DBG561:![0-9]+]]
+; DEBUG-NEXT:    call void @llvm.dbg.value(metadata <4 x i32> [[TMP0]], metadata [[META556:![0-9]+]], metadata !DIExpression()), !dbg [[DBG561]]
+; DEBUG-NEXT:    ret <4 x i32> [[TMP0]], !dbg [[DBG562:![0-9]+]]
+;
+entry:
+  %cond = alloca <4 x i32>, align 8
+  %coerce.dive2 = getelementptr inbounds <4 x i32>, ptr %cond, i32 0, i32 0
+  store [2 x i64] %cond.coerce, ptr %coerce.dive2, align 8
+  %m5 = getelementptr inbounds <4 x i32>, ptr %cond, i32 0, i32 0
+  %0 = load <4 x i32>, ptr %m5, align 8
+  ret <4 x i32> %0
+}
+
 declare void @llvm.memcpy.p0.p0.i64(ptr, ptr, i64, i1)
 declare void @llvm.lifetime.end.p0(i64, ptr)
 ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:

>From 1d95dd8bde374440bea9409e692f3168e4383069 Mon Sep 17 00:00:00 2001
From: Jeffrey Byrnes <Jeffrey.Byrnes at amd.com>
Date: Tue, 27 Feb 2024 11:27:22 -0800
Subject: [PATCH 3/7] fixup! convert createAndCheckVectorTypesForPromotion to a
 helper function

Change-Id: I7f63cfe0fdb9f08fd94e40c33c060af492bdd26a
---
 llvm/lib/Transforms/Scalar/SROA.cpp | 71 ++++++++++++++++-------------
 1 file changed, 40 insertions(+), 31 deletions(-)

diff --git a/llvm/lib/Transforms/Scalar/SROA.cpp b/llvm/lib/Transforms/Scalar/SROA.cpp
index a6c44e2ebcf7c4..c8e46e8c895487 100644
--- a/llvm/lib/Transforms/Scalar/SROA.cpp
+++ b/llvm/lib/Transforms/Scalar/SROA.cpp
@@ -2257,6 +2257,38 @@ checkVectorTypesForPromotion(Partition &P, const DataLayout &DL,
   return nullptr;
 }
 
+static VectorType *createAndCheckVectorTypesForPromotion(
+    SetVector<Type *> &OtherTys,
+    SmallVectorImpl<VectorType *> &CandidateTysCopy,
+    function_ref<void(Type *)> CheckCandidateType, Partition &P,
+    const DataLayout &DL, SmallVector<VectorType *, 4> &CandidateTys,
+    bool &HaveCommonEltTy, Type *&CommonEltTy, bool &HaveVecPtrTy,
+    bool &HaveCommonVecPtrTy, VectorType *&CommonVecPtrTy) {
+  // Consider additional vector types where the element type size is a
+  // multiple of load/store element size.
+  for (Type *Ty : OtherTys) {
+    if (!VectorType::isValidElementType(Ty))
+      continue;
+    unsigned TypeSize = DL.getTypeSizeInBits(Ty).getFixedValue();
+    // Make a copy of CandidateTys and iterate through it, because we
+    // might append to CandidateTys in the loop.
+    for (VectorType *&VTy : CandidateTysCopy) {
+      unsigned VectorSize = DL.getTypeSizeInBits(VTy).getFixedValue();
+      unsigned ElementSize =
+          DL.getTypeSizeInBits(VTy->getElementType()).getFixedValue();
+      if (TypeSize != VectorSize && TypeSize != ElementSize &&
+          VectorSize % TypeSize == 0) {
+        VectorType *NewVTy = VectorType::get(Ty, VectorSize / TypeSize, false);
+        CheckCandidateType(NewVTy);
+      }
+    }
+  }
+
+  return checkVectorTypesForPromotion(P, DL, CandidateTys, HaveCommonEltTy,
+                                      CommonEltTy, HaveVecPtrTy,
+                                      HaveCommonVecPtrTy, CommonVecPtrTy);
+}
+
 /// Test whether the given alloca partitioning and range of slices can be
 /// promoted to a vector.
 ///
@@ -2305,34 +2337,6 @@ static VectorType *isVectorPromotionViable(Partition &P, const DataLayout &DL) {
       }
     }
   };
-  auto createAndCheckVectorTypesForPromotion =
-      [&](SetVector<Type *> OtherTys,
-          SmallVector<VectorType *, 4> CandidateTysCopy) {
-        // Consider additional vector types where the element type size is a
-        // multiple of load/store element size.
-        for (Type *Ty : OtherTys) {
-          if (!VectorType::isValidElementType(Ty))
-            continue;
-          unsigned TypeSize = DL.getTypeSizeInBits(Ty).getFixedValue();
-          // Make a copy of CandidateTys and iterate through it, because we
-          // might append to CandidateTys in the loop.
-          for (VectorType *&VTy : CandidateTysCopy) {
-            unsigned VectorSize = DL.getTypeSizeInBits(VTy).getFixedValue();
-            unsigned ElementSize =
-                DL.getTypeSizeInBits(VTy->getElementType()).getFixedValue();
-            if (TypeSize != VectorSize && TypeSize != ElementSize &&
-                VectorSize % TypeSize == 0) {
-              VectorType *NewVTy =
-                  VectorType::get(Ty, VectorSize / TypeSize, false);
-              CheckCandidateType(NewVTy);
-            }
-          }
-        }
-
-        return checkVectorTypesForPromotion(
-            P, DL, CandidateTys, HaveCommonEltTy, CommonEltTy, HaveVecPtrTy,
-            HaveCommonVecPtrTy, CommonVecPtrTy);
-      };
 
   // Put load and store types into a set for de-duplication.
   for (const Slice &S : P) {
@@ -2359,12 +2363,17 @@ static VectorType *isVectorPromotionViable(Partition &P, const DataLayout &DL) {
   }
 
   SmallVector<VectorType *, 4> CandidateTysCopy = CandidateTys;
-  if (auto *VTy =
-          createAndCheckVectorTypesForPromotion(LoadStoreTys, CandidateTysCopy))
+  if (auto *VTy = createAndCheckVectorTypesForPromotion(
+          LoadStoreTys, CandidateTysCopy, CheckCandidateType, P, DL,
+          CandidateTys, HaveCommonEltTy, CommonEltTy, HaveVecPtrTy,
+          HaveCommonVecPtrTy, CommonVecPtrTy))
     return VTy;
 
   CandidateTys.clear();
-  return createAndCheckVectorTypesForPromotion(DeferredTys, CandidateTysCopy);
+  return createAndCheckVectorTypesForPromotion(
+      DeferredTys, CandidateTysCopy, CheckCandidateType, P, DL, CandidateTys,
+      HaveCommonEltTy, CommonEltTy, HaveVecPtrTy, HaveCommonVecPtrTy,
+      CommonVecPtrTy);
 }
 
 /// Test whether a slice of an alloca is valid for integer widening.

>From 08bbc80439f1c3c2eb47582b34d632e4a05cee03 Mon Sep 17 00:00:00 2001
From: Jeffrey Byrnes <Jeffrey.Byrnes at amd.com>
Date: Wed, 28 Feb 2024 15:32:01 -0800
Subject: [PATCH 4/7] fixup! Clean up CandidateTysCopy

Change-Id: Iea1367c878118b6f1dcac3c43b53b98be0ca57e3
---
 llvm/lib/Transforms/Scalar/SROA.cpp | 15 +++++++--------
 1 file changed, 7 insertions(+), 8 deletions(-)

diff --git a/llvm/lib/Transforms/Scalar/SROA.cpp b/llvm/lib/Transforms/Scalar/SROA.cpp
index c8e46e8c895487..cf92ca6c71b310 100644
--- a/llvm/lib/Transforms/Scalar/SROA.cpp
+++ b/llvm/lib/Transforms/Scalar/SROA.cpp
@@ -2258,10 +2258,9 @@ checkVectorTypesForPromotion(Partition &P, const DataLayout &DL,
 }
 
 static VectorType *createAndCheckVectorTypesForPromotion(
-    SetVector<Type *> &OtherTys,
-    SmallVectorImpl<VectorType *> &CandidateTysCopy,
+    SetVector<Type *> &OtherTys, ArrayRef<VectorType *> CandidateTysCopy,
     function_ref<void(Type *)> CheckCandidateType, Partition &P,
-    const DataLayout &DL, SmallVector<VectorType *, 4> &CandidateTys,
+    const DataLayout &DL, SmallVectorImpl<VectorType *> &CandidateTys,
     bool &HaveCommonEltTy, Type *&CommonEltTy, bool &HaveVecPtrTy,
     bool &HaveCommonVecPtrTy, VectorType *&CommonVecPtrTy) {
   // Consider additional vector types where the element type size is a
@@ -2272,7 +2271,7 @@ static VectorType *createAndCheckVectorTypesForPromotion(
     unsigned TypeSize = DL.getTypeSizeInBits(Ty).getFixedValue();
     // Make a copy of CandidateTys and iterate through it, because we
     // might append to CandidateTys in the loop.
-    for (VectorType *&VTy : CandidateTysCopy) {
+    for (VectorType *const &VTy : CandidateTysCopy) {
       unsigned VectorSize = DL.getTypeSizeInBits(VTy).getFixedValue();
       unsigned ElementSize =
           DL.getTypeSizeInBits(VTy->getElementType()).getFixedValue();
@@ -2362,13 +2361,13 @@ static VectorType *isVectorPromotionViable(Partition &P, const DataLayout &DL) {
       CheckCandidateType(Ty);
   }
 
-  SmallVector<VectorType *, 4> CandidateTysCopy = CandidateTys;
   if (auto *VTy = createAndCheckVectorTypesForPromotion(
-          LoadStoreTys, CandidateTysCopy, CheckCandidateType, P, DL,
-          CandidateTys, HaveCommonEltTy, CommonEltTy, HaveVecPtrTy,
-          HaveCommonVecPtrTy, CommonVecPtrTy))
+          LoadStoreTys, CandidateTys, CheckCandidateType, P, DL, CandidateTys,
+          HaveCommonEltTy, CommonEltTy, HaveVecPtrTy, HaveCommonVecPtrTy,
+          CommonVecPtrTy))
     return VTy;
 
+  SmallVector<VectorType *, 4> CandidateTysCopy = CandidateTys;
   CandidateTys.clear();
   return createAndCheckVectorTypesForPromotion(
       DeferredTys, CandidateTysCopy, CheckCandidateType, P, DL, CandidateTys,

>From 80eede3b4bd87e4c8ec311702e87820a759fd36d Mon Sep 17 00:00:00 2001
From: Jeffrey Byrnes <Jeffrey.Byrnes at amd.com>
Date: Thu, 29 Feb 2024 15:37:31 -0800
Subject: [PATCH 5/7] fixup! use API & minor code changes

Change-Id: Ib00edfe8bd3517effb7f58bf9631bd2700dc84c1
---
 llvm/lib/Transforms/Scalar/SROA.cpp | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/llvm/lib/Transforms/Scalar/SROA.cpp b/llvm/lib/Transforms/Scalar/SROA.cpp
index cf92ca6c71b310..9b3868d191e977 100644
--- a/llvm/lib/Transforms/Scalar/SROA.cpp
+++ b/llvm/lib/Transforms/Scalar/SROA.cpp
@@ -2271,7 +2271,7 @@ static VectorType *createAndCheckVectorTypesForPromotion(
     unsigned TypeSize = DL.getTypeSizeInBits(Ty).getFixedValue();
     // Make a copy of CandidateTys and iterate through it, because we
     // might append to CandidateTys in the loop.
-    for (VectorType *const &VTy : CandidateTysCopy) {
+    for (VectorType *const VTy : CandidateTysCopy) {
       unsigned VectorSize = DL.getTypeSizeInBits(VTy).getFixedValue();
       unsigned ElementSize =
           DL.getTypeSizeInBits(VTy->getElementType()).getFixedValue();
@@ -2347,8 +2347,7 @@ static VectorType *isVectorPromotionViable(Partition &P, const DataLayout &DL) {
     else
       continue;
 
-    auto CandTy =
-        isa<VectorType>(Ty) ? cast<VectorType>(Ty)->getElementType() : Ty;
+    auto CandTy = Ty->getScalarType();
     if (CandTy->isPointerTy() && (S.beginOffset() != P.beginOffset() ||
                                   S.endOffset() != P.endOffset())) {
       DeferredTys.insert(Ty);

>From 2d3b23938a4babcfc572b4af0ba6ed3de908b053 Mon Sep 17 00:00:00 2001
From: Jeffrey Byrnes <Jeffrey.Byrnes at amd.com>
Date: Fri, 1 Mar 2024 12:56:12 -0800
Subject: [PATCH 6/7] fixup! Don't use ArrayRef of CandidateTys

Change-Id: I8f00f67fcd9db825b74ed98b260b8c720e17f653
---
 llvm/lib/Transforms/Scalar/SROA.cpp | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/llvm/lib/Transforms/Scalar/SROA.cpp b/llvm/lib/Transforms/Scalar/SROA.cpp
index 9b3868d191e977..e96702d7f657d7 100644
--- a/llvm/lib/Transforms/Scalar/SROA.cpp
+++ b/llvm/lib/Transforms/Scalar/SROA.cpp
@@ -2360,13 +2360,13 @@ static VectorType *isVectorPromotionViable(Partition &P, const DataLayout &DL) {
       CheckCandidateType(Ty);
   }
 
+  SmallVector<VectorType *, 4> CandidateTysCopy = CandidateTys;
   if (auto *VTy = createAndCheckVectorTypesForPromotion(
-          LoadStoreTys, CandidateTys, CheckCandidateType, P, DL, CandidateTys,
-          HaveCommonEltTy, CommonEltTy, HaveVecPtrTy, HaveCommonVecPtrTy,
-          CommonVecPtrTy))
+          LoadStoreTys, CandidateTysCopy, CheckCandidateType, P, DL,
+          CandidateTys, HaveCommonEltTy, CommonEltTy, HaveVecPtrTy,
+          HaveCommonVecPtrTy, CommonVecPtrTy))
     return VTy;
 
-  SmallVector<VectorType *, 4> CandidateTysCopy = CandidateTys;
   CandidateTys.clear();
   return createAndCheckVectorTypesForPromotion(
       DeferredTys, CandidateTysCopy, CheckCandidateType, P, DL, CandidateTys,

>From ea54c6fc3670ac9f165f33e8699d95677ee54cdb Mon Sep 17 00:00:00 2001
From: Jeffrey Byrnes <Jeffrey.Byrnes at amd.com>
Date: Mon, 4 Mar 2024 10:28:44 -0800
Subject: [PATCH 7/7] fixup! Add test for invariant memory across loop

Change-Id: Idff43d522274474a6b719548eaefcf675fdb66e2
---
 llvm/lib/Transforms/Scalar/SROA.cpp           |  4 +++
 llvm/test/Transforms/SROA/vector-promotion.ll | 30 +++++++++++++++++++
 2 files changed, 34 insertions(+)

diff --git a/llvm/lib/Transforms/Scalar/SROA.cpp b/llvm/lib/Transforms/Scalar/SROA.cpp
index e96702d7f657d7..4b5b0ea66fa063 100644
--- a/llvm/lib/Transforms/Scalar/SROA.cpp
+++ b/llvm/lib/Transforms/Scalar/SROA.cpp
@@ -2263,6 +2263,8 @@ static VectorType *createAndCheckVectorTypesForPromotion(
     const DataLayout &DL, SmallVectorImpl<VectorType *> &CandidateTys,
     bool &HaveCommonEltTy, Type *&CommonEltTy, bool &HaveVecPtrTy,
     bool &HaveCommonVecPtrTy, VectorType *&CommonVecPtrTy) {
+  [[maybe_unused]] auto OriginalElt =
+      CandidateTysCopy.size() ? CandidateTysCopy[0] : nullptr;
   // Consider additional vector types where the element type size is a
   // multiple of load/store element size.
   for (Type *Ty : OtherTys) {
@@ -2272,6 +2274,8 @@ static VectorType *createAndCheckVectorTypesForPromotion(
     // Make a copy of CandidateTys and iterate through it, because we
     // might append to CandidateTys in the loop.
     for (VectorType *const VTy : CandidateTysCopy) {
+      // The elements in the copy should remain invariant throughout the loop
+      assert(CandidateTysCopy[0] == OriginalElt && "Different Element");
       unsigned VectorSize = DL.getTypeSizeInBits(VTy).getFixedValue();
       unsigned ElementSize =
           DL.getTypeSizeInBits(VTy->getElementType()).getFixedValue();
diff --git a/llvm/test/Transforms/SROA/vector-promotion.ll b/llvm/test/Transforms/SROA/vector-promotion.ll
index dc70520fc5ca70..df5fa01f454b85 100644
--- a/llvm/test/Transforms/SROA/vector-promotion.ll
+++ b/llvm/test/Transforms/SROA/vector-promotion.ll
@@ -1424,6 +1424,36 @@ entry:
   ret <4 x i32> %0
 }
 
+; The following test should not crash the compiler
+; (calls to CheckCandidateType from createAndCheckVectorTypesForPromotion may change the memory to hold CandidateTys.data())
+define noundef zeroext i1 @CandidateTysRealloc() personality ptr null {
+entry:
+  %alloca = alloca <4x i32>, align 16
+  store <4 x i32> <i32 1, i32 1, i32 1, i32 1>, ptr %alloca, align 16
+  br label %bb.1
+
+bb.1:
+  br label %bb.1
+
+bb.2:
+  %Load0 = load <4 x i32>, ptr %alloca, align 16
+  store <4 x i32> zeroinitializer, ptr %alloca, align 16
+  %Load1 = load <4 x i32>, ptr %alloca, align 16
+  br label %bb.3
+
+bb.3:
+  br label %bb.3
+
+bb.4:
+  %Load2 = load i64, ptr %alloca, align 16
+  %Load3 = load <4 x i32>, ptr %alloca, align 16
+  store <4 x i32> zeroinitializer, ptr %alloca, align 16
+  br label %bb.5
+
+bb.5:
+  br label %bb.5
+}
+
 declare void @llvm.memcpy.p0.p0.i64(ptr, ptr, i64, i1)
 declare void @llvm.lifetime.end.p0(i64, ptr)
 ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:



More information about the llvm-commits mailing list