[llvm] [SROA]: Only defer trying partial sized ptr or ptr vector types (PR #82279)
Jeffrey Byrnes via llvm-commits
llvm-commits at lists.llvm.org
Thu Feb 29 15:43:22 PST 2024
https://github.com/jrbyrnes updated https://github.com/llvm/llvm-project/pull/82279
>From 5ea353115b99d7aa7015d8bca504aa0fd9604cc9 Mon Sep 17 00:00:00 2001
From: Jeffrey Byrnes <Jeffrey.Byrnes at amd.com>
Date: Mon, 19 Feb 2024 12:59:13 -0800
Subject: [PATCH 1/5] [SROA] NFC: Extract common code to
createAndCheckVectorTypesForPromotion
Change-Id: Iea5d60b12e2de7033fc1a71e80aa96c261e998bf
---
llvm/lib/Transforms/Scalar/SROA.cpp | 56 ++++++++++++++++-------------
1 file changed, 31 insertions(+), 25 deletions(-)
diff --git a/llvm/lib/Transforms/Scalar/SROA.cpp b/llvm/lib/Transforms/Scalar/SROA.cpp
index 6c8785d52c4eab..7e19efe7f4aaec 100644
--- a/llvm/lib/Transforms/Scalar/SROA.cpp
+++ b/llvm/lib/Transforms/Scalar/SROA.cpp
@@ -2304,6 +2304,34 @@ static VectorType *isVectorPromotionViable(Partition &P, const DataLayout &DL) {
}
}
};
+ auto createAndCheckVectorTypesForPromotion =
+ [&](SetVector<Type *> OtherTys,
+ SmallVector<VectorType *, 4> CandidateTysCopy) {
+ // Consider additional vector types where the element type size is a
+ // multiple of load/store element size.
+ for (Type *Ty : OtherTys) {
+ if (!VectorType::isValidElementType(Ty))
+ continue;
+ unsigned TypeSize = DL.getTypeSizeInBits(Ty).getFixedValue();
+ // Make a copy of CandidateTys and iterate through it, because we
+ // might append to CandidateTys in the loop.
+ for (VectorType *&VTy : CandidateTysCopy) {
+ unsigned VectorSize = DL.getTypeSizeInBits(VTy).getFixedValue();
+ unsigned ElementSize =
+ DL.getTypeSizeInBits(VTy->getElementType()).getFixedValue();
+ if (TypeSize != VectorSize && TypeSize != ElementSize &&
+ VectorSize % TypeSize == 0) {
+ VectorType *NewVTy =
+ VectorType::get(Ty, VectorSize / TypeSize, false);
+ CheckCandidateType(NewVTy);
+ }
+ }
+ }
+
+ return checkVectorTypesForPromotion(
+ P, DL, CandidateTys, HaveCommonEltTy, CommonEltTy, HaveVecPtrTy,
+ HaveCommonVecPtrTy, CommonVecPtrTy);
+ };
// Put load and store types into a set for de-duplication.
for (const Slice &S : P) {
@@ -2325,31 +2353,9 @@ static VectorType *isVectorPromotionViable(Partition &P, const DataLayout &DL) {
HaveCommonVecPtrTy, CommonVecPtrTy))
return VTy;
- // Consider additional vector types where the element type size is a
- // multiple of load/store element size.
- for (Type *Ty : LoadStoreTys) {
- if (!VectorType::isValidElementType(Ty))
- continue;
- unsigned TypeSize = DL.getTypeSizeInBits(Ty).getFixedValue();
- // Make a copy of CandidateTys and iterate through it, because we might
- // append to CandidateTys in the loop.
- SmallVector<VectorType *, 4> CandidateTysCopy = CandidateTys;
- CandidateTys.clear();
- for (VectorType *&VTy : CandidateTysCopy) {
- unsigned VectorSize = DL.getTypeSizeInBits(VTy).getFixedValue();
- unsigned ElementSize =
- DL.getTypeSizeInBits(VTy->getElementType()).getFixedValue();
- if (TypeSize != VectorSize && TypeSize != ElementSize &&
- VectorSize % TypeSize == 0) {
- VectorType *NewVTy = VectorType::get(Ty, VectorSize / TypeSize, false);
- CheckCandidateType(NewVTy);
- }
- }
- }
-
- return checkVectorTypesForPromotion(P, DL, CandidateTys, HaveCommonEltTy,
- CommonEltTy, HaveVecPtrTy,
- HaveCommonVecPtrTy, CommonVecPtrTy);
+ SmallVector<VectorType *, 4> CandidateTysCopy = CandidateTys;
+ CandidateTys.clear();
+ return createAndCheckVectorTypesForPromotion(LoadStoreTys, CandidateTysCopy);
}
/// Test whether a slice of an alloca is valid for integer widening.
>From 31d512678a7993bc755f4ebb8a0f2346f86b3b09 Mon Sep 17 00:00:00 2001
From: Jeffrey Byrnes <Jeffrey.Byrnes at amd.com>
Date: Mon, 19 Feb 2024 13:28:32 -0800
Subject: [PATCH 2/5] [SROA]: Only defer trying partial sized ptr or ptr vector
types
Change-Id: Ic77f87290905addadd5819dff2d0c62f031022ab
---
llvm/lib/Transforms/Scalar/SROA.cpp | 19 ++++++++---
llvm/test/Transforms/SROA/vector-promotion.ll | 32 +++++++++++++++++++
2 files changed, 46 insertions(+), 5 deletions(-)
diff --git a/llvm/lib/Transforms/Scalar/SROA.cpp b/llvm/lib/Transforms/Scalar/SROA.cpp
index 7e19efe7f4aaec..a6c44e2ebcf7c4 100644
--- a/llvm/lib/Transforms/Scalar/SROA.cpp
+++ b/llvm/lib/Transforms/Scalar/SROA.cpp
@@ -2271,6 +2271,7 @@ static VectorType *isVectorPromotionViable(Partition &P, const DataLayout &DL) {
// we have different element types.
SmallVector<VectorType *, 4> CandidateTys;
SetVector<Type *> LoadStoreTys;
+ SetVector<Type *> DeferredTys;
Type *CommonEltTy = nullptr;
VectorType *CommonVecPtrTy = nullptr;
bool HaveVecPtrTy = false;
@@ -2342,20 +2343,28 @@ static VectorType *isVectorPromotionViable(Partition &P, const DataLayout &DL) {
Ty = SI->getValueOperand()->getType();
else
continue;
+
+ auto CandTy =
+ isa<VectorType>(Ty) ? cast<VectorType>(Ty)->getElementType() : Ty;
+ if (CandTy->isPointerTy() && (S.beginOffset() != P.beginOffset() ||
+ S.endOffset() != P.endOffset())) {
+ DeferredTys.insert(Ty);
+ continue;
+ }
+
LoadStoreTys.insert(Ty);
// Consider any loads or stores that are the exact size of the slice.
if (S.beginOffset() == P.beginOffset() && S.endOffset() == P.endOffset())
CheckCandidateType(Ty);
}
- if (auto *VTy = checkVectorTypesForPromotion(
- P, DL, CandidateTys, HaveCommonEltTy, CommonEltTy, HaveVecPtrTy,
- HaveCommonVecPtrTy, CommonVecPtrTy))
+ SmallVector<VectorType *, 4> CandidateTysCopy = CandidateTys;
+ if (auto *VTy =
+ createAndCheckVectorTypesForPromotion(LoadStoreTys, CandidateTysCopy))
return VTy;
- SmallVector<VectorType *, 4> CandidateTysCopy = CandidateTys;
CandidateTys.clear();
- return createAndCheckVectorTypesForPromotion(LoadStoreTys, CandidateTysCopy);
+ return createAndCheckVectorTypesForPromotion(DeferredTys, CandidateTysCopy);
}
/// Test whether a slice of an alloca is valid for integer widening.
diff --git a/llvm/test/Transforms/SROA/vector-promotion.ll b/llvm/test/Transforms/SROA/vector-promotion.ll
index e48dd5bb392082..dc70520fc5ca70 100644
--- a/llvm/test/Transforms/SROA/vector-promotion.ll
+++ b/llvm/test/Transforms/SROA/vector-promotion.ll
@@ -1392,6 +1392,38 @@ define <4 x ptr> @ptrLoadStoreTysPtr(ptr %init, i64 %val2) {
ret <4 x ptr> %sroaval
}
+define <4 x i32> @validLoadStoreTy([2 x i64] %cond.coerce) {
+; CHECK-LABEL: @validLoadStoreTy(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[COND_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [2 x i64] [[COND_COERCE:%.*]], 0
+; CHECK-NEXT: [[COND_SROA_0_0_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[COND_COERCE_FCA_0_EXTRACT]], i32 0
+; CHECK-NEXT: [[COND_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [2 x i64] [[COND_COERCE]], 1
+; CHECK-NEXT: [[COND_SROA_0_8_VEC_INSERT:%.*]] = insertelement <2 x i64> [[COND_SROA_0_0_VEC_INSERT]], i64 [[COND_COERCE_FCA_1_EXTRACT]], i32 1
+; CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[COND_SROA_0_8_VEC_INSERT]] to <4 x i32>
+; CHECK-NEXT: ret <4 x i32> [[TMP0]]
+;
+; DEBUG-LABEL: @validLoadStoreTy(
+; DEBUG-NEXT: entry:
+; DEBUG-NEXT: call void @llvm.dbg.value(metadata ptr undef, metadata [[META553:![0-9]+]], metadata !DIExpression()), !dbg [[DBG557:![0-9]+]]
+; DEBUG-NEXT: call void @llvm.dbg.value(metadata ptr undef, metadata [[META554:![0-9]+]], metadata !DIExpression()), !dbg [[DBG558:![0-9]+]]
+; DEBUG-NEXT: [[COND_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [2 x i64] [[COND_COERCE:%.*]], 0, !dbg [[DBG559:![0-9]+]]
+; DEBUG-NEXT: [[COND_SROA_0_0_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[COND_COERCE_FCA_0_EXTRACT]], i32 0, !dbg [[DBG559]]
+; DEBUG-NEXT: [[COND_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [2 x i64] [[COND_COERCE]], 1, !dbg [[DBG559]]
+; DEBUG-NEXT: [[COND_SROA_0_8_VEC_INSERT:%.*]] = insertelement <2 x i64> [[COND_SROA_0_0_VEC_INSERT]], i64 [[COND_COERCE_FCA_1_EXTRACT]], i32 1, !dbg [[DBG559]]
+; DEBUG-NEXT: call void @llvm.dbg.value(metadata ptr undef, metadata [[META555:![0-9]+]], metadata !DIExpression()), !dbg [[DBG560:![0-9]+]]
+; DEBUG-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[COND_SROA_0_8_VEC_INSERT]] to <4 x i32>, !dbg [[DBG561:![0-9]+]]
+; DEBUG-NEXT: call void @llvm.dbg.value(metadata <4 x i32> [[TMP0]], metadata [[META556:![0-9]+]], metadata !DIExpression()), !dbg [[DBG561]]
+; DEBUG-NEXT: ret <4 x i32> [[TMP0]], !dbg [[DBG562:![0-9]+]]
+;
+entry:
+ %cond = alloca <4 x i32>, align 8
+ %coerce.dive2 = getelementptr inbounds <4 x i32>, ptr %cond, i32 0, i32 0
+ store [2 x i64] %cond.coerce, ptr %coerce.dive2, align 8
+ %m5 = getelementptr inbounds <4 x i32>, ptr %cond, i32 0, i32 0
+ %0 = load <4 x i32>, ptr %m5, align 8
+ ret <4 x i32> %0
+}
+
declare void @llvm.memcpy.p0.p0.i64(ptr, ptr, i64, i1)
declare void @llvm.lifetime.end.p0(i64, ptr)
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
>From 1d95dd8bde374440bea9409e692f3168e4383069 Mon Sep 17 00:00:00 2001
From: Jeffrey Byrnes <Jeffrey.Byrnes at amd.com>
Date: Tue, 27 Feb 2024 11:27:22 -0800
Subject: [PATCH 3/5] fixup! convert createAndCheckVectorTypesForPromotion to a
helper function
Change-Id: I7f63cfe0fdb9f08fd94e40c33c060af492bdd26a
---
llvm/lib/Transforms/Scalar/SROA.cpp | 71 ++++++++++++++++-------------
1 file changed, 40 insertions(+), 31 deletions(-)
diff --git a/llvm/lib/Transforms/Scalar/SROA.cpp b/llvm/lib/Transforms/Scalar/SROA.cpp
index a6c44e2ebcf7c4..c8e46e8c895487 100644
--- a/llvm/lib/Transforms/Scalar/SROA.cpp
+++ b/llvm/lib/Transforms/Scalar/SROA.cpp
@@ -2257,6 +2257,38 @@ checkVectorTypesForPromotion(Partition &P, const DataLayout &DL,
return nullptr;
}
+static VectorType *createAndCheckVectorTypesForPromotion(
+ SetVector<Type *> &OtherTys,
+ SmallVectorImpl<VectorType *> &CandidateTysCopy,
+ function_ref<void(Type *)> CheckCandidateType, Partition &P,
+ const DataLayout &DL, SmallVector<VectorType *, 4> &CandidateTys,
+ bool &HaveCommonEltTy, Type *&CommonEltTy, bool &HaveVecPtrTy,
+ bool &HaveCommonVecPtrTy, VectorType *&CommonVecPtrTy) {
+ // Consider additional vector types where the element type size is a
+ // multiple of load/store element size.
+ for (Type *Ty : OtherTys) {
+ if (!VectorType::isValidElementType(Ty))
+ continue;
+ unsigned TypeSize = DL.getTypeSizeInBits(Ty).getFixedValue();
+ // Make a copy of CandidateTys and iterate through it, because we
+ // might append to CandidateTys in the loop.
+ for (VectorType *&VTy : CandidateTysCopy) {
+ unsigned VectorSize = DL.getTypeSizeInBits(VTy).getFixedValue();
+ unsigned ElementSize =
+ DL.getTypeSizeInBits(VTy->getElementType()).getFixedValue();
+ if (TypeSize != VectorSize && TypeSize != ElementSize &&
+ VectorSize % TypeSize == 0) {
+ VectorType *NewVTy = VectorType::get(Ty, VectorSize / TypeSize, false);
+ CheckCandidateType(NewVTy);
+ }
+ }
+ }
+
+ return checkVectorTypesForPromotion(P, DL, CandidateTys, HaveCommonEltTy,
+ CommonEltTy, HaveVecPtrTy,
+ HaveCommonVecPtrTy, CommonVecPtrTy);
+}
+
/// Test whether the given alloca partitioning and range of slices can be
/// promoted to a vector.
///
@@ -2305,34 +2337,6 @@ static VectorType *isVectorPromotionViable(Partition &P, const DataLayout &DL) {
}
}
};
- auto createAndCheckVectorTypesForPromotion =
- [&](SetVector<Type *> OtherTys,
- SmallVector<VectorType *, 4> CandidateTysCopy) {
- // Consider additional vector types where the element type size is a
- // multiple of load/store element size.
- for (Type *Ty : OtherTys) {
- if (!VectorType::isValidElementType(Ty))
- continue;
- unsigned TypeSize = DL.getTypeSizeInBits(Ty).getFixedValue();
- // Make a copy of CandidateTys and iterate through it, because we
- // might append to CandidateTys in the loop.
- for (VectorType *&VTy : CandidateTysCopy) {
- unsigned VectorSize = DL.getTypeSizeInBits(VTy).getFixedValue();
- unsigned ElementSize =
- DL.getTypeSizeInBits(VTy->getElementType()).getFixedValue();
- if (TypeSize != VectorSize && TypeSize != ElementSize &&
- VectorSize % TypeSize == 0) {
- VectorType *NewVTy =
- VectorType::get(Ty, VectorSize / TypeSize, false);
- CheckCandidateType(NewVTy);
- }
- }
- }
-
- return checkVectorTypesForPromotion(
- P, DL, CandidateTys, HaveCommonEltTy, CommonEltTy, HaveVecPtrTy,
- HaveCommonVecPtrTy, CommonVecPtrTy);
- };
// Put load and store types into a set for de-duplication.
for (const Slice &S : P) {
@@ -2359,12 +2363,17 @@ static VectorType *isVectorPromotionViable(Partition &P, const DataLayout &DL) {
}
SmallVector<VectorType *, 4> CandidateTysCopy = CandidateTys;
- if (auto *VTy =
- createAndCheckVectorTypesForPromotion(LoadStoreTys, CandidateTysCopy))
+ if (auto *VTy = createAndCheckVectorTypesForPromotion(
+ LoadStoreTys, CandidateTysCopy, CheckCandidateType, P, DL,
+ CandidateTys, HaveCommonEltTy, CommonEltTy, HaveVecPtrTy,
+ HaveCommonVecPtrTy, CommonVecPtrTy))
return VTy;
CandidateTys.clear();
- return createAndCheckVectorTypesForPromotion(DeferredTys, CandidateTysCopy);
+ return createAndCheckVectorTypesForPromotion(
+ DeferredTys, CandidateTysCopy, CheckCandidateType, P, DL, CandidateTys,
+ HaveCommonEltTy, CommonEltTy, HaveVecPtrTy, HaveCommonVecPtrTy,
+ CommonVecPtrTy);
}
/// Test whether a slice of an alloca is valid for integer widening.
>From 08bbc80439f1c3c2eb47582b34d632e4a05cee03 Mon Sep 17 00:00:00 2001
From: Jeffrey Byrnes <Jeffrey.Byrnes at amd.com>
Date: Wed, 28 Feb 2024 15:32:01 -0800
Subject: [PATCH 4/5] fixup! Clean up CandidateTysCopy
Change-Id: Iea1367c878118b6f1dcac3c43b53b98be0ca57e3
---
llvm/lib/Transforms/Scalar/SROA.cpp | 15 +++++++--------
1 file changed, 7 insertions(+), 8 deletions(-)
diff --git a/llvm/lib/Transforms/Scalar/SROA.cpp b/llvm/lib/Transforms/Scalar/SROA.cpp
index c8e46e8c895487..cf92ca6c71b310 100644
--- a/llvm/lib/Transforms/Scalar/SROA.cpp
+++ b/llvm/lib/Transforms/Scalar/SROA.cpp
@@ -2258,10 +2258,9 @@ checkVectorTypesForPromotion(Partition &P, const DataLayout &DL,
}
static VectorType *createAndCheckVectorTypesForPromotion(
- SetVector<Type *> &OtherTys,
- SmallVectorImpl<VectorType *> &CandidateTysCopy,
+ SetVector<Type *> &OtherTys, ArrayRef<VectorType *> CandidateTysCopy,
function_ref<void(Type *)> CheckCandidateType, Partition &P,
- const DataLayout &DL, SmallVector<VectorType *, 4> &CandidateTys,
+ const DataLayout &DL, SmallVectorImpl<VectorType *> &CandidateTys,
bool &HaveCommonEltTy, Type *&CommonEltTy, bool &HaveVecPtrTy,
bool &HaveCommonVecPtrTy, VectorType *&CommonVecPtrTy) {
// Consider additional vector types where the element type size is a
@@ -2272,7 +2271,7 @@ static VectorType *createAndCheckVectorTypesForPromotion(
unsigned TypeSize = DL.getTypeSizeInBits(Ty).getFixedValue();
// Make a copy of CandidateTys and iterate through it, because we
// might append to CandidateTys in the loop.
- for (VectorType *&VTy : CandidateTysCopy) {
+ for (VectorType *const &VTy : CandidateTysCopy) {
unsigned VectorSize = DL.getTypeSizeInBits(VTy).getFixedValue();
unsigned ElementSize =
DL.getTypeSizeInBits(VTy->getElementType()).getFixedValue();
@@ -2362,13 +2361,13 @@ static VectorType *isVectorPromotionViable(Partition &P, const DataLayout &DL) {
CheckCandidateType(Ty);
}
- SmallVector<VectorType *, 4> CandidateTysCopy = CandidateTys;
if (auto *VTy = createAndCheckVectorTypesForPromotion(
- LoadStoreTys, CandidateTysCopy, CheckCandidateType, P, DL,
- CandidateTys, HaveCommonEltTy, CommonEltTy, HaveVecPtrTy,
- HaveCommonVecPtrTy, CommonVecPtrTy))
+ LoadStoreTys, CandidateTys, CheckCandidateType, P, DL, CandidateTys,
+ HaveCommonEltTy, CommonEltTy, HaveVecPtrTy, HaveCommonVecPtrTy,
+ CommonVecPtrTy))
return VTy;
+ SmallVector<VectorType *, 4> CandidateTysCopy = CandidateTys;
CandidateTys.clear();
return createAndCheckVectorTypesForPromotion(
DeferredTys, CandidateTysCopy, CheckCandidateType, P, DL, CandidateTys,
>From 80eede3b4bd87e4c8ec311702e87820a759fd36d Mon Sep 17 00:00:00 2001
From: Jeffrey Byrnes <Jeffrey.Byrnes at amd.com>
Date: Thu, 29 Feb 2024 15:37:31 -0800
Subject: [PATCH 5/5] fixup! use API & minor code changes
Change-Id: Ib00edfe8bd3517effb7f58bf9631bd2700dc84c1
---
llvm/lib/Transforms/Scalar/SROA.cpp | 5 ++---
1 file changed, 2 insertions(+), 3 deletions(-)
diff --git a/llvm/lib/Transforms/Scalar/SROA.cpp b/llvm/lib/Transforms/Scalar/SROA.cpp
index cf92ca6c71b310..9b3868d191e977 100644
--- a/llvm/lib/Transforms/Scalar/SROA.cpp
+++ b/llvm/lib/Transforms/Scalar/SROA.cpp
@@ -2271,7 +2271,7 @@ static VectorType *createAndCheckVectorTypesForPromotion(
unsigned TypeSize = DL.getTypeSizeInBits(Ty).getFixedValue();
// Make a copy of CandidateTys and iterate through it, because we
// might append to CandidateTys in the loop.
- for (VectorType *const &VTy : CandidateTysCopy) {
+ for (VectorType *const VTy : CandidateTysCopy) {
unsigned VectorSize = DL.getTypeSizeInBits(VTy).getFixedValue();
unsigned ElementSize =
DL.getTypeSizeInBits(VTy->getElementType()).getFixedValue();
@@ -2347,8 +2347,7 @@ static VectorType *isVectorPromotionViable(Partition &P, const DataLayout &DL) {
else
continue;
- auto CandTy =
- isa<VectorType>(Ty) ? cast<VectorType>(Ty)->getElementType() : Ty;
+ auto CandTy = Ty->getScalarType();
if (CandTy->isPointerTy() && (S.beginOffset() != P.beginOffset() ||
S.endOffset() != P.endOffset())) {
DeferredTys.insert(Ty);
More information about the llvm-commits
mailing list