[PATCH] D124616: [TTI][X86] Fix splat-load cost when load+broadcast cannot be combined.
Vasileios Porpodas via Phabricator via llvm-commits
llvm-commits at lists.llvm.org
Mon May 2 15:28:05 PDT 2022
vporpo updated this revision to Diff 426531.
vporpo added a comment.
Updated condition for LoadCanBeCombined.
Repository:
rG LLVM Github Monorepo
CHANGES SINCE LAST ACTION
https://reviews.llvm.org/D124616/new/
https://reviews.llvm.org/D124616
Files:
llvm/lib/Target/X86/X86TargetTransformInfo.cpp
llvm/test/Analysis/CostModel/X86/shuffle-load.ll
Index: llvm/test/Analysis/CostModel/X86/shuffle-load.ll
===================================================================
--- llvm/test/Analysis/CostModel/X86/shuffle-load.ll
+++ llvm/test/Analysis/CostModel/X86/shuffle-load.ll
@@ -471,3 +471,49 @@
ret void
}
+
+; Checks the cost of a load+broadcast that cannot be combined.
+define void @multiple_uses() {
+; SSE-LABEL: 'multiple_uses'
+; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_2xf64 = load <2 x double>, ptr undef, align 16
+; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_2xf64_1 = shufflevector <2 x double> %ld_2xf64, <2 x double> undef, <2 x i32> zeroinitializer
+; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_2xf64_2 = shufflevector <2 x double> %ld_2xf64, <2 x double> undef, <2 x i32> zeroinitializer
+; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; SSE2-LABEL: 'multiple_uses'
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_2xf64 = load <2 x double>, ptr undef, align 16
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_2xf64_1 = shufflevector <2 x double> %ld_2xf64, <2 x double> undef, <2 x i32> zeroinitializer
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_2xf64_2 = shufflevector <2 x double> %ld_2xf64, <2 x double> undef, <2 x i32> zeroinitializer
+; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; SSE3-LABEL: 'multiple_uses'
+; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_2xf64 = load <2 x double>, ptr undef, align 16
+; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_2xf64_1 = shufflevector <2 x double> %ld_2xf64, <2 x double> undef, <2 x i32> zeroinitializer
+; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_2xf64_2 = shufflevector <2 x double> %ld_2xf64, <2 x double> undef, <2 x i32> zeroinitializer
+; SSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; AVX-LABEL: 'multiple_uses'
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_2xf64 = load <2 x double>, ptr undef, align 16
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_2xf64_1 = shufflevector <2 x double> %ld_2xf64, <2 x double> undef, <2 x i32> zeroinitializer
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_2xf64_2 = shufflevector <2 x double> %ld_2xf64, <2 x double> undef, <2 x i32> zeroinitializer
+; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; AVX2-LABEL: 'multiple_uses'
+; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_2xf64 = load <2 x double>, ptr undef, align 16
+; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_2xf64_1 = shufflevector <2 x double> %ld_2xf64, <2 x double> undef, <2 x i32> zeroinitializer
+; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_2xf64_2 = shufflevector <2 x double> %ld_2xf64, <2 x double> undef, <2 x i32> zeroinitializer
+; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; AVX512-LABEL: 'multiple_uses'
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ld_2xf64 = load <2 x double>, ptr undef, align 16
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_2xf64_1 = shufflevector <2 x double> %ld_2xf64, <2 x double> undef, <2 x i32> zeroinitializer
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_2xf64_2 = shufflevector <2 x double> %ld_2xf64, <2 x double> undef, <2 x i32> zeroinitializer
+; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+bb1:
+ %ld_2xf64 = load <2 x double>, ptr undef
+ ; Load has multiple uses
+ %sf_2xf64_1 = shufflevector <2 x double> %ld_2xf64, <2 x double> undef, <2 x i32> zeroinitializer
+ %sf_2xf64_2 = shufflevector <2 x double> %ld_2xf64, <2 x double> undef, <2 x i32> zeroinitializer
+ ret void
+}
Index: llvm/lib/Target/X86/X86TargetTransformInfo.cpp
===================================================================
--- llvm/lib/Target/X86/X86TargetTransformInfo.cpp
+++ llvm/lib/Target/X86/X86TargetTransformInfo.cpp
@@ -1608,16 +1608,26 @@
};
if (ST->hasSSE2()) {
- bool IsLoad =
- llvm::any_of(Args, [](const auto &V) { return isa<LoadInst>(V); });
- if (ST->hasSSE3() && IsLoad)
- if (const auto *Entry =
- CostTableLookup(SSE3BroadcastLoadTbl, Kind, LT.second)) {
- assert(isLegalBroadcastLoad(BaseTp->getElementType(),
- LT.second.getVectorElementCount()) &&
- "Table entry missing from isLegalBroadcastLoad()");
- return LT.first * Entry->Cost;
- }
+ if (bool IsLoad = !Args.empty() && isa<LoadInst>(Args[0])) {
+ const LoadInst *L = cast<LoadInst>(Args[0]);
+ // A Load can be combined with a Broadcast if the Broadcast is the Load's
+ // single user.
+ // Note: We are only checking L->hasOneUse() when the load is a vector. We
+ // are doing this because this function is also used by the SLP Vectorizer
+ // to estimate the cost before the vector instructions get generated. So
+ // the load that gets broadcasted gets used by multiple scalar
+ // instructions, and L->hasOneUse() would return false.
+ bool LoadCanBeCombined =
+ L->getType()->isVectorTy() ? L->hasOneUse() : true;
+ if (ST->hasSSE3() && LoadCanBeCombined)
+ if (const auto *Entry =
+ CostTableLookup(SSE3BroadcastLoadTbl, Kind, LT.second)) {
+ assert(isLegalBroadcastLoad(BaseTp->getElementType(),
+ LT.second.getVectorElementCount()) &&
+ "Table entry missing from isLegalBroadcastLoad()");
+ return LT.first * Entry->Cost;
+ }
+ }
if (const auto *Entry = CostTableLookup(SSE2ShuffleTbl, Kind, LT.second))
return LT.first * Entry->Cost;
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D124616.426531.patch
Type: text/x-patch
Size: 6162 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20220502/4b2ed63a/attachment.bin>
More information about the llvm-commits
mailing list