[PATCH] D124616: [TTI][X86] Fix splat-load cost when load+broadcast cannot be combined.

Vasileios Porpodas via Phabricator via llvm-commits llvm-commits at lists.llvm.org
Mon May 2 08:28:14 PDT 2022


vporpo updated this revision to Diff 426419.
vporpo added a comment.

Added hasOneUse check and removed the BB check.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D124616/new/

https://reviews.llvm.org/D124616

Files:
  llvm/lib/Target/X86/X86TargetTransformInfo.cpp
  llvm/test/Analysis/CostModel/X86/shuffle-load.ll


Index: llvm/test/Analysis/CostModel/X86/shuffle-load.ll
===================================================================
--- llvm/test/Analysis/CostModel/X86/shuffle-load.ll
+++ llvm/test/Analysis/CostModel/X86/shuffle-load.ll
@@ -471,3 +471,49 @@
 
   ret void
 }
+
+; Checks the cost of a load+broadcast that cannot be combined.
+define void @multiple_uses() {
+; SSE-LABEL: 'multiple_uses'
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %ld_2xf64 = load <2 x double>, ptr undef, align 16
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sf_2xf64_1 = shufflevector <2 x double> %ld_2xf64, <2 x double> undef, <2 x i32> zeroinitializer
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sf_2xf64_2 = shufflevector <2 x double> %ld_2xf64, <2 x double> undef, <2 x i32> zeroinitializer
+; SSE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; SSE2-LABEL: 'multiple_uses'
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %ld_2xf64 = load <2 x double>, ptr undef, align 16
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sf_2xf64_1 = shufflevector <2 x double> %ld_2xf64, <2 x double> undef, <2 x i32> zeroinitializer
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sf_2xf64_2 = shufflevector <2 x double> %ld_2xf64, <2 x double> undef, <2 x i32> zeroinitializer
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; SSE3-LABEL: 'multiple_uses'
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %ld_2xf64 = load <2 x double>, ptr undef, align 16
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sf_2xf64_1 = shufflevector <2 x double> %ld_2xf64, <2 x double> undef, <2 x i32> zeroinitializer
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sf_2xf64_2 = shufflevector <2 x double> %ld_2xf64, <2 x double> undef, <2 x i32> zeroinitializer
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; AVX-LABEL: 'multiple_uses'
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %ld_2xf64 = load <2 x double>, ptr undef, align 16
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sf_2xf64_1 = shufflevector <2 x double> %ld_2xf64, <2 x double> undef, <2 x i32> zeroinitializer
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sf_2xf64_2 = shufflevector <2 x double> %ld_2xf64, <2 x double> undef, <2 x i32> zeroinitializer
+; AVX-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; AVX2-LABEL: 'multiple_uses'
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %ld_2xf64 = load <2 x double>, ptr undef, align 16
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sf_2xf64_1 = shufflevector <2 x double> %ld_2xf64, <2 x double> undef, <2 x i32> zeroinitializer
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sf_2xf64_2 = shufflevector <2 x double> %ld_2xf64, <2 x double> undef, <2 x i32> zeroinitializer
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; AVX512-LABEL: 'multiple_uses'
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %ld_2xf64 = load <2 x double>, ptr undef, align 16
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sf_2xf64_1 = shufflevector <2 x double> %ld_2xf64, <2 x double> undef, <2 x i32> zeroinitializer
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sf_2xf64_2 = shufflevector <2 x double> %ld_2xf64, <2 x double> undef, <2 x i32> zeroinitializer
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+bb1:
+  %ld_2xf64 = load <2 x double>, ptr undef
+  ; Load has multiple uses
+  %sf_2xf64_1 = shufflevector <2 x double> %ld_2xf64, <2 x double> undef, <2 x i32> zeroinitializer
+  %sf_2xf64_2 = shufflevector <2 x double> %ld_2xf64, <2 x double> undef, <2 x i32> zeroinitializer
+  ret void
+}
Index: llvm/lib/Target/X86/X86TargetTransformInfo.cpp
===================================================================
--- llvm/lib/Target/X86/X86TargetTransformInfo.cpp
+++ llvm/lib/Target/X86/X86TargetTransformInfo.cpp
@@ -1608,16 +1608,21 @@
   };
 
   if (ST->hasSSE2()) {
-    bool IsLoad =
-        llvm::any_of(Args, [](const auto &V) { return isa<LoadInst>(V); });
-    if (ST->hasSSE3() && IsLoad)
-      if (const auto *Entry =
-              CostTableLookup(SSE3BroadcastLoadTbl, Kind, LT.second)) {
-        assert(isLegalBroadcastLoad(BaseTp->getElementType(),
-                                    LT.second.getVectorElementCount()) &&
-               "Table entry missing from isLegalBroadcastLoad()");
-        return LT.first * Entry->Cost;
-      }
+    if (bool IsLoad = !Args.empty() && isa<LoadInst>(Args[0])) {
+      // A Load can be combined with a Broadcast if the Broadcast is the Load's
+      // single user.
+      const LoadInst *L = cast<LoadInst>(Args[0]);
+      bool LoadCanBeCombined =
+          L->hasOneUse() && isa<Instruction>(L->user_back());
+      if (ST->hasSSE3() && LoadCanBeCombined)
+        if (const auto *Entry =
+                CostTableLookup(SSE3BroadcastLoadTbl, Kind, LT.second)) {
+          assert(isLegalBroadcastLoad(BaseTp->getElementType(),
+                                      LT.second.getVectorElementCount()) &&
+                 "Table entry missing from isLegalBroadcastLoad()");
+          return LT.first * Entry->Cost;
+        }
+    }
 
     if (const auto *Entry = CostTableLookup(SSE2ShuffleTbl, Kind, LT.second))
       return LT.first * Entry->Cost;


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D124616.426419.patch
Type: text/x-patch
Size: 5785 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20220502/e6ae98ce/attachment.bin>


More information about the llvm-commits mailing list