[PATCH] D124616: [TTI][X86] Fix splat-load cost when load+broadcast cannot be combined.

Vasileios Porpodas via Phabricator via llvm-commits llvm-commits at lists.llvm.org
Thu Apr 28 10:44:56 PDT 2022


vporpo updated this revision to Diff 425847.
vporpo added a comment.

We now only check if the load and its uses are in the same BB.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D124616/new/

https://reviews.llvm.org/D124616

Files:
  llvm/lib/Target/X86/X86TargetTransformInfo.cpp
  llvm/test/Analysis/CostModel/X86/shuffle-load.ll


Index: llvm/test/Analysis/CostModel/X86/shuffle-load.ll
===================================================================
--- llvm/test/Analysis/CostModel/X86/shuffle-load.ll
+++ llvm/test/Analysis/CostModel/X86/shuffle-load.ll
@@ -471,3 +471,51 @@
 
   ret void
 }
+
+; Checks the cost of a load+broadcast that cannot be combined.
+define void @different_bbs() {
+; SSE-LABEL: 'different_bbs'
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %ld_2xf64 = load <2 x double>, ptr undef, align 16
+; SSE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: br label %bb2
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sf_2xf64 = shufflevector <2 x double> %ld_2xf64, <2 x double> undef, <2 x i32> zeroinitializer
+; SSE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; SSE2-LABEL: 'different_bbs'
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %ld_2xf64 = load <2 x double>, ptr undef, align 16
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: br label %bb2
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sf_2xf64 = shufflevector <2 x double> %ld_2xf64, <2 x double> undef, <2 x i32> zeroinitializer
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; SSE3-LABEL: 'different_bbs'
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %ld_2xf64 = load <2 x double>, ptr undef, align 16
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: br label %bb2
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sf_2xf64 = shufflevector <2 x double> %ld_2xf64, <2 x double> undef, <2 x i32> zeroinitializer
+; SSE3-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; AVX-LABEL: 'different_bbs'
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %ld_2xf64 = load <2 x double>, ptr undef, align 16
+; AVX-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: br label %bb2
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sf_2xf64 = shufflevector <2 x double> %ld_2xf64, <2 x double> undef, <2 x i32> zeroinitializer
+; AVX-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; AVX2-LABEL: 'different_bbs'
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %ld_2xf64 = load <2 x double>, ptr undef, align 16
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: br label %bb2
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sf_2xf64 = shufflevector <2 x double> %ld_2xf64, <2 x double> undef, <2 x i32> zeroinitializer
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; AVX512-LABEL: 'different_bbs'
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %ld_2xf64 = load <2 x double>, ptr undef, align 16
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: br label %bb2
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sf_2xf64 = shufflevector <2 x double> %ld_2xf64, <2 x double> undef, <2 x i32> zeroinitializer
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+bb1:
+  %ld_2xf64 = load <2 x double>, ptr undef
+  br label %bb2
+
+bb2:
+  ; Load and Broadcast in different BBs
+  %sf_2xf64 = shufflevector <2 x double> %ld_2xf64, <2 x double> undef, <2 x i32> zeroinitializer
+  ret void
+}
Index: llvm/lib/Target/X86/X86TargetTransformInfo.cpp
===================================================================
--- llvm/lib/Target/X86/X86TargetTransformInfo.cpp
+++ llvm/lib/Target/X86/X86TargetTransformInfo.cpp
@@ -1602,16 +1602,23 @@
   };
 
   if (ST->hasSSE2()) {
-    bool IsLoad =
-        llvm::any_of(Args, [](const auto &V) { return isa<LoadInst>(V); });
-    if (ST->hasSSE3() && IsLoad)
-      if (const auto *Entry =
-              CostTableLookup(SSE3BroadcastLoadTbl, Kind, LT.second)) {
-        assert(isLegalBroadcastLoad(BaseTp->getElementType(),
-                                    LT.second.getVectorElementCount()) &&
-               "Table entry missing from isLegalBroadcastLoad()");
-        return LT.first * Entry->Cost;
-      }
+    if (bool IsLoad = !Args.empty() && isa<LoadInst>(Args[0])) {
+      // A Load can be combined with a Broadcast if they are in the same BB
+      const LoadInst *L = cast<LoadInst>(Args[0]);
+      const BasicBlock *LBB = L->getParent();
+      bool LoadCanBeCombined = llvm::all_of(L->users(), [LBB](const User *U) {
+        if (const Instruction *UI = dyn_cast<Instruction>(U))
+          return UI->getParent() == LBB;
+      });
+      if (ST->hasSSE3() && LoadCanBeCombined)
+        if (const auto *Entry =
+                CostTableLookup(SSE3BroadcastLoadTbl, Kind, LT.second)) {
+          assert(isLegalBroadcastLoad(BaseTp->getElementType(),
+                                      LT.second.getVectorElementCount()) &&
+                 "Table entry missing from isLegalBroadcastLoad()");
+          return LT.first * Entry->Cost;
+        }
+    }
 
     if (const auto *Entry = CostTableLookup(SSE2ShuffleTbl, Kind, LT.second))
       return LT.first * Entry->Cost;


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D124616.425847.patch
Type: text/x-patch
Size: 5325 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20220428/b7154c3b/attachment.bin>


More information about the llvm-commits mailing list