[PATCH] D155876: [PowerPC] vector cost model add cost to extract i1

Roland Froese via Phabricator via llvm-commits llvm-commits at lists.llvm.org
Thu Jul 20 11:53:45 PDT 2023


RolandF created this revision.
Herald added subscribers: shchenz, kbarton, hiraditya, nemanjai.
Herald added a project: All.
RolandF requested review of this revision.
Herald added subscribers: llvm-commits, wangpc.
Herald added a project: LLVM.

Try to avoid some unprofitable predication on PPC.  Recognize in the cost model that computing on i1 values will require extra mask or compare operation.


https://reviews.llvm.org/D155876

Files:
  llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
  llvm/test/Transforms/LoopVectorize/PowerPC/predcost.ll


Index: llvm/test/Transforms/LoopVectorize/PowerPC/predcost.ll
===================================================================
--- /dev/null
+++ llvm/test/Transforms/LoopVectorize/PowerPC/predcost.ll
@@ -0,0 +1,45 @@
+; RUN: opt -ppc-vec-mask-cost=true -aa-pipeline=basic-aa -mcpu=pwr8 -S -passes=loop-vectorize < %s | FileCheck %s
+
+target datalayout = "e-m:e-Fn32-i64:64-n32:64-S128-v256:256:256-v512:512:512"
+target triple = "powerpc64le-unknown-linux-gnu"
+
+; Function Attrs: nofree norecurse nosync nounwind memory(argmem: readwrite)
+define dso_local void @_tc(ptr nocapture noundef %aaa, i64 noundef %bbb) local_unnamed_addr #0 {
+; CHECK-NOT: extractelement <16 x i1>
+entry:
+  %cmp7 = icmp sgt i64 %bbb, 0
+  br i1 %cmp7, label %for.body.preheader, label %for.cond.cleanup
+
+for.body.preheader:                               ; preds = %entry
+  br label %for.body
+
+for.cond.cleanup.loopexit:                        ; preds = %for.inc
+  br label %for.cond.cleanup
+
+for.cond.cleanup:                                 ; preds = %for.cond.cleanup.loopexit, %entry
+  %arrayidx3 = getelementptr inbounds i8, ptr %aaa, i64 %bbb
+  store i8 0, ptr %arrayidx3, align 1
+  ret void
+
+for.body:                                         ; preds = %for.body.preheader, %for.inc
+  %i.08 = phi i64 [ %inc, %for.inc ], [ 0, %for.body.preheader ]
+  %arrayidx = getelementptr inbounds i8, ptr %aaa, i64 %i.08
+  %0 = load i8, ptr %arrayidx, align 1
+  %cmp1 = icmp eq i8 %0, 0
+  br i1 %cmp1, label %if.then, label %for.inc
+
+if.then:                                          ; preds = %for.body
+  store i8 32, ptr %arrayidx, align 1
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.body, %if.then
+  %inc = add nuw nsw i64 %i.08, 1
+  %exitcond.not = icmp eq i64 %inc, %bbb
+  br i1 %exitcond.not, label %for.cond.cleanup.loopexit, label %for.body
+}
+
+; Function Attrs: nocallback nofree nosync nounwind willreturn memory(argmem: readwrite)
+declare void @llvm.lifetime.start.p0(i64 immarg, ptr nocapture)
+
+; Function Attrs: nocallback nofree nosync nounwind willreturn memory(argmem: readwrite)
+declare void @llvm.lifetime.end.p0(i64 immarg, ptr nocapture)
Index: llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
===================================================================
--- llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
+++ llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
@@ -27,6 +27,9 @@
 
 #define DEBUG_TYPE "ppctti"
 
+static cl::opt<bool> VecMaskCost("ppc-vec-mask-cost",
+cl::desc("add masking cost for i1 vectors"), cl::init(true), cl::Hidden);
+
 static cl::opt<bool> DisablePPCConstHoist("disable-ppc-constant-hoisting",
 cl::desc("disable constant hoisting on PPC"), cl::init(false), cl::Hidden);
 
@@ -700,6 +703,9 @@
     return Cost;
 
   } else if (Val->getScalarType()->isIntegerTy() && Index != -1U) {
+    unsigned EltSize = Val->getScalarSizeInBits();
+    // Computing on 1 bit values requires extra mask or compare operations.
+    unsigned MaskCost = VecMaskCost && EltSize == 1 ? 1 : 0;
     if (ST->hasP9Altivec()) {
       if (ISD == ISD::INSERT_VECTOR_ELT)
         // A move-to VSR and a permute/insert.  Assume vector operation cost
@@ -721,12 +727,14 @@
       // We need a vector extract (or mfvsrld).  Assume vector operation cost.
       // The cost of the load constant for a vector extract is disregarded
       // (invariant, easily schedulable).
-      return CostFactor;
+      return CostFactor + MaskCost;
 
     } else if (ST->hasDirectMove())
       // Assume permute has standard cost.
       // Assume move-to/move-from VSR have 2x standard cost.
-      return 3;
+      if (ISD == ISD::INSERT_VECTOR_ELT)
+        return 3;
+      return 3 + MaskCost;
   }
 
   // Estimated cost of a load-hit-store delay.  This was obtained


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D155876.542620.patch
Type: text/x-patch
Size: 3846 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20230720/5c0a6713/attachment.bin>


More information about the llvm-commits mailing list