[PATCH] D15119: AMDGPU: Report extractelement as free in cost model

Matt Arsenault via llvm-commits llvm-commits at lists.llvm.org
Tue Dec 1 09:12:11 PST 2015


arsenm created this revision.
arsenm added a reviewer: tstellarAMD.
arsenm added a subscriber: llvm-commits.

The cost for scalarized operations is computed as N * (scalar operation
cost + 1 extractelement + 1 insertelement). This partially fixes
inflating the cost of scalarized operations since every operation is
scalarized and free. I don't think we want any cost asociated with
scalarization, but for now insertelement is still counted. I'm not sure
if we should pretend that insertelement is also free, or add a way
to compute a custom scalarization cost.

http://reviews.llvm.org/D15119

Files:
  include/llvm/CodeGen/BasicTTIImpl.h
  test/Analysis/CostModel/AMDGPU/fabs.ll

Index: test/Analysis/CostModel/AMDGPU/fabs.ll
===================================================================
--- /dev/null
+++ test/Analysis/CostModel/AMDGPU/fabs.ll
@@ -0,0 +1,67 @@
+; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa < %s | FileCheck %s
+
+; CHECK: 'fabs_f32'
+; CHECK: estimated cost of 0 for {{.*}} call float @llvm.fabs.f32
+define void @fabs_f32(float addrspace(1)* %out, float addrspace(1)* %vaddr) #0 {
+  %vec = load float, float addrspace(1)* %vaddr
+  %fabs = call float @llvm.fabs.f32(float %vec) #1
+  store float %fabs, float addrspace(1)* %out
+  ret void
+}
+
+; CHECK: 'fabs_v2f32'
+; CHECK: estimated cost of 2 for {{.*}} call <2 x float> @llvm.fabs.v2f32
+define void @fabs_v2f32(<2 x float> addrspace(1)* %out, <2 x float> addrspace(1)* %vaddr) #0 {
+  %vec = load <2 x float>, <2 x float> addrspace(1)* %vaddr
+  %fabs = call <2 x float> @llvm.fabs.v2f32(<2 x float> %vec) #1
+  store <2 x float> %fabs, <2 x float> addrspace(1)* %out
+  ret void
+}
+
+; CHECK: 'fabs_v3f32'
+; CHECK: estimated cost of 3 for {{.*}} call <3 x float> @llvm.fabs.v3f32
+define void @fabs_v3f32(<3 x float> addrspace(1)* %out, <3 x float> addrspace(1)* %vaddr) #0 {
+  %vec = load <3 x float>, <3 x float> addrspace(1)* %vaddr
+  %fabs = call <3 x float> @llvm.fabs.v3f32(<3 x float> %vec) #1
+  store <3 x float> %fabs, <3 x float> addrspace(1)* %out
+  ret void
+}
+
+; CHECK: 'fabs_f64'
+; CHECK: estimated cost of 0 for {{.*}} call double @llvm.fabs.f64
+define void @fabs_f64(double addrspace(1)* %out, double addrspace(1)* %vaddr) #0 {
+  %vec = load double, double addrspace(1)* %vaddr
+  %fabs = call double @llvm.fabs.f64(double %vec) #1
+  store double %fabs, double addrspace(1)* %out
+  ret void
+}
+
+; CHECK: 'fabs_v2f64'
+; CHECK: estimated cost of 2 for {{.*}} call <2 x double> @llvm.fabs.v2f64
+define void @fabs_v2f64(<2 x double> addrspace(1)* %out, <2 x double> addrspace(1)* %vaddr) #0 {
+  %vec = load <2 x double>, <2 x double> addrspace(1)* %vaddr
+  %fabs = call <2 x double> @llvm.fabs.v2f64(<2 x double> %vec) #1
+  store <2 x double> %fabs, <2 x double> addrspace(1)* %out
+  ret void
+}
+
+; CHECK: 'fabs_v3f64'
+; CHECK: estimated cost of 3 for {{.*}} call <3 x double> @llvm.fabs.v3f64
+define void @fabs_v3f64(<3 x double> addrspace(1)* %out, <3 x double> addrspace(1)* %vaddr) #0 {
+  %vec = load <3 x double>, <3 x double> addrspace(1)* %vaddr
+  %fabs = call <3 x double> @llvm.fabs.v3f64(<3 x double> %vec) #1
+  store <3 x double> %fabs, <3 x double> addrspace(1)* %out
+  ret void
+}
+
+
+declare float @llvm.fabs.f32(float) #1
+declare <2 x float> @llvm.fabs.v2f32(<2 x float>) #1
+declare <3 x float> @llvm.fabs.v3f32(<3 x float>) #1
+
+declare double @llvm.fabs.f64(double) #1
+declare <2 x double> @llvm.fabs.v2f64(<2 x double>) #1
+declare <3 x double> @llvm.fabs.v3f64(<3 x double>) #1
+
+attributes #0 = { nounwind }
+attributes #1 = { nounwind readnone }
Index: include/llvm/CodeGen/BasicTTIImpl.h
===================================================================
--- include/llvm/CodeGen/BasicTTIImpl.h
+++ include/llvm/CodeGen/BasicTTIImpl.h
@@ -697,6 +697,11 @@
     std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, RetTy);
 
     if (TLI->isOperationLegalOrPromote(ISD, LT.second)) {
+      if (IID == Intrinsic::fabs &&
+          TLI->isFAbsFree(LT.second)) {
+        return 0;
+      }
+
       // The operation is legal. Assume it costs 1.
       // If the type is split to multiple registers, assume that there is some
       // overhead to this.


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D15119.41525.patch
Type: text/x-patch
Size: 3554 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20151201/ddcfdc7d/attachment.bin>


More information about the llvm-commits mailing list