[llvm] llvm.lround: Added the intrinsic cost model case with a test. (PR #105694)

Thu Aug 22 09:57:23 PDT 2024

llvmbot wrote:




@llvm/pr-subscribers-backend-amdgpu

Author: Sumanth Gundapaneni (sgundapa)

<details>
<summary>Changes</summary>



---
Full diff: https://github.com/llvm/llvm-project/pull/105694.diff


2 Files Affected:

- (modified) llvm/include/llvm/CodeGen/BasicTTIImpl.h (+6) 
- (added) llvm/test/Analysis/CostModel/AMDGPU/lround.ll (+78) 


``````````diff

diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
index 217e3f1324f9c9..14968698058d44 100644
--- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h
+++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
@@ -2068,6 +2068,12 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
     case Intrinsic::roundeven:
       ISD = ISD::FROUNDEVEN;
       break;
+    case Intrinsic::lround:
+      ISD = ISD::LROUND;
+      break;
+    case Intrinsic::llround:
+      ISD = ISD::LLROUND;
+      break;
     case Intrinsic::pow:
       ISD = ISD::FPOW;
       break;
diff --git a/llvm/test/Analysis/CostModel/AMDGPU/lround.ll b/llvm/test/Analysis/CostModel/AMDGPU/lround.ll
new file mode 100644
index 00000000000000..8228d1672e2ebb
--- /dev/null
+++ b/llvm/test/Analysis/CostModel/AMDGPU/lround.ll
@@ -0,0 +1,78 @@
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx1010 < %s | FileCheck -check-prefixes=ALL %s
+; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx90a < %s | FileCheck -check-prefixes=ALL %s
+; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=ALL %s
+; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -mtriple=amdgcn-unknown-amdhsa < %s | FileCheck -check-prefixes=ALL %s
+; RUN: opt -passes="print<cost-model>" -cost-kind=code-size 2>&1 -disable-output -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx1010 < %s | FileCheck -check-prefixes=SIZE %s
+; RUN: opt -passes="print<cost-model>" -cost-kind=code-size 2>&1 -disable-output -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx90a < %s | FileCheck -check-prefixes=SIZE %s
+; RUN: opt -passes="print<cost-model>" -cost-kind=code-size 2>&1 -disable-output -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=SIZE %s
+; RUN: opt -passes="print<cost-model>" -cost-kind=code-size 2>&1 -disable-output -mtriple=amdgcn-unknown-amdhsa < %s | FileCheck -check-prefixes=SIZE %s
+
+
+define i32 @test_lround() {
+; ALL-LABEL: 'test_lround'
+; ALL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %VI32F32 = call i32 @llvm.lround.i32.f32(float undef)
+; ALL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32F64 = call i32 @llvm.lround.i32.f64(double undef)
+; ALL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V64F32 = call i64 @llvm.lround.i64.f32(float undef)
+; ALL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V64F64 = call i64 @llvm.lround.i64.f64(double undef)
+; ALL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32F16 = call i32 @llvm.lround.i32.f16(half undef)
+; ALL-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2I322F32 = call <2 x i32> @llvm.lround.v2i32.v2f32(<2 x float> undef)
+; ALL-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2I642F32 = call <2 x i64> @llvm.lround.v2i64.v2f32(<2 x float> undef)
+; ALL-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4I324F32 = call <4 x i32> @llvm.lround.v4i32.v4f32(<4 x float> undef)
+; ALL-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef
+;
+; SIZE-LABEL: 'test_lround'
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %VI32F32 = call i32 @llvm.lround.i32.f32(float undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32F64 = call i32 @llvm.lround.i32.f64(double undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V64F32 = call i64 @llvm.lround.i64.f32(float undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V64F64 = call i64 @llvm.lround.i64.f64(double undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32F16 = call i32 @llvm.lround.i32.f16(half undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2I322F32 = call <2 x i32> @llvm.lround.v2i32.v2f32(<2 x float> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2I642F32 = call <2 x i64> @llvm.lround.v2i64.v2f32(<2 x float> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4I324F32 = call <4 x i32> @llvm.lround.v4i32.v4f32(<4 x float> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+  %VI32F32 =  call i32 @llvm.lround.i32.f32(float undef)
+  %V32F64 =  call i32 @llvm.lround.i32.f64(double undef)
+  %V64F32 =  call i64 @llvm.lround.i64.f32(float undef)
+  %V64F64 =  call i64 @llvm.lround.i64.f64(double undef)
+  %V32F16 =  call i32 @llvm.lround.i32.f16(half undef)
+  %V2I322F32 =  call <2 x i32> @llvm.lround.v2i32.v2f32(<2 x float> undef)
+  %V2I642F32 =  call <2 x i64> @llvm.lround.v2i64.v2f32(<2 x float> undef)
+  %V4I324F32 =  call <4 x i32> @llvm.lround.v2i32.v2f32(<4 x float> undef)
+  ret i32 undef
+}
+
+define i32 @test_llround() {
+; ALL-LABEL: 'test_llround'
+; ALL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %VI32F32 = call i32 @llvm.llround.i32.f32(float undef)
+; ALL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32F64 = call i32 @llvm.llround.i32.f64(double undef)
+; ALL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V64F32 = call i64 @llvm.llround.i64.f32(float undef)
+; ALL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V64F64 = call i64 @llvm.llround.i64.f64(double undef)
+; ALL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32F16 = call i32 @llvm.llround.i32.f16(half undef)
+; ALL-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2I322F32 = call <2 x i32> @llvm.llround.v2i32.v2f32(<2 x float> undef)
+; ALL-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2I642F32 = call <2 x i64> @llvm.llround.v2i64.v2f32(<2 x float> undef)
+; ALL-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4I324F32 = call <4 x i32> @llvm.llround.v4i32.v4f32(<4 x float> undef)
+; ALL-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef
+;
+; SIZE-LABEL: 'test_llround'
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %VI32F32 = call i32 @llvm.llround.i32.f32(float undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32F64 = call i32 @llvm.llround.i32.f64(double undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V64F32 = call i64 @llvm.llround.i64.f32(float undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V64F64 = call i64 @llvm.llround.i64.f64(double undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32F16 = call i32 @llvm.llround.i32.f16(half undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2I322F32 = call <2 x i32> @llvm.llround.v2i32.v2f32(<2 x float> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2I642F32 = call <2 x i64> @llvm.llround.v2i64.v2f32(<2 x float> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4I324F32 = call <4 x i32> @llvm.llround.v4i32.v4f32(<4 x float> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+  %VI32F32 =  call i32 @llvm.llround.i32.f32(float undef)
+  %V32F64 =  call i32 @llvm.llround.i32.f64(double undef)
+  %V64F32 =  call i64 @llvm.llround.i64.f32(float undef)
+  %V64F64 =  call i64 @llvm.llround.i64.f64(double undef)
+  %V32F16 =  call i32 @llvm.llround.i32.f16(half undef)
+  %V2I322F32 =  call <2 x i32> @llvm.llround.v2i32.v2f32(<2 x float> undef)
+  %V2I642F32 =  call <2 x i64> @llvm.llround.v2i64.v2f32(<2 x float> undef)
+  %V4I324F32 =  call <4 x i32> @llvm.llround.v2i32.v2f32(<4 x float> undef)
+  ret i32 undef
+}

``````````

</details>


https://github.com/llvm/llvm-project/pull/105694