[llvm] BasicTTI: Cleanup multiple result intrinsic handling (PR #165970)
via llvm-commits
llvm-commits at lists.llvm.org
Fri Oct 31 22:37:59 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-llvm-analysis
Author: Matt Arsenault (arsenm)
<details>
<summary>Changes</summary>
Avoid weird lambda returning function pointer and sink the libcall
logic to where the operation is handled. This allows chaining the
libcall logic to try sincos_stret and fallback to sincos. The resulting
cost seems too low.
---
Full diff: https://github.com/llvm/llvm-project/pull/165970.diff
2 Files Affected:
- (modified) llvm/include/llvm/CodeGen/BasicTTIImpl.h (+40-20)
- (modified) llvm/test/Analysis/CostModel/AArch64/sincos.ll (+17-4)
``````````diff
diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
index e8dbc964a943e..bbce59b71edae 100644
--- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h
+++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
@@ -302,7 +302,6 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
/// (e.g. scalarization).
std::optional<InstructionCost> getMultipleResultIntrinsicVectorLibCallCost(
const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind,
- RTLIB::Libcall LC,
std::optional<unsigned> CallRetElementIndex = {}) const {
Type *RetTy = ICA.getReturnType();
// Vector variants of the intrinsic can be mapped to a vector library call.
@@ -311,11 +310,43 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
!isVectorizedStructTy(cast<StructType>(RetTy)))
return std::nullopt;
+ Type *Ty = getContainedTypes(RetTy).front();
+ EVT VT = getTLI()->getValueType(DL, Ty);
+
+ EVT ScalarVT = VT.getScalarType();
+ RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
+
+ bool UsesMemoryOutArgument = true;
+
+ switch (ICA.getID()) {
+ case Intrinsic::modf:
+ LC = RTLIB::getMODF(ScalarVT);
+ break;
+ case Intrinsic::sincospi:
+ LC = RTLIB::getSINCOSPI(ScalarVT);
+ break;
+ case Intrinsic::sincos:
+ LC = RTLIB::getSINCOS_STRET(ScalarVT);
+ UsesMemoryOutArgument = false;
+
+ if (getTLI()->getLibcallImpl(LC) == RTLIB::Unsupported) {
+ LC = RTLIB::getSINCOS(ScalarVT);
+ UsesMemoryOutArgument = true;
+ }
+
+ break;
+ default:
+ return std::nullopt;
+ }
+
// Find associated libcall.
- const char *LCName = getTLI()->getLibcallName(LC);
- if (!LCName)
+ RTLIB::LibcallImpl LibcallImpl = getTLI()->getLibcallImpl(LC);
+ if (LibcallImpl == RTLIB::Unsupported)
return std::nullopt;
+ StringRef LCName =
+ RTLIB::RuntimeLibcallsInfo::getLibcallImplName(LibcallImpl);
+
// Search for a corresponding vector variant.
LLVMContext &Ctx = RetTy->getContext();
ElementCount VF = getVectorizedTypeVF(RetTy);
@@ -336,6 +367,11 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
VecTy, {}, CostKind, 0, nullptr, {});
}
+ // Technically this depends on the ABI, but assume sincos_stret passes in
+ // registers.
+ if (!UsesMemoryOutArgument)
+ return Cost;
+
// Lowering to a library call (with output pointers) may require us to emit
// reloads for the results.
for (auto [Idx, VectorTy] : enumerate(getContainedTypes(RetTy))) {
@@ -2137,22 +2173,6 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
case Intrinsic::modf:
case Intrinsic::sincos:
case Intrinsic::sincospi: {
- Type *Ty = getContainedTypes(RetTy).front();
- EVT VT = getTLI()->getValueType(DL, Ty);
-
- RTLIB::Libcall LC = [&] {
- switch (ICA.getID()) {
- case Intrinsic::modf:
- return RTLIB::getMODF;
- case Intrinsic::sincos:
- return RTLIB::getSINCOS;
- case Intrinsic::sincospi:
- return RTLIB::getSINCOSPI;
- default:
- llvm_unreachable("unexpected intrinsic");
- }
- }()(VT.getScalarType());
-
std::optional<unsigned> CallRetElementIndex;
// The first element of the modf result is returned by value in the
// libcall.
@@ -2160,7 +2180,7 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
CallRetElementIndex = 0;
if (auto Cost = getMultipleResultIntrinsicVectorLibCallCost(
- ICA, CostKind, LC, CallRetElementIndex))
+ ICA, CostKind, CallRetElementIndex))
return *Cost;
// Otherwise, fallback to default scalarization cost.
break;
diff --git a/llvm/test/Analysis/CostModel/AArch64/sincos.ll b/llvm/test/Analysis/CostModel/AArch64/sincos.ll
index 32408acb582d0..72c8f2bbbf8cf 100644
--- a/llvm/test/Analysis/CostModel/AArch64/sincos.ll
+++ b/llvm/test/Analysis/CostModel/AArch64/sincos.ll
@@ -1,6 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "sincos"
; RUN: opt < %s -mtriple=aarch64-gnu-linux -mattr=+neon,+sve -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output | FileCheck %s
; RUN: opt < %s -mtriple=aarch64-gnu-linux -mattr=+neon,+sve -vector-library=ArmPL -passes="print<cost-model>" -intrinsic-cost-strategy=intrinsic-cost -cost-kind=throughput 2>&1 -disable-output | FileCheck %s -check-prefix=CHECK-VECLIB
+; RUN: opt < %s -mtriple=arm64-apple-macos10.9 -mattr=+neon -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output | FileCheck -check-prefix=SINCOS_STRET %s
define void @sincos() {
; CHECK-LABEL: 'sincos'
@@ -8,13 +9,11 @@ define void @sincos() {
; CHECK: Cost Model: Found an estimated cost of 10 for instruction: %f32 = call { float, float } @llvm.sincos.f32(float poison)
; CHECK: Cost Model: Found an estimated cost of 10 for instruction: %f64 = call { double, double } @llvm.sincos.f64(double poison)
; CHECK: Cost Model: Found an estimated cost of 10 for instruction: %f128 = call { fp128, fp128 } @llvm.sincos.f128(fp128 poison)
-;
; CHECK: Cost Model: Found an estimated cost of 36 for instruction: %v8f16 = call { <8 x half>, <8 x half> } @llvm.sincos.v8f16(<8 x half> poison)
; CHECK: Cost Model: Found an estimated cost of 52 for instruction: %v4f32 = call { <4 x float>, <4 x float> } @llvm.sincos.v4f32(<4 x float> poison)
; CHECK: Cost Model: Found an estimated cost of 24 for instruction: %v2f64 = call { <2 x double>, <2 x double> } @llvm.sincos.v2f64(<2 x double> poison)
; CHECK: Cost Model: Found an estimated cost of 10 for instruction: %v1f128 = call { <1 x fp128>, <1 x fp128> } @llvm.sincos.v1f128(<1 x fp128> poison)
; CHECK: Cost Model: Found an estimated cost of 104 for instruction: %v8f32 = call { <8 x float>, <8 x float> } @llvm.sincos.v8f32(<8 x float> poison)
-;
; CHECK: Cost Model: Invalid cost for instruction: %nxv8f16 = call { <vscale x 8 x half>, <vscale x 8 x half> } @llvm.sincos.nxv8f16(<vscale x 8 x half> poison)
; CHECK: Cost Model: Invalid cost for instruction: %nxv4f32 = call { <vscale x 4 x float>, <vscale x 4 x float> } @llvm.sincos.nxv4f32(<vscale x 4 x float> poison)
; CHECK: Cost Model: Invalid cost for instruction: %nxv2f64 = call { <vscale x 2 x double>, <vscale x 2 x double> } @llvm.sincos.nxv2f64(<vscale x 2 x double> poison)
@@ -26,18 +25,32 @@ define void @sincos() {
; CHECK-VECLIB: Cost Model: Found an estimated cost of 10 for instruction: %f32 = call { float, float } @llvm.sincos.f32(float poison)
; CHECK-VECLIB: Cost Model: Found an estimated cost of 10 for instruction: %f64 = call { double, double } @llvm.sincos.f64(double poison)
; CHECK-VECLIB: Cost Model: Found an estimated cost of 10 for instruction: %f128 = call { fp128, fp128 } @llvm.sincos.f128(fp128 poison)
-;
; CHECK-VECLIB: Cost Model: Found an estimated cost of 36 for instruction: %v8f16 = call { <8 x half>, <8 x half> } @llvm.sincos.v8f16(<8 x half> poison)
; CHECK-VECLIB: Cost Model: Found an estimated cost of 12 for instruction: %v4f32 = call { <4 x float>, <4 x float> } @llvm.sincos.v4f32(<4 x float> poison)
; CHECK-VECLIB: Cost Model: Found an estimated cost of 12 for instruction: %v2f64 = call { <2 x double>, <2 x double> } @llvm.sincos.v2f64(<2 x double> poison)
; CHECK-VECLIB: Cost Model: Found an estimated cost of 10 for instruction: %v1f128 = call { <1 x fp128>, <1 x fp128> } @llvm.sincos.v1f128(<1 x fp128> poison)
; CHECK-VECLIB: Cost Model: Found an estimated cost of 104 for instruction: %v8f32 = call { <8 x float>, <8 x float> } @llvm.sincos.v8f32(<8 x float> poison)
-;
; CHECK-VECLIB: Cost Model: Invalid cost for instruction: %nxv8f16 = call { <vscale x 8 x half>, <vscale x 8 x half> } @llvm.sincos.nxv8f16(<vscale x 8 x half> poison)
; CHECK-VECLIB: Cost Model: Found an estimated cost of 13 for instruction: %nxv4f32 = call { <vscale x 4 x float>, <vscale x 4 x float> } @llvm.sincos.nxv4f32(<vscale x 4 x float> poison)
; CHECK-VECLIB: Cost Model: Found an estimated cost of 13 for instruction: %nxv2f64 = call { <vscale x 2 x double>, <vscale x 2 x double> } @llvm.sincos.nxv2f64(<vscale x 2 x double> poison)
; CHECK-VECLIB: Cost Model: Invalid cost for instruction: %nxv1f128 = call { <vscale x 1 x fp128>, <vscale x 1 x fp128> } @llvm.sincos.nxv1f128(<vscale x 1 x fp128> poison)
; CHECK-VECLIB: Cost Model: Invalid cost for instruction: %nxv8f32 = call { <vscale x 8 x float>, <vscale x 8 x float> } @llvm.sincos.nxv8f32(<vscale x 8 x float> poison)
+;
+; SINCOS_STRET-LABEL: 'sincos'
+; SINCOS_STRET: Cost Model: Found an estimated cost of 1 for instruction: %f16 = call { half, half } @llvm.sincos.f16(half poison)
+; SINCOS_STRET: Cost Model: Found an estimated cost of 2 for instruction: %f32 = call { float, float } @llvm.sincos.f32(float poison)
+; SINCOS_STRET: Cost Model: Found an estimated cost of 2 for instruction: %f64 = call { double, double } @llvm.sincos.f64(double poison)
+; SINCOS_STRET: Cost Model: Found an estimated cost of 10 for instruction: %f128 = call { fp128, fp128 } @llvm.sincos.f128(fp128 poison)
+; SINCOS_STRET: Cost Model: Found an estimated cost of 36 for instruction: %v8f16 = call { <8 x half>, <8 x half> } @llvm.sincos.v8f16(<8 x half> poison)
+; SINCOS_STRET: Cost Model: Found an estimated cost of 20 for instruction: %v4f32 = call { <4 x float>, <4 x float> } @llvm.sincos.v4f32(<4 x float> poison)
+; SINCOS_STRET: Cost Model: Found an estimated cost of 8 for instruction: %v2f64 = call { <2 x double>, <2 x double> } @llvm.sincos.v2f64(<2 x double> poison)
+; SINCOS_STRET: Cost Model: Found an estimated cost of 10 for instruction: %v1f128 = call { <1 x fp128>, <1 x fp128> } @llvm.sincos.v1f128(<1 x fp128> poison)
+; SINCOS_STRET: Cost Model: Found an estimated cost of 40 for instruction: %v8f32 = call { <8 x float>, <8 x float> } @llvm.sincos.v8f32(<8 x float> poison)
+; SINCOS_STRET: Cost Model: Invalid cost for instruction: %nxv8f16 = call { <vscale x 8 x half>, <vscale x 8 x half> } @llvm.sincos.nxv8f16(<vscale x 8 x half> poison)
+; SINCOS_STRET: Cost Model: Invalid cost for instruction: %nxv4f32 = call { <vscale x 4 x float>, <vscale x 4 x float> } @llvm.sincos.nxv4f32(<vscale x 4 x float> poison)
+; SINCOS_STRET: Cost Model: Invalid cost for instruction: %nxv2f64 = call { <vscale x 2 x double>, <vscale x 2 x double> } @llvm.sincos.nxv2f64(<vscale x 2 x double> poison)
+; SINCOS_STRET: Cost Model: Invalid cost for instruction: %nxv1f128 = call { <vscale x 1 x fp128>, <vscale x 1 x fp128> } @llvm.sincos.nxv1f128(<vscale x 1 x fp128> poison)
+; SINCOS_STRET: Cost Model: Invalid cost for instruction: %nxv8f32 = call { <vscale x 8 x float>, <vscale x 8 x float> } @llvm.sincos.nxv8f32(<vscale x 8 x float> poison)
;
%f16 = call { half, half } @llvm.sincos.f16(half poison)
%f32 = call { float, float } @llvm.sincos.f32(float poison)
``````````
</details>
https://github.com/llvm/llvm-project/pull/165970
More information about the llvm-commits
mailing list