[llvm] BasicTTI: Cleanup multiple result intrinsic handling (PR #165970)
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Fri Oct 31 22:37:06 PDT 2025
https://github.com/arsenm created https://github.com/llvm/llvm-project/pull/165970
Avoid weird lambda returning function pointer and sink the libcall
logic to where the operation is handled. This allows chaining the
libcall logic to try sincos_stret and fallback to sincos. The resulting
cost seems too low.
>From b84223f393493d26c782ee7a8d7fa632bd5c1147 Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault at amd.com>
Date: Fri, 31 Oct 2025 20:33:25 -0700
Subject: [PATCH] BasicTTI: Cleanup multiple result intrinsic handling
Avoid weird lambda returning function pointer and sink the libcall
logic to where the operation is handled. This allows chaining the
libcall logic to try sincos_stret and fallback to sincos. The resulting
cost seems too low.
---
llvm/include/llvm/CodeGen/BasicTTIImpl.h | 60 ++++++++++++-------
.../test/Analysis/CostModel/AArch64/sincos.ll | 21 +++++--
2 files changed, 57 insertions(+), 24 deletions(-)
diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
index e8dbc964a943e..bbce59b71edae 100644
--- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h
+++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
@@ -302,7 +302,6 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
/// (e.g. scalarization).
std::optional<InstructionCost> getMultipleResultIntrinsicVectorLibCallCost(
const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind,
- RTLIB::Libcall LC,
std::optional<unsigned> CallRetElementIndex = {}) const {
Type *RetTy = ICA.getReturnType();
// Vector variants of the intrinsic can be mapped to a vector library call.
@@ -311,11 +310,43 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
!isVectorizedStructTy(cast<StructType>(RetTy)))
return std::nullopt;
+ Type *Ty = getContainedTypes(RetTy).front();
+ EVT VT = getTLI()->getValueType(DL, Ty);
+
+ EVT ScalarVT = VT.getScalarType();
+ RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
+
+ bool UsesMemoryOutArgument = true;
+
+ switch (ICA.getID()) {
+ case Intrinsic::modf:
+ LC = RTLIB::getMODF(ScalarVT);
+ break;
+ case Intrinsic::sincospi:
+ LC = RTLIB::getSINCOSPI(ScalarVT);
+ break;
+ case Intrinsic::sincos:
+ LC = RTLIB::getSINCOS_STRET(ScalarVT);
+ UsesMemoryOutArgument = false;
+
+ if (getTLI()->getLibcallImpl(LC) == RTLIB::Unsupported) {
+ LC = RTLIB::getSINCOS(ScalarVT);
+ UsesMemoryOutArgument = true;
+ }
+
+ break;
+ default:
+ return std::nullopt;
+ }
+
// Find associated libcall.
- const char *LCName = getTLI()->getLibcallName(LC);
- if (!LCName)
+ RTLIB::LibcallImpl LibcallImpl = getTLI()->getLibcallImpl(LC);
+ if (LibcallImpl == RTLIB::Unsupported)
return std::nullopt;
+ StringRef LCName =
+ RTLIB::RuntimeLibcallsInfo::getLibcallImplName(LibcallImpl);
+
// Search for a corresponding vector variant.
LLVMContext &Ctx = RetTy->getContext();
ElementCount VF = getVectorizedTypeVF(RetTy);
@@ -336,6 +367,11 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
VecTy, {}, CostKind, 0, nullptr, {});
}
+ // Technically this depends on the ABI, but assume sincos_stret passes in
+ // registers.
+ if (!UsesMemoryOutArgument)
+ return Cost;
+
// Lowering to a library call (with output pointers) may require us to emit
// reloads for the results.
for (auto [Idx, VectorTy] : enumerate(getContainedTypes(RetTy))) {
@@ -2137,22 +2173,6 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
case Intrinsic::modf:
case Intrinsic::sincos:
case Intrinsic::sincospi: {
- Type *Ty = getContainedTypes(RetTy).front();
- EVT VT = getTLI()->getValueType(DL, Ty);
-
- RTLIB::Libcall LC = [&] {
- switch (ICA.getID()) {
- case Intrinsic::modf:
- return RTLIB::getMODF;
- case Intrinsic::sincos:
- return RTLIB::getSINCOS;
- case Intrinsic::sincospi:
- return RTLIB::getSINCOSPI;
- default:
- llvm_unreachable("unexpected intrinsic");
- }
- }()(VT.getScalarType());
-
std::optional<unsigned> CallRetElementIndex;
// The first element of the modf result is returned by value in the
// libcall.
@@ -2160,7 +2180,7 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
CallRetElementIndex = 0;
if (auto Cost = getMultipleResultIntrinsicVectorLibCallCost(
- ICA, CostKind, LC, CallRetElementIndex))
+ ICA, CostKind, CallRetElementIndex))
return *Cost;
// Otherwise, fallback to default scalarization cost.
break;
diff --git a/llvm/test/Analysis/CostModel/AArch64/sincos.ll b/llvm/test/Analysis/CostModel/AArch64/sincos.ll
index 32408acb582d0..72c8f2bbbf8cf 100644
--- a/llvm/test/Analysis/CostModel/AArch64/sincos.ll
+++ b/llvm/test/Analysis/CostModel/AArch64/sincos.ll
@@ -1,6 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "sincos"
; RUN: opt < %s -mtriple=aarch64-gnu-linux -mattr=+neon,+sve -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output | FileCheck %s
; RUN: opt < %s -mtriple=aarch64-gnu-linux -mattr=+neon,+sve -vector-library=ArmPL -passes="print<cost-model>" -intrinsic-cost-strategy=intrinsic-cost -cost-kind=throughput 2>&1 -disable-output | FileCheck %s -check-prefix=CHECK-VECLIB
+; RUN: opt < %s -mtriple=arm64-apple-macos10.9 -mattr=+neon -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output | FileCheck -check-prefix=SINCOS_STRET %s
define void @sincos() {
; CHECK-LABEL: 'sincos'
@@ -8,13 +9,11 @@ define void @sincos() {
; CHECK: Cost Model: Found an estimated cost of 10 for instruction: %f32 = call { float, float } @llvm.sincos.f32(float poison)
; CHECK: Cost Model: Found an estimated cost of 10 for instruction: %f64 = call { double, double } @llvm.sincos.f64(double poison)
; CHECK: Cost Model: Found an estimated cost of 10 for instruction: %f128 = call { fp128, fp128 } @llvm.sincos.f128(fp128 poison)
-;
; CHECK: Cost Model: Found an estimated cost of 36 for instruction: %v8f16 = call { <8 x half>, <8 x half> } @llvm.sincos.v8f16(<8 x half> poison)
; CHECK: Cost Model: Found an estimated cost of 52 for instruction: %v4f32 = call { <4 x float>, <4 x float> } @llvm.sincos.v4f32(<4 x float> poison)
; CHECK: Cost Model: Found an estimated cost of 24 for instruction: %v2f64 = call { <2 x double>, <2 x double> } @llvm.sincos.v2f64(<2 x double> poison)
; CHECK: Cost Model: Found an estimated cost of 10 for instruction: %v1f128 = call { <1 x fp128>, <1 x fp128> } @llvm.sincos.v1f128(<1 x fp128> poison)
; CHECK: Cost Model: Found an estimated cost of 104 for instruction: %v8f32 = call { <8 x float>, <8 x float> } @llvm.sincos.v8f32(<8 x float> poison)
-;
; CHECK: Cost Model: Invalid cost for instruction: %nxv8f16 = call { <vscale x 8 x half>, <vscale x 8 x half> } @llvm.sincos.nxv8f16(<vscale x 8 x half> poison)
; CHECK: Cost Model: Invalid cost for instruction: %nxv4f32 = call { <vscale x 4 x float>, <vscale x 4 x float> } @llvm.sincos.nxv4f32(<vscale x 4 x float> poison)
; CHECK: Cost Model: Invalid cost for instruction: %nxv2f64 = call { <vscale x 2 x double>, <vscale x 2 x double> } @llvm.sincos.nxv2f64(<vscale x 2 x double> poison)
@@ -26,18 +25,32 @@ define void @sincos() {
; CHECK-VECLIB: Cost Model: Found an estimated cost of 10 for instruction: %f32 = call { float, float } @llvm.sincos.f32(float poison)
; CHECK-VECLIB: Cost Model: Found an estimated cost of 10 for instruction: %f64 = call { double, double } @llvm.sincos.f64(double poison)
; CHECK-VECLIB: Cost Model: Found an estimated cost of 10 for instruction: %f128 = call { fp128, fp128 } @llvm.sincos.f128(fp128 poison)
-;
; CHECK-VECLIB: Cost Model: Found an estimated cost of 36 for instruction: %v8f16 = call { <8 x half>, <8 x half> } @llvm.sincos.v8f16(<8 x half> poison)
; CHECK-VECLIB: Cost Model: Found an estimated cost of 12 for instruction: %v4f32 = call { <4 x float>, <4 x float> } @llvm.sincos.v4f32(<4 x float> poison)
; CHECK-VECLIB: Cost Model: Found an estimated cost of 12 for instruction: %v2f64 = call { <2 x double>, <2 x double> } @llvm.sincos.v2f64(<2 x double> poison)
; CHECK-VECLIB: Cost Model: Found an estimated cost of 10 for instruction: %v1f128 = call { <1 x fp128>, <1 x fp128> } @llvm.sincos.v1f128(<1 x fp128> poison)
; CHECK-VECLIB: Cost Model: Found an estimated cost of 104 for instruction: %v8f32 = call { <8 x float>, <8 x float> } @llvm.sincos.v8f32(<8 x float> poison)
-;
; CHECK-VECLIB: Cost Model: Invalid cost for instruction: %nxv8f16 = call { <vscale x 8 x half>, <vscale x 8 x half> } @llvm.sincos.nxv8f16(<vscale x 8 x half> poison)
; CHECK-VECLIB: Cost Model: Found an estimated cost of 13 for instruction: %nxv4f32 = call { <vscale x 4 x float>, <vscale x 4 x float> } @llvm.sincos.nxv4f32(<vscale x 4 x float> poison)
; CHECK-VECLIB: Cost Model: Found an estimated cost of 13 for instruction: %nxv2f64 = call { <vscale x 2 x double>, <vscale x 2 x double> } @llvm.sincos.nxv2f64(<vscale x 2 x double> poison)
; CHECK-VECLIB: Cost Model: Invalid cost for instruction: %nxv1f128 = call { <vscale x 1 x fp128>, <vscale x 1 x fp128> } @llvm.sincos.nxv1f128(<vscale x 1 x fp128> poison)
; CHECK-VECLIB: Cost Model: Invalid cost for instruction: %nxv8f32 = call { <vscale x 8 x float>, <vscale x 8 x float> } @llvm.sincos.nxv8f32(<vscale x 8 x float> poison)
+;
+; SINCOS_STRET-LABEL: 'sincos'
+; SINCOS_STRET: Cost Model: Found an estimated cost of 1 for instruction: %f16 = call { half, half } @llvm.sincos.f16(half poison)
+; SINCOS_STRET: Cost Model: Found an estimated cost of 2 for instruction: %f32 = call { float, float } @llvm.sincos.f32(float poison)
+; SINCOS_STRET: Cost Model: Found an estimated cost of 2 for instruction: %f64 = call { double, double } @llvm.sincos.f64(double poison)
+; SINCOS_STRET: Cost Model: Found an estimated cost of 10 for instruction: %f128 = call { fp128, fp128 } @llvm.sincos.f128(fp128 poison)
+; SINCOS_STRET: Cost Model: Found an estimated cost of 36 for instruction: %v8f16 = call { <8 x half>, <8 x half> } @llvm.sincos.v8f16(<8 x half> poison)
+; SINCOS_STRET: Cost Model: Found an estimated cost of 20 for instruction: %v4f32 = call { <4 x float>, <4 x float> } @llvm.sincos.v4f32(<4 x float> poison)
+; SINCOS_STRET: Cost Model: Found an estimated cost of 8 for instruction: %v2f64 = call { <2 x double>, <2 x double> } @llvm.sincos.v2f64(<2 x double> poison)
+; SINCOS_STRET: Cost Model: Found an estimated cost of 10 for instruction: %v1f128 = call { <1 x fp128>, <1 x fp128> } @llvm.sincos.v1f128(<1 x fp128> poison)
+; SINCOS_STRET: Cost Model: Found an estimated cost of 40 for instruction: %v8f32 = call { <8 x float>, <8 x float> } @llvm.sincos.v8f32(<8 x float> poison)
+; SINCOS_STRET: Cost Model: Invalid cost for instruction: %nxv8f16 = call { <vscale x 8 x half>, <vscale x 8 x half> } @llvm.sincos.nxv8f16(<vscale x 8 x half> poison)
+; SINCOS_STRET: Cost Model: Invalid cost for instruction: %nxv4f32 = call { <vscale x 4 x float>, <vscale x 4 x float> } @llvm.sincos.nxv4f32(<vscale x 4 x float> poison)
+; SINCOS_STRET: Cost Model: Invalid cost for instruction: %nxv2f64 = call { <vscale x 2 x double>, <vscale x 2 x double> } @llvm.sincos.nxv2f64(<vscale x 2 x double> poison)
+; SINCOS_STRET: Cost Model: Invalid cost for instruction: %nxv1f128 = call { <vscale x 1 x fp128>, <vscale x 1 x fp128> } @llvm.sincos.nxv1f128(<vscale x 1 x fp128> poison)
+; SINCOS_STRET: Cost Model: Invalid cost for instruction: %nxv8f32 = call { <vscale x 8 x float>, <vscale x 8 x float> } @llvm.sincos.nxv8f32(<vscale x 8 x float> poison)
;
%f16 = call { half, half } @llvm.sincos.f16(half poison)
%f32 = call { float, float } @llvm.sincos.f32(float poison)
More information about the llvm-commits
mailing list