[llvm-branch-commits] [llvm] AArch64: Stop changing legality rules based on sincos_stret availability (PR #165817)
Matt Arsenault via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Thu Oct 30 19:05:32 PDT 2025
https://github.com/arsenm created https://github.com/llvm/llvm-project/pull/165817
This should be treated like a program property and not a static property
of the subtarget. The regression is the 3 element vector case; a combine
happens to replace the original undef value with non-undef, so the 4th
component is never eliminated. Trying to avoid that particular case
hits other combine regressions, so leave that for later.
>From b9d0a05c767424df2698618f131bba9a6357f95f Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault at amd.com>
Date: Mon, 27 Oct 2025 14:34:43 -0700
Subject: [PATCH] AArch64: Stop changing legality rules based on sincos_stret
availability
This should be treated like a program property and not a static property
of the subtarget. The regression is the 3 element vector case; a combine
happens to replace the original undef value with non-undef, so the 4th
component is never eliminated. Trying to avoid that particular case
hits other combine regressions, so leave that for later.
---
.../Target/AArch64/AArch64ISelLowering.cpp | 25 +++--
llvm/test/CodeGen/AArch64/llvm.sincos.ll | 97 ++++++++++++-------
2 files changed, 73 insertions(+), 49 deletions(-)
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 60aa61e993b26..6324d9d18e31b 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -1052,15 +1052,9 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
// Lower READCYCLECOUNTER using an mrs from CNTVCT_EL0.
setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, Legal);
- if (getLibcallName(RTLIB::SINCOS_STRET_F32) != nullptr &&
- getLibcallName(RTLIB::SINCOS_STRET_F64) != nullptr) {
// Issue __sincos_stret if available.
- setOperationAction(ISD::FSINCOS, MVT::f64, Custom);
- setOperationAction(ISD::FSINCOS, MVT::f32, Custom);
- } else {
- setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
- setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
- }
+ setOperationAction(ISD::FSINCOS, MVT::f64, Custom);
+ setOperationAction(ISD::FSINCOS, MVT::f32, Custom);
// Make floating-point constants legal for the large code model, so they don't
// become loads from the constant pool.
@@ -5353,20 +5347,23 @@ SDValue AArch64TargetLowering::LowerFSINCOS(SDValue Op,
SDLoc DL(Op);
SDValue Arg = Op.getOperand(0);
EVT ArgVT = Arg.getValueType();
+ RTLIB::Libcall LC = RTLIB::getSINCOS_STRET(ArgVT);
+ RTLIB::LibcallImpl SincosStret = getLibcallImpl(LC);
+ if (SincosStret == RTLIB::Unsupported)
+ return SDValue();
+
Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
ArgListTy Args;
Args.emplace_back(Arg, ArgTy);
- RTLIB::Libcall LC = ArgVT == MVT::f64 ? RTLIB::SINCOS_STRET_F64
- : RTLIB::SINCOS_STRET_F32;
- const char *LibcallName = getLibcallName(LC);
- SDValue Callee =
- DAG.getExternalSymbol(LibcallName, getPointerTy(DAG.getDataLayout()));
+ StringRef LibcallImplName = getLibcallImplName(SincosStret);
+ SDValue Callee = DAG.getExternalSymbol(LibcallImplName.data(),
+ getPointerTy(DAG.getDataLayout()));
StructType *RetTy = StructType::get(ArgTy, ArgTy);
TargetLowering::CallLoweringInfo CLI(DAG);
- CallingConv::ID CC = getLibcallCallingConv(LC);
+ CallingConv::ID CC = getLibcallImplCallingConv(SincosStret);
CLI.setDebugLoc(DL)
.setChain(DAG.getEntryNode())
.setLibCallee(CC, RetTy, Callee, std::move(Args));
diff --git a/llvm/test/CodeGen/AArch64/llvm.sincos.ll b/llvm/test/CodeGen/AArch64/llvm.sincos.ll
index 21da8645b9b16..fa3ada6212894 100644
--- a/llvm/test/CodeGen/AArch64/llvm.sincos.ll
+++ b/llvm/test/CodeGen/AArch64/llvm.sincos.ll
@@ -374,84 +374,111 @@ define { float, float } @test_sincos_f32(float %a) nounwind {
define { <3 x float>, <3 x float> } @test_sincos_v3f32(<3 x float> %a) nounwind {
; CHECK-LABEL: test_sincos_v3f32:
; CHECK: // %bb.0:
-; CHECK-NEXT: sub sp, sp, #80
-; CHECK-NEXT: add x0, sp, #20
-; CHECK-NEXT: add x1, sp, #16
-; CHECK-NEXT: str x30, [sp, #32] // 8-byte Folded Spill
-; CHECK-NEXT: stp x22, x21, [sp, #48] // 16-byte Folded Spill
-; CHECK-NEXT: stp x20, x19, [sp, #64] // 16-byte Folded Spill
+; CHECK-NEXT: sub sp, sp, #112
+; CHECK-NEXT: add x0, sp, #60
+; CHECK-NEXT: add x1, sp, #56
+; CHECK-NEXT: str x30, [sp, #48] // 8-byte Folded Spill
+; CHECK-NEXT: stp x24, x23, [sp, #64] // 16-byte Folded Spill
+; CHECK-NEXT: stp x22, x21, [sp, #80] // 16-byte Folded Spill
+; CHECK-NEXT: stp x20, x19, [sp, #96] // 16-byte Folded Spill
; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill
; CHECK-NEXT: // kill: def $s0 killed $s0 killed $q0
; CHECK-NEXT: bl sincosf
; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload
-; CHECK-NEXT: add x0, sp, #28
-; CHECK-NEXT: add x1, sp, #24
-; CHECK-NEXT: add x19, sp, #28
-; CHECK-NEXT: add x20, sp, #24
+; CHECK-NEXT: add x0, sp, #44
+; CHECK-NEXT: add x1, sp, #40
+; CHECK-NEXT: add x19, sp, #44
+; CHECK-NEXT: add x20, sp, #40
; CHECK-NEXT: mov s0, v0.s[1]
; CHECK-NEXT: bl sincosf
; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload
-; CHECK-NEXT: add x0, sp, #44
-; CHECK-NEXT: add x1, sp, #40
-; CHECK-NEXT: add x21, sp, #44
-; CHECK-NEXT: add x22, sp, #40
+; CHECK-NEXT: add x0, sp, #36
+; CHECK-NEXT: add x1, sp, #32
+; CHECK-NEXT: add x21, sp, #36
+; CHECK-NEXT: add x22, sp, #32
; CHECK-NEXT: mov s0, v0.s[2]
; CHECK-NEXT: bl sincosf
-; CHECK-NEXT: ldp s1, s0, [sp, #16]
-; CHECK-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload
+; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-NEXT: add x0, sp, #28
+; CHECK-NEXT: add x1, sp, #24
+; CHECK-NEXT: add x23, sp, #28
+; CHECK-NEXT: add x24, sp, #24
+; CHECK-NEXT: mov s0, v0.s[3]
+; CHECK-NEXT: bl sincosf
+; CHECK-NEXT: ldp s1, s0, [sp, #56]
+; CHECK-NEXT: ldr x30, [sp, #48] // 8-byte Folded Reload
; CHECK-NEXT: ld1 { v0.s }[1], [x19]
; CHECK-NEXT: ld1 { v1.s }[1], [x20]
-; CHECK-NEXT: ldp x20, x19, [sp, #64] // 16-byte Folded Reload
+; CHECK-NEXT: ldp x20, x19, [sp, #96] // 16-byte Folded Reload
; CHECK-NEXT: ld1 { v0.s }[2], [x21]
; CHECK-NEXT: ld1 { v1.s }[2], [x22]
-; CHECK-NEXT: ldp x22, x21, [sp, #48] // 16-byte Folded Reload
-; CHECK-NEXT: add sp, sp, #80
+; CHECK-NEXT: ldp x22, x21, [sp, #80] // 16-byte Folded Reload
+; CHECK-NEXT: ld1 { v0.s }[3], [x23]
+; CHECK-NEXT: ld1 { v1.s }[3], [x24]
+; CHECK-NEXT: ldp x24, x23, [sp, #64] // 16-byte Folded Reload
+; CHECK-NEXT: add sp, sp, #112
; CHECK-NEXT: ret
;
; NO-LIBCALL-LABEL: test_sincos_v3f32:
; NO-LIBCALL: // %bb.0:
; NO-LIBCALL-NEXT: sub sp, sp, #80
-; NO-LIBCALL-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill
+; NO-LIBCALL-NEXT: stp d9, d8, [sp, #56] // 16-byte Folded Spill
; NO-LIBCALL-NEXT: mov s8, v0.s[1]
-; NO-LIBCALL-NEXT: str q0, [sp, #32] // 16-byte Folded Spill
-; NO-LIBCALL-NEXT: str x30, [sp, #64] // 8-byte Folded Spill
+; NO-LIBCALL-NEXT: str q0, [sp, #16] // 16-byte Folded Spill
+; NO-LIBCALL-NEXT: str d10, [sp, #48] // 8-byte Folded Spill
+; NO-LIBCALL-NEXT: str x30, [sp, #72] // 8-byte Folded Spill
; NO-LIBCALL-NEXT: fmov s0, s8
; NO-LIBCALL-NEXT: bl sinf
; NO-LIBCALL-NEXT: // kill: def $s0 killed $s0 def $q0
-; NO-LIBCALL-NEXT: str q0, [sp, #16] // 16-byte Folded Spill
-; NO-LIBCALL-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload
+; NO-LIBCALL-NEXT: str q0, [sp, #32] // 16-byte Folded Spill
+; NO-LIBCALL-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload
; NO-LIBCALL-NEXT: // kill: def $s0 killed $s0 killed $q0
; NO-LIBCALL-NEXT: bl sinf
-; NO-LIBCALL-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload
+; NO-LIBCALL-NEXT: ldr q1, [sp, #32] // 16-byte Folded Reload
; NO-LIBCALL-NEXT: // kill: def $s0 killed $s0 def $q0
; NO-LIBCALL-NEXT: mov v0.s[1], v1.s[0]
-; NO-LIBCALL-NEXT: str q0, [sp, #16] // 16-byte Folded Spill
-; NO-LIBCALL-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload
+; NO-LIBCALL-NEXT: str q0, [sp, #32] // 16-byte Folded Spill
+; NO-LIBCALL-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload
; NO-LIBCALL-NEXT: mov s9, v0.s[2]
; NO-LIBCALL-NEXT: fmov s0, s9
; NO-LIBCALL-NEXT: bl sinf
-; NO-LIBCALL-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload
+; NO-LIBCALL-NEXT: ldr q1, [sp, #32] // 16-byte Folded Reload
; NO-LIBCALL-NEXT: // kill: def $s0 killed $s0 def $q0
; NO-LIBCALL-NEXT: mov v1.s[2], v0.s[0]
+; NO-LIBCALL-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload
+; NO-LIBCALL-NEXT: mov s10, v0.s[3]
+; NO-LIBCALL-NEXT: str q1, [sp, #32] // 16-byte Folded Spill
+; NO-LIBCALL-NEXT: fmov s0, s10
+; NO-LIBCALL-NEXT: bl sinf
+; NO-LIBCALL-NEXT: ldr q1, [sp, #32] // 16-byte Folded Reload
+; NO-LIBCALL-NEXT: // kill: def $s0 killed $s0 def $q0
+; NO-LIBCALL-NEXT: mov v1.s[3], v0.s[0]
; NO-LIBCALL-NEXT: fmov s0, s8
-; NO-LIBCALL-NEXT: str q1, [sp, #16] // 16-byte Folded Spill
+; NO-LIBCALL-NEXT: str q1, [sp, #32] // 16-byte Folded Spill
; NO-LIBCALL-NEXT: bl cosf
; NO-LIBCALL-NEXT: // kill: def $s0 killed $s0 def $q0
; NO-LIBCALL-NEXT: str q0, [sp] // 16-byte Folded Spill
-; NO-LIBCALL-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload
+; NO-LIBCALL-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload
; NO-LIBCALL-NEXT: // kill: def $s0 killed $s0 killed $q0
; NO-LIBCALL-NEXT: bl cosf
; NO-LIBCALL-NEXT: ldr q1, [sp] // 16-byte Folded Reload
; NO-LIBCALL-NEXT: // kill: def $s0 killed $s0 def $q0
; NO-LIBCALL-NEXT: mov v0.s[1], v1.s[0]
-; NO-LIBCALL-NEXT: str q0, [sp, #32] // 16-byte Folded Spill
+; NO-LIBCALL-NEXT: str q0, [sp, #16] // 16-byte Folded Spill
; NO-LIBCALL-NEXT: fmov s0, s9
; NO-LIBCALL-NEXT: bl cosf
+; NO-LIBCALL-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload
+; NO-LIBCALL-NEXT: // kill: def $s0 killed $s0 def $q0
+; NO-LIBCALL-NEXT: mov v1.s[2], v0.s[0]
+; NO-LIBCALL-NEXT: fmov s0, s10
+; NO-LIBCALL-NEXT: str q1, [sp, #16] // 16-byte Folded Spill
+; NO-LIBCALL-NEXT: bl cosf
; NO-LIBCALL-NEXT: fmov s2, s0
-; NO-LIBCALL-NEXT: ldp q0, q1, [sp, #16] // 32-byte Folded Reload
-; NO-LIBCALL-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload
-; NO-LIBCALL-NEXT: ldr x30, [sp, #64] // 8-byte Folded Reload
-; NO-LIBCALL-NEXT: mov v1.s[2], v2.s[0]
+; NO-LIBCALL-NEXT: ldp q1, q0, [sp, #16] // 32-byte Folded Reload
+; NO-LIBCALL-NEXT: ldp d9, d8, [sp, #56] // 16-byte Folded Reload
+; NO-LIBCALL-NEXT: ldr x30, [sp, #72] // 8-byte Folded Reload
+; NO-LIBCALL-NEXT: ldr d10, [sp, #48] // 8-byte Folded Reload
+; NO-LIBCALL-NEXT: mov v1.s[3], v2.s[0]
; NO-LIBCALL-NEXT: add sp, sp, #80
; NO-LIBCALL-NEXT: ret
%result = call { <3 x float>, <3 x float> } @llvm.sincos.v3f32(<3 x float> %a)
More information about the llvm-branch-commits
mailing list