[llvm] [SelectionDAG] Add `f16` soft promotion for `lrint` and `lround` (PR #152684)
via llvm-commits
llvm-commits at lists.llvm.org
Fri Aug 8 05:18:21 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-webassembly
Author: Trevor Gross (tgross35)
<details>
<summary>Changes</summary>
On platforms that soft promote `half`, using `lrint` intrinsics crashes with the following:
SoftPromoteHalfOperand Op #<!-- -->0: t5: i32 = lrint t4
LLVM ERROR: Do not know how to soft promote this operator's operand!
PLEASE submit a bug report to https://github.com/llvm/llvm-project/issues/ and include the crash backtrace.
Stack dump:
0. Program arguments: /Users/tmgross/Documents/projects/llvm/llvm-build/bin/llc -mtriple=riscv32
1. Running pass 'Function Pass Manager' on module '<stdin>'.
2. Running pass 'RISC-V DAG->DAG Pattern Instruction Selection' on function '@<!-- -->test_lrint_ixx_f16'
Resolve this by adding a soft promotion.
`SoftPromoteHalfOp_FP_TO_XINT` is reused here since it provides the correct input and output types. It is renamed `PromoteFloatOp_UnaryOp` to match `PromoteFloatOp_UnaryOp` and similar functions that are used to handle the same sets of intrinsics.
Fixes crash tests added in https://github.com/llvm/llvm-project/pull/152662 for targets that use `softPromoteHalfType`.
---
Patch is 1.53 MiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/152684.diff
29 Files Affected:
- (modified) llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp (+14-3)
- (modified) llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h (+1-1)
- (modified) llvm/test/CodeGen/AArch64/sve-fixed-vector-llrint.ll (+654)
- (modified) llvm/test/CodeGen/AArch64/sve-fixed-vector-lrint.ll (+1115)
- (modified) llvm/test/CodeGen/AArch64/vector-llrint.ll (+516)
- (modified) llvm/test/CodeGen/AArch64/vector-lrint.ll (+948)
- (modified) llvm/test/CodeGen/ARM/llrint-conv.ll (+21)
- (modified) llvm/test/CodeGen/ARM/lrint-conv.ll (+21)
- (added) llvm/test/CodeGen/ARM/vector-llrint.ll (+11126)
- (added) llvm/test/CodeGen/ARM/vector-lrint.ll (+18372)
- (modified) llvm/test/CodeGen/AVR/llrint.ll (+18)
- (modified) llvm/test/CodeGen/AVR/lrint.ll (+18)
- (added) llvm/test/CodeGen/LoongArch/lrint-conv.ll (+111)
- (added) llvm/test/CodeGen/MSP430/lrint-conv.ll (+60)
- (modified) llvm/test/CodeGen/Mips/llrint-conv.ll (+18)
- (modified) llvm/test/CodeGen/Mips/lrint-conv.ll (+18)
- (modified) llvm/test/CodeGen/PowerPC/llrint-conv.ll (+32)
- (modified) llvm/test/CodeGen/PowerPC/lrint-conv.ll (+32)
- (modified) llvm/test/CodeGen/PowerPC/vector-llrint.ll (+600)
- (modified) llvm/test/CodeGen/PowerPC/vector-lrint.ll (+2332)
- (added) llvm/test/CodeGen/RISCV/lrint-conv.ll (+87)
- (added) llvm/test/CodeGen/SPARC/lrint-conv.ll (+68)
- (added) llvm/test/CodeGen/WebAssembly/lrint-conv.ll (+62)
- (modified) llvm/test/CodeGen/X86/llrint-conv.ll (+97-31)
- (modified) llvm/test/CodeGen/X86/lrint-conv-i32.ll (+105-9)
- (modified) llvm/test/CodeGen/X86/lrint-conv-i64.ll (+30-4)
- (modified) llvm/test/CodeGen/X86/vector-llrint-f16.ll (+3)
- (modified) llvm/test/CodeGen/X86/vector-llrint.ll (+1278)
- (modified) llvm/test/CodeGen/X86/vector-lrint.ll (+1921)
``````````diff
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
index 2cad36eff9c88..f84e6c8291cce 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
@@ -20,6 +20,7 @@
#include "LegalizeTypes.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/CodeGen/ISDOpcodes.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
@@ -3729,10 +3730,20 @@ bool DAGTypeLegalizer::SoftPromoteHalfOperand(SDNode *N, unsigned OpNo) {
Res = SoftPromoteHalfOp_FAKE_USE(N, OpNo);
break;
case ISD::FCOPYSIGN: Res = SoftPromoteHalfOp_FCOPYSIGN(N, OpNo); break;
+ case ISD::FP_TO_SINT:
+ case ISD::FP_TO_UINT:
+ case ISD::LLRINT:
+ case ISD::LLROUND:
+ case ISD::LRINT:
+ case ISD::LROUND:
case ISD::STRICT_FP_TO_SINT:
case ISD::STRICT_FP_TO_UINT:
- case ISD::FP_TO_SINT:
- case ISD::FP_TO_UINT: Res = SoftPromoteHalfOp_FP_TO_XINT(N); break;
+ case ISD::STRICT_LLRINT:
+ case ISD::STRICT_LLROUND:
+ case ISD::STRICT_LRINT:
+ case ISD::STRICT_LROUND:
+ Res = SoftPromoteHalfOp_UnaryOp(N);
+ break;
case ISD::FP_TO_SINT_SAT:
case ISD::FP_TO_UINT_SAT:
Res = SoftPromoteHalfOp_FP_TO_XINT_SAT(N); break;
@@ -3811,7 +3822,7 @@ SDValue DAGTypeLegalizer::SoftPromoteHalfOp_FP_EXTEND(SDNode *N) {
return DAG.getNode(GetPromotionOpcode(SVT, RVT), SDLoc(N), RVT, Op);
}
-SDValue DAGTypeLegalizer::SoftPromoteHalfOp_FP_TO_XINT(SDNode *N) {
+SDValue DAGTypeLegalizer::SoftPromoteHalfOp_UnaryOp(SDNode *N) {
EVT RVT = N->getValueType(0);
bool IsStrict = N->isStrictFPOpcode();
SDValue Op = N->getOperand(IsStrict ? 1 : 0);
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
index 63544e63e1da1..8eb3cec8bc87a 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
@@ -840,7 +840,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer {
SDValue SoftPromoteHalfOp_FAKE_USE(SDNode *N, unsigned OpNo);
SDValue SoftPromoteHalfOp_FCOPYSIGN(SDNode *N, unsigned OpNo);
SDValue SoftPromoteHalfOp_FP_EXTEND(SDNode *N);
- SDValue SoftPromoteHalfOp_FP_TO_XINT(SDNode *N);
+ SDValue SoftPromoteHalfOp_UnaryOp(SDNode *N);
SDValue SoftPromoteHalfOp_FP_TO_XINT_SAT(SDNode *N);
SDValue SoftPromoteHalfOp_SETCC(SDNode *N);
SDValue SoftPromoteHalfOp_SELECT_CC(SDNode *N, unsigned OpNo);
diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-vector-llrint.ll b/llvm/test/CodeGen/AArch64/sve-fixed-vector-llrint.ll
index 7f144df499be0..838aac0edcb73 100644
--- a/llvm/test/CodeGen/AArch64/sve-fixed-vector-llrint.ll
+++ b/llvm/test/CodeGen/AArch64/sve-fixed-vector-llrint.ll
@@ -861,3 +861,657 @@ define <32 x i64> @llrint_v32f64(<32 x double> %x) {
ret <32 x i64> %a
}
declare <32 x i64> @llvm.llrint.v32i64.v32f64(<32 x double>)
+
+define <1 x i64> @llrint_v1i64_v1fp128(<1 x fp128> %x) {
+; CHECK-LABEL: llrint_v1i64_v1fp128:
+; CHECK: // %bb.0:
+; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: .cfi_offset w30, -16
+; CHECK-NEXT: bl llrintl
+; CHECK-NEXT: fmov d0, x0
+; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT: ret
+ %a = call <1 x i64> @llvm.llrint.v1i64.v1fp128(<1 x fp128> %x)
+ ret <1 x i64> %a
+}
+declare <1 x i64> @llvm.llrint.v1i64.v1fp128(<1 x fp128>)
+
+define <2 x i64> @llrint_v2i64_v2fp128(<2 x fp128> %x) {
+; CHECK-LABEL: llrint_v2i64_v2fp128:
+; CHECK: // %bb.0:
+; CHECK-NEXT: sub sp, sp, #48
+; CHECK-NEXT: str x30, [sp, #32] // 8-byte Folded Spill
+; CHECK-NEXT: .cfi_def_cfa_offset 48
+; CHECK-NEXT: .cfi_offset w30, -16
+; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill
+; CHECK-NEXT: mov v0.16b, v1.16b
+; CHECK-NEXT: bl llrintl
+; CHECK-NEXT: fmov d0, x0
+; CHECK-NEXT: str q0, [sp, #16] // 16-byte Folded Spill
+; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-NEXT: bl llrintl
+; CHECK-NEXT: fmov d0, x0
+; CHECK-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload
+; CHECK-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload
+; CHECK-NEXT: mov v0.d[1], v1.d[0]
+; CHECK-NEXT: add sp, sp, #48
+; CHECK-NEXT: ret
+ %a = call <2 x i64> @llvm.llrint.v2i64.v2fp128(<2 x fp128> %x)
+ ret <2 x i64> %a
+}
+declare <2 x i64> @llvm.llrint.v2i64.v2fp128(<2 x fp128>)
+
+define <4 x i64> @llrint_v4i64_v4fp128(<4 x fp128> %x) {
+; CHECK-LABEL: llrint_v4i64_v4fp128:
+; CHECK: // %bb.0:
+; CHECK-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill
+; CHECK-NEXT: sub sp, sp, #64
+; CHECK-NEXT: addvl sp, sp, #-1
+; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x8f, 0x00, 0x11, 0xd0, 0x00, 0x22, 0x11, 0x08, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 80 + 8 * VG
+; CHECK-NEXT: .cfi_offset w30, -8
+; CHECK-NEXT: .cfi_offset w29, -16
+; CHECK-NEXT: str q0, [sp, #48] // 16-byte Folded Spill
+; CHECK-NEXT: mov v0.16b, v3.16b
+; CHECK-NEXT: stp q2, q1, [sp, #16] // 32-byte Folded Spill
+; CHECK-NEXT: bl llrintl
+; CHECK-NEXT: fmov d0, x0
+; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill
+; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload
+; CHECK-NEXT: bl llrintl
+; CHECK-NEXT: fmov d0, x0
+; CHECK-NEXT: ldr q1, [sp] // 16-byte Folded Reload
+; CHECK-NEXT: add x8, sp, #64
+; CHECK-NEXT: mov v0.d[1], v1.d[0]
+; CHECK-NEXT: str z0, [x8] // 16-byte Folded Spill
+; CHECK-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload
+; CHECK-NEXT: bl llrintl
+; CHECK-NEXT: fmov d0, x0
+; CHECK-NEXT: str q0, [sp, #32] // 16-byte Folded Spill
+; CHECK-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload
+; CHECK-NEXT: bl llrintl
+; CHECK-NEXT: fmov d0, x0
+; CHECK-NEXT: ldr q1, [sp, #32] // 16-byte Folded Reload
+; CHECK-NEXT: add x8, sp, #64
+; CHECK-NEXT: ptrue p0.d, vl2
+; CHECK-NEXT: mov v0.d[1], v1.d[0]
+; CHECK-NEXT: ldr z1, [x8] // 16-byte Folded Reload
+; CHECK-NEXT: splice z0.d, p0, z0.d, z1.d
+; CHECK-NEXT: mov z1.d, z0.d
+; CHECK-NEXT: ext z1.b, z1.b, z0.b, #16
+; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
+; CHECK-NEXT: // kill: def $q1 killed $q1 killed $z1
+; CHECK-NEXT: addvl sp, sp, #1
+; CHECK-NEXT: add sp, sp, #64
+; CHECK-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload
+; CHECK-NEXT: ret
+ %a = call <4 x i64> @llvm.llrint.v4i64.v4fp128(<4 x fp128> %x)
+ ret <4 x i64> %a
+}
+declare <4 x i64> @llvm.llrint.v4i64.v4fp128(<4 x fp128>)
+
+define <8 x i64> @llrint_v8i64_v8fp128(<8 x fp128> %x) {
+; CHECK-LABEL: llrint_v8i64_v8fp128:
+; CHECK: // %bb.0:
+; CHECK-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill
+; CHECK-NEXT: sub sp, sp, #128
+; CHECK-NEXT: addvl sp, sp, #-2
+; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x8f, 0x00, 0x11, 0x90, 0x01, 0x22, 0x11, 0x10, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 144 + 16 * VG
+; CHECK-NEXT: .cfi_offset w30, -8
+; CHECK-NEXT: .cfi_offset w29, -16
+; CHECK-NEXT: str q0, [sp, #112] // 16-byte Folded Spill
+; CHECK-NEXT: mov v0.16b, v7.16b
+; CHECK-NEXT: stp q6, q5, [sp, #16] // 32-byte Folded Spill
+; CHECK-NEXT: stp q4, q3, [sp, #48] // 32-byte Folded Spill
+; CHECK-NEXT: stp q2, q1, [sp, #80] // 32-byte Folded Spill
+; CHECK-NEXT: bl llrintl
+; CHECK-NEXT: fmov d0, x0
+; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill
+; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload
+; CHECK-NEXT: bl llrintl
+; CHECK-NEXT: fmov d0, x0
+; CHECK-NEXT: ldr q1, [sp] // 16-byte Folded Reload
+; CHECK-NEXT: add x8, sp, #128
+; CHECK-NEXT: mov v0.d[1], v1.d[0]
+; CHECK-NEXT: str z0, [x8, #1, mul vl] // 16-byte Folded Spill
+; CHECK-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload
+; CHECK-NEXT: bl llrintl
+; CHECK-NEXT: fmov d0, x0
+; CHECK-NEXT: str q0, [sp, #32] // 16-byte Folded Spill
+; CHECK-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload
+; CHECK-NEXT: bl llrintl
+; CHECK-NEXT: fmov d0, x0
+; CHECK-NEXT: ldr q1, [sp, #32] // 16-byte Folded Reload
+; CHECK-NEXT: add x8, sp, #128
+; CHECK-NEXT: ptrue p0.d, vl2
+; CHECK-NEXT: mov v0.d[1], v1.d[0]
+; CHECK-NEXT: ldr z1, [x8, #1, mul vl] // 16-byte Folded Reload
+; CHECK-NEXT: splice z0.d, p0, z0.d, z1.d
+; CHECK-NEXT: str z0, [x8, #1, mul vl] // 16-byte Folded Spill
+; CHECK-NEXT: ldr q0, [sp, #64] // 16-byte Folded Reload
+; CHECK-NEXT: bl llrintl
+; CHECK-NEXT: fmov d0, x0
+; CHECK-NEXT: str q0, [sp, #64] // 16-byte Folded Spill
+; CHECK-NEXT: ldr q0, [sp, #80] // 16-byte Folded Reload
+; CHECK-NEXT: bl llrintl
+; CHECK-NEXT: fmov d0, x0
+; CHECK-NEXT: ldr q1, [sp, #64] // 16-byte Folded Reload
+; CHECK-NEXT: add x8, sp, #128
+; CHECK-NEXT: mov v0.d[1], v1.d[0]
+; CHECK-NEXT: str z0, [x8] // 16-byte Folded Spill
+; CHECK-NEXT: ldr q0, [sp, #96] // 16-byte Folded Reload
+; CHECK-NEXT: bl llrintl
+; CHECK-NEXT: fmov d0, x0
+; CHECK-NEXT: str q0, [sp, #96] // 16-byte Folded Spill
+; CHECK-NEXT: ldr q0, [sp, #112] // 16-byte Folded Reload
+; CHECK-NEXT: bl llrintl
+; CHECK-NEXT: fmov d0, x0
+; CHECK-NEXT: ldr q1, [sp, #96] // 16-byte Folded Reload
+; CHECK-NEXT: add x8, sp, #128
+; CHECK-NEXT: ptrue p0.d, vl2
+; CHECK-NEXT: ldr z2, [x8, #1, mul vl] // 16-byte Folded Reload
+; CHECK-NEXT: mov v0.d[1], v1.d[0]
+; CHECK-NEXT: ldr z1, [x8] // 16-byte Folded Reload
+; CHECK-NEXT: mov z3.d, z2.d
+; CHECK-NEXT: splice z0.d, p0, z0.d, z1.d
+; CHECK-NEXT: ext z3.b, z3.b, z2.b, #16
+; CHECK-NEXT: // kill: def $q2 killed $q2 killed $z2
+; CHECK-NEXT: // kill: def $q3 killed $q3 killed $z3
+; CHECK-NEXT: mov z1.d, z0.d
+; CHECK-NEXT: ext z1.b, z1.b, z0.b, #16
+; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
+; CHECK-NEXT: // kill: def $q1 killed $q1 killed $z1
+; CHECK-NEXT: addvl sp, sp, #2
+; CHECK-NEXT: add sp, sp, #128
+; CHECK-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload
+; CHECK-NEXT: ret
+ %a = call <8 x i64> @llvm.llrint.v8i64.v8fp128(<8 x fp128> %x)
+ ret <8 x i64> %a
+}
+declare <8 x i64> @llvm.llrint.v8i64.v8fp128(<8 x fp128>)
+
+define <16 x i64> @llrint_v16fp128(<16 x fp128> %x) {
+; CHECK-LABEL: llrint_v16fp128:
+; CHECK: // %bb.0:
+; CHECK-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill
+; CHECK-NEXT: sub sp, sp, #256
+; CHECK-NEXT: addvl sp, sp, #-4
+; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x8f, 0x00, 0x11, 0x90, 0x02, 0x22, 0x11, 0x20, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 272 + 32 * VG
+; CHECK-NEXT: .cfi_offset w30, -8
+; CHECK-NEXT: .cfi_offset w29, -16
+; CHECK-NEXT: addvl x8, sp, #4
+; CHECK-NEXT: str q1, [sp, #240] // 16-byte Folded Spill
+; CHECK-NEXT: ldr q1, [x8, #272]
+; CHECK-NEXT: addvl x8, sp, #4
+; CHECK-NEXT: str q0, [sp, #224] // 16-byte Folded Spill
+; CHECK-NEXT: stp q7, q6, [sp, #128] // 32-byte Folded Spill
+; CHECK-NEXT: str q1, [sp, #112] // 16-byte Folded Spill
+; CHECK-NEXT: ldr q1, [x8, #288]
+; CHECK-NEXT: addvl x8, sp, #4
+; CHECK-NEXT: stp q5, q4, [sp, #160] // 32-byte Folded Spill
+; CHECK-NEXT: str q1, [sp, #96] // 16-byte Folded Spill
+; CHECK-NEXT: ldr q1, [x8, #304]
+; CHECK-NEXT: addvl x8, sp, #4
+; CHECK-NEXT: stp q3, q2, [sp, #192] // 32-byte Folded Spill
+; CHECK-NEXT: str q1, [sp, #80] // 16-byte Folded Spill
+; CHECK-NEXT: ldr q1, [x8, #320]
+; CHECK-NEXT: addvl x8, sp, #4
+; CHECK-NEXT: str q1, [sp, #64] // 16-byte Folded Spill
+; CHECK-NEXT: ldr q1, [x8, #336]
+; CHECK-NEXT: addvl x8, sp, #4
+; CHECK-NEXT: str q1, [sp, #48] // 16-byte Folded Spill
+; CHECK-NEXT: ldr q1, [x8, #352]
+; CHECK-NEXT: addvl x8, sp, #4
+; CHECK-NEXT: str q1, [sp, #32] // 16-byte Folded Spill
+; CHECK-NEXT: ldr q1, [x8, #368]
+; CHECK-NEXT: addvl x8, sp, #4
+; CHECK-NEXT: str q1, [sp, #16] // 16-byte Folded Spill
+; CHECK-NEXT: ldr q1, [x8, #384]
+; CHECK-NEXT: mov v0.16b, v1.16b
+; CHECK-NEXT: bl llrintl
+; CHECK-NEXT: fmov d0, x0
+; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill
+; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload
+; CHECK-NEXT: bl llrintl
+; CHECK-NEXT: fmov d0, x0
+; CHECK-NEXT: ldr q1, [sp] // 16-byte Folded Reload
+; CHECK-NEXT: add x8, sp, #256
+; CHECK-NEXT: mov v0.d[1], v1.d[0]
+; CHECK-NEXT: str z0, [x8, #3, mul vl] // 16-byte Folded Spill
+; CHECK-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload
+; CHECK-NEXT: bl llrintl
+; CHECK-NEXT: fmov d0, x0
+; CHECK-NEXT: str q0, [sp, #32] // 16-byte Folded Spill
+; CHECK-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload
+; CHECK-NEXT: bl llrintl
+; CHECK-NEXT: fmov d0, x0
+; CHECK-NEXT: ldr q1, [sp, #32] // 16-byte Folded Reload
+; CHECK-NEXT: add x8, sp, #256
+; CHECK-NEXT: ptrue p0.d, vl2
+; CHECK-NEXT: mov v0.d[1], v1.d[0]
+; CHECK-NEXT: ldr z1, [x8, #3, mul vl] // 16-byte Folded Reload
+; CHECK-NEXT: splice z0.d, p0, z0.d, z1.d
+; CHECK-NEXT: str z0, [x8, #3, mul vl] // 16-byte Folded Spill
+; CHECK-NEXT: ldr q0, [sp, #64] // 16-byte Folded Reload
+; CHECK-NEXT: bl llrintl
+; CHECK-NEXT: fmov d0, x0
+; CHECK-NEXT: str q0, [sp, #64] // 16-byte Folded Spill
+; CHECK-NEXT: ldr q0, [sp, #80] // 16-byte Folded Reload
+; CHECK-NEXT: bl llrintl
+; CHECK-NEXT: fmov d0, x0
+; CHECK-NEXT: ldr q1, [sp, #64] // 16-byte Folded Reload
+; CHECK-NEXT: add x8, sp, #256
+; CHECK-NEXT: mov v0.d[1], v1.d[0]
+; CHECK-NEXT: str z0, [x8, #2, mul vl] // 16-byte Folded Spill
+; CHECK-NEXT: ldr q0, [sp, #96] // 16-byte Folded Reload
+; CHECK-NEXT: bl llrintl
+; CHECK-NEXT: fmov d0, x0
+; CHECK-NEXT: str q0, [sp, #96] // 16-byte Folded Spill
+; CHECK-NEXT: ldr q0, [sp, #112] // 16-byte Folded Reload
+; CHECK-NEXT: bl llrintl
+; CHECK-NEXT: fmov d0, x0
+; CHECK-NEXT: ldr q1, [sp, #96] // 16-byte Folded Reload
+; CHECK-NEXT: add x8, sp, #256
+; CHECK-NEXT: ptrue p0.d, vl2
+; CHECK-NEXT: mov v0.d[1], v1.d[0]
+; CHECK-NEXT: ldr z1, [x8, #2, mul vl] // 16-byte Folded Reload
+; CHECK-NEXT: splice z0.d, p0, z0.d, z1.d
+; CHECK-NEXT: str z0, [x8, #2, mul vl] // 16-byte Folded Spill
+; CHECK-NEXT: ldr q0, [sp, #128] // 16-byte Folded Reload
+; CHECK-NEXT: bl llrintl
+; CHECK-NEXT: fmov d0, x0
+; CHECK-NEXT: str q0, [sp, #128] // 16-byte Folded Spill
+; CHECK-NEXT: ldr q0, [sp, #144] // 16-byte Folded Reload
+; CHECK-NEXT: bl llrintl
+; CHECK-NEXT: fmov d0, x0
+; CHECK-NEXT: ldr q1, [sp, #128] // 16-byte Folded Reload
+; CHECK-NEXT: add x8, sp, #256
+; CHECK-NEXT: mov v0.d[1], v1.d[0]
+; CHECK-NEXT: str z0, [x8, #1, mul vl] // 16-byte Folded Spill
+; CHECK-NEXT: ldr q0, [sp, #160] // 16-byte Folded Reload
+; CHECK-NEXT: bl llrintl
+; CHECK-NEXT: fmov d0, x0
+; CHECK-NEXT: str q0, [sp, #160] // 16-byte Folded Spill
+; CHECK-NEXT: ldr q0, [sp, #176] // 16-byte Folded Reload
+; CHECK-NEXT: bl llrintl
+; CHECK-NEXT: fmov d0, x0
+; CHECK-NEXT: ldr q1, [sp, #160] // 16-byte Folded Reload
+; CHECK-NEXT: add x8, sp, #256
+; CHECK-NEXT: ptrue p0.d, vl2
+; CHECK-NEXT: mov v0.d[1], v1.d[0]
+; CHECK-NEXT: ldr z1, [x8, #1, mul vl] // 16-byte Folded Reload
+; CHECK-NEXT: splice z0.d, p0, z0.d, z1.d
+; CHECK-NEXT: str z0, [x8, #1, mul vl] // 16-byte Folded Spill
+; CHECK-NEXT: ldr q0, [sp, #192] // 16-byte Folded Reload
+; CHECK-NEXT: bl llrintl
+; CHECK-NEXT: fmov d0, x0
+; CHECK-NEXT: str q0, [sp, #192] // 16-byte Folded Spill
+; CHECK-NEXT: ldr q0, [sp, #208] // 16-byte Folded Reload
+; CHECK-NEXT: bl llrintl
+; CHECK-NEXT: fmov d0, x0
+; CHECK-NEXT: ldr q1, [sp, #192] // 16-byte Folded Reload
+; CHECK-NEXT: add x8, sp, #256
+; CHECK-NEXT: mov v0.d[1], v1.d[0]
+; CHECK-NEXT: str z0, [x8] // 16-byte Folded Spill
+; CHECK-NEXT: ldr q0, [sp, #240] // 16-byte Folded Reload
+; CHECK-NEXT: bl llrintl
+; CHECK-NEXT: fmov d0, x0
+; CHECK-NEXT: str q0, [sp, #240] // 16-byte Folded Spill
+; CHECK-NEXT: ldr q0, [sp, #224] // 16-byte Folded Reload
+; CHECK-NEXT: bl llrintl
+; CHECK-NEXT: fmov d0, x0
+; CHECK-NEXT: ldr q1, [sp, #240] // 16-byte Folded Reload
+; CHECK-NEXT: add x8, sp, #256
+; CHECK-NEXT: ptrue p0.d, vl2
+; CHECK-NEXT: ldr z2, [x8, #1, mul vl] // 16-byte Folded Reload
+; CHECK-NEXT: ldr z4, [x8, #2, mul vl] // 16-byte Folded Reload
+; CHECK-NEXT: ldr z6, [x8, #3, mul vl] // 16-byte Folded Reload
+; CHECK-NEXT: mov v0.d[1], v1.d[0]
+; CHECK-NEXT: ldr z1, [x8] // 16-byte Folded Reload
+; CHECK-NEXT: mov z3.d, z2.d
+; CHECK-NEXT: mov z5.d, z4.d
+; CHECK-NEXT: mov z7.d, z6.d
+; CHECK-NEXT: splice z0.d, p0, z0.d, z1.d
+; CHECK-NEXT: ext z3.b, z3.b, z2.b, #16
+; CHECK-NEXT: ext z5.b, z5.b, z4.b, #16
+; CHECK-NEXT: ext z7.b, z7.b, z6.b, #16
+; CHECK-NEXT: // kill: def $q2 killed $q2 killed $z2
+; CHECK-NEXT: // kill: def $q4 killed $q4 killed $z4
+; CHECK-NEXT: // kill: def $q3 killed $q3 killed $z3
+; CHECK-NEXT: // kill: def $q5 killed $q5 killed $z5
+; CHECK-NEXT: // kill: def $q6 killed $q6 killed $z6
+; CHECK-NEXT: // kill: def $q7 killed $q7 killed $z7
+; CHECK-NEXT: mov z1.d, z0.d
+; CHECK-NEXT: ext z1.b, z1.b, z0.b, #16
+; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
+; CHECK-NEXT: // kill: def $q1 killed $q1 killed $z1
+; CHECK-NEXT: addvl sp, sp, #4
+; CHECK-NEXT: add sp, sp, #256
+; CHECK-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload
+; CHECK-NEXT: ret
+ %a = call <16 x i64> @llvm.llrint.v16i64.v16fp128(<16 x fp128> %x)
+ ret <16 x i64> %a
+}
+declare <16 x i64> @llvm.llrint.v16i64.v16fp128(<16 x fp128>)
+
+define <32 x i64> @llrint_v32fp128(<32 x fp128> %x) {
+; CHECK-LABEL: llrint_v32fp128:
+; CHECK: // %bb.0:
+; CHECK-NEXT: str x29, [sp, #-32]! // 8-byte Folded Spill
+; CHECK-NEXT: stp x30, x19, [sp, #16] // 16-byte Folded Spill
+; CHECK-NEXT: sub sp, sp, #512
+; CHECK-NEXT: addvl sp, sp, #-8
+; CHECK-NEXT: .cfi_escape 0x0f, 0x0e, 0x8f, 0x00, 0x11, 0xa0, 0x04, 0x22, 0x11, 0xc0, 0x00, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 544 + 64 * VG
+; CHECK-NEXT: .cfi_offset w19, -8
+; CHECK-NEXT: .cfi_offset w30, -16
+; CHECK-NEXT: .cfi_offset w29, -32
+; CHECK-NEXT: addvl x9, sp, #8
+; CHECK-NEXT: stp q2, q1, [sp, #16] // 32-byte Folded Spill
+; CHECK-NEXT: mov x19, x8
+; CHECK-NEXT: stp q0, q7, [sp, #48] // 32-byte Folded Spill
+; CHECK-NEXT: ldr q0, [x9, #864]
+; CHECK-NEXT: addvl x9, sp, #8
+; CHECK-NEXT: str q6, [sp, #96] // 16-byte Folded Spill
+; CHECK-NEXT: str q0, [sp, #496] // 16-byte Folded Spill
+; CHECK-NEXT: ldr q0, [x9, #880]
+; CHECK-NEXT: addvl x9, sp, #8
+; CHECK-NEXT: stp q5, q4, [sp, #128] // 32-byte Folded Spill
+; CHECK-NEXT: str q0, [sp, #480] // 16-byte Folded Spill
+; CHECK-NEXT: ldr q0, [x9, #896]
+; CHECK-NEXT: addvl x9, sp, #8
+; CHECK-NEXT: str q0, [sp, #464] // 16-byte Folded Spill
+; CHECK-NEXT: ldr q0, [x9, #912]
+; CHECK-NEXT: addvl x9, sp, #8
+; CHECK-NEXT: str q0, [sp, #448] // 16-byte Folded Spill
+; CHECK-NEXT: ldr q0, [x9, #800]
+; CHECK-NEXT: addvl x9, sp, #8
+; CHECK-NEXT: str q0, [sp, #432] // 16-byte Folded Spill
+; CHECK-NEXT: ldr q0, [x9, #816]
+; CHECK-NEXT: addvl x9, sp, #8
+; CHECK-NEXT: str q0, [sp, #416] // 16-byte Folded Spill
+; CHECK-NEXT: ldr q0, [x9, #832]
+; CHECK-NEXT: addvl x9, sp, #8
+; CHECK-NEXT: str q0, [sp, #400] // 16-byte Folded Spill
+; CHECK-NEXT: ldr q0, [x9, #848]
+; CHECK-NEXT: addvl x9, sp, #8
+; CHECK-NEXT: str q0, [sp, #384] // 16-byte Folded Spill
+; CHECK-NEXT: ldr q0, [x9, #736]
+; CHECK-NEXT: addvl x9, sp, #8
+; CHECK-NEXT: str q0, [sp, #368] // 16-byte Folded Spill
+; CHECK-NEXT: ldr q0, [x9, #752]
+; CHECK-NEXT: addvl x9, sp, #8
+; CHECK-NEXT: str q0, [sp, #352] // 16-byte Folded Spill
+; CHECK-NEXT: ldr q0, [x9, #768]
+; CHECK-NEXT: addvl x9, sp, #8
+; CHECK-NEXT: str q0, [sp, #336] // 16-byte Folded Spill
+; CHECK-NEXT: ldr q0, [x9, #784]
+; CHECK-NEXT: addvl x9, sp, #8
+; CHECK-NEXT: str q0, [sp, #320] // 16-byte Folded Spill
+; CHECK-NEXT: ldr q0, [x9, #672]
+; CHECK-NEXT: addvl x9, sp, #8
+; CHECK-NEXT: str ...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/152684
More information about the llvm-commits
mailing list