[llvm] [SelectionDAG] Add `f16` soft promotion for `lrint` and `lround` (PR #152684)

Tue Sep 9 23:49:29 PDT 2025

https://github.com/tgross35 updated https://github.com/llvm/llvm-project/pull/152684

>From 45e07fe946949899a607c49e1c2d2ec956856865 Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross at umich.edu>
Date: Fri, 8 Aug 2025 05:04:09 -0500
Subject: [PATCH] [SelectionDAG] Add `f16` soft promotion for `lrint` and
 `lround`

On platforms that soft promote `half`, using `lrint` intrinsics crashes
with the following:

    SoftPromoteHalfOperand Op #0: t5: i32 = lrint t4

    LLVM ERROR: Do not know how to soft promote this operator's operand!
    PLEASE submit a bug report to https://github.com/llvm/llvm-project/issues/ and include the crash backtrace.
    Stack dump:
    0.      Program arguments: /Users/tmgross/Documents/projects/llvm/llvm-build/bin/llc -mtriple=riscv32
    1.      Running pass 'Function Pass Manager' on module '<stdin>'.
    2.      Running pass 'RISC-V DAG->DAG Pattern Instruction Selection' on function '@test_lrint_ixx_f16'

Resolve this by adding a soft promotion.

`SoftPromoteHalfOp_FP_TO_XINT` is reused here since it provides the
correct input and output types. It is renamed `PromoteFloatOp_UnaryOp`
to match `PromoteFloatOp_UnaryOp` and similar functions that are used to
handle the same sets of intrinsics.
---
 .../SelectionDAG/LegalizeFloatTypes.cpp       | 17 ++++--
 llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h |  2 +-
 llvm/test/CodeGen/ARM/lrint-conv.ll           | 15 +++---
 llvm/test/CodeGen/LoongArch/lrint-conv.ll     | 33 ++++++++----
 llvm/test/CodeGen/Mips/llrint-conv.ll         | 23 ++++----
 llvm/test/CodeGen/Mips/lrint-conv.ll          | 27 +++++-----
 llvm/test/CodeGen/RISCV/lrint-conv.ll         | 25 ++++++---
 llvm/test/CodeGen/X86/lrint-conv-i32.ll       | 52 ++++++++++++++++---
 8 files changed, 138 insertions(+), 56 deletions(-)

diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
index 83bb1dfe86c6a..1a4ce333d6101 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
@@ -20,6 +20,7 @@
 
 #include "LegalizeTypes.h"
 #include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/CodeGen/ISDOpcodes.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
 using namespace llvm;
@@ -3737,10 +3738,20 @@ bool DAGTypeLegalizer::SoftPromoteHalfOperand(SDNode *N, unsigned OpNo) {
     Res = SoftPromoteHalfOp_FAKE_USE(N, OpNo);
     break;
   case ISD::FCOPYSIGN:  Res = SoftPromoteHalfOp_FCOPYSIGN(N, OpNo); break;
+  case ISD::FP_TO_SINT:
+  case ISD::FP_TO_UINT:
+  case ISD::LLRINT:
+  case ISD::LLROUND:
+  case ISD::LRINT:
+  case ISD::LROUND:
   case ISD::STRICT_FP_TO_SINT:
   case ISD::STRICT_FP_TO_UINT:
-  case ISD::FP_TO_SINT:
-  case ISD::FP_TO_UINT: Res = SoftPromoteHalfOp_FP_TO_XINT(N); break;
+  case ISD::STRICT_LLRINT:
+  case ISD::STRICT_LLROUND:
+  case ISD::STRICT_LRINT:
+  case ISD::STRICT_LROUND:
+    Res = SoftPromoteHalfOp_UnaryOp(N);
+    break;
   case ISD::FP_TO_SINT_SAT:
   case ISD::FP_TO_UINT_SAT:
                         Res = SoftPromoteHalfOp_FP_TO_XINT_SAT(N); break;
@@ -3819,7 +3830,7 @@ SDValue DAGTypeLegalizer::SoftPromoteHalfOp_FP_EXTEND(SDNode *N) {
   return DAG.getNode(GetPromotionOpcode(SVT, RVT), SDLoc(N), RVT, Op);
 }
 
-SDValue DAGTypeLegalizer::SoftPromoteHalfOp_FP_TO_XINT(SDNode *N) {
+SDValue DAGTypeLegalizer::SoftPromoteHalfOp_UnaryOp(SDNode *N) {
   EVT RVT = N->getValueType(0);
   bool IsStrict = N->isStrictFPOpcode();
   SDValue Op = N->getOperand(IsStrict ? 1 : 0);
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
index 586c3411791f9..a9f9af8b7d07e 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
@@ -843,7 +843,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer {
   SDValue SoftPromoteHalfOp_FAKE_USE(SDNode *N, unsigned OpNo);
   SDValue SoftPromoteHalfOp_FCOPYSIGN(SDNode *N, unsigned OpNo);
   SDValue SoftPromoteHalfOp_FP_EXTEND(SDNode *N);
-  SDValue SoftPromoteHalfOp_FP_TO_XINT(SDNode *N);
+  SDValue SoftPromoteHalfOp_UnaryOp(SDNode *N);
   SDValue SoftPromoteHalfOp_FP_TO_XINT_SAT(SDNode *N);
   SDValue SoftPromoteHalfOp_SETCC(SDNode *N);
   SDValue SoftPromoteHalfOp_SELECT_CC(SDNode *N, unsigned OpNo);
diff --git a/llvm/test/CodeGen/ARM/lrint-conv.ll b/llvm/test/CodeGen/ARM/lrint-conv.ll
index 9aa95112af533..848b14e48f2d1 100644
--- a/llvm/test/CodeGen/ARM/lrint-conv.ll
+++ b/llvm/test/CodeGen/ARM/lrint-conv.ll
@@ -1,12 +1,15 @@
 ; RUN: llc < %s -mtriple=arm-eabi -float-abi=soft | FileCheck %s --check-prefix=SOFTFP
 ; RUN: llc < %s -mtriple=arm-eabi -float-abi=hard | FileCheck %s --check-prefix=HARDFP
 
-; FIXME: crash
-; define i32 @testmswh_builtin(half %x) {
-; entry:
-;   %0 = tail call i32 @llvm.lrint.i32.f16(half %x)
-;   ret i32 %0
-; }
+; SOFTFP-LABEL: testmswh_builtin:
+; SOFTFP:       bl      lrintf
+; HARDFP-LABEL: testmswh_builtin:
+; HARDFP:       bl      lrintf
+define i32 @testmswh_builtin(half %x) {
+entry:
+  %0 = tail call i32 @llvm.lrint.i32.f16(half %x)
+  ret i32 %0
+}
 
 ; SOFTFP-LABEL: testmsws_builtin:
 ; SOFTFP:       bl      lrintf
diff --git a/llvm/test/CodeGen/LoongArch/lrint-conv.ll b/llvm/test/CodeGen/LoongArch/lrint-conv.ll
index 85de820025614..262d1c16a6486 100644
--- a/llvm/test/CodeGen/LoongArch/lrint-conv.ll
+++ b/llvm/test/CodeGen/LoongArch/lrint-conv.ll
@@ -5,16 +5,31 @@
 ; RUN: sed 's/ITy/i32/g' %s | llc -mtriple=loongarch64 | FileCheck %s --check-prefixes=LA64-I32
 ; RUN: sed 's/ITy/i64/g' %s | llc -mtriple=loongarch64 | FileCheck %s --check-prefixes=LA64-I64
 
-; FIXME: crash
-; define ITy @test_lrint_ixx_f16(half %x) nounwind {
-;   %res = tail call ITy @llvm.lrint.ITy.f16(half %x)
-;   ret ITy %res
-; }
+define ITy @test_lrint_ixx_f16(half %x) nounwind {
+; LA32-LABEL: test_lrint_ixx_f16:
+; LA32:         bl lrintf
+;
+; LA64-I32-LABEL: test_lrint_ixx_f16:
+; LA64-I32:         pcaddu18i $ra, %call36(lrintf)
+;
+; LA64-I64-LABEL: test_lrint_ixx_f16:
+; LA64-I64:         pcaddu18i $t8, %call36(lrintf)
+  %res = tail call ITy @llvm.lrint.ITy.f16(half %x)
+  ret ITy %res
+}
 
-; define ITy @test_llrint_ixx_f16(half %x) nounwind {
-;   %res = tail call ITy @llvm.llrint.ITy.f16(half %x)
-;   ret ITy %res
-; }
+define ITy @test_llrint_ixx_f16(half %x) nounwind {
+; LA32-LABEL: test_llrint_ixx_f16:
+; LA32:         bl llrintf
+;
+; LA64-I32-LABEL: test_llrint_ixx_f16:
+; LA64-I32:         pcaddu18i $ra, %call36(llrintf)
+;
+; LA64-I64-LABEL: test_llrint_ixx_f16:
+; LA64-I64:         pcaddu18i $t8, %call36(llrintf)
+  %res = tail call ITy @llvm.llrint.ITy.f16(half %x)
+  ret ITy %res
+}
 
 define ITy @test_lrint_ixx_f32(float %x) nounwind {
 ; LA32-LABEL: test_lrint_ixx_f32:
diff --git a/llvm/test/CodeGen/Mips/llrint-conv.ll b/llvm/test/CodeGen/Mips/llrint-conv.ll
index 592d40c0f65aa..8eaef5d4135bb 100644
--- a/llvm/test/CodeGen/Mips/llrint-conv.ll
+++ b/llvm/test/CodeGen/Mips/llrint-conv.ll
@@ -1,19 +1,18 @@
 ; RUN: llc < %s -mtriple=mips64el -mattr=+soft-float | FileCheck %s
 ; RUN: llc < %s -mtriple=mips -mattr=+soft-float     | FileCheck %s
 
-; FIXME: crash
-; define signext i32 @testmswh(half %x) {
-; entry:
-;   %0 = tail call i64 @llvm.llrint.i64.f16(half %x)
-;   %conv = trunc i64 %0 to i32
-;   ret i32 %conv
-; }
+define signext i32 @testmswh(half %x) {
+entry:
+  %0 = tail call i64 @llvm.llrint.i64.f16(half %x)
+  %conv = trunc i64 %0 to i32
+  ret i32 %conv
+}
 
-; define i64 @testmsxh(half %x) {
-; entry:
-;   %0 = tail call i64 @llvm.llrint.i64.f16(half %x)
-;   ret i64 %0
-; }
+define i64 @testmsxh(half %x) {
+entry:
+  %0 = tail call i64 @llvm.llrint.i64.f16(half %x)
+  ret i64 %0
+}
 
 define signext i32 @testmsws(float %x) {
 ; CHECK-LABEL: testmsws:
diff --git a/llvm/test/CodeGen/Mips/lrint-conv.ll b/llvm/test/CodeGen/Mips/lrint-conv.ll
index 6d2e392675f1c..64c5cb9ac5b07 100644
--- a/llvm/test/CodeGen/Mips/lrint-conv.ll
+++ b/llvm/test/CodeGen/Mips/lrint-conv.ll
@@ -1,19 +1,22 @@
 ; RUN: llc < %s -mtriple=mips64el -mattr=+soft-float | FileCheck %s
 ; RUN: llc < %s -mtriple=mips -mattr=+soft-float     | FileCheck %s
 
-; FIXME: crash
-; define signext i32 @testmswh(half %x) {
-; entry:
-;   %0 = tail call i64 @llvm.lrint.i64.f16(half %x)
-;   %conv = trunc i64 %0 to i32
-;   ret i32 %conv
-; }
+define signext i32 @testmswh(half %x) {
+; CHECK-LABEL: testmswh:
+; CHECK:       jal     lrintf
+entry:
+  %0 = tail call i64 @llvm.lrint.i64.f16(half %x)
+  %conv = trunc i64 %0 to i32
+  ret i32 %conv
+}
 
-; define i64 @testmsxh(half %x) {
-; entry:
-;   %0 = tail call i64 @llvm.lrint.i64.f16(half %x)
-;   ret i64 %0
-; }
+define i64 @testmsxh(half %x) {
+; CHECK-LABEL: testmsxh:
+; CHECK:       jal     lrintf
+entry:
+  %0 = tail call i64 @llvm.lrint.i64.f16(half %x)
+  ret i64 %0
+}
 
 define signext i32 @testmsws(float %x) {
 ; CHECK-LABEL: testmsws:
diff --git a/llvm/test/CodeGen/RISCV/lrint-conv.ll b/llvm/test/CodeGen/RISCV/lrint-conv.ll
index d3af2153588a1..ecb6bd0932ef3 100644
--- a/llvm/test/CodeGen/RISCV/lrint-conv.ll
+++ b/llvm/test/CodeGen/RISCV/lrint-conv.ll
@@ -5,14 +5,25 @@
 ; RUN: sed 's/ITy/i32/g' %s | llc -mtriple=riscv64 | FileCheck %s --check-prefixes=RV64
 ; RUN: sed 's/ITy/i64/g' %s | llc -mtriple=riscv64 | FileCheck %s --check-prefixes=RV64
 
-; FIXME: crash
-; define ITy @test_lrint_ixx_f16(half %x) nounwind {
-;   %res = tail call ITy @llvm.lrint.ITy.f16(half %x)
-; }
+define ITy @test_lrint_ixx_f16(half %x) nounwind {
+; RV32-LABEL: test_lrint_ixx_f16:
+; RV32:         call lrintf
+;
+; RV64-LABEL: test_lrint_ixx_f16:
+; RV64:         call lrintf
+  %res = tail call ITy @llvm.lrint.ITy.f16(half %x)
+  ret ITy %res
+}
 
-; define ITy @test_llrint_ixx_f16(half %x) nounwind {
-;   %res = tail call ITy @llvm.llrint.ITy.f16(half %x)
-; }
+define ITy @test_llrint_ixx_f16(half %x) nounwind {
+; RV32-LABEL: test_llrint_ixx_f16:
+; RV32:         call llrintf
+;
+; RV64-LABEL: test_llrint_ixx_f16:
+; RV64:         call llrintf
+  %res = tail call ITy @llvm.llrint.ITy.f16(half %x)
+  ret ITy %res
+}
 
 define ITy @test_lrint_ixx_f32(float %x) nounwind {
 ; RV32-LABEL: test_lrint_ixx_f32:
diff --git a/llvm/test/CodeGen/X86/lrint-conv-i32.ll b/llvm/test/CodeGen/X86/lrint-conv-i32.ll
index 3c50aea1095f4..5c0a64f1477e6 100644
--- a/llvm/test/CodeGen/X86/lrint-conv-i32.ll
+++ b/llvm/test/CodeGen/X86/lrint-conv-i32.ll
@@ -7,12 +7,52 @@
 ; RUN: llc < %s -mtriple=x86_64-unknown -mattr=avx | FileCheck %s --check-prefixes=X64,X64-AVX
 ; RUN: llc < %s -mtriple=x86_64-unknown -mattr=avx512f | FileCheck %s --check-prefixes=X64,X64-AVX
 
-; FIXME: crash
-; define i32 @testmswh(half %x) nounwind {
-; entry:
-;   %0 = tail call i32 @llvm.lrint.i32.f16(half %x)
-;   ret i32 %0
-; }
+define i32 @testmswh(half %x) nounwind {
+; X86-NOSSE-LABEL: testmswh:
+; X86-NOSSE:       # %bb.0: # %entry
+; X86-NOSSE-NEXT:    pushl %eax
+; X86-NOSSE-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
+; X86-NOSSE-NEXT:    pushl %eax
+; X86-NOSSE-NEXT:    calll __extendhfsf2
+; X86-NOSSE-NEXT:    addl $4, %esp
+; X86-NOSSE-NEXT:    fistpl (%esp)
+; X86-NOSSE-NEXT:    movl (%esp), %eax
+; X86-NOSSE-NEXT:    popl %ecx
+; X86-NOSSE-NEXT:    retl
+;
+; X86-SSE2-LABEL: testmswh:
+; X86-SSE2:       # %bb.0: # %entry
+; X86-SSE2-NEXT:    subl $8, %esp
+; X86-SSE2-NEXT:    pinsrw $0, {{[0-9]+}}(%esp), %xmm0
+; X86-SSE2-NEXT:    pextrw $0, %xmm0, %eax
+; X86-SSE2-NEXT:    movw %ax, (%esp)
+; X86-SSE2-NEXT:    calll __extendhfsf2
+; X86-SSE2-NEXT:    fstps (%esp)
+; X86-SSE2-NEXT:    calll rintf
+; X86-SSE2-NEXT:    fstps (%esp)
+; X86-SSE2-NEXT:    calll __truncsfhf2
+; X86-SSE2-NEXT:    pextrw $0, %xmm0, %eax
+; X86-SSE2-NEXT:    movw %ax, (%esp)
+; X86-SSE2-NEXT:    calll __extendhfsf2
+; X86-SSE2-NEXT:    fstps {{[0-9]+}}(%esp)
+; X86-SSE2-NEXT:    cvttss2si {{[0-9]+}}(%esp), %eax
+; X86-SSE2-NEXT:    addl $8, %esp
+; X86-SSE2-NEXT:    retl
+;
+; X64-SSE-LABEL: testmswh:
+; X64-SSE:       # %bb.0: # %entry
+; X64-SSE-NEXT:    pushq %rax
+; X64-SSE-NEXT:    callq __extendhfsf2 at PLT
+; X64-SSE-NEXT:    callq rintf at PLT
+; X64-SSE-NEXT:    callq __truncsfhf2 at PLT
+; X64-SSE-NEXT:    callq __extendhfsf2 at PLT
+; X64-SSE-NEXT:    cvttss2si %xmm0, %eax
+; X64-SSE-NEXT:    popq %rcx
+; X64-SSE-NEXT:    retq
+entry:
+  %0 = tail call i32 @llvm.lrint.i32.f16(half %x)
+  ret i32 %0
+}
 
 define i32 @testmsws(float %x) nounwind {
 ; X86-NOSSE-LABEL: testmsws: