[llvm] [AArch64][GISel] Fix lowering of fp16 intrinsics (PR #130156)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Mar 6 10:18:24 PST 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-aarch64
Author: Sander de Smalen (sdesmalen-arm)
<details>
<summary>Changes</summary>
This addresses the issue described in https://github.com/llvm/llvm-project/issues/128843
---
Full diff: https://github.com/llvm/llvm-project/pull/130156.diff
4 Files Affected:
- (modified) llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp (+63-18)
- (modified) llvm/test/CodeGen/AArch64/arm64-vabs.ll (-4)
- (modified) llvm/test/CodeGen/AArch64/fp16_intrinsic_scalar_1op.ll (+71-30)
- (modified) llvm/test/CodeGen/AArch64/fp16_intrinsic_scalar_2op.ll (+111-46)
``````````diff
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp
index d9c558819db3d..f11eb7934814c 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp
@@ -466,6 +466,18 @@ static bool isFPIntrinsic(const MachineRegisterInfo &MRI,
case Intrinsic::aarch64_neon_fminv:
case Intrinsic::aarch64_neon_fmaxnmv:
case Intrinsic::aarch64_neon_fminnmv:
+ case Intrinsic::aarch64_neon_fmax:
+ case Intrinsic::aarch64_neon_fmin:
+ case Intrinsic::aarch64_neon_fmulx:
+ case Intrinsic::aarch64_neon_frecpe:
+ case Intrinsic::aarch64_neon_frecps:
+ case Intrinsic::aarch64_neon_frecpx:
+ case Intrinsic::aarch64_neon_frsqrte:
+ case Intrinsic::aarch64_neon_frsqrts:
+ case Intrinsic::aarch64_neon_facge:
+ case Intrinsic::aarch64_neon_facgt:
+ case Intrinsic::aarch64_neon_fabd:
+ case Intrinsic::aarch64_sisd_fabd:
return true;
case Intrinsic::aarch64_neon_saddlv: {
const LLT SrcTy = MRI.getType(MI.getOperand(2).getReg());
@@ -540,6 +552,24 @@ bool AArch64RegisterBankInfo::onlyUsesFP(const MachineInstr &MI,
case TargetOpcode::G_LROUND:
case TargetOpcode::G_LLROUND:
return true;
+ case TargetOpcode::G_INTRINSIC:
+ switch (cast<GIntrinsic>(MI).getIntrinsicID()) {
+ case Intrinsic::aarch64_neon_fcvtas:
+ case Intrinsic::aarch64_neon_fcvtau:
+ case Intrinsic::aarch64_neon_fcvtzs:
+ case Intrinsic::aarch64_neon_fcvtzu:
+ case Intrinsic::aarch64_neon_fcvtms:
+ case Intrinsic::aarch64_neon_fcvtmu:
+ case Intrinsic::aarch64_neon_fcvtns:
+ case Intrinsic::aarch64_neon_fcvtnu:
+ case Intrinsic::aarch64_neon_fcvtps:
+ case Intrinsic::aarch64_neon_fcvtpu:
+ // Force FPR register bank for half types, as those types otherwise
+ // don't get legalized correctly resulting in fp16 <-> gpr32 COPY's.
+ return MRI.getType(MI.getOperand(2).getReg()) == LLT::float16();
+ default:
+ break;
+ }
default:
break;
}
@@ -1082,24 +1112,39 @@ AArch64RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
break;
case TargetOpcode::G_INTRINSIC:
case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS: {
- // Check if we know that the intrinsic has any constraints on its register
- // banks. If it does, then update the mapping accordingly.
- unsigned Idx = 0;
- if (onlyDefinesFP(MI, MRI, TRI))
- for (const auto &Op : MI.defs()) {
- if (Op.isReg())
- OpRegBankIdx[Idx] = PMI_FirstFPR;
- ++Idx;
- }
- else
- Idx += MI.getNumExplicitDefs();
-
- if (onlyUsesFP(MI, MRI, TRI))
- for (const auto &Op : MI.explicit_uses()) {
- if (Op.isReg())
- OpRegBankIdx[Idx] = PMI_FirstFPR;
- ++Idx;
- }
+ switch (cast<GIntrinsic>(MI).getIntrinsicID()) {
+ case Intrinsic::aarch64_neon_vcvtfxs2fp:
+ case Intrinsic::aarch64_neon_vcvtfxu2fp:
+ case Intrinsic::aarch64_neon_vcvtfp2fxs:
+ case Intrinsic::aarch64_neon_vcvtfp2fxu:
+ // Override these two intrinsics, because they would have a partial
+ // mapping. This is needed for 'half' types, which otherwise don't
+ // get legalised correctly.
+ OpRegBankIdx[0] = PMI_FirstFPR;
+ OpRegBankIdx[2] = PMI_FirstFPR;
+ break;
+ default: {
+ // Check if we know that the intrinsic has any constraints on its register
+ // banks. If it does, then update the mapping accordingly.
+ unsigned Idx = 0;
+ if (onlyDefinesFP(MI, MRI, TRI))
+ for (const auto &Op : MI.defs()) {
+ if (Op.isReg())
+ OpRegBankIdx[Idx] = PMI_FirstFPR;
+ ++Idx;
+ }
+ else
+ Idx += MI.getNumExplicitDefs();
+
+ if (onlyUsesFP(MI, MRI, TRI))
+ for (const auto &Op : MI.explicit_uses()) {
+ if (Op.isReg())
+ OpRegBankIdx[Idx] = PMI_FirstFPR;
+ ++Idx;
+ }
+ break;
+ }
+ }
break;
}
case TargetOpcode::G_LROUND:
diff --git a/llvm/test/CodeGen/AArch64/arm64-vabs.ll b/llvm/test/CodeGen/AArch64/arm64-vabs.ll
index fe4657186cd2a..7ddbdf2cf2c52 100644
--- a/llvm/test/CodeGen/AArch64/arm64-vabs.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-vabs.ll
@@ -2,10 +2,6 @@
; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple | FileCheck -check-prefixes=CHECK,CHECK-SD %s
; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple -global-isel -global-isel-abort=2 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI
-; CHECK-GI: warning: Instruction selection used fallback path for fabds
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fabdd
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for uabd_i64
-
define <8 x i16> @sabdl8h(ptr %A, ptr %B) nounwind {
; CHECK-LABEL: sabdl8h:
; CHECK: // %bb.0:
diff --git a/llvm/test/CodeGen/AArch64/fp16_intrinsic_scalar_1op.ll b/llvm/test/CodeGen/AArch64/fp16_intrinsic_scalar_1op.ll
index 40d2d636b94bb..1b9895445152a 100644
--- a/llvm/test/CodeGen/AArch64/fp16_intrinsic_scalar_1op.ll
+++ b/llvm/test/CodeGen/AArch64/fp16_intrinsic_scalar_1op.ll
@@ -1,5 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=aarch64 -mattr=+v8.2a,+fullfp16 | FileCheck %s
+; RUN: llc < %s -mtriple=aarch64 -global-isel=0 -mattr=+v8.2a,+fullfp16 | FileCheck %s --check-prefixes=CHECK,SDISEL
+; RUN: llc < %s -mtriple=aarch64 -global-isel=1 -mattr=+v8.2a,+fullfp16 | FileCheck %s --check-prefixes=CHECK,GISEL
declare i64 @llvm.aarch64.neon.fcvtpu.i64.f16(half)
declare i32 @llvm.aarch64.neon.fcvtpu.i32.f16(half)
@@ -26,11 +27,18 @@ declare half @llvm.aarch64.neon.frecpx.f16(half)
declare half @llvm.aarch64.neon.frecpe.f16(half)
define dso_local i16 @t2(half %a) {
-; CHECK-LABEL: t2:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: fcmp h0, #0.0
-; CHECK-NEXT: csetm w0, eq
-; CHECK-NEXT: ret
+; SDISEL-LABEL: t2:
+; SDISEL: // %bb.0: // %entry
+; SDISEL-NEXT: fcmp h0, #0.0
+; SDISEL-NEXT: csetm w0, eq
+; SDISEL-NEXT: ret
+;
+; GISEL-LABEL: t2:
+; GISEL: // %bb.0: // %entry
+; GISEL-NEXT: fcmp h0, #0.0
+; GISEL-NEXT: cset w8, eq
+; GISEL-NEXT: sbfx w0, w8, #0, #1
+; GISEL-NEXT: ret
entry:
%0 = fcmp oeq half %a, 0xH0000
%vceqz = sext i1 %0 to i16
@@ -38,11 +46,18 @@ entry:
}
define dso_local i16 @t3(half %a) {
-; CHECK-LABEL: t3:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: fcmp h0, #0.0
-; CHECK-NEXT: csetm w0, ge
-; CHECK-NEXT: ret
+; SDISEL-LABEL: t3:
+; SDISEL: // %bb.0: // %entry
+; SDISEL-NEXT: fcmp h0, #0.0
+; SDISEL-NEXT: csetm w0, ge
+; SDISEL-NEXT: ret
+;
+; GISEL-LABEL: t3:
+; GISEL: // %bb.0: // %entry
+; GISEL-NEXT: fcmp h0, #0.0
+; GISEL-NEXT: cset w8, ge
+; GISEL-NEXT: sbfx w0, w8, #0, #1
+; GISEL-NEXT: ret
entry:
%0 = fcmp oge half %a, 0xH0000
%vcgez = sext i1 %0 to i16
@@ -50,11 +65,18 @@ entry:
}
define dso_local i16 @t4(half %a) {
-; CHECK-LABEL: t4:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: fcmp h0, #0.0
-; CHECK-NEXT: csetm w0, gt
-; CHECK-NEXT: ret
+; SDISEL-LABEL: t4:
+; SDISEL: // %bb.0: // %entry
+; SDISEL-NEXT: fcmp h0, #0.0
+; SDISEL-NEXT: csetm w0, gt
+; SDISEL-NEXT: ret
+;
+; GISEL-LABEL: t4:
+; GISEL: // %bb.0: // %entry
+; GISEL-NEXT: fcmp h0, #0.0
+; GISEL-NEXT: cset w8, gt
+; GISEL-NEXT: sbfx w0, w8, #0, #1
+; GISEL-NEXT: ret
entry:
%0 = fcmp ogt half %a, 0xH0000
%vcgtz = sext i1 %0 to i16
@@ -62,11 +84,18 @@ entry:
}
define dso_local i16 @t5(half %a) {
-; CHECK-LABEL: t5:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: fcmp h0, #0.0
-; CHECK-NEXT: csetm w0, ls
-; CHECK-NEXT: ret
+; SDISEL-LABEL: t5:
+; SDISEL: // %bb.0: // %entry
+; SDISEL-NEXT: fcmp h0, #0.0
+; SDISEL-NEXT: csetm w0, ls
+; SDISEL-NEXT: ret
+;
+; GISEL-LABEL: t5:
+; GISEL: // %bb.0: // %entry
+; GISEL-NEXT: fcmp h0, #0.0
+; GISEL-NEXT: cset w8, ls
+; GISEL-NEXT: sbfx w0, w8, #0, #1
+; GISEL-NEXT: ret
entry:
%0 = fcmp ole half %a, 0xH0000
%vclez = sext i1 %0 to i16
@@ -74,11 +103,18 @@ entry:
}
define dso_local i16 @t6(half %a) {
-; CHECK-LABEL: t6:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: fcmp h0, #0.0
-; CHECK-NEXT: csetm w0, mi
-; CHECK-NEXT: ret
+; SDISEL-LABEL: t6:
+; SDISEL: // %bb.0: // %entry
+; SDISEL-NEXT: fcmp h0, #0.0
+; SDISEL-NEXT: csetm w0, mi
+; SDISEL-NEXT: ret
+;
+; GISEL-LABEL: t6:
+; GISEL: // %bb.0: // %entry
+; GISEL-NEXT: fcmp h0, #0.0
+; GISEL-NEXT: cset w8, mi
+; GISEL-NEXT: sbfx w0, w8, #0, #1
+; GISEL-NEXT: ret
entry:
%0 = fcmp olt half %a, 0xH0000
%vcltz = sext i1 %0 to i16
@@ -136,10 +172,15 @@ entry:
}
define dso_local i16 @t16(half %a) {
-; CHECK-LABEL: t16:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: fcvtzs w0, h0
-; CHECK-NEXT: ret
+; SDISEL-LABEL: t16:
+; SDISEL: // %bb.0: // %entry
+; SDISEL-NEXT: fcvtzs w0, h0
+; SDISEL-NEXT: ret
+;
+; GISEL-LABEL: t16:
+; GISEL: // %bb.0: // %entry
+; GISEL-NEXT: fcvtzu w0, h0
+; GISEL-NEXT: ret
entry:
%0 = fptoui half %a to i16
ret i16 %0
diff --git a/llvm/test/CodeGen/AArch64/fp16_intrinsic_scalar_2op.ll b/llvm/test/CodeGen/AArch64/fp16_intrinsic_scalar_2op.ll
index 36795f86e0065..5b08ef2852977 100644
--- a/llvm/test/CodeGen/AArch64/fp16_intrinsic_scalar_2op.ll
+++ b/llvm/test/CodeGen/AArch64/fp16_intrinsic_scalar_2op.ll
@@ -1,5 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=aarch64 -mattr=+v8.2a,+fullfp16 | FileCheck %s
+; RUN: llc < %s -mtriple=aarch64 -global-isel=0 -mattr=+v8.2a,+fullfp16 | FileCheck %s --check-prefixes=CHECK,SDISEL
+; RUN: llc < %s -mtriple=aarch64 -global-isel=1 -mattr=+v8.2a,+fullfp16 | FileCheck %s --check-prefixes=CHECK,GISEL
+
declare half @llvm.aarch64.sisd.fabd.f16(half, half)
declare half @llvm.aarch64.neon.fmax.f16(half, half)
@@ -33,11 +35,18 @@ entry:
}
define dso_local i16 @t_vceqh_f16(half %a, half %b) {
-; CHECK-LABEL: t_vceqh_f16:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: fcmp h0, h1
-; CHECK-NEXT: csetm w0, eq
-; CHECK-NEXT: ret
+; SDISEL-LABEL: t_vceqh_f16:
+; SDISEL: // %bb.0: // %entry
+; SDISEL-NEXT: fcmp h0, h1
+; SDISEL-NEXT: csetm w0, eq
+; SDISEL-NEXT: ret
+;
+; GISEL-LABEL: t_vceqh_f16:
+; GISEL: // %bb.0: // %entry
+; GISEL-NEXT: fcmp h0, h1
+; GISEL-NEXT: cset w8, eq
+; GISEL-NEXT: sbfx w0, w8, #0, #1
+; GISEL-NEXT: ret
entry:
%0 = fcmp oeq half %a, %b
%vcmpd = sext i1 %0 to i16
@@ -45,11 +54,18 @@ entry:
}
define dso_local i16 @t_vcgeh_f16(half %a, half %b) {
-; CHECK-LABEL: t_vcgeh_f16:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: fcmp h0, h1
-; CHECK-NEXT: csetm w0, ge
-; CHECK-NEXT: ret
+; SDISEL-LABEL: t_vcgeh_f16:
+; SDISEL: // %bb.0: // %entry
+; SDISEL-NEXT: fcmp h0, h1
+; SDISEL-NEXT: csetm w0, ge
+; SDISEL-NEXT: ret
+;
+; GISEL-LABEL: t_vcgeh_f16:
+; GISEL: // %bb.0: // %entry
+; GISEL-NEXT: fcmp h0, h1
+; GISEL-NEXT: cset w8, ge
+; GISEL-NEXT: sbfx w0, w8, #0, #1
+; GISEL-NEXT: ret
entry:
%0 = fcmp oge half %a, %b
%vcmpd = sext i1 %0 to i16
@@ -57,11 +73,18 @@ entry:
}
define dso_local i16 @t_vcgth_f16(half %a, half %b) {
-; CHECK-LABEL: t_vcgth_f16:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: fcmp h0, h1
-; CHECK-NEXT: csetm w0, gt
-; CHECK-NEXT: ret
+; SDISEL-LABEL: t_vcgth_f16:
+; SDISEL: // %bb.0: // %entry
+; SDISEL-NEXT: fcmp h0, h1
+; SDISEL-NEXT: csetm w0, gt
+; SDISEL-NEXT: ret
+;
+; GISEL-LABEL: t_vcgth_f16:
+; GISEL: // %bb.0: // %entry
+; GISEL-NEXT: fcmp h0, h1
+; GISEL-NEXT: cset w8, gt
+; GISEL-NEXT: sbfx w0, w8, #0, #1
+; GISEL-NEXT: ret
entry:
%0 = fcmp ogt half %a, %b
%vcmpd = sext i1 %0 to i16
@@ -69,11 +92,18 @@ entry:
}
define dso_local i16 @t_vcleh_f16(half %a, half %b) {
-; CHECK-LABEL: t_vcleh_f16:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: fcmp h0, h1
-; CHECK-NEXT: csetm w0, ls
-; CHECK-NEXT: ret
+; SDISEL-LABEL: t_vcleh_f16:
+; SDISEL: // %bb.0: // %entry
+; SDISEL-NEXT: fcmp h0, h1
+; SDISEL-NEXT: csetm w0, ls
+; SDISEL-NEXT: ret
+;
+; GISEL-LABEL: t_vcleh_f16:
+; GISEL: // %bb.0: // %entry
+; GISEL-NEXT: fcmp h0, h1
+; GISEL-NEXT: cset w8, ls
+; GISEL-NEXT: sbfx w0, w8, #0, #1
+; GISEL-NEXT: ret
entry:
%0 = fcmp ole half %a, %b
%vcmpd = sext i1 %0 to i16
@@ -81,11 +111,18 @@ entry:
}
define dso_local i16 @t_vclth_f16(half %a, half %b) {
-; CHECK-LABEL: t_vclth_f16:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: fcmp h0, h1
-; CHECK-NEXT: csetm w0, mi
-; CHECK-NEXT: ret
+; SDISEL-LABEL: t_vclth_f16:
+; SDISEL: // %bb.0: // %entry
+; SDISEL-NEXT: fcmp h0, h1
+; SDISEL-NEXT: csetm w0, mi
+; SDISEL-NEXT: ret
+;
+; GISEL-LABEL: t_vclth_f16:
+; GISEL: // %bb.0: // %entry
+; GISEL-NEXT: fcmp h0, h1
+; GISEL-NEXT: cset w8, mi
+; GISEL-NEXT: sbfx w0, w8, #0, #1
+; GISEL-NEXT: ret
entry:
%0 = fcmp olt half %a, %b
%vcmpd = sext i1 %0 to i16
@@ -150,11 +187,18 @@ declare half @llvm.aarch64.neon.vcvtfxu2fp.f16.i32(i32, i32) #1
declare i32 @llvm.aarch64.neon.vcvtfp2fxu.i32.f16(half, i32) #1
define dso_local half @test_vcvth_n_f16_s16_1(i16 %a) {
-; CHECK-LABEL: test_vcvth_n_f16_s16_1:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: fmov s0, w0
-; CHECK-NEXT: scvtf h0, h0, #1
-; CHECK-NEXT: ret
+; SDISEL-LABEL: test_vcvth_n_f16_s16_1:
+; SDISEL: // %bb.0: // %entry
+; SDISEL-NEXT: fmov s0, w0
+; SDISEL-NEXT: scvtf h0, h0, #1
+; SDISEL-NEXT: ret
+;
+; GISEL-LABEL: test_vcvth_n_f16_s16_1:
+; GISEL: // %bb.0: // %entry
+; GISEL-NEXT: sxth w8, w0
+; GISEL-NEXT: fmov s0, w8
+; GISEL-NEXT: scvtf h0, h0, #1
+; GISEL-NEXT: ret
entry:
%sext = sext i16 %a to i32
%fcvth_n = tail call half @llvm.aarch64.neon.vcvtfxs2fp.f16.i32(i32 %sext, i32 1)
@@ -162,11 +206,18 @@ entry:
}
define dso_local half @test_vcvth_n_f16_s16_16(i16 %a) {
-; CHECK-LABEL: test_vcvth_n_f16_s16_16:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: fmov s0, w0
-; CHECK-NEXT: scvtf h0, h0, #16
-; CHECK-NEXT: ret
+; SDISEL-LABEL: test_vcvth_n_f16_s16_16:
+; SDISEL: // %bb.0: // %entry
+; SDISEL-NEXT: fmov s0, w0
+; SDISEL-NEXT: scvtf h0, h0, #16
+; SDISEL-NEXT: ret
+;
+; GISEL-LABEL: test_vcvth_n_f16_s16_16:
+; GISEL: // %bb.0: // %entry
+; GISEL-NEXT: sxth w8, w0
+; GISEL-NEXT: fmov s0, w8
+; GISEL-NEXT: scvtf h0, h0, #16
+; GISEL-NEXT: ret
entry:
%sext = sext i16 %a to i32
%fcvth_n = tail call half @llvm.aarch64.neon.vcvtfxs2fp.f16.i32(i32 %sext, i32 16)
@@ -264,11 +315,18 @@ entry:
}
define dso_local half @test_vcvth_n_f16_u16_1(i16 %a) {
-; CHECK-LABEL: test_vcvth_n_f16_u16_1:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: fmov s0, w0
-; CHECK-NEXT: ucvtf h0, h0, #1
-; CHECK-NEXT: ret
+; SDISEL-LABEL: test_vcvth_n_f16_u16_1:
+; SDISEL: // %bb.0: // %entry
+; SDISEL-NEXT: fmov s0, w0
+; SDISEL-NEXT: ucvtf h0, h0, #1
+; SDISEL-NEXT: ret
+;
+; GISEL-LABEL: test_vcvth_n_f16_u16_1:
+; GISEL: // %bb.0: // %entry
+; GISEL-NEXT: and w8, w0, #0xffff
+; GISEL-NEXT: fmov s0, w8
+; GISEL-NEXT: ucvtf h0, h0, #1
+; GISEL-NEXT: ret
entry:
%0 = zext i16 %a to i32
%fcvth_n = tail call half @llvm.aarch64.neon.vcvtfxu2fp.f16.i32(i32 %0, i32 1)
@@ -276,11 +334,18 @@ entry:
}
define dso_local half @test_vcvth_n_f16_u16_16(i16 %a) {
-; CHECK-LABEL: test_vcvth_n_f16_u16_16:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: fmov s0, w0
-; CHECK-NEXT: ucvtf h0, h0, #16
-; CHECK-NEXT: ret
+; SDISEL-LABEL: test_vcvth_n_f16_u16_16:
+; SDISEL: // %bb.0: // %entry
+; SDISEL-NEXT: fmov s0, w0
+; SDISEL-NEXT: ucvtf h0, h0, #16
+; SDISEL-NEXT: ret
+;
+; GISEL-LABEL: test_vcvth_n_f16_u16_16:
+; GISEL: // %bb.0: // %entry
+; GISEL-NEXT: and w8, w0, #0xffff
+; GISEL-NEXT: fmov s0, w8
+; GISEL-NEXT: ucvtf h0, h0, #16
+; GISEL-NEXT: ret
entry:
%0 = zext i16 %a to i32
%fcvth_n = tail call half @llvm.aarch64.neon.vcvtfxu2fp.f16.i32(i32 %0, i32 16)
``````````
</details>
https://github.com/llvm/llvm-project/pull/130156
More information about the llvm-commits
mailing list