[llvm] [AMDGPU] Implement llvm.lrint intrinsic lowering (PR #98931)
Sumanth Gundapaneni via llvm-commits
llvm-commits at lists.llvm.org
Tue Jul 23 08:06:45 PDT 2024
https://github.com/sgundapa updated https://github.com/llvm/llvm-project/pull/98931
>From 772f639642e926d823fc65522b379f0ebc139563 Mon Sep 17 00:00:00 2001
From: Sumanth Gundapaneni <sumanth.gundapaneni at amd.com>
Date: Wed, 26 Jun 2024 10:58:20 -0500
Subject: [PATCH 1/5] [AMDGPU] Implement llvm.lrint intrinsic lowering
This patch enabled the target-independent lowering of llvm.lrint via
GlobalISel. For SelectionDAG, the instrinsic is custom lowered.
---
.../CodeGen/GlobalISel/LegalizerHelper.cpp | 13 +
llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp | 17 +-
llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h | 1 +
.../lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp | 5 +
llvm/test/CodeGen/AMDGPU/GlobalISel/lrint.ll | 493 ++++++++++++++++++
llvm/test/CodeGen/AMDGPU/lrint.ll | 467 +++++++++++++++++
6 files changed, 994 insertions(+), 2 deletions(-)
create mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/lrint.ll
create mode 100644 llvm/test/CodeGen/AMDGPU/lrint.ll
diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
index 3f1094e0ac703..c63b24caf6106 100644
--- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
@@ -3818,6 +3818,17 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT LowerHintTy) {
changeOpcode(MI, TargetOpcode::G_INTRINSIC_ROUNDEVEN);
return Legalized;
}
+ case TargetOpcode::G_INTRINSIC_LRINT:
+ case TargetOpcode::G_INTRINSIC_LLRINT: {
+ Register DstReg = MI.getOperand(0).getReg();
+ Register SrcReg = MI.getOperand(1).getReg();
+ LLT SrcTy = MRI.getType(SrcReg);
+ auto Round = MIRBuilder.buildIntrinsicRoundeven(SrcTy, SrcReg);
+
+ MIRBuilder.buildFPTOSI(DstReg, Round);
+ MI.eraseFromParent();
+ return Legalized;
+ }
case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: {
auto [OldValRes, SuccessRes, Addr, CmpVal, NewVal] = MI.getFirst5Regs();
Register NewOldValRes = MRI.cloneVirtualRegister(OldValRes);
@@ -4668,6 +4679,8 @@ LegalizerHelper::fewerElementsVector(MachineInstr &MI, unsigned TypeIdx,
case G_FCEIL:
case G_FFLOOR:
case G_FRINT:
+ case G_INTRINSIC_LRINT:
+ case G_INTRINSIC_LLRINT:
case G_INTRINSIC_ROUND:
case G_INTRINSIC_ROUNDEVEN:
case G_INTRINSIC_TRUNC:
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
index ef30bf6d993fa..ef3e74c9a622f 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@@ -404,7 +404,8 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::FNEARBYINT, {MVT::f16, MVT::f32, MVT::f64}, Custom);
- setOperationAction(ISD::FRINT, {MVT::f16, MVT::f32, MVT::f64}, Custom);
+ setOperationAction({ISD::FRINT, ISD::LRINT, ISD::LLRINT},
+ {MVT::f16, MVT::f32, MVT::f64}, Custom);
setOperationAction(ISD::FREM, {MVT::f16, MVT::f32, MVT::f64}, Custom);
@@ -1388,7 +1389,11 @@ SDValue AMDGPUTargetLowering::LowerOperation(SDValue Op,
case ISD::FCEIL: return LowerFCEIL(Op, DAG);
case ISD::FTRUNC: return LowerFTRUNC(Op, DAG);
case ISD::FRINT: return LowerFRINT(Op, DAG);
- case ISD::FNEARBYINT: return LowerFNEARBYINT(Op, DAG);
+ case ISD::LRINT:
+ case ISD::LLRINT:
+ return LowerLRINT(Op, DAG);
+ case ISD::FNEARBYINT:
+ return LowerFNEARBYINT(Op, DAG);
case ISD::FROUNDEVEN:
return LowerFROUNDEVEN(Op, DAG);
case ISD::FROUND: return LowerFROUND(Op, DAG);
@@ -2496,6 +2501,14 @@ SDValue AMDGPUTargetLowering::LowerFRINT(SDValue Op, SelectionDAG &DAG) const {
return DAG.getNode(ISD::FROUNDEVEN, SDLoc(Op), VT, Arg);
}
+SDValue AMDGPUTargetLowering::LowerLRINT(SDValue Op, SelectionDAG &DAG) const {
+ auto ResVT = Op.getValueType();
+ auto Arg = Op.getOperand(0u);
+ auto ArgVT = Arg.getValueType();
+ SDValue RoundNode = DAG.getNode(ISD::FROUNDEVEN, SDLoc(Op), ArgVT, Arg);
+ return DAG.getNode(ISD::FP_TO_SINT, SDLoc(Op), ResVT, RoundNode);
+}
+
// XXX - May require not supporting f32 denormals?
// Don't handle v2f16. The extra instructions to scalarize and repack around the
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
index 37572af3897f2..2e8f857e95a2d 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
@@ -55,6 +55,7 @@ class AMDGPUTargetLowering : public TargetLowering {
SDValue LowerFCEIL(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFTRUNC(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFRINT(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerLRINT(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFNEARBYINT(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFROUNDEVEN(SDValue Op, SelectionDAG &DAG) const;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
index 88e40da110555..0622690759c35 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
@@ -1141,6 +1141,11 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
.scalarize(0)
.lower();
+ getActionDefinitionsBuilder({G_INTRINSIC_LRINT, G_INTRINSIC_LLRINT})
+ .clampScalar(0, S16, S64)
+ .scalarize(0)
+ .lower();
+
if (ST.has16BitInsts()) {
getActionDefinitionsBuilder(
{G_INTRINSIC_TRUNC, G_FCEIL, G_INTRINSIC_ROUNDEVEN})
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/lrint.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/lrint.ll
new file mode 100644
index 0000000000000..c6ac0b2dd3334
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/lrint.ll
@@ -0,0 +1,493 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9 %s
+; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1010 < %s | FileCheck -check-prefixes=GCN,GFX10 %s
+; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1100 < %s | FileCheck -check-prefixes=GCN,GFX11 %s
+
+declare float @llvm.rint.f32(float)
+declare i32 @llvm.lrint.i32.f32(float)
+declare i32 @llvm.lrint.i32.f64(double)
+declare i64 @llvm.lrint.i64.f32(float)
+declare i64 @llvm.lrint.i64.f64(double)
+declare i64 @llvm.llrint.i64.f32(float)
+declare half @llvm.rint.f16(half)
+declare i32 @llvm.lrint.i32.f16(half %arg)
+declare <2 x float> @llvm.rint.v2f32.v2f32(<2 x float> %arg)
+declare <2 x i32> @llvm.lrint.v2i32.v2f32(<2 x float> %arg)
+declare <2 x i64> @llvm.lrint.v2i64.v2f32(<2 x float> %arg)
+
+define float @intrinsic_frint(float %arg) {
+; GCN-LABEL: intrinsic_frint:
+; GCN: ; %bb.0: ; %entry
+; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT: v_rndne_f32_e32 v0, v0
+; GCN-NEXT: s_setpc_b64 s[30:31]
+entry:
+ %0 = tail call float @llvm.rint.f32(float %arg)
+ ret float %0
+}
+
+define i32 @intrinsic_lrint_i32_f32(float %arg) {
+; GFX9-LABEL: intrinsic_lrint_i32_f32:
+; GFX9: ; %bb.0: ; %entry
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_rndne_f32_e32 v0, v0
+; GFX9-NEXT: v_cvt_i32_f32_e32 v0, v0
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-LABEL: intrinsic_lrint_i32_f32:
+; GFX10: ; %bb.0: ; %entry
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT: v_rndne_f32_e32 v0, v0
+; GFX10-NEXT: v_cvt_i32_f32_e32 v0, v0
+; GFX10-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: intrinsic_lrint_i32_f32:
+; GFX11: ; %bb.0: ; %entry
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: v_rndne_f32_e32 v0, v0
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-NEXT: v_cvt_i32_f32_e32 v0, v0
+; GFX11-NEXT: s_setpc_b64 s[30:31]
+entry:
+ %0 = tail call i32 @llvm.lrint.i32.f32(float %arg)
+ ret i32 %0
+}
+
+define i32 @intrinsic_lrint_i32_f64(double %arg) {
+; GFX9-LABEL: intrinsic_lrint_i32_f64:
+; GFX9: ; %bb.0: ; %entry
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_rndne_f64_e32 v[0:1], v[0:1]
+; GFX9-NEXT: v_cvt_i32_f64_e32 v0, v[0:1]
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-LABEL: intrinsic_lrint_i32_f64:
+; GFX10: ; %bb.0: ; %entry
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT: v_rndne_f64_e32 v[0:1], v[0:1]
+; GFX10-NEXT: v_cvt_i32_f64_e32 v0, v[0:1]
+; GFX10-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: intrinsic_lrint_i32_f64:
+; GFX11: ; %bb.0: ; %entry
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: v_rndne_f64_e32 v[0:1], v[0:1]
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-NEXT: v_cvt_i32_f64_e32 v0, v[0:1]
+; GFX11-NEXT: s_setpc_b64 s[30:31]
+entry:
+ %0 = tail call i32 @llvm.lrint.i32.f64(double %arg)
+ ret i32 %0
+}
+
+define i64 @intrinsic_lrint_i64_f32(float %arg) {
+; GFX9-LABEL: intrinsic_lrint_i64_f32:
+; GFX9: ; %bb.0: ; %entry
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_rndne_f32_e32 v0, v0
+; GFX9-NEXT: v_trunc_f32_e32 v1, v0
+; GFX9-NEXT: v_mov_b32_e32 v2, 0x2f800000
+; GFX9-NEXT: v_mul_f32_e64 v2, |v1|, v2
+; GFX9-NEXT: v_floor_f32_e32 v2, v2
+; GFX9-NEXT: v_mov_b32_e32 v3, 0xcf800000
+; GFX9-NEXT: v_fma_f32 v1, v2, v3, |v1|
+; GFX9-NEXT: v_cvt_u32_f32_e32 v1, v1
+; GFX9-NEXT: v_cvt_u32_f32_e32 v2, v2
+; GFX9-NEXT: v_ashrrev_i32_e32 v3, 31, v0
+; GFX9-NEXT: v_xor_b32_e32 v0, v1, v3
+; GFX9-NEXT: v_xor_b32_e32 v1, v2, v3
+; GFX9-NEXT: v_sub_co_u32_e32 v0, vcc, v0, v3
+; GFX9-NEXT: v_subb_co_u32_e32 v1, vcc, v1, v3, vcc
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-LABEL: intrinsic_lrint_i64_f32:
+; GFX10: ; %bb.0: ; %entry
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT: v_rndne_f32_e32 v0, v0
+; GFX10-NEXT: v_trunc_f32_e32 v1, v0
+; GFX10-NEXT: v_ashrrev_i32_e32 v3, 31, v0
+; GFX10-NEXT: v_mul_f32_e64 v2, 0x2f800000, |v1|
+; GFX10-NEXT: v_floor_f32_e32 v2, v2
+; GFX10-NEXT: v_fma_f32 v1, 0xcf800000, v2, |v1|
+; GFX10-NEXT: v_cvt_u32_f32_e32 v0, v1
+; GFX10-NEXT: v_cvt_u32_f32_e32 v1, v2
+; GFX10-NEXT: v_xor_b32_e32 v0, v0, v3
+; GFX10-NEXT: v_xor_b32_e32 v1, v1, v3
+; GFX10-NEXT: v_sub_co_u32 v0, vcc_lo, v0, v3
+; GFX10-NEXT: v_sub_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
+; GFX10-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: intrinsic_lrint_i64_f32:
+; GFX11: ; %bb.0: ; %entry
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: v_rndne_f32_e32 v0, v0
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX11-NEXT: v_trunc_f32_e32 v1, v0
+; GFX11-NEXT: v_ashrrev_i32_e32 v3, 31, v0
+; GFX11-NEXT: v_mul_f32_e64 v2, 0x2f800000, |v1|
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NEXT: v_floor_f32_e32 v2, v2
+; GFX11-NEXT: v_fma_f32 v1, 0xcf800000, v2, |v1|
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX11-NEXT: v_cvt_u32_f32_e32 v0, v1
+; GFX11-NEXT: v_cvt_u32_f32_e32 v1, v2
+; GFX11-NEXT: v_xor_b32_e32 v0, v0, v3
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-NEXT: v_xor_b32_e32 v1, v1, v3
+; GFX11-NEXT: v_sub_co_u32 v0, vcc_lo, v0, v3
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2)
+; GFX11-NEXT: v_sub_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
+; GFX11-NEXT: s_setpc_b64 s[30:31]
+entry:
+ %0 = tail call i64 @llvm.lrint.i64.f32(float %arg)
+ ret i64 %0
+}
+
+define i64 @intrinsic_lrint_i64_f64(double %arg) {
+; GFX9-LABEL: intrinsic_lrint_i64_f64:
+; GFX9: ; %bb.0: ; %entry
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_rndne_f64_e32 v[0:1], v[0:1]
+; GFX9-NEXT: v_mov_b32_e32 v2, 0
+; GFX9-NEXT: v_mov_b32_e32 v3, 0x3df00000
+; GFX9-NEXT: v_mov_b32_e32 v4, 0
+; GFX9-NEXT: v_mov_b32_e32 v5, 0xc1f00000
+; GFX9-NEXT: v_trunc_f64_e32 v[0:1], v[0:1]
+; GFX9-NEXT: v_mul_f64 v[2:3], v[0:1], v[2:3]
+; GFX9-NEXT: v_floor_f64_e32 v[2:3], v[2:3]
+; GFX9-NEXT: v_fma_f64 v[0:1], v[2:3], v[4:5], v[0:1]
+; GFX9-NEXT: v_cvt_u32_f64_e32 v0, v[0:1]
+; GFX9-NEXT: v_cvt_i32_f64_e32 v1, v[2:3]
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-LABEL: intrinsic_lrint_i64_f64:
+; GFX10: ; %bb.0: ; %entry
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT: v_rndne_f64_e32 v[0:1], v[0:1]
+; GFX10-NEXT: v_trunc_f64_e32 v[0:1], v[0:1]
+; GFX10-NEXT: v_mul_f64 v[2:3], 0x3df00000, v[0:1]
+; GFX10-NEXT: v_floor_f64_e32 v[2:3], v[2:3]
+; GFX10-NEXT: v_fma_f64 v[0:1], 0xc1f00000, v[2:3], v[0:1]
+; GFX10-NEXT: v_cvt_u32_f64_e32 v0, v[0:1]
+; GFX10-NEXT: v_cvt_i32_f64_e32 v1, v[2:3]
+; GFX10-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: intrinsic_lrint_i64_f64:
+; GFX11: ; %bb.0: ; %entry
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: v_rndne_f64_e32 v[0:1], v[0:1]
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NEXT: v_trunc_f64_e32 v[0:1], v[0:1]
+; GFX11-NEXT: v_mul_f64 v[2:3], 0x3df00000, v[0:1]
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NEXT: v_floor_f64_e32 v[2:3], v[2:3]
+; GFX11-NEXT: v_fma_f64 v[0:1], 0xc1f00000, v[2:3], v[0:1]
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-NEXT: v_cvt_u32_f64_e32 v0, v[0:1]
+; GFX11-NEXT: v_cvt_i32_f64_e32 v1, v[2:3]
+; GFX11-NEXT: s_setpc_b64 s[30:31]
+entry:
+ %0 = tail call i64 @llvm.lrint.i64.f64(double %arg)
+ ret i64 %0
+}
+
+define i64 @intrinsic_llrint_i64_f32(float %arg) {
+; GFX9-LABEL: intrinsic_llrint_i64_f32:
+; GFX9: ; %bb.0: ; %entry
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_rndne_f32_e32 v0, v0
+; GFX9-NEXT: v_trunc_f32_e32 v1, v0
+; GFX9-NEXT: v_mov_b32_e32 v2, 0x2f800000
+; GFX9-NEXT: v_mul_f32_e64 v2, |v1|, v2
+; GFX9-NEXT: v_floor_f32_e32 v2, v2
+; GFX9-NEXT: v_mov_b32_e32 v3, 0xcf800000
+; GFX9-NEXT: v_fma_f32 v1, v2, v3, |v1|
+; GFX9-NEXT: v_cvt_u32_f32_e32 v1, v1
+; GFX9-NEXT: v_cvt_u32_f32_e32 v2, v2
+; GFX9-NEXT: v_ashrrev_i32_e32 v3, 31, v0
+; GFX9-NEXT: v_xor_b32_e32 v0, v1, v3
+; GFX9-NEXT: v_xor_b32_e32 v1, v2, v3
+; GFX9-NEXT: v_sub_co_u32_e32 v0, vcc, v0, v3
+; GFX9-NEXT: v_subb_co_u32_e32 v1, vcc, v1, v3, vcc
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-LABEL: intrinsic_llrint_i64_f32:
+; GFX10: ; %bb.0: ; %entry
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT: v_rndne_f32_e32 v0, v0
+; GFX10-NEXT: v_trunc_f32_e32 v1, v0
+; GFX10-NEXT: v_ashrrev_i32_e32 v3, 31, v0
+; GFX10-NEXT: v_mul_f32_e64 v2, 0x2f800000, |v1|
+; GFX10-NEXT: v_floor_f32_e32 v2, v2
+; GFX10-NEXT: v_fma_f32 v1, 0xcf800000, v2, |v1|
+; GFX10-NEXT: v_cvt_u32_f32_e32 v0, v1
+; GFX10-NEXT: v_cvt_u32_f32_e32 v1, v2
+; GFX10-NEXT: v_xor_b32_e32 v0, v0, v3
+; GFX10-NEXT: v_xor_b32_e32 v1, v1, v3
+; GFX10-NEXT: v_sub_co_u32 v0, vcc_lo, v0, v3
+; GFX10-NEXT: v_sub_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
+; GFX10-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: intrinsic_llrint_i64_f32:
+; GFX11: ; %bb.0: ; %entry
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: v_rndne_f32_e32 v0, v0
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX11-NEXT: v_trunc_f32_e32 v1, v0
+; GFX11-NEXT: v_ashrrev_i32_e32 v3, 31, v0
+; GFX11-NEXT: v_mul_f32_e64 v2, 0x2f800000, |v1|
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NEXT: v_floor_f32_e32 v2, v2
+; GFX11-NEXT: v_fma_f32 v1, 0xcf800000, v2, |v1|
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX11-NEXT: v_cvt_u32_f32_e32 v0, v1
+; GFX11-NEXT: v_cvt_u32_f32_e32 v1, v2
+; GFX11-NEXT: v_xor_b32_e32 v0, v0, v3
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-NEXT: v_xor_b32_e32 v1, v1, v3
+; GFX11-NEXT: v_sub_co_u32 v0, vcc_lo, v0, v3
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2)
+; GFX11-NEXT: v_sub_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
+; GFX11-NEXT: s_setpc_b64 s[30:31]
+entry:
+ %0 = tail call i64 @llvm.llrint.i64.f32(float %arg)
+ ret i64 %0
+}
+
+define i64 @intrinsic_llrint_i64_f64(double %arg) {
+; GFX9-LABEL: intrinsic_llrint_i64_f64:
+; GFX9: ; %bb.0: ; %entry
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_rndne_f64_e32 v[0:1], v[0:1]
+; GFX9-NEXT: v_mov_b32_e32 v2, 0
+; GFX9-NEXT: v_mov_b32_e32 v3, 0x3df00000
+; GFX9-NEXT: v_mov_b32_e32 v4, 0
+; GFX9-NEXT: v_mov_b32_e32 v5, 0xc1f00000
+; GFX9-NEXT: v_trunc_f64_e32 v[0:1], v[0:1]
+; GFX9-NEXT: v_mul_f64 v[2:3], v[0:1], v[2:3]
+; GFX9-NEXT: v_floor_f64_e32 v[2:3], v[2:3]
+; GFX9-NEXT: v_fma_f64 v[0:1], v[2:3], v[4:5], v[0:1]
+; GFX9-NEXT: v_cvt_u32_f64_e32 v0, v[0:1]
+; GFX9-NEXT: v_cvt_i32_f64_e32 v1, v[2:3]
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-LABEL: intrinsic_llrint_i64_f64:
+; GFX10: ; %bb.0: ; %entry
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT: v_rndne_f64_e32 v[0:1], v[0:1]
+; GFX10-NEXT: v_trunc_f64_e32 v[0:1], v[0:1]
+; GFX10-NEXT: v_mul_f64 v[2:3], 0x3df00000, v[0:1]
+; GFX10-NEXT: v_floor_f64_e32 v[2:3], v[2:3]
+; GFX10-NEXT: v_fma_f64 v[0:1], 0xc1f00000, v[2:3], v[0:1]
+; GFX10-NEXT: v_cvt_u32_f64_e32 v0, v[0:1]
+; GFX10-NEXT: v_cvt_i32_f64_e32 v1, v[2:3]
+; GFX10-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: intrinsic_llrint_i64_f64:
+; GFX11: ; %bb.0: ; %entry
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: v_rndne_f64_e32 v[0:1], v[0:1]
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NEXT: v_trunc_f64_e32 v[0:1], v[0:1]
+; GFX11-NEXT: v_mul_f64 v[2:3], 0x3df00000, v[0:1]
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NEXT: v_floor_f64_e32 v[2:3], v[2:3]
+; GFX11-NEXT: v_fma_f64 v[0:1], 0xc1f00000, v[2:3], v[0:1]
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-NEXT: v_cvt_u32_f64_e32 v0, v[0:1]
+; GFX11-NEXT: v_cvt_i32_f64_e32 v1, v[2:3]
+; GFX11-NEXT: s_setpc_b64 s[30:31]
+entry:
+ %0 = tail call i64 @llvm.llrint.i64.f64(double %arg)
+ ret i64 %0
+}
+
+define half @intrinsic_frint_half(half %arg) {
+; GCN-LABEL: intrinsic_frint_half:
+; GCN: ; %bb.0: ; %entry
+; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT: v_rndne_f16_e32 v0, v0
+; GCN-NEXT: s_setpc_b64 s[30:31]
+entry:
+ %0 = tail call half @llvm.rint.f16(half %arg)
+ ret half %0
+}
+
+define i32 @intrinsic_lrint_i32_f16(half %arg) {
+; GFX9-LABEL: intrinsic_lrint_i32_f16:
+; GFX9: ; %bb.0: ; %entry
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_rndne_f16_e32 v0, v0
+; GFX9-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX9-NEXT: v_cvt_i32_f32_e32 v0, v0
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-LABEL: intrinsic_lrint_i32_f16:
+; GFX10: ; %bb.0: ; %entry
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT: v_rndne_f16_e32 v0, v0
+; GFX10-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX10-NEXT: v_cvt_i32_f32_e32 v0, v0
+; GFX10-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: intrinsic_lrint_i32_f16:
+; GFX11: ; %bb.0: ; %entry
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: v_rndne_f16_e32 v0, v0
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX11-NEXT: v_cvt_i32_f32_e32 v0, v0
+; GFX11-NEXT: s_setpc_b64 s[30:31]
+entry:
+ %0 = tail call i32 @llvm.lrint.i32.f16(half %arg)
+ ret i32 %0
+}
+
+define <2 x float> @intrinsic_frint_v2f32_v2f32(<2 x float> %arg) {
+; GCN-LABEL: intrinsic_frint_v2f32_v2f32:
+; GCN: ; %bb.0: ; %entry
+; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT: v_rndne_f32_e32 v0, v0
+; GCN-NEXT: v_rndne_f32_e32 v1, v1
+; GCN-NEXT: s_setpc_b64 s[30:31]
+entry:
+ %0 = tail call <2 x float> @llvm.rint.v2f32.v2f32(<2 x float> %arg)
+ ret <2 x float> %0
+}
+
+define <2 x i32> @intrinsic_lrint_v2i32_v2f32(<2 x float> %arg) {
+; GFX9-LABEL: intrinsic_lrint_v2i32_v2f32:
+; GFX9: ; %bb.0: ; %entry
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_rndne_f32_e32 v0, v0
+; GFX9-NEXT: v_rndne_f32_e32 v1, v1
+; GFX9-NEXT: v_cvt_i32_f32_e32 v0, v0
+; GFX9-NEXT: v_cvt_i32_f32_e32 v1, v1
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-LABEL: intrinsic_lrint_v2i32_v2f32:
+; GFX10: ; %bb.0: ; %entry
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT: v_rndne_f32_e32 v0, v0
+; GFX10-NEXT: v_rndne_f32_e32 v1, v1
+; GFX10-NEXT: v_cvt_i32_f32_e32 v0, v0
+; GFX10-NEXT: v_cvt_i32_f32_e32 v1, v1
+; GFX10-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: intrinsic_lrint_v2i32_v2f32:
+; GFX11: ; %bb.0: ; %entry
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: v_rndne_f32_e32 v0, v0
+; GFX11-NEXT: v_rndne_f32_e32 v1, v1
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-NEXT: v_cvt_i32_f32_e32 v0, v0
+; GFX11-NEXT: v_cvt_i32_f32_e32 v1, v1
+; GFX11-NEXT: s_setpc_b64 s[30:31]
+entry:
+ %0 = tail call <2 x i32> @llvm.lrint.v2i32.v2f32(<2 x float> %arg)
+ ret <2 x i32> %0
+}
+
+define <2 x i64> @intrinsic_lrint_v2i64_v2f32(<2 x float> %arg) {
+; GFX9-LABEL: intrinsic_lrint_v2i64_v2f32:
+; GFX9: ; %bb.0: ; %entry
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_rndne_f32_e32 v0, v0
+; GFX9-NEXT: v_trunc_f32_e32 v2, v0
+; GFX9-NEXT: v_mov_b32_e32 v3, 0x2f800000
+; GFX9-NEXT: v_mul_f32_e64 v4, |v2|, v3
+; GFX9-NEXT: v_floor_f32_e32 v4, v4
+; GFX9-NEXT: v_mov_b32_e32 v5, 0xcf800000
+; GFX9-NEXT: v_fma_f32 v2, v4, v5, |v2|
+; GFX9-NEXT: v_cvt_u32_f32_e32 v2, v2
+; GFX9-NEXT: v_cvt_u32_f32_e32 v4, v4
+; GFX9-NEXT: v_ashrrev_i32_e32 v6, 31, v0
+; GFX9-NEXT: v_xor_b32_e32 v0, v2, v6
+; GFX9-NEXT: v_xor_b32_e32 v2, v4, v6
+; GFX9-NEXT: v_rndne_f32_e32 v4, v1
+; GFX9-NEXT: v_trunc_f32_e32 v1, v4
+; GFX9-NEXT: v_mul_f32_e64 v3, |v1|, v3
+; GFX9-NEXT: v_floor_f32_e32 v3, v3
+; GFX9-NEXT: v_fma_f32 v1, v3, v5, |v1|
+; GFX9-NEXT: v_cvt_u32_f32_e32 v5, v1
+; GFX9-NEXT: v_cvt_u32_f32_e32 v3, v3
+; GFX9-NEXT: v_sub_co_u32_e32 v0, vcc, v0, v6
+; GFX9-NEXT: v_ashrrev_i32_e32 v4, 31, v4
+; GFX9-NEXT: v_subb_co_u32_e32 v1, vcc, v2, v6, vcc
+; GFX9-NEXT: v_xor_b32_e32 v2, v5, v4
+; GFX9-NEXT: v_xor_b32_e32 v3, v3, v4
+; GFX9-NEXT: v_sub_co_u32_e32 v2, vcc, v2, v4
+; GFX9-NEXT: v_subb_co_u32_e32 v3, vcc, v3, v4, vcc
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-LABEL: intrinsic_lrint_v2i64_v2f32:
+; GFX10: ; %bb.0: ; %entry
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT: v_rndne_f32_e32 v0, v0
+; GFX10-NEXT: v_rndne_f32_e32 v1, v1
+; GFX10-NEXT: v_trunc_f32_e32 v2, v0
+; GFX10-NEXT: v_trunc_f32_e32 v3, v1
+; GFX10-NEXT: v_ashrrev_i32_e32 v6, 31, v0
+; GFX10-NEXT: v_mul_f32_e64 v4, 0x2f800000, |v2|
+; GFX10-NEXT: v_mul_f32_e64 v5, 0x2f800000, |v3|
+; GFX10-NEXT: v_floor_f32_e32 v4, v4
+; GFX10-NEXT: v_floor_f32_e32 v5, v5
+; GFX10-NEXT: v_fma_f32 v2, 0xcf800000, v4, |v2|
+; GFX10-NEXT: v_fma_f32 v0, 0xcf800000, v5, |v3|
+; GFX10-NEXT: v_ashrrev_i32_e32 v3, 31, v1
+; GFX10-NEXT: v_cvt_u32_f32_e32 v1, v2
+; GFX10-NEXT: v_cvt_u32_f32_e32 v2, v4
+; GFX10-NEXT: v_cvt_u32_f32_e32 v0, v0
+; GFX10-NEXT: v_cvt_u32_f32_e32 v4, v5
+; GFX10-NEXT: v_xor_b32_e32 v1, v1, v6
+; GFX10-NEXT: v_xor_b32_e32 v2, v2, v6
+; GFX10-NEXT: v_xor_b32_e32 v5, v0, v3
+; GFX10-NEXT: v_xor_b32_e32 v4, v4, v3
+; GFX10-NEXT: v_sub_co_u32 v0, vcc_lo, v1, v6
+; GFX10-NEXT: v_sub_co_ci_u32_e32 v1, vcc_lo, v2, v6, vcc_lo
+; GFX10-NEXT: v_sub_co_u32 v2, vcc_lo, v5, v3
+; GFX10-NEXT: v_sub_co_ci_u32_e32 v3, vcc_lo, v4, v3, vcc_lo
+; GFX10-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: intrinsic_lrint_v2i64_v2f32:
+; GFX11: ; %bb.0: ; %entry
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: v_rndne_f32_e32 v0, v0
+; GFX11-NEXT: v_rndne_f32_e32 v1, v1
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-NEXT: v_trunc_f32_e32 v2, v0
+; GFX11-NEXT: v_trunc_f32_e32 v3, v1
+; GFX11-NEXT: v_ashrrev_i32_e32 v6, 31, v0
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX11-NEXT: v_mul_f32_e64 v4, 0x2f800000, |v2|
+; GFX11-NEXT: v_mul_f32_e64 v5, 0x2f800000, |v3|
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-NEXT: v_floor_f32_e32 v4, v4
+; GFX11-NEXT: v_floor_f32_e32 v5, v5
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-NEXT: v_fma_f32 v2, 0xcf800000, v4, |v2|
+; GFX11-NEXT: v_fma_f32 v0, 0xcf800000, v5, |v3|
+; GFX11-NEXT: v_ashrrev_i32_e32 v3, 31, v1
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_4)
+; GFX11-NEXT: v_cvt_u32_f32_e32 v1, v2
+; GFX11-NEXT: v_cvt_u32_f32_e32 v2, v4
+; GFX11-NEXT: v_cvt_u32_f32_e32 v0, v0
+; GFX11-NEXT: v_cvt_u32_f32_e32 v4, v5
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX11-NEXT: v_xor_b32_e32 v1, v1, v6
+; GFX11-NEXT: v_xor_b32_e32 v2, v2, v6
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX11-NEXT: v_xor_b32_e32 v5, v0, v3
+; GFX11-NEXT: v_xor_b32_e32 v4, v4, v3
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX11-NEXT: v_sub_co_u32 v0, vcc_lo, v1, v6
+; GFX11-NEXT: v_sub_co_ci_u32_e32 v1, vcc_lo, v2, v6, vcc_lo
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX11-NEXT: v_sub_co_u32 v2, vcc_lo, v5, v3
+; GFX11-NEXT: v_sub_co_ci_u32_e32 v3, vcc_lo, v4, v3, vcc_lo
+; GFX11-NEXT: s_setpc_b64 s[30:31]
+entry:
+ %0 = tail call <2 x i64> @llvm.lrint.v2i64.v2f32(<2 x float> %arg)
+ ret <2 x i64> %0
+}
+
diff --git a/llvm/test/CodeGen/AMDGPU/lrint.ll b/llvm/test/CodeGen/AMDGPU/lrint.ll
new file mode 100644
index 0000000000000..2da1bd095165b
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/lrint.ll
@@ -0,0 +1,467 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9 %s
+; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1010 < %s | FileCheck -check-prefixes=GCN,GFX10 %s
+; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1100 < %s | FileCheck -check-prefixes=GCN,GFX11 %s
+
+declare float @llvm.rint.f32(float)
+declare i32 @llvm.lrint.i32.f32(float)
+declare i32 @llvm.lrint.i32.f64(double)
+declare i64 @llvm.lrint.i64.f32(float)
+declare i64 @llvm.lrint.i64.f64(double)
+declare i64 @llvm.llrint.i64.f32(float)
+declare half @llvm.rint.f16(half)
+declare i32 @llvm.lrint.i32.f16(half %arg)
+declare <2 x float> @llvm.rint.v2f32.v2f32(<2 x float> %arg)
+declare <2 x i32> @llvm.lrint.v2i32.v2f32(<2 x float> %arg)
+declare <2 x i64> @llvm.lrint.v2i64.v2f32(<2 x float> %arg)
+
+define float @intrinsic_frint(float %arg) {
+; GCN-LABEL: intrinsic_frint:
+; GCN: ; %bb.0: ; %entry
+; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT: v_rndne_f32_e32 v0, v0
+; GCN-NEXT: s_setpc_b64 s[30:31]
+entry:
+ %0 = tail call float @llvm.rint.f32(float %arg)
+ ret float %0
+}
+
+define i32 @intrinsic_lrint_i32_f32(float %arg) {
+; GFX9-LABEL: intrinsic_lrint_i32_f32:
+; GFX9: ; %bb.0: ; %entry
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_rndne_f32_e32 v0, v0
+; GFX9-NEXT: v_cvt_i32_f32_e32 v0, v0
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-LABEL: intrinsic_lrint_i32_f32:
+; GFX10: ; %bb.0: ; %entry
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT: v_rndne_f32_e32 v0, v0
+; GFX10-NEXT: v_cvt_i32_f32_e32 v0, v0
+; GFX10-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: intrinsic_lrint_i32_f32:
+; GFX11: ; %bb.0: ; %entry
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: v_rndne_f32_e32 v0, v0
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-NEXT: v_cvt_i32_f32_e32 v0, v0
+; GFX11-NEXT: s_setpc_b64 s[30:31]
+entry:
+ %0 = tail call i32 @llvm.lrint.i32.f32(float %arg)
+ ret i32 %0
+}
+
+define i32 @intrinsic_lrint_i32_f64(double %arg) {
+; GFX9-LABEL: intrinsic_lrint_i32_f64:
+; GFX9: ; %bb.0: ; %entry
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_rndne_f64_e32 v[0:1], v[0:1]
+; GFX9-NEXT: v_cvt_i32_f64_e32 v0, v[0:1]
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-LABEL: intrinsic_lrint_i32_f64:
+; GFX10: ; %bb.0: ; %entry
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT: v_rndne_f64_e32 v[0:1], v[0:1]
+; GFX10-NEXT: v_cvt_i32_f64_e32 v0, v[0:1]
+; GFX10-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: intrinsic_lrint_i32_f64:
+; GFX11: ; %bb.0: ; %entry
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: v_rndne_f64_e32 v[0:1], v[0:1]
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-NEXT: v_cvt_i32_f64_e32 v0, v[0:1]
+; GFX11-NEXT: s_setpc_b64 s[30:31]
+entry:
+ %0 = tail call i32 @llvm.lrint.i32.f64(double %arg)
+ ret i32 %0
+}
+
+define i64 @intrinsic_lrint_i64_f32(float %arg) {
+; GFX9-LABEL: intrinsic_lrint_i64_f32:
+; GFX9: ; %bb.0: ; %entry
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_rndne_f32_e32 v0, v0
+; GFX9-NEXT: s_mov_b32 s4, 0x2f800000
+; GFX9-NEXT: v_mul_f32_e64 v1, |v0|, s4
+; GFX9-NEXT: v_floor_f32_e32 v1, v1
+; GFX9-NEXT: s_mov_b32 s4, 0xcf800000
+; GFX9-NEXT: v_cvt_u32_f32_e32 v2, v1
+; GFX9-NEXT: v_fma_f32 v1, v1, s4, |v0|
+; GFX9-NEXT: v_cvt_u32_f32_e32 v1, v1
+; GFX9-NEXT: v_ashrrev_i32_e32 v3, 31, v0
+; GFX9-NEXT: v_xor_b32_e32 v2, v2, v3
+; GFX9-NEXT: v_xor_b32_e32 v0, v1, v3
+; GFX9-NEXT: v_sub_co_u32_e32 v0, vcc, v0, v3
+; GFX9-NEXT: v_subb_co_u32_e32 v1, vcc, v2, v3, vcc
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-LABEL: intrinsic_lrint_i64_f32:
+; GFX10: ; %bb.0: ; %entry
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT: v_rndne_f32_e32 v0, v0
+; GFX10-NEXT: v_mul_f32_e64 v1, 0x2f800000, |v0|
+; GFX10-NEXT: v_ashrrev_i32_e32 v3, 31, v0
+; GFX10-NEXT: v_floor_f32_e32 v1, v1
+; GFX10-NEXT: v_fma_f32 v2, 0xcf800000, v1, |v0|
+; GFX10-NEXT: v_cvt_u32_f32_e32 v1, v1
+; GFX10-NEXT: v_cvt_u32_f32_e32 v0, v2
+; GFX10-NEXT: v_xor_b32_e32 v1, v1, v3
+; GFX10-NEXT: v_xor_b32_e32 v0, v0, v3
+; GFX10-NEXT: v_sub_co_u32 v0, vcc_lo, v0, v3
+; GFX10-NEXT: v_sub_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
+; GFX10-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: intrinsic_lrint_i64_f32:
+; GFX11: ; %bb.0: ; %entry
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: v_rndne_f32_e32 v0, v0
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX11-NEXT: v_mul_f32_e64 v1, 0x2f800000, |v0|
+; GFX11-NEXT: v_ashrrev_i32_e32 v3, 31, v0
+; GFX11-NEXT: v_floor_f32_e32 v1, v1
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX11-NEXT: v_fma_f32 v2, 0xcf800000, v1, |v0|
+; GFX11-NEXT: v_cvt_u32_f32_e32 v1, v1
+; GFX11-NEXT: v_cvt_u32_f32_e32 v0, v2
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-NEXT: v_xor_b32_e32 v1, v1, v3
+; GFX11-NEXT: v_xor_b32_e32 v0, v0, v3
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX11-NEXT: v_sub_co_u32 v0, vcc_lo, v0, v3
+; GFX11-NEXT: v_sub_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
+; GFX11-NEXT: s_setpc_b64 s[30:31]
+entry:
+ %0 = tail call i64 @llvm.lrint.i64.f32(float %arg)
+ ret i64 %0
+}
+
+define i64 @intrinsic_lrint_i64_f64(double %arg) {
+; GFX9-LABEL: intrinsic_lrint_i64_f64:
+; GFX9: ; %bb.0: ; %entry
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_rndne_f64_e32 v[0:1], v[0:1]
+; GFX9-NEXT: s_movk_i32 s4, 0xffe0
+; GFX9-NEXT: v_ldexp_f64 v[2:3], v[0:1], s4
+; GFX9-NEXT: s_mov_b32 s4, 0
+; GFX9-NEXT: s_mov_b32 s5, 0xc1f00000
+; GFX9-NEXT: v_floor_f64_e32 v[2:3], v[2:3]
+; GFX9-NEXT: v_fma_f64 v[0:1], v[2:3], s[4:5], v[0:1]
+; GFX9-NEXT: v_cvt_u32_f64_e32 v0, v[0:1]
+; GFX9-NEXT: v_cvt_i32_f64_e32 v1, v[2:3]
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-LABEL: intrinsic_lrint_i64_f64:
+; GFX10: ; %bb.0: ; %entry
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT: v_rndne_f64_e32 v[0:1], v[0:1]
+; GFX10-NEXT: v_ldexp_f64 v[2:3], v[0:1], 0xffffffe0
+; GFX10-NEXT: v_floor_f64_e32 v[2:3], v[2:3]
+; GFX10-NEXT: v_fma_f64 v[0:1], 0xc1f00000, v[2:3], v[0:1]
+; GFX10-NEXT: v_cvt_u32_f64_e32 v0, v[0:1]
+; GFX10-NEXT: v_cvt_i32_f64_e32 v1, v[2:3]
+; GFX10-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: intrinsic_lrint_i64_f64:
+; GFX11: ; %bb.0: ; %entry
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: v_rndne_f64_e32 v[0:1], v[0:1]
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NEXT: v_ldexp_f64 v[2:3], v[0:1], 0xffffffe0
+; GFX11-NEXT: v_floor_f64_e32 v[2:3], v[2:3]
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NEXT: v_fma_f64 v[0:1], 0xc1f00000, v[2:3], v[0:1]
+; GFX11-NEXT: v_cvt_u32_f64_e32 v0, v[0:1]
+; GFX11-NEXT: v_cvt_i32_f64_e32 v1, v[2:3]
+; GFX11-NEXT: s_setpc_b64 s[30:31]
+entry:
+ %0 = tail call i64 @llvm.lrint.i64.f64(double %arg)
+ ret i64 %0
+}
+
+define i64 @intrinsic_llrint_i64_f32(float %arg) {
+; GFX9-LABEL: intrinsic_llrint_i64_f32:
+; GFX9: ; %bb.0: ; %entry
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_rndne_f32_e32 v0, v0
+; GFX9-NEXT: s_mov_b32 s4, 0x2f800000
+; GFX9-NEXT: v_mul_f32_e64 v1, |v0|, s4
+; GFX9-NEXT: v_floor_f32_e32 v1, v1
+; GFX9-NEXT: s_mov_b32 s4, 0xcf800000
+; GFX9-NEXT: v_cvt_u32_f32_e32 v2, v1
+; GFX9-NEXT: v_fma_f32 v1, v1, s4, |v0|
+; GFX9-NEXT: v_cvt_u32_f32_e32 v1, v1
+; GFX9-NEXT: v_ashrrev_i32_e32 v3, 31, v0
+; GFX9-NEXT: v_xor_b32_e32 v2, v2, v3
+; GFX9-NEXT: v_xor_b32_e32 v0, v1, v3
+; GFX9-NEXT: v_sub_co_u32_e32 v0, vcc, v0, v3
+; GFX9-NEXT: v_subb_co_u32_e32 v1, vcc, v2, v3, vcc
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-LABEL: intrinsic_llrint_i64_f32:
+; GFX10: ; %bb.0: ; %entry
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT: v_rndne_f32_e32 v0, v0
+; GFX10-NEXT: v_mul_f32_e64 v1, 0x2f800000, |v0|
+; GFX10-NEXT: v_ashrrev_i32_e32 v3, 31, v0
+; GFX10-NEXT: v_floor_f32_e32 v1, v1
+; GFX10-NEXT: v_fma_f32 v2, 0xcf800000, v1, |v0|
+; GFX10-NEXT: v_cvt_u32_f32_e32 v1, v1
+; GFX10-NEXT: v_cvt_u32_f32_e32 v0, v2
+; GFX10-NEXT: v_xor_b32_e32 v1, v1, v3
+; GFX10-NEXT: v_xor_b32_e32 v0, v0, v3
+; GFX10-NEXT: v_sub_co_u32 v0, vcc_lo, v0, v3
+; GFX10-NEXT: v_sub_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
+; GFX10-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: intrinsic_llrint_i64_f32:
+; GFX11: ; %bb.0: ; %entry
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: v_rndne_f32_e32 v0, v0
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX11-NEXT: v_mul_f32_e64 v1, 0x2f800000, |v0|
+; GFX11-NEXT: v_ashrrev_i32_e32 v3, 31, v0
+; GFX11-NEXT: v_floor_f32_e32 v1, v1
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX11-NEXT: v_fma_f32 v2, 0xcf800000, v1, |v0|
+; GFX11-NEXT: v_cvt_u32_f32_e32 v1, v1
+; GFX11-NEXT: v_cvt_u32_f32_e32 v0, v2
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-NEXT: v_xor_b32_e32 v1, v1, v3
+; GFX11-NEXT: v_xor_b32_e32 v0, v0, v3
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX11-NEXT: v_sub_co_u32 v0, vcc_lo, v0, v3
+; GFX11-NEXT: v_sub_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
+; GFX11-NEXT: s_setpc_b64 s[30:31]
+entry:
+ %0 = tail call i64 @llvm.llrint.i64.f32(float %arg)
+ ret i64 %0
+}
+
+define i64 @intrinsic_llrint_i64_f64(double %arg) {
+; GFX9-LABEL: intrinsic_llrint_i64_f64:
+; GFX9: ; %bb.0: ; %entry
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_rndne_f64_e32 v[0:1], v[0:1]
+; GFX9-NEXT: s_movk_i32 s4, 0xffe0
+; GFX9-NEXT: v_ldexp_f64 v[2:3], v[0:1], s4
+; GFX9-NEXT: s_mov_b32 s4, 0
+; GFX9-NEXT: s_mov_b32 s5, 0xc1f00000
+; GFX9-NEXT: v_floor_f64_e32 v[2:3], v[2:3]
+; GFX9-NEXT: v_fma_f64 v[0:1], v[2:3], s[4:5], v[0:1]
+; GFX9-NEXT: v_cvt_u32_f64_e32 v0, v[0:1]
+; GFX9-NEXT: v_cvt_i32_f64_e32 v1, v[2:3]
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-LABEL: intrinsic_llrint_i64_f64:
+; GFX10: ; %bb.0: ; %entry
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT: v_rndne_f64_e32 v[0:1], v[0:1]
+; GFX10-NEXT: v_ldexp_f64 v[2:3], v[0:1], 0xffffffe0
+; GFX10-NEXT: v_floor_f64_e32 v[2:3], v[2:3]
+; GFX10-NEXT: v_fma_f64 v[0:1], 0xc1f00000, v[2:3], v[0:1]
+; GFX10-NEXT: v_cvt_u32_f64_e32 v0, v[0:1]
+; GFX10-NEXT: v_cvt_i32_f64_e32 v1, v[2:3]
+; GFX10-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: intrinsic_llrint_i64_f64:
+; GFX11: ; %bb.0: ; %entry
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: v_rndne_f64_e32 v[0:1], v[0:1]
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NEXT: v_ldexp_f64 v[2:3], v[0:1], 0xffffffe0
+; GFX11-NEXT: v_floor_f64_e32 v[2:3], v[2:3]
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NEXT: v_fma_f64 v[0:1], 0xc1f00000, v[2:3], v[0:1]
+; GFX11-NEXT: v_cvt_u32_f64_e32 v0, v[0:1]
+; GFX11-NEXT: v_cvt_i32_f64_e32 v1, v[2:3]
+; GFX11-NEXT: s_setpc_b64 s[30:31]
+entry:
+ %0 = tail call i64 @llvm.llrint.i64.f64(double %arg)
+ ret i64 %0
+}
+
+define half @intrinsic_frint_half(half %arg) {
+; GCN-LABEL: intrinsic_frint_half:
+; GCN: ; %bb.0: ; %entry
+; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT: v_rndne_f16_e32 v0, v0
+; GCN-NEXT: s_setpc_b64 s[30:31]
+entry:
+ %0 = tail call half @llvm.rint.f16(half %arg)
+ ret half %0
+}
+
+define i32 @intrinsic_lrint_i32_f16(half %arg) {
+; GFX9-LABEL: intrinsic_lrint_i32_f16:
+; GFX9: ; %bb.0: ; %entry
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_rndne_f16_e32 v0, v0
+; GFX9-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX9-NEXT: v_cvt_i32_f32_e32 v0, v0
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-LABEL: intrinsic_lrint_i32_f16:
+; GFX10: ; %bb.0: ; %entry
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT: v_rndne_f16_e32 v0, v0
+; GFX10-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX10-NEXT: v_cvt_i32_f32_e32 v0, v0
+; GFX10-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: intrinsic_lrint_i32_f16:
+; GFX11: ; %bb.0: ; %entry
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: v_rndne_f16_e32 v0, v0
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX11-NEXT: v_cvt_i32_f32_e32 v0, v0
+; GFX11-NEXT: s_setpc_b64 s[30:31]
+entry:
+ %0 = tail call i32 @llvm.lrint.i32.f16(half %arg)
+ ret i32 %0
+}
+
+define <2 x float> @intrinsic_frint_v2f32_v2f32(<2 x float> %arg) {
+; GCN-LABEL: intrinsic_frint_v2f32_v2f32:
+; GCN: ; %bb.0: ; %entry
+; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT: v_rndne_f32_e32 v0, v0
+; GCN-NEXT: v_rndne_f32_e32 v1, v1
+; GCN-NEXT: s_setpc_b64 s[30:31]
+entry:
+ %0 = tail call <2 x float> @llvm.rint.v2f32.v2f32(<2 x float> %arg)
+ ret <2 x float> %0
+}
+
+define <2 x i32> @intrinsic_lrint_v2i32_v2f32(<2 x float> %arg) {
+; GFX9-LABEL: intrinsic_lrint_v2i32_v2f32:
+; GFX9: ; %bb.0: ; %entry
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_rndne_f32_e32 v0, v0
+; GFX9-NEXT: v_rndne_f32_e32 v1, v1
+; GFX9-NEXT: v_cvt_i32_f32_e32 v0, v0
+; GFX9-NEXT: v_cvt_i32_f32_e32 v1, v1
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-LABEL: intrinsic_lrint_v2i32_v2f32:
+; GFX10: ; %bb.0: ; %entry
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT: v_rndne_f32_e32 v0, v0
+; GFX10-NEXT: v_rndne_f32_e32 v1, v1
+; GFX10-NEXT: v_cvt_i32_f32_e32 v0, v0
+; GFX10-NEXT: v_cvt_i32_f32_e32 v1, v1
+; GFX10-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: intrinsic_lrint_v2i32_v2f32:
+; GFX11: ; %bb.0: ; %entry
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: v_rndne_f32_e32 v0, v0
+; GFX11-NEXT: v_rndne_f32_e32 v1, v1
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-NEXT: v_cvt_i32_f32_e32 v0, v0
+; GFX11-NEXT: v_cvt_i32_f32_e32 v1, v1
+; GFX11-NEXT: s_setpc_b64 s[30:31]
+entry:
+ %0 = tail call <2 x i32> @llvm.lrint.v2i32.v2f32(<2 x float> %arg)
+ ret <2 x i32> %0
+}
+
+define <2 x i64> @intrinsic_lrint_v2i64_v2f32(<2 x float> %arg) {
+; GFX9-LABEL: intrinsic_lrint_v2i64_v2f32:
+; GFX9: ; %bb.0: ; %entry
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_rndne_f32_e32 v0, v0
+; GFX9-NEXT: s_mov_b32 s4, 0x2f800000
+; GFX9-NEXT: v_mul_f32_e64 v2, |v0|, s4
+; GFX9-NEXT: v_floor_f32_e32 v2, v2
+; GFX9-NEXT: s_mov_b32 s5, 0xcf800000
+; GFX9-NEXT: v_cvt_u32_f32_e32 v3, v2
+; GFX9-NEXT: v_fma_f32 v2, v2, s5, |v0|
+; GFX9-NEXT: v_cvt_u32_f32_e32 v2, v2
+; GFX9-NEXT: v_ashrrev_i32_e32 v4, 31, v0
+; GFX9-NEXT: v_xor_b32_e32 v3, v3, v4
+; GFX9-NEXT: v_xor_b32_e32 v0, v2, v4
+; GFX9-NEXT: v_rndne_f32_e32 v2, v1
+; GFX9-NEXT: v_mul_f32_e64 v1, |v2|, s4
+; GFX9-NEXT: v_floor_f32_e32 v1, v1
+; GFX9-NEXT: v_cvt_u32_f32_e32 v5, v1
+; GFX9-NEXT: v_fma_f32 v1, v1, s5, |v2|
+; GFX9-NEXT: v_cvt_u32_f32_e32 v6, v1
+; GFX9-NEXT: v_sub_co_u32_e32 v0, vcc, v0, v4
+; GFX9-NEXT: v_subb_co_u32_e32 v1, vcc, v3, v4, vcc
+; GFX9-NEXT: v_ashrrev_i32_e32 v3, 31, v2
+; GFX9-NEXT: v_xor_b32_e32 v2, v6, v3
+; GFX9-NEXT: v_xor_b32_e32 v4, v5, v3
+; GFX9-NEXT: v_sub_co_u32_e32 v2, vcc, v2, v3
+; GFX9-NEXT: v_subb_co_u32_e32 v3, vcc, v4, v3, vcc
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-LABEL: intrinsic_lrint_v2i64_v2f32:
+; GFX10: ; %bb.0: ; %entry
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT: v_rndne_f32_e32 v0, v0
+; GFX10-NEXT: v_rndne_f32_e32 v1, v1
+; GFX10-NEXT: v_mul_f32_e64 v2, 0x2f800000, |v0|
+; GFX10-NEXT: v_mul_f32_e64 v3, 0x2f800000, |v1|
+; GFX10-NEXT: v_ashrrev_i32_e32 v5, 31, v0
+; GFX10-NEXT: v_ashrrev_i32_e32 v6, 31, v1
+; GFX10-NEXT: v_floor_f32_e32 v2, v2
+; GFX10-NEXT: v_floor_f32_e32 v3, v3
+; GFX10-NEXT: v_fma_f32 v4, 0xcf800000, v2, |v0|
+; GFX10-NEXT: v_fma_f32 v0, 0xcf800000, v3, |v1|
+; GFX10-NEXT: v_cvt_u32_f32_e32 v2, v2
+; GFX10-NEXT: v_cvt_u32_f32_e32 v3, v3
+; GFX10-NEXT: v_cvt_u32_f32_e32 v1, v4
+; GFX10-NEXT: v_cvt_u32_f32_e32 v0, v0
+; GFX10-NEXT: v_xor_b32_e32 v2, v2, v5
+; GFX10-NEXT: v_xor_b32_e32 v3, v3, v6
+; GFX10-NEXT: v_xor_b32_e32 v1, v1, v5
+; GFX10-NEXT: v_xor_b32_e32 v4, v0, v6
+; GFX10-NEXT: v_sub_co_u32 v0, vcc_lo, v1, v5
+; GFX10-NEXT: v_sub_co_ci_u32_e32 v1, vcc_lo, v2, v5, vcc_lo
+; GFX10-NEXT: v_sub_co_u32 v2, vcc_lo, v4, v6
+; GFX10-NEXT: v_sub_co_ci_u32_e32 v3, vcc_lo, v3, v6, vcc_lo
+; GFX10-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: intrinsic_lrint_v2i64_v2f32:
+; GFX11: ; %bb.0: ; %entry
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: v_rndne_f32_e32 v0, v0
+; GFX11-NEXT: v_rndne_f32_e32 v1, v1
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-NEXT: v_mul_f32_e64 v2, 0x2f800000, |v0|
+; GFX11-NEXT: v_mul_f32_e64 v3, 0x2f800000, |v1|
+; GFX11-NEXT: v_ashrrev_i32_e32 v5, 31, v0
+; GFX11-NEXT: v_ashrrev_i32_e32 v6, 31, v1
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX11-NEXT: v_floor_f32_e32 v2, v2
+; GFX11-NEXT: v_floor_f32_e32 v3, v3
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-NEXT: v_fma_f32 v4, 0xcf800000, v2, |v0|
+; GFX11-NEXT: v_fma_f32 v0, 0xcf800000, v3, |v1|
+; GFX11-NEXT: v_cvt_u32_f32_e32 v2, v2
+; GFX11-NEXT: v_cvt_u32_f32_e32 v3, v3
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX11-NEXT: v_cvt_u32_f32_e32 v1, v4
+; GFX11-NEXT: v_cvt_u32_f32_e32 v0, v0
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX11-NEXT: v_xor_b32_e32 v2, v2, v5
+; GFX11-NEXT: v_xor_b32_e32 v3, v3, v6
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX11-NEXT: v_xor_b32_e32 v1, v1, v5
+; GFX11-NEXT: v_xor_b32_e32 v4, v0, v6
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_3)
+; GFX11-NEXT: v_sub_co_u32 v0, vcc_lo, v1, v5
+; GFX11-NEXT: v_sub_co_ci_u32_e32 v1, vcc_lo, v2, v5, vcc_lo
+; GFX11-NEXT: v_sub_co_u32 v2, vcc_lo, v4, v6
+; GFX11-NEXT: v_sub_co_ci_u32_e32 v3, vcc_lo, v3, v6, vcc_lo
+; GFX11-NEXT: s_setpc_b64 s[30:31]
+entry:
+ %0 = tail call <2 x i64> @llvm.lrint.v2i64.v2f32(<2 x float> %arg)
+ ret <2 x i64> %0
+}
+
>From f08e460eb5e45fee48a74fea085a83325a82d1c5 Mon Sep 17 00:00:00 2001
From: Sumanth Gundapaneni <sumanth.gundapaneni at amd.com>
Date: Tue, 16 Jul 2024 11:50:00 -0500
Subject: [PATCH 2/5] [AMDGPU] Implement llvm.lrint intrinsic lowering.
Unless the target expands this node, the intrinsic is defaulted to lower
to a library call.
---
llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp | 10 ++++++++++
llvm/lib/CodeGen/TargetLoweringBase.cpp | 7 +++++--
llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp | 6 ++++--
3 files changed, 19 insertions(+), 4 deletions(-)
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index d6a0dd9ae9b20..357826ba418e6 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -4326,6 +4326,16 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
// targets where it is not needed.
Results.push_back(Node->getOperand(0));
break;
+ case ISD::LRINT:
+ case ISD::LLRINT: {
+ SDValue Arg = Node->getOperand(0);
+ EVT ArgVT = Arg.getValueType();
+ EVT ResVT = Node->getValueType(0);
+ SDLoc dl(Node);
+ SDValue RoundNode = DAG.getNode(ISD::FROUNDEVEN, dl, ArgVT, Arg);
+ Results.push_back(DAG.getNode(ISD::FP_TO_SINT, dl, ResVT, RoundNode));
+ break;
+ }
case ISD::GLOBAL_OFFSET_TABLE:
case ISD::GlobalAddress:
case ISD::GlobalTLSAddress:
diff --git a/llvm/lib/CodeGen/TargetLoweringBase.cpp b/llvm/lib/CodeGen/TargetLoweringBase.cpp
index 353b0cafbd72e..210e09ba6485f 100644
--- a/llvm/lib/CodeGen/TargetLoweringBase.cpp
+++ b/llvm/lib/CodeGen/TargetLoweringBase.cpp
@@ -1019,10 +1019,13 @@ void TargetLoweringBase::initActions() {
setOperationAction({ISD::FCBRT, ISD::FLOG, ISD::FLOG2, ISD::FLOG10, ISD::FEXP,
ISD::FEXP2, ISD::FEXP10, ISD::FFLOOR, ISD::FNEARBYINT,
ISD::FCEIL, ISD::FRINT, ISD::FTRUNC, ISD::LROUND,
- ISD::LLROUND, ISD::LRINT, ISD::LLRINT, ISD::FROUNDEVEN,
- ISD::FTAN},
+ ISD::LLROUND, ISD::FROUNDEVEN, ISD::FTAN},
{MVT::f32, MVT::f64, MVT::f128}, Expand);
+ // Unless the target expands, default LRINT to LibCall.
+ setOperationAction({ISD::LRINT, ISD::LLRINT}, {MVT::f32, MVT::f64, MVT::f128},
+ LibCall);
+
setOperationAction(ISD::FTAN, MVT::f16, Promote);
// Default ISD::TRAP to expand (which turns it into abort).
setOperationAction(ISD::TRAP, MVT::Other, Expand);
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
index ef3e74c9a622f..ebef6f6ad785c 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@@ -404,8 +404,10 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::FNEARBYINT, {MVT::f16, MVT::f32, MVT::f64}, Custom);
- setOperationAction({ISD::FRINT, ISD::LRINT, ISD::LLRINT},
- {MVT::f16, MVT::f32, MVT::f64}, Custom);
+ setOperationAction(ISD::FRINT, {MVT::f16, MVT::f32, MVT::f64}, Custom);
+
+ setOperationAction({ISD::LRINT, ISD::LLRINT}, {MVT::f16, MVT::f32, MVT::f64},
+ Expand);
setOperationAction(ISD::FREM, {MVT::f16, MVT::f32, MVT::f64}, Custom);
>From 608d85b0ceae61fbfe1b72b057732fd48625b4ed Mon Sep 17 00:00:00 2001
From: Sumanth Gundapaneni <sumanth.gundapaneni at amd.com>
Date: Tue, 16 Jul 2024 12:53:33 -0500
Subject: [PATCH 3/5] Fix clang-format error.
---
llvm/lib/CodeGen/TargetLoweringBase.cpp | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/llvm/lib/CodeGen/TargetLoweringBase.cpp b/llvm/lib/CodeGen/TargetLoweringBase.cpp
index cb4f7a7598f4c..96ad500d327a0 100644
--- a/llvm/lib/CodeGen/TargetLoweringBase.cpp
+++ b/llvm/lib/CodeGen/TargetLoweringBase.cpp
@@ -793,15 +793,15 @@ void TargetLoweringBase::initActions() {
ISD::FTAN, ISD::FACOS, ISD::FASIN, ISD::FATAN, ISD::FCOSH,
ISD::FSINH, ISD::FTANH},
{MVT::f32, MVT::f64, MVT::f128}, Expand);
-
+
// Unless the target expands, default LRINT to LibCall.
setOperationAction({ISD::LRINT, ISD::LLRINT}, {MVT::f32, MVT::f64, MVT::f128},
LibCall);
-
+
setOperationAction({ISD::FTAN, ISD::FACOS, ISD::FASIN, ISD::FATAN, ISD::FCOSH,
ISD::FSINH, ISD::FTANH},
MVT::f16, Promote);
- // Default ISD::TRAP to expand (which turns it into abort).
+ // Default ISD::TRAP to expand (which turns it into abort).
setOperationAction(ISD::TRAP, MVT::Other, Expand);
// On most systems, DEBUGTRAP and TRAP have no difference. The "Expand"
>From 2f66f98ac61910d9e542df4eb18796feb208756d Mon Sep 17 00:00:00 2001
From: Sumanth Gundapaneni <sumanth.gundapaneni at amd.com>
Date: Thu, 18 Jul 2024 15:27:26 -0500
Subject: [PATCH 4/5] Eliminate dead code
---
llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp | 14 +-------------
llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h | 1 -
2 files changed, 1 insertion(+), 14 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
index 1a295f298923d..2a243f4fc2b07 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@@ -1385,11 +1385,7 @@ SDValue AMDGPUTargetLowering::LowerOperation(SDValue Op,
case ISD::FCEIL: return LowerFCEIL(Op, DAG);
case ISD::FTRUNC: return LowerFTRUNC(Op, DAG);
case ISD::FRINT: return LowerFRINT(Op, DAG);
- case ISD::LRINT:
- case ISD::LLRINT:
- return LowerLRINT(Op, DAG);
- case ISD::FNEARBYINT:
- return LowerFNEARBYINT(Op, DAG);
+ case ISD::FNEARBYINT: return LowerFNEARBYINT(Op, DAG);
case ISD::FROUNDEVEN:
return LowerFROUNDEVEN(Op, DAG);
case ISD::FROUND: return LowerFROUND(Op, DAG);
@@ -2497,14 +2493,6 @@ SDValue AMDGPUTargetLowering::LowerFRINT(SDValue Op, SelectionDAG &DAG) const {
return DAG.getNode(ISD::FROUNDEVEN, SDLoc(Op), VT, Arg);
}
-SDValue AMDGPUTargetLowering::LowerLRINT(SDValue Op, SelectionDAG &DAG) const {
- auto ResVT = Op.getValueType();
- auto Arg = Op.getOperand(0u);
- auto ArgVT = Arg.getValueType();
- SDValue RoundNode = DAG.getNode(ISD::FROUNDEVEN, SDLoc(Op), ArgVT, Arg);
- return DAG.getNode(ISD::FP_TO_SINT, SDLoc(Op), ResVT, RoundNode);
-}
-
// XXX - May require not supporting f32 denormals?
// Don't handle v2f16. The extra instructions to scalarize and repack around the
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
index 2e8f857e95a2d..37572af3897f2 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
@@ -55,7 +55,6 @@ class AMDGPUTargetLowering : public TargetLowering {
SDValue LowerFCEIL(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFTRUNC(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFRINT(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerLRINT(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFNEARBYINT(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFROUNDEVEN(SDValue Op, SelectionDAG &DAG) const;
>From 7448a22f333dbbb61b8d2fdb51717eb6f36ad5d4 Mon Sep 17 00:00:00 2001
From: Sumanth Gundapaneni <sumanth.gundapaneni at amd.com>
Date: Tue, 23 Jul 2024 10:06:20 -0500
Subject: [PATCH 5/5] Address comments
---
.../CodeGen/GlobalISel/LegalizerHelper.cpp | 3 +-
llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp | 2 +-
llvm/lib/CodeGen/TargetLoweringBase.cpp | 2 +-
llvm/test/CodeGen/AMDGPU/GlobalISel/lrint.ll | 493 ----------
llvm/test/CodeGen/AMDGPU/lrint.ll | 878 ++++++++++++------
5 files changed, 594 insertions(+), 784 deletions(-)
delete mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/lrint.ll
diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
index 973ad218e81ea..ab9593d291471 100644
--- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
@@ -3894,8 +3894,7 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT LowerHintTy) {
Register DstReg = MI.getOperand(0).getReg();
Register SrcReg = MI.getOperand(1).getReg();
LLT SrcTy = MRI.getType(SrcReg);
- auto Round = MIRBuilder.buildIntrinsicRoundeven(SrcTy, SrcReg);
-
+ auto Round = MIRBuilder.buildInstr(TargetOpcode::G_FRINT, {SrcTy}, {SrcReg});
MIRBuilder.buildFPTOSI(DstReg, Round);
MI.eraseFromParent();
return Legalized;
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index d8bbb3402600a..20c19dad3f86d 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -4332,7 +4332,7 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
EVT ArgVT = Arg.getValueType();
EVT ResVT = Node->getValueType(0);
SDLoc dl(Node);
- SDValue RoundNode = DAG.getNode(ISD::FROUNDEVEN, dl, ArgVT, Arg);
+ SDValue RoundNode = DAG.getNode(ISD::FRINT, dl, ArgVT, Arg);
Results.push_back(DAG.getNode(ISD::FP_TO_SINT, dl, ResVT, RoundNode));
break;
}
diff --git a/llvm/lib/CodeGen/TargetLoweringBase.cpp b/llvm/lib/CodeGen/TargetLoweringBase.cpp
index 96ad500d327a0..aa1f681c377f4 100644
--- a/llvm/lib/CodeGen/TargetLoweringBase.cpp
+++ b/llvm/lib/CodeGen/TargetLoweringBase.cpp
@@ -794,7 +794,7 @@ void TargetLoweringBase::initActions() {
ISD::FSINH, ISD::FTANH},
{MVT::f32, MVT::f64, MVT::f128}, Expand);
- // Unless the target expands, default LRINT to LibCall.
+ // FIXME: Query RuntimeLibCalls to make the decision.
setOperationAction({ISD::LRINT, ISD::LLRINT}, {MVT::f32, MVT::f64, MVT::f128},
LibCall);
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/lrint.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/lrint.ll
deleted file mode 100644
index c6ac0b2dd3334..0000000000000
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/lrint.ll
+++ /dev/null
@@ -1,493 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
-; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9 %s
-; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1010 < %s | FileCheck -check-prefixes=GCN,GFX10 %s
-; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1100 < %s | FileCheck -check-prefixes=GCN,GFX11 %s
-
-declare float @llvm.rint.f32(float)
-declare i32 @llvm.lrint.i32.f32(float)
-declare i32 @llvm.lrint.i32.f64(double)
-declare i64 @llvm.lrint.i64.f32(float)
-declare i64 @llvm.lrint.i64.f64(double)
-declare i64 @llvm.llrint.i64.f32(float)
-declare half @llvm.rint.f16(half)
-declare i32 @llvm.lrint.i32.f16(half %arg)
-declare <2 x float> @llvm.rint.v2f32.v2f32(<2 x float> %arg)
-declare <2 x i32> @llvm.lrint.v2i32.v2f32(<2 x float> %arg)
-declare <2 x i64> @llvm.lrint.v2i64.v2f32(<2 x float> %arg)
-
-define float @intrinsic_frint(float %arg) {
-; GCN-LABEL: intrinsic_frint:
-; GCN: ; %bb.0: ; %entry
-; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-NEXT: v_rndne_f32_e32 v0, v0
-; GCN-NEXT: s_setpc_b64 s[30:31]
-entry:
- %0 = tail call float @llvm.rint.f32(float %arg)
- ret float %0
-}
-
-define i32 @intrinsic_lrint_i32_f32(float %arg) {
-; GFX9-LABEL: intrinsic_lrint_i32_f32:
-; GFX9: ; %bb.0: ; %entry
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_rndne_f32_e32 v0, v0
-; GFX9-NEXT: v_cvt_i32_f32_e32 v0, v0
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX10-LABEL: intrinsic_lrint_i32_f32:
-; GFX10: ; %bb.0: ; %entry
-; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: v_rndne_f32_e32 v0, v0
-; GFX10-NEXT: v_cvt_i32_f32_e32 v0, v0
-; GFX10-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-LABEL: intrinsic_lrint_i32_f32:
-; GFX11: ; %bb.0: ; %entry
-; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: v_rndne_f32_e32 v0, v0
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NEXT: v_cvt_i32_f32_e32 v0, v0
-; GFX11-NEXT: s_setpc_b64 s[30:31]
-entry:
- %0 = tail call i32 @llvm.lrint.i32.f32(float %arg)
- ret i32 %0
-}
-
-define i32 @intrinsic_lrint_i32_f64(double %arg) {
-; GFX9-LABEL: intrinsic_lrint_i32_f64:
-; GFX9: ; %bb.0: ; %entry
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_rndne_f64_e32 v[0:1], v[0:1]
-; GFX9-NEXT: v_cvt_i32_f64_e32 v0, v[0:1]
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX10-LABEL: intrinsic_lrint_i32_f64:
-; GFX10: ; %bb.0: ; %entry
-; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: v_rndne_f64_e32 v[0:1], v[0:1]
-; GFX10-NEXT: v_cvt_i32_f64_e32 v0, v[0:1]
-; GFX10-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-LABEL: intrinsic_lrint_i32_f64:
-; GFX11: ; %bb.0: ; %entry
-; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: v_rndne_f64_e32 v[0:1], v[0:1]
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NEXT: v_cvt_i32_f64_e32 v0, v[0:1]
-; GFX11-NEXT: s_setpc_b64 s[30:31]
-entry:
- %0 = tail call i32 @llvm.lrint.i32.f64(double %arg)
- ret i32 %0
-}
-
-define i64 @intrinsic_lrint_i64_f32(float %arg) {
-; GFX9-LABEL: intrinsic_lrint_i64_f32:
-; GFX9: ; %bb.0: ; %entry
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_rndne_f32_e32 v0, v0
-; GFX9-NEXT: v_trunc_f32_e32 v1, v0
-; GFX9-NEXT: v_mov_b32_e32 v2, 0x2f800000
-; GFX9-NEXT: v_mul_f32_e64 v2, |v1|, v2
-; GFX9-NEXT: v_floor_f32_e32 v2, v2
-; GFX9-NEXT: v_mov_b32_e32 v3, 0xcf800000
-; GFX9-NEXT: v_fma_f32 v1, v2, v3, |v1|
-; GFX9-NEXT: v_cvt_u32_f32_e32 v1, v1
-; GFX9-NEXT: v_cvt_u32_f32_e32 v2, v2
-; GFX9-NEXT: v_ashrrev_i32_e32 v3, 31, v0
-; GFX9-NEXT: v_xor_b32_e32 v0, v1, v3
-; GFX9-NEXT: v_xor_b32_e32 v1, v2, v3
-; GFX9-NEXT: v_sub_co_u32_e32 v0, vcc, v0, v3
-; GFX9-NEXT: v_subb_co_u32_e32 v1, vcc, v1, v3, vcc
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX10-LABEL: intrinsic_lrint_i64_f32:
-; GFX10: ; %bb.0: ; %entry
-; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: v_rndne_f32_e32 v0, v0
-; GFX10-NEXT: v_trunc_f32_e32 v1, v0
-; GFX10-NEXT: v_ashrrev_i32_e32 v3, 31, v0
-; GFX10-NEXT: v_mul_f32_e64 v2, 0x2f800000, |v1|
-; GFX10-NEXT: v_floor_f32_e32 v2, v2
-; GFX10-NEXT: v_fma_f32 v1, 0xcf800000, v2, |v1|
-; GFX10-NEXT: v_cvt_u32_f32_e32 v0, v1
-; GFX10-NEXT: v_cvt_u32_f32_e32 v1, v2
-; GFX10-NEXT: v_xor_b32_e32 v0, v0, v3
-; GFX10-NEXT: v_xor_b32_e32 v1, v1, v3
-; GFX10-NEXT: v_sub_co_u32 v0, vcc_lo, v0, v3
-; GFX10-NEXT: v_sub_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
-; GFX10-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-LABEL: intrinsic_lrint_i64_f32:
-; GFX11: ; %bb.0: ; %entry
-; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: v_rndne_f32_e32 v0, v0
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
-; GFX11-NEXT: v_trunc_f32_e32 v1, v0
-; GFX11-NEXT: v_ashrrev_i32_e32 v3, 31, v0
-; GFX11-NEXT: v_mul_f32_e64 v2, 0x2f800000, |v1|
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NEXT: v_floor_f32_e32 v2, v2
-; GFX11-NEXT: v_fma_f32 v1, 0xcf800000, v2, |v1|
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
-; GFX11-NEXT: v_cvt_u32_f32_e32 v0, v1
-; GFX11-NEXT: v_cvt_u32_f32_e32 v1, v2
-; GFX11-NEXT: v_xor_b32_e32 v0, v0, v3
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-NEXT: v_xor_b32_e32 v1, v1, v3
-; GFX11-NEXT: v_sub_co_u32 v0, vcc_lo, v0, v3
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2)
-; GFX11-NEXT: v_sub_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
-; GFX11-NEXT: s_setpc_b64 s[30:31]
-entry:
- %0 = tail call i64 @llvm.lrint.i64.f32(float %arg)
- ret i64 %0
-}
-
-define i64 @intrinsic_lrint_i64_f64(double %arg) {
-; GFX9-LABEL: intrinsic_lrint_i64_f64:
-; GFX9: ; %bb.0: ; %entry
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_rndne_f64_e32 v[0:1], v[0:1]
-; GFX9-NEXT: v_mov_b32_e32 v2, 0
-; GFX9-NEXT: v_mov_b32_e32 v3, 0x3df00000
-; GFX9-NEXT: v_mov_b32_e32 v4, 0
-; GFX9-NEXT: v_mov_b32_e32 v5, 0xc1f00000
-; GFX9-NEXT: v_trunc_f64_e32 v[0:1], v[0:1]
-; GFX9-NEXT: v_mul_f64 v[2:3], v[0:1], v[2:3]
-; GFX9-NEXT: v_floor_f64_e32 v[2:3], v[2:3]
-; GFX9-NEXT: v_fma_f64 v[0:1], v[2:3], v[4:5], v[0:1]
-; GFX9-NEXT: v_cvt_u32_f64_e32 v0, v[0:1]
-; GFX9-NEXT: v_cvt_i32_f64_e32 v1, v[2:3]
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX10-LABEL: intrinsic_lrint_i64_f64:
-; GFX10: ; %bb.0: ; %entry
-; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: v_rndne_f64_e32 v[0:1], v[0:1]
-; GFX10-NEXT: v_trunc_f64_e32 v[0:1], v[0:1]
-; GFX10-NEXT: v_mul_f64 v[2:3], 0x3df00000, v[0:1]
-; GFX10-NEXT: v_floor_f64_e32 v[2:3], v[2:3]
-; GFX10-NEXT: v_fma_f64 v[0:1], 0xc1f00000, v[2:3], v[0:1]
-; GFX10-NEXT: v_cvt_u32_f64_e32 v0, v[0:1]
-; GFX10-NEXT: v_cvt_i32_f64_e32 v1, v[2:3]
-; GFX10-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-LABEL: intrinsic_lrint_i64_f64:
-; GFX11: ; %bb.0: ; %entry
-; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: v_rndne_f64_e32 v[0:1], v[0:1]
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NEXT: v_trunc_f64_e32 v[0:1], v[0:1]
-; GFX11-NEXT: v_mul_f64 v[2:3], 0x3df00000, v[0:1]
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NEXT: v_floor_f64_e32 v[2:3], v[2:3]
-; GFX11-NEXT: v_fma_f64 v[0:1], 0xc1f00000, v[2:3], v[0:1]
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NEXT: v_cvt_u32_f64_e32 v0, v[0:1]
-; GFX11-NEXT: v_cvt_i32_f64_e32 v1, v[2:3]
-; GFX11-NEXT: s_setpc_b64 s[30:31]
-entry:
- %0 = tail call i64 @llvm.lrint.i64.f64(double %arg)
- ret i64 %0
-}
-
-define i64 @intrinsic_llrint_i64_f32(float %arg) {
-; GFX9-LABEL: intrinsic_llrint_i64_f32:
-; GFX9: ; %bb.0: ; %entry
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_rndne_f32_e32 v0, v0
-; GFX9-NEXT: v_trunc_f32_e32 v1, v0
-; GFX9-NEXT: v_mov_b32_e32 v2, 0x2f800000
-; GFX9-NEXT: v_mul_f32_e64 v2, |v1|, v2
-; GFX9-NEXT: v_floor_f32_e32 v2, v2
-; GFX9-NEXT: v_mov_b32_e32 v3, 0xcf800000
-; GFX9-NEXT: v_fma_f32 v1, v2, v3, |v1|
-; GFX9-NEXT: v_cvt_u32_f32_e32 v1, v1
-; GFX9-NEXT: v_cvt_u32_f32_e32 v2, v2
-; GFX9-NEXT: v_ashrrev_i32_e32 v3, 31, v0
-; GFX9-NEXT: v_xor_b32_e32 v0, v1, v3
-; GFX9-NEXT: v_xor_b32_e32 v1, v2, v3
-; GFX9-NEXT: v_sub_co_u32_e32 v0, vcc, v0, v3
-; GFX9-NEXT: v_subb_co_u32_e32 v1, vcc, v1, v3, vcc
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX10-LABEL: intrinsic_llrint_i64_f32:
-; GFX10: ; %bb.0: ; %entry
-; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: v_rndne_f32_e32 v0, v0
-; GFX10-NEXT: v_trunc_f32_e32 v1, v0
-; GFX10-NEXT: v_ashrrev_i32_e32 v3, 31, v0
-; GFX10-NEXT: v_mul_f32_e64 v2, 0x2f800000, |v1|
-; GFX10-NEXT: v_floor_f32_e32 v2, v2
-; GFX10-NEXT: v_fma_f32 v1, 0xcf800000, v2, |v1|
-; GFX10-NEXT: v_cvt_u32_f32_e32 v0, v1
-; GFX10-NEXT: v_cvt_u32_f32_e32 v1, v2
-; GFX10-NEXT: v_xor_b32_e32 v0, v0, v3
-; GFX10-NEXT: v_xor_b32_e32 v1, v1, v3
-; GFX10-NEXT: v_sub_co_u32 v0, vcc_lo, v0, v3
-; GFX10-NEXT: v_sub_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
-; GFX10-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-LABEL: intrinsic_llrint_i64_f32:
-; GFX11: ; %bb.0: ; %entry
-; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: v_rndne_f32_e32 v0, v0
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
-; GFX11-NEXT: v_trunc_f32_e32 v1, v0
-; GFX11-NEXT: v_ashrrev_i32_e32 v3, 31, v0
-; GFX11-NEXT: v_mul_f32_e64 v2, 0x2f800000, |v1|
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NEXT: v_floor_f32_e32 v2, v2
-; GFX11-NEXT: v_fma_f32 v1, 0xcf800000, v2, |v1|
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
-; GFX11-NEXT: v_cvt_u32_f32_e32 v0, v1
-; GFX11-NEXT: v_cvt_u32_f32_e32 v1, v2
-; GFX11-NEXT: v_xor_b32_e32 v0, v0, v3
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-NEXT: v_xor_b32_e32 v1, v1, v3
-; GFX11-NEXT: v_sub_co_u32 v0, vcc_lo, v0, v3
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2)
-; GFX11-NEXT: v_sub_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
-; GFX11-NEXT: s_setpc_b64 s[30:31]
-entry:
- %0 = tail call i64 @llvm.llrint.i64.f32(float %arg)
- ret i64 %0
-}
-
-define i64 @intrinsic_llrint_i64_f64(double %arg) {
-; GFX9-LABEL: intrinsic_llrint_i64_f64:
-; GFX9: ; %bb.0: ; %entry
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_rndne_f64_e32 v[0:1], v[0:1]
-; GFX9-NEXT: v_mov_b32_e32 v2, 0
-; GFX9-NEXT: v_mov_b32_e32 v3, 0x3df00000
-; GFX9-NEXT: v_mov_b32_e32 v4, 0
-; GFX9-NEXT: v_mov_b32_e32 v5, 0xc1f00000
-; GFX9-NEXT: v_trunc_f64_e32 v[0:1], v[0:1]
-; GFX9-NEXT: v_mul_f64 v[2:3], v[0:1], v[2:3]
-; GFX9-NEXT: v_floor_f64_e32 v[2:3], v[2:3]
-; GFX9-NEXT: v_fma_f64 v[0:1], v[2:3], v[4:5], v[0:1]
-; GFX9-NEXT: v_cvt_u32_f64_e32 v0, v[0:1]
-; GFX9-NEXT: v_cvt_i32_f64_e32 v1, v[2:3]
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX10-LABEL: intrinsic_llrint_i64_f64:
-; GFX10: ; %bb.0: ; %entry
-; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: v_rndne_f64_e32 v[0:1], v[0:1]
-; GFX10-NEXT: v_trunc_f64_e32 v[0:1], v[0:1]
-; GFX10-NEXT: v_mul_f64 v[2:3], 0x3df00000, v[0:1]
-; GFX10-NEXT: v_floor_f64_e32 v[2:3], v[2:3]
-; GFX10-NEXT: v_fma_f64 v[0:1], 0xc1f00000, v[2:3], v[0:1]
-; GFX10-NEXT: v_cvt_u32_f64_e32 v0, v[0:1]
-; GFX10-NEXT: v_cvt_i32_f64_e32 v1, v[2:3]
-; GFX10-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-LABEL: intrinsic_llrint_i64_f64:
-; GFX11: ; %bb.0: ; %entry
-; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: v_rndne_f64_e32 v[0:1], v[0:1]
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NEXT: v_trunc_f64_e32 v[0:1], v[0:1]
-; GFX11-NEXT: v_mul_f64 v[2:3], 0x3df00000, v[0:1]
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NEXT: v_floor_f64_e32 v[2:3], v[2:3]
-; GFX11-NEXT: v_fma_f64 v[0:1], 0xc1f00000, v[2:3], v[0:1]
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NEXT: v_cvt_u32_f64_e32 v0, v[0:1]
-; GFX11-NEXT: v_cvt_i32_f64_e32 v1, v[2:3]
-; GFX11-NEXT: s_setpc_b64 s[30:31]
-entry:
- %0 = tail call i64 @llvm.llrint.i64.f64(double %arg)
- ret i64 %0
-}
-
-define half @intrinsic_frint_half(half %arg) {
-; GCN-LABEL: intrinsic_frint_half:
-; GCN: ; %bb.0: ; %entry
-; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-NEXT: v_rndne_f16_e32 v0, v0
-; GCN-NEXT: s_setpc_b64 s[30:31]
-entry:
- %0 = tail call half @llvm.rint.f16(half %arg)
- ret half %0
-}
-
-define i32 @intrinsic_lrint_i32_f16(half %arg) {
-; GFX9-LABEL: intrinsic_lrint_i32_f16:
-; GFX9: ; %bb.0: ; %entry
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_rndne_f16_e32 v0, v0
-; GFX9-NEXT: v_cvt_f32_f16_e32 v0, v0
-; GFX9-NEXT: v_cvt_i32_f32_e32 v0, v0
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX10-LABEL: intrinsic_lrint_i32_f16:
-; GFX10: ; %bb.0: ; %entry
-; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: v_rndne_f16_e32 v0, v0
-; GFX10-NEXT: v_cvt_f32_f16_e32 v0, v0
-; GFX10-NEXT: v_cvt_i32_f32_e32 v0, v0
-; GFX10-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-LABEL: intrinsic_lrint_i32_f16:
-; GFX11: ; %bb.0: ; %entry
-; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: v_rndne_f16_e32 v0, v0
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NEXT: v_cvt_f32_f16_e32 v0, v0
-; GFX11-NEXT: v_cvt_i32_f32_e32 v0, v0
-; GFX11-NEXT: s_setpc_b64 s[30:31]
-entry:
- %0 = tail call i32 @llvm.lrint.i32.f16(half %arg)
- ret i32 %0
-}
-
-define <2 x float> @intrinsic_frint_v2f32_v2f32(<2 x float> %arg) {
-; GCN-LABEL: intrinsic_frint_v2f32_v2f32:
-; GCN: ; %bb.0: ; %entry
-; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-NEXT: v_rndne_f32_e32 v0, v0
-; GCN-NEXT: v_rndne_f32_e32 v1, v1
-; GCN-NEXT: s_setpc_b64 s[30:31]
-entry:
- %0 = tail call <2 x float> @llvm.rint.v2f32.v2f32(<2 x float> %arg)
- ret <2 x float> %0
-}
-
-define <2 x i32> @intrinsic_lrint_v2i32_v2f32(<2 x float> %arg) {
-; GFX9-LABEL: intrinsic_lrint_v2i32_v2f32:
-; GFX9: ; %bb.0: ; %entry
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_rndne_f32_e32 v0, v0
-; GFX9-NEXT: v_rndne_f32_e32 v1, v1
-; GFX9-NEXT: v_cvt_i32_f32_e32 v0, v0
-; GFX9-NEXT: v_cvt_i32_f32_e32 v1, v1
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX10-LABEL: intrinsic_lrint_v2i32_v2f32:
-; GFX10: ; %bb.0: ; %entry
-; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: v_rndne_f32_e32 v0, v0
-; GFX10-NEXT: v_rndne_f32_e32 v1, v1
-; GFX10-NEXT: v_cvt_i32_f32_e32 v0, v0
-; GFX10-NEXT: v_cvt_i32_f32_e32 v1, v1
-; GFX10-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-LABEL: intrinsic_lrint_v2i32_v2f32:
-; GFX11: ; %bb.0: ; %entry
-; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: v_rndne_f32_e32 v0, v0
-; GFX11-NEXT: v_rndne_f32_e32 v1, v1
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-NEXT: v_cvt_i32_f32_e32 v0, v0
-; GFX11-NEXT: v_cvt_i32_f32_e32 v1, v1
-; GFX11-NEXT: s_setpc_b64 s[30:31]
-entry:
- %0 = tail call <2 x i32> @llvm.lrint.v2i32.v2f32(<2 x float> %arg)
- ret <2 x i32> %0
-}
-
-define <2 x i64> @intrinsic_lrint_v2i64_v2f32(<2 x float> %arg) {
-; GFX9-LABEL: intrinsic_lrint_v2i64_v2f32:
-; GFX9: ; %bb.0: ; %entry
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_rndne_f32_e32 v0, v0
-; GFX9-NEXT: v_trunc_f32_e32 v2, v0
-; GFX9-NEXT: v_mov_b32_e32 v3, 0x2f800000
-; GFX9-NEXT: v_mul_f32_e64 v4, |v2|, v3
-; GFX9-NEXT: v_floor_f32_e32 v4, v4
-; GFX9-NEXT: v_mov_b32_e32 v5, 0xcf800000
-; GFX9-NEXT: v_fma_f32 v2, v4, v5, |v2|
-; GFX9-NEXT: v_cvt_u32_f32_e32 v2, v2
-; GFX9-NEXT: v_cvt_u32_f32_e32 v4, v4
-; GFX9-NEXT: v_ashrrev_i32_e32 v6, 31, v0
-; GFX9-NEXT: v_xor_b32_e32 v0, v2, v6
-; GFX9-NEXT: v_xor_b32_e32 v2, v4, v6
-; GFX9-NEXT: v_rndne_f32_e32 v4, v1
-; GFX9-NEXT: v_trunc_f32_e32 v1, v4
-; GFX9-NEXT: v_mul_f32_e64 v3, |v1|, v3
-; GFX9-NEXT: v_floor_f32_e32 v3, v3
-; GFX9-NEXT: v_fma_f32 v1, v3, v5, |v1|
-; GFX9-NEXT: v_cvt_u32_f32_e32 v5, v1
-; GFX9-NEXT: v_cvt_u32_f32_e32 v3, v3
-; GFX9-NEXT: v_sub_co_u32_e32 v0, vcc, v0, v6
-; GFX9-NEXT: v_ashrrev_i32_e32 v4, 31, v4
-; GFX9-NEXT: v_subb_co_u32_e32 v1, vcc, v2, v6, vcc
-; GFX9-NEXT: v_xor_b32_e32 v2, v5, v4
-; GFX9-NEXT: v_xor_b32_e32 v3, v3, v4
-; GFX9-NEXT: v_sub_co_u32_e32 v2, vcc, v2, v4
-; GFX9-NEXT: v_subb_co_u32_e32 v3, vcc, v3, v4, vcc
-; GFX9-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX10-LABEL: intrinsic_lrint_v2i64_v2f32:
-; GFX10: ; %bb.0: ; %entry
-; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: v_rndne_f32_e32 v0, v0
-; GFX10-NEXT: v_rndne_f32_e32 v1, v1
-; GFX10-NEXT: v_trunc_f32_e32 v2, v0
-; GFX10-NEXT: v_trunc_f32_e32 v3, v1
-; GFX10-NEXT: v_ashrrev_i32_e32 v6, 31, v0
-; GFX10-NEXT: v_mul_f32_e64 v4, 0x2f800000, |v2|
-; GFX10-NEXT: v_mul_f32_e64 v5, 0x2f800000, |v3|
-; GFX10-NEXT: v_floor_f32_e32 v4, v4
-; GFX10-NEXT: v_floor_f32_e32 v5, v5
-; GFX10-NEXT: v_fma_f32 v2, 0xcf800000, v4, |v2|
-; GFX10-NEXT: v_fma_f32 v0, 0xcf800000, v5, |v3|
-; GFX10-NEXT: v_ashrrev_i32_e32 v3, 31, v1
-; GFX10-NEXT: v_cvt_u32_f32_e32 v1, v2
-; GFX10-NEXT: v_cvt_u32_f32_e32 v2, v4
-; GFX10-NEXT: v_cvt_u32_f32_e32 v0, v0
-; GFX10-NEXT: v_cvt_u32_f32_e32 v4, v5
-; GFX10-NEXT: v_xor_b32_e32 v1, v1, v6
-; GFX10-NEXT: v_xor_b32_e32 v2, v2, v6
-; GFX10-NEXT: v_xor_b32_e32 v5, v0, v3
-; GFX10-NEXT: v_xor_b32_e32 v4, v4, v3
-; GFX10-NEXT: v_sub_co_u32 v0, vcc_lo, v1, v6
-; GFX10-NEXT: v_sub_co_ci_u32_e32 v1, vcc_lo, v2, v6, vcc_lo
-; GFX10-NEXT: v_sub_co_u32 v2, vcc_lo, v5, v3
-; GFX10-NEXT: v_sub_co_ci_u32_e32 v3, vcc_lo, v4, v3, vcc_lo
-; GFX10-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-LABEL: intrinsic_lrint_v2i64_v2f32:
-; GFX11: ; %bb.0: ; %entry
-; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: v_rndne_f32_e32 v0, v0
-; GFX11-NEXT: v_rndne_f32_e32 v1, v1
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-NEXT: v_trunc_f32_e32 v2, v0
-; GFX11-NEXT: v_trunc_f32_e32 v3, v1
-; GFX11-NEXT: v_ashrrev_i32_e32 v6, 31, v0
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX11-NEXT: v_mul_f32_e64 v4, 0x2f800000, |v2|
-; GFX11-NEXT: v_mul_f32_e64 v5, 0x2f800000, |v3|
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-NEXT: v_floor_f32_e32 v4, v4
-; GFX11-NEXT: v_floor_f32_e32 v5, v5
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-NEXT: v_fma_f32 v2, 0xcf800000, v4, |v2|
-; GFX11-NEXT: v_fma_f32 v0, 0xcf800000, v5, |v3|
-; GFX11-NEXT: v_ashrrev_i32_e32 v3, 31, v1
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_4)
-; GFX11-NEXT: v_cvt_u32_f32_e32 v1, v2
-; GFX11-NEXT: v_cvt_u32_f32_e32 v2, v4
-; GFX11-NEXT: v_cvt_u32_f32_e32 v0, v0
-; GFX11-NEXT: v_cvt_u32_f32_e32 v4, v5
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
-; GFX11-NEXT: v_xor_b32_e32 v1, v1, v6
-; GFX11-NEXT: v_xor_b32_e32 v2, v2, v6
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
-; GFX11-NEXT: v_xor_b32_e32 v5, v0, v3
-; GFX11-NEXT: v_xor_b32_e32 v4, v4, v3
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
-; GFX11-NEXT: v_sub_co_u32 v0, vcc_lo, v1, v6
-; GFX11-NEXT: v_sub_co_ci_u32_e32 v1, vcc_lo, v2, v6, vcc_lo
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
-; GFX11-NEXT: v_sub_co_u32 v2, vcc_lo, v5, v3
-; GFX11-NEXT: v_sub_co_ci_u32_e32 v3, vcc_lo, v4, v3, vcc_lo
-; GFX11-NEXT: s_setpc_b64 s[30:31]
-entry:
- %0 = tail call <2 x i64> @llvm.lrint.v2i64.v2f32(<2 x float> %arg)
- ret <2 x i64> %0
-}
-
diff --git a/llvm/test/CodeGen/AMDGPU/lrint.ll b/llvm/test/CodeGen/AMDGPU/lrint.ll
index 2da1bd095165b..31e6cf6ea645c 100644
--- a/llvm/test/CodeGen/AMDGPU/lrint.ll
+++ b/llvm/test/CodeGen/AMDGPU/lrint.ll
@@ -1,7 +1,11 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
-; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9 %s
-; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1010 < %s | FileCheck -check-prefixes=GCN,GFX10 %s
-; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1100 < %s | FileCheck -check-prefixes=GCN,GFX11 %s
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --extra_scrub --version 5
+
+; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9,GFX9-SDAG %s
+; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9,GFX9-GISEL %s
+; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1010 < %s | FileCheck -check-prefixes=GCN,GFX10,GFX10-SDAG %s
+; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1010 < %s | FileCheck -check-prefixes=GCN,GFX10,GFX10-GISEL %s
+; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1100 < %s | FileCheck -check-prefixes=GCN,GFX11,GFX11-SDAG %s
+; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1100 < %s | FileCheck -check-prefixes=GCN,GFX11,GFX11-GISEL %s
declare float @llvm.rint.f32(float)
declare i32 @llvm.lrint.i32.f32(float)
@@ -22,8 +26,8 @@ define float @intrinsic_frint(float %arg) {
; GCN-NEXT: v_rndne_f32_e32 v0, v0
; GCN-NEXT: s_setpc_b64 s[30:31]
entry:
- %0 = tail call float @llvm.rint.f32(float %arg)
- ret float %0
+ %res = tail call float @llvm.rint.f32(float %arg)
+ ret float %res
}
define i32 @intrinsic_lrint_i32_f32(float %arg) {
@@ -49,8 +53,8 @@ define i32 @intrinsic_lrint_i32_f32(float %arg) {
; GFX11-NEXT: v_cvt_i32_f32_e32 v0, v0
; GFX11-NEXT: s_setpc_b64 s[30:31]
entry:
- %0 = tail call i32 @llvm.lrint.i32.f32(float %arg)
- ret i32 %0
+ %res = tail call i32 @llvm.lrint.i32.f32(float %arg)
+ ret i32 %res
}
define i32 @intrinsic_lrint_i32_f64(double %arg) {
@@ -76,212 +80,414 @@ define i32 @intrinsic_lrint_i32_f64(double %arg) {
; GFX11-NEXT: v_cvt_i32_f64_e32 v0, v[0:1]
; GFX11-NEXT: s_setpc_b64 s[30:31]
entry:
- %0 = tail call i32 @llvm.lrint.i32.f64(double %arg)
- ret i32 %0
+ %res = tail call i32 @llvm.lrint.i32.f64(double %arg)
+ ret i32 %res
}
define i64 @intrinsic_lrint_i64_f32(float %arg) {
-; GFX9-LABEL: intrinsic_lrint_i64_f32:
-; GFX9: ; %bb.0: ; %entry
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_rndne_f32_e32 v0, v0
-; GFX9-NEXT: s_mov_b32 s4, 0x2f800000
-; GFX9-NEXT: v_mul_f32_e64 v1, |v0|, s4
-; GFX9-NEXT: v_floor_f32_e32 v1, v1
-; GFX9-NEXT: s_mov_b32 s4, 0xcf800000
-; GFX9-NEXT: v_cvt_u32_f32_e32 v2, v1
-; GFX9-NEXT: v_fma_f32 v1, v1, s4, |v0|
-; GFX9-NEXT: v_cvt_u32_f32_e32 v1, v1
-; GFX9-NEXT: v_ashrrev_i32_e32 v3, 31, v0
-; GFX9-NEXT: v_xor_b32_e32 v2, v2, v3
-; GFX9-NEXT: v_xor_b32_e32 v0, v1, v3
-; GFX9-NEXT: v_sub_co_u32_e32 v0, vcc, v0, v3
-; GFX9-NEXT: v_subb_co_u32_e32 v1, vcc, v2, v3, vcc
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX9-SDAG-LABEL: intrinsic_lrint_i64_f32:
+; GFX9-SDAG: ; %bb.0: ; %entry
+; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-SDAG-NEXT: v_rndne_f32_e32 v0, v0
+; GFX9-SDAG-NEXT: s_mov_b32 s4, 0x2f800000
+; GFX9-SDAG-NEXT: v_mul_f32_e64 v1, |v0|, s4
+; GFX9-SDAG-NEXT: v_floor_f32_e32 v1, v1
+; GFX9-SDAG-NEXT: s_mov_b32 s4, 0xcf800000
+; GFX9-SDAG-NEXT: v_cvt_u32_f32_e32 v2, v1
+; GFX9-SDAG-NEXT: v_fma_f32 v1, v1, s4, |v0|
+; GFX9-SDAG-NEXT: v_cvt_u32_f32_e32 v1, v1
+; GFX9-SDAG-NEXT: v_ashrrev_i32_e32 v3, 31, v0
+; GFX9-SDAG-NEXT: v_xor_b32_e32 v2, v2, v3
+; GFX9-SDAG-NEXT: v_xor_b32_e32 v0, v1, v3
+; GFX9-SDAG-NEXT: v_sub_co_u32_e32 v0, vcc, v0, v3
+; GFX9-SDAG-NEXT: v_subb_co_u32_e32 v1, vcc, v2, v3, vcc
+; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
;
-; GFX10-LABEL: intrinsic_lrint_i64_f32:
-; GFX10: ; %bb.0: ; %entry
-; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: v_rndne_f32_e32 v0, v0
-; GFX10-NEXT: v_mul_f32_e64 v1, 0x2f800000, |v0|
-; GFX10-NEXT: v_ashrrev_i32_e32 v3, 31, v0
-; GFX10-NEXT: v_floor_f32_e32 v1, v1
-; GFX10-NEXT: v_fma_f32 v2, 0xcf800000, v1, |v0|
-; GFX10-NEXT: v_cvt_u32_f32_e32 v1, v1
-; GFX10-NEXT: v_cvt_u32_f32_e32 v0, v2
-; GFX10-NEXT: v_xor_b32_e32 v1, v1, v3
-; GFX10-NEXT: v_xor_b32_e32 v0, v0, v3
-; GFX10-NEXT: v_sub_co_u32 v0, vcc_lo, v0, v3
-; GFX10-NEXT: v_sub_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
-; GFX10-NEXT: s_setpc_b64 s[30:31]
+; GFX9-GISEL-LABEL: intrinsic_lrint_i64_f32:
+; GFX9-GISEL: ; %bb.0: ; %entry
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GISEL-NEXT: v_rndne_f32_e32 v0, v0
+; GFX9-GISEL-NEXT: v_trunc_f32_e32 v1, v0
+; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, 0x2f800000
+; GFX9-GISEL-NEXT: v_mul_f32_e64 v2, |v1|, v2
+; GFX9-GISEL-NEXT: v_floor_f32_e32 v2, v2
+; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, 0xcf800000
+; GFX9-GISEL-NEXT: v_fma_f32 v1, v2, v3, |v1|
+; GFX9-GISEL-NEXT: v_cvt_u32_f32_e32 v1, v1
+; GFX9-GISEL-NEXT: v_cvt_u32_f32_e32 v2, v2
+; GFX9-GISEL-NEXT: v_ashrrev_i32_e32 v3, 31, v0
+; GFX9-GISEL-NEXT: v_xor_b32_e32 v0, v1, v3
+; GFX9-GISEL-NEXT: v_xor_b32_e32 v1, v2, v3
+; GFX9-GISEL-NEXT: v_sub_co_u32_e32 v0, vcc, v0, v3
+; GFX9-GISEL-NEXT: v_subb_co_u32_e32 v1, vcc, v1, v3, vcc
+; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-LABEL: intrinsic_lrint_i64_f32:
-; GFX11: ; %bb.0: ; %entry
-; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: v_rndne_f32_e32 v0, v0
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
-; GFX11-NEXT: v_mul_f32_e64 v1, 0x2f800000, |v0|
-; GFX11-NEXT: v_ashrrev_i32_e32 v3, 31, v0
-; GFX11-NEXT: v_floor_f32_e32 v1, v1
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
-; GFX11-NEXT: v_fma_f32 v2, 0xcf800000, v1, |v0|
-; GFX11-NEXT: v_cvt_u32_f32_e32 v1, v1
-; GFX11-NEXT: v_cvt_u32_f32_e32 v0, v2
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-NEXT: v_xor_b32_e32 v1, v1, v3
-; GFX11-NEXT: v_xor_b32_e32 v0, v0, v3
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX11-NEXT: v_sub_co_u32 v0, vcc_lo, v0, v3
-; GFX11-NEXT: v_sub_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
-; GFX11-NEXT: s_setpc_b64 s[30:31]
+; GFX10-SDAG-LABEL: intrinsic_lrint_i64_f32:
+; GFX10-SDAG: ; %bb.0: ; %entry
+; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-SDAG-NEXT: v_rndne_f32_e32 v0, v0
+; GFX10-SDAG-NEXT: v_mul_f32_e64 v1, 0x2f800000, |v0|
+; GFX10-SDAG-NEXT: v_ashrrev_i32_e32 v3, 31, v0
+; GFX10-SDAG-NEXT: v_floor_f32_e32 v1, v1
+; GFX10-SDAG-NEXT: v_fma_f32 v2, 0xcf800000, v1, |v0|
+; GFX10-SDAG-NEXT: v_cvt_u32_f32_e32 v1, v1
+; GFX10-SDAG-NEXT: v_cvt_u32_f32_e32 v0, v2
+; GFX10-SDAG-NEXT: v_xor_b32_e32 v1, v1, v3
+; GFX10-SDAG-NEXT: v_xor_b32_e32 v0, v0, v3
+; GFX10-SDAG-NEXT: v_sub_co_u32 v0, vcc_lo, v0, v3
+; GFX10-SDAG-NEXT: v_sub_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
+; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-GISEL-LABEL: intrinsic_lrint_i64_f32:
+; GFX10-GISEL: ; %bb.0: ; %entry
+; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-GISEL-NEXT: v_rndne_f32_e32 v0, v0
+; GFX10-GISEL-NEXT: v_trunc_f32_e32 v1, v0
+; GFX10-GISEL-NEXT: v_ashrrev_i32_e32 v3, 31, v0
+; GFX10-GISEL-NEXT: v_mul_f32_e64 v2, 0x2f800000, |v1|
+; GFX10-GISEL-NEXT: v_floor_f32_e32 v2, v2
+; GFX10-GISEL-NEXT: v_fma_f32 v1, 0xcf800000, v2, |v1|
+; GFX10-GISEL-NEXT: v_cvt_u32_f32_e32 v0, v1
+; GFX10-GISEL-NEXT: v_cvt_u32_f32_e32 v1, v2
+; GFX10-GISEL-NEXT: v_xor_b32_e32 v0, v0, v3
+; GFX10-GISEL-NEXT: v_xor_b32_e32 v1, v1, v3
+; GFX10-GISEL-NEXT: v_sub_co_u32 v0, vcc_lo, v0, v3
+; GFX10-GISEL-NEXT: v_sub_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
+; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-SDAG-LABEL: intrinsic_lrint_i64_f32:
+; GFX11-SDAG: ; %bb.0: ; %entry
+; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SDAG-NEXT: v_rndne_f32_e32 v0, v0
+; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX11-SDAG-NEXT: v_mul_f32_e64 v1, 0x2f800000, |v0|
+; GFX11-SDAG-NEXT: v_ashrrev_i32_e32 v3, 31, v0
+; GFX11-SDAG-NEXT: v_floor_f32_e32 v1, v1
+; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX11-SDAG-NEXT: v_fma_f32 v2, 0xcf800000, v1, |v0|
+; GFX11-SDAG-NEXT: v_cvt_u32_f32_e32 v1, v1
+; GFX11-SDAG-NEXT: v_cvt_u32_f32_e32 v0, v2
+; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-SDAG-NEXT: v_xor_b32_e32 v1, v1, v3
+; GFX11-SDAG-NEXT: v_xor_b32_e32 v0, v0, v3
+; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX11-SDAG-NEXT: v_sub_co_u32 v0, vcc_lo, v0, v3
+; GFX11-SDAG-NEXT: v_sub_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
+; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GISEL-LABEL: intrinsic_lrint_i64_f32:
+; GFX11-GISEL: ; %bb.0: ; %entry
+; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GISEL-NEXT: v_rndne_f32_e32 v0, v0
+; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX11-GISEL-NEXT: v_trunc_f32_e32 v1, v0
+; GFX11-GISEL-NEXT: v_ashrrev_i32_e32 v3, 31, v0
+; GFX11-GISEL-NEXT: v_mul_f32_e64 v2, 0x2f800000, |v1|
+; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-GISEL-NEXT: v_floor_f32_e32 v2, v2
+; GFX11-GISEL-NEXT: v_fma_f32 v1, 0xcf800000, v2, |v1|
+; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX11-GISEL-NEXT: v_cvt_u32_f32_e32 v0, v1
+; GFX11-GISEL-NEXT: v_cvt_u32_f32_e32 v1, v2
+; GFX11-GISEL-NEXT: v_xor_b32_e32 v0, v0, v3
+; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-GISEL-NEXT: v_xor_b32_e32 v1, v1, v3
+; GFX11-GISEL-NEXT: v_sub_co_u32 v0, vcc_lo, v0, v3
+; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2)
+; GFX11-GISEL-NEXT: v_sub_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
+; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
entry:
- %0 = tail call i64 @llvm.lrint.i64.f32(float %arg)
- ret i64 %0
+ %res = tail call i64 @llvm.lrint.i64.f32(float %arg)
+ ret i64 %res
}
define i64 @intrinsic_lrint_i64_f64(double %arg) {
-; GFX9-LABEL: intrinsic_lrint_i64_f64:
-; GFX9: ; %bb.0: ; %entry
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_rndne_f64_e32 v[0:1], v[0:1]
-; GFX9-NEXT: s_movk_i32 s4, 0xffe0
-; GFX9-NEXT: v_ldexp_f64 v[2:3], v[0:1], s4
-; GFX9-NEXT: s_mov_b32 s4, 0
-; GFX9-NEXT: s_mov_b32 s5, 0xc1f00000
-; GFX9-NEXT: v_floor_f64_e32 v[2:3], v[2:3]
-; GFX9-NEXT: v_fma_f64 v[0:1], v[2:3], s[4:5], v[0:1]
-; GFX9-NEXT: v_cvt_u32_f64_e32 v0, v[0:1]
-; GFX9-NEXT: v_cvt_i32_f64_e32 v1, v[2:3]
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX9-SDAG-LABEL: intrinsic_lrint_i64_f64:
+; GFX9-SDAG: ; %bb.0: ; %entry
+; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-SDAG-NEXT: v_rndne_f64_e32 v[0:1], v[0:1]
+; GFX9-SDAG-NEXT: s_movk_i32 s4, 0xffe0
+; GFX9-SDAG-NEXT: v_ldexp_f64 v[2:3], v[0:1], s4
+; GFX9-SDAG-NEXT: s_mov_b32 s4, 0
+; GFX9-SDAG-NEXT: s_mov_b32 s5, 0xc1f00000
+; GFX9-SDAG-NEXT: v_floor_f64_e32 v[2:3], v[2:3]
+; GFX9-SDAG-NEXT: v_fma_f64 v[0:1], v[2:3], s[4:5], v[0:1]
+; GFX9-SDAG-NEXT: v_cvt_u32_f64_e32 v0, v[0:1]
+; GFX9-SDAG-NEXT: v_cvt_i32_f64_e32 v1, v[2:3]
+; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
;
-; GFX10-LABEL: intrinsic_lrint_i64_f64:
-; GFX10: ; %bb.0: ; %entry
-; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: v_rndne_f64_e32 v[0:1], v[0:1]
-; GFX10-NEXT: v_ldexp_f64 v[2:3], v[0:1], 0xffffffe0
-; GFX10-NEXT: v_floor_f64_e32 v[2:3], v[2:3]
-; GFX10-NEXT: v_fma_f64 v[0:1], 0xc1f00000, v[2:3], v[0:1]
-; GFX10-NEXT: v_cvt_u32_f64_e32 v0, v[0:1]
-; GFX10-NEXT: v_cvt_i32_f64_e32 v1, v[2:3]
-; GFX10-NEXT: s_setpc_b64 s[30:31]
+; GFX9-GISEL-LABEL: intrinsic_lrint_i64_f64:
+; GFX9-GISEL: ; %bb.0: ; %entry
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GISEL-NEXT: v_rndne_f64_e32 v[0:1], v[0:1]
+; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, 0
+; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, 0x3df00000
+; GFX9-GISEL-NEXT: v_mov_b32_e32 v4, 0
+; GFX9-GISEL-NEXT: v_mov_b32_e32 v5, 0xc1f00000
+; GFX9-GISEL-NEXT: v_trunc_f64_e32 v[0:1], v[0:1]
+; GFX9-GISEL-NEXT: v_mul_f64 v[2:3], v[0:1], v[2:3]
+; GFX9-GISEL-NEXT: v_floor_f64_e32 v[2:3], v[2:3]
+; GFX9-GISEL-NEXT: v_fma_f64 v[0:1], v[2:3], v[4:5], v[0:1]
+; GFX9-GISEL-NEXT: v_cvt_u32_f64_e32 v0, v[0:1]
+; GFX9-GISEL-NEXT: v_cvt_i32_f64_e32 v1, v[2:3]
+; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-LABEL: intrinsic_lrint_i64_f64:
-; GFX11: ; %bb.0: ; %entry
-; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: v_rndne_f64_e32 v[0:1], v[0:1]
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NEXT: v_ldexp_f64 v[2:3], v[0:1], 0xffffffe0
-; GFX11-NEXT: v_floor_f64_e32 v[2:3], v[2:3]
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NEXT: v_fma_f64 v[0:1], 0xc1f00000, v[2:3], v[0:1]
-; GFX11-NEXT: v_cvt_u32_f64_e32 v0, v[0:1]
-; GFX11-NEXT: v_cvt_i32_f64_e32 v1, v[2:3]
-; GFX11-NEXT: s_setpc_b64 s[30:31]
+; GFX10-SDAG-LABEL: intrinsic_lrint_i64_f64:
+; GFX10-SDAG: ; %bb.0: ; %entry
+; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-SDAG-NEXT: v_rndne_f64_e32 v[0:1], v[0:1]
+; GFX10-SDAG-NEXT: v_ldexp_f64 v[2:3], v[0:1], 0xffffffe0
+; GFX10-SDAG-NEXT: v_floor_f64_e32 v[2:3], v[2:3]
+; GFX10-SDAG-NEXT: v_fma_f64 v[0:1], 0xc1f00000, v[2:3], v[0:1]
+; GFX10-SDAG-NEXT: v_cvt_u32_f64_e32 v0, v[0:1]
+; GFX10-SDAG-NEXT: v_cvt_i32_f64_e32 v1, v[2:3]
+; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-GISEL-LABEL: intrinsic_lrint_i64_f64:
+; GFX10-GISEL: ; %bb.0: ; %entry
+; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-GISEL-NEXT: v_rndne_f64_e32 v[0:1], v[0:1]
+; GFX10-GISEL-NEXT: v_trunc_f64_e32 v[0:1], v[0:1]
+; GFX10-GISEL-NEXT: v_mul_f64 v[2:3], 0x3df00000, v[0:1]
+; GFX10-GISEL-NEXT: v_floor_f64_e32 v[2:3], v[2:3]
+; GFX10-GISEL-NEXT: v_fma_f64 v[0:1], 0xc1f00000, v[2:3], v[0:1]
+; GFX10-GISEL-NEXT: v_cvt_u32_f64_e32 v0, v[0:1]
+; GFX10-GISEL-NEXT: v_cvt_i32_f64_e32 v1, v[2:3]
+; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-SDAG-LABEL: intrinsic_lrint_i64_f64:
+; GFX11-SDAG: ; %bb.0: ; %entry
+; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SDAG-NEXT: v_rndne_f64_e32 v[0:1], v[0:1]
+; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-SDAG-NEXT: v_ldexp_f64 v[2:3], v[0:1], 0xffffffe0
+; GFX11-SDAG-NEXT: v_floor_f64_e32 v[2:3], v[2:3]
+; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-SDAG-NEXT: v_fma_f64 v[0:1], 0xc1f00000, v[2:3], v[0:1]
+; GFX11-SDAG-NEXT: v_cvt_u32_f64_e32 v0, v[0:1]
+; GFX11-SDAG-NEXT: v_cvt_i32_f64_e32 v1, v[2:3]
+; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GISEL-LABEL: intrinsic_lrint_i64_f64:
+; GFX11-GISEL: ; %bb.0: ; %entry
+; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GISEL-NEXT: v_rndne_f64_e32 v[0:1], v[0:1]
+; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-GISEL-NEXT: v_trunc_f64_e32 v[0:1], v[0:1]
+; GFX11-GISEL-NEXT: v_mul_f64 v[2:3], 0x3df00000, v[0:1]
+; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-GISEL-NEXT: v_floor_f64_e32 v[2:3], v[2:3]
+; GFX11-GISEL-NEXT: v_fma_f64 v[0:1], 0xc1f00000, v[2:3], v[0:1]
+; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GISEL-NEXT: v_cvt_u32_f64_e32 v0, v[0:1]
+; GFX11-GISEL-NEXT: v_cvt_i32_f64_e32 v1, v[2:3]
+; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
entry:
- %0 = tail call i64 @llvm.lrint.i64.f64(double %arg)
- ret i64 %0
+ %res = tail call i64 @llvm.lrint.i64.f64(double %arg)
+ ret i64 %res
}
define i64 @intrinsic_llrint_i64_f32(float %arg) {
-; GFX9-LABEL: intrinsic_llrint_i64_f32:
-; GFX9: ; %bb.0: ; %entry
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_rndne_f32_e32 v0, v0
-; GFX9-NEXT: s_mov_b32 s4, 0x2f800000
-; GFX9-NEXT: v_mul_f32_e64 v1, |v0|, s4
-; GFX9-NEXT: v_floor_f32_e32 v1, v1
-; GFX9-NEXT: s_mov_b32 s4, 0xcf800000
-; GFX9-NEXT: v_cvt_u32_f32_e32 v2, v1
-; GFX9-NEXT: v_fma_f32 v1, v1, s4, |v0|
-; GFX9-NEXT: v_cvt_u32_f32_e32 v1, v1
-; GFX9-NEXT: v_ashrrev_i32_e32 v3, 31, v0
-; GFX9-NEXT: v_xor_b32_e32 v2, v2, v3
-; GFX9-NEXT: v_xor_b32_e32 v0, v1, v3
-; GFX9-NEXT: v_sub_co_u32_e32 v0, vcc, v0, v3
-; GFX9-NEXT: v_subb_co_u32_e32 v1, vcc, v2, v3, vcc
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX9-SDAG-LABEL: intrinsic_llrint_i64_f32:
+; GFX9-SDAG: ; %bb.0: ; %entry
+; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-SDAG-NEXT: v_rndne_f32_e32 v0, v0
+; GFX9-SDAG-NEXT: s_mov_b32 s4, 0x2f800000
+; GFX9-SDAG-NEXT: v_mul_f32_e64 v1, |v0|, s4
+; GFX9-SDAG-NEXT: v_floor_f32_e32 v1, v1
+; GFX9-SDAG-NEXT: s_mov_b32 s4, 0xcf800000
+; GFX9-SDAG-NEXT: v_cvt_u32_f32_e32 v2, v1
+; GFX9-SDAG-NEXT: v_fma_f32 v1, v1, s4, |v0|
+; GFX9-SDAG-NEXT: v_cvt_u32_f32_e32 v1, v1
+; GFX9-SDAG-NEXT: v_ashrrev_i32_e32 v3, 31, v0
+; GFX9-SDAG-NEXT: v_xor_b32_e32 v2, v2, v3
+; GFX9-SDAG-NEXT: v_xor_b32_e32 v0, v1, v3
+; GFX9-SDAG-NEXT: v_sub_co_u32_e32 v0, vcc, v0, v3
+; GFX9-SDAG-NEXT: v_subb_co_u32_e32 v1, vcc, v2, v3, vcc
+; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
;
-; GFX10-LABEL: intrinsic_llrint_i64_f32:
-; GFX10: ; %bb.0: ; %entry
-; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: v_rndne_f32_e32 v0, v0
-; GFX10-NEXT: v_mul_f32_e64 v1, 0x2f800000, |v0|
-; GFX10-NEXT: v_ashrrev_i32_e32 v3, 31, v0
-; GFX10-NEXT: v_floor_f32_e32 v1, v1
-; GFX10-NEXT: v_fma_f32 v2, 0xcf800000, v1, |v0|
-; GFX10-NEXT: v_cvt_u32_f32_e32 v1, v1
-; GFX10-NEXT: v_cvt_u32_f32_e32 v0, v2
-; GFX10-NEXT: v_xor_b32_e32 v1, v1, v3
-; GFX10-NEXT: v_xor_b32_e32 v0, v0, v3
-; GFX10-NEXT: v_sub_co_u32 v0, vcc_lo, v0, v3
-; GFX10-NEXT: v_sub_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
-; GFX10-NEXT: s_setpc_b64 s[30:31]
+; GFX9-GISEL-LABEL: intrinsic_llrint_i64_f32:
+; GFX9-GISEL: ; %bb.0: ; %entry
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GISEL-NEXT: v_rndne_f32_e32 v0, v0
+; GFX9-GISEL-NEXT: v_trunc_f32_e32 v1, v0
+; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, 0x2f800000
+; GFX9-GISEL-NEXT: v_mul_f32_e64 v2, |v1|, v2
+; GFX9-GISEL-NEXT: v_floor_f32_e32 v2, v2
+; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, 0xcf800000
+; GFX9-GISEL-NEXT: v_fma_f32 v1, v2, v3, |v1|
+; GFX9-GISEL-NEXT: v_cvt_u32_f32_e32 v1, v1
+; GFX9-GISEL-NEXT: v_cvt_u32_f32_e32 v2, v2
+; GFX9-GISEL-NEXT: v_ashrrev_i32_e32 v3, 31, v0
+; GFX9-GISEL-NEXT: v_xor_b32_e32 v0, v1, v3
+; GFX9-GISEL-NEXT: v_xor_b32_e32 v1, v2, v3
+; GFX9-GISEL-NEXT: v_sub_co_u32_e32 v0, vcc, v0, v3
+; GFX9-GISEL-NEXT: v_subb_co_u32_e32 v1, vcc, v1, v3, vcc
+; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-LABEL: intrinsic_llrint_i64_f32:
-; GFX11: ; %bb.0: ; %entry
-; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: v_rndne_f32_e32 v0, v0
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
-; GFX11-NEXT: v_mul_f32_e64 v1, 0x2f800000, |v0|
-; GFX11-NEXT: v_ashrrev_i32_e32 v3, 31, v0
-; GFX11-NEXT: v_floor_f32_e32 v1, v1
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
-; GFX11-NEXT: v_fma_f32 v2, 0xcf800000, v1, |v0|
-; GFX11-NEXT: v_cvt_u32_f32_e32 v1, v1
-; GFX11-NEXT: v_cvt_u32_f32_e32 v0, v2
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-NEXT: v_xor_b32_e32 v1, v1, v3
-; GFX11-NEXT: v_xor_b32_e32 v0, v0, v3
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX11-NEXT: v_sub_co_u32 v0, vcc_lo, v0, v3
-; GFX11-NEXT: v_sub_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
-; GFX11-NEXT: s_setpc_b64 s[30:31]
+; GFX10-SDAG-LABEL: intrinsic_llrint_i64_f32:
+; GFX10-SDAG: ; %bb.0: ; %entry
+; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-SDAG-NEXT: v_rndne_f32_e32 v0, v0
+; GFX10-SDAG-NEXT: v_mul_f32_e64 v1, 0x2f800000, |v0|
+; GFX10-SDAG-NEXT: v_ashrrev_i32_e32 v3, 31, v0
+; GFX10-SDAG-NEXT: v_floor_f32_e32 v1, v1
+; GFX10-SDAG-NEXT: v_fma_f32 v2, 0xcf800000, v1, |v0|
+; GFX10-SDAG-NEXT: v_cvt_u32_f32_e32 v1, v1
+; GFX10-SDAG-NEXT: v_cvt_u32_f32_e32 v0, v2
+; GFX10-SDAG-NEXT: v_xor_b32_e32 v1, v1, v3
+; GFX10-SDAG-NEXT: v_xor_b32_e32 v0, v0, v3
+; GFX10-SDAG-NEXT: v_sub_co_u32 v0, vcc_lo, v0, v3
+; GFX10-SDAG-NEXT: v_sub_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
+; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-GISEL-LABEL: intrinsic_llrint_i64_f32:
+; GFX10-GISEL: ; %bb.0: ; %entry
+; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-GISEL-NEXT: v_rndne_f32_e32 v0, v0
+; GFX10-GISEL-NEXT: v_trunc_f32_e32 v1, v0
+; GFX10-GISEL-NEXT: v_ashrrev_i32_e32 v3, 31, v0
+; GFX10-GISEL-NEXT: v_mul_f32_e64 v2, 0x2f800000, |v1|
+; GFX10-GISEL-NEXT: v_floor_f32_e32 v2, v2
+; GFX10-GISEL-NEXT: v_fma_f32 v1, 0xcf800000, v2, |v1|
+; GFX10-GISEL-NEXT: v_cvt_u32_f32_e32 v0, v1
+; GFX10-GISEL-NEXT: v_cvt_u32_f32_e32 v1, v2
+; GFX10-GISEL-NEXT: v_xor_b32_e32 v0, v0, v3
+; GFX10-GISEL-NEXT: v_xor_b32_e32 v1, v1, v3
+; GFX10-GISEL-NEXT: v_sub_co_u32 v0, vcc_lo, v0, v3
+; GFX10-GISEL-NEXT: v_sub_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
+; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-SDAG-LABEL: intrinsic_llrint_i64_f32:
+; GFX11-SDAG: ; %bb.0: ; %entry
+; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SDAG-NEXT: v_rndne_f32_e32 v0, v0
+; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX11-SDAG-NEXT: v_mul_f32_e64 v1, 0x2f800000, |v0|
+; GFX11-SDAG-NEXT: v_ashrrev_i32_e32 v3, 31, v0
+; GFX11-SDAG-NEXT: v_floor_f32_e32 v1, v1
+; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX11-SDAG-NEXT: v_fma_f32 v2, 0xcf800000, v1, |v0|
+; GFX11-SDAG-NEXT: v_cvt_u32_f32_e32 v1, v1
+; GFX11-SDAG-NEXT: v_cvt_u32_f32_e32 v0, v2
+; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-SDAG-NEXT: v_xor_b32_e32 v1, v1, v3
+; GFX11-SDAG-NEXT: v_xor_b32_e32 v0, v0, v3
+; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX11-SDAG-NEXT: v_sub_co_u32 v0, vcc_lo, v0, v3
+; GFX11-SDAG-NEXT: v_sub_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
+; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GISEL-LABEL: intrinsic_llrint_i64_f32:
+; GFX11-GISEL: ; %bb.0: ; %entry
+; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GISEL-NEXT: v_rndne_f32_e32 v0, v0
+; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX11-GISEL-NEXT: v_trunc_f32_e32 v1, v0
+; GFX11-GISEL-NEXT: v_ashrrev_i32_e32 v3, 31, v0
+; GFX11-GISEL-NEXT: v_mul_f32_e64 v2, 0x2f800000, |v1|
+; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-GISEL-NEXT: v_floor_f32_e32 v2, v2
+; GFX11-GISEL-NEXT: v_fma_f32 v1, 0xcf800000, v2, |v1|
+; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX11-GISEL-NEXT: v_cvt_u32_f32_e32 v0, v1
+; GFX11-GISEL-NEXT: v_cvt_u32_f32_e32 v1, v2
+; GFX11-GISEL-NEXT: v_xor_b32_e32 v0, v0, v3
+; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-GISEL-NEXT: v_xor_b32_e32 v1, v1, v3
+; GFX11-GISEL-NEXT: v_sub_co_u32 v0, vcc_lo, v0, v3
+; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2)
+; GFX11-GISEL-NEXT: v_sub_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
+; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
entry:
- %0 = tail call i64 @llvm.llrint.i64.f32(float %arg)
- ret i64 %0
+ %res = tail call i64 @llvm.llrint.i64.f32(float %arg)
+ ret i64 %res
}
define i64 @intrinsic_llrint_i64_f64(double %arg) {
-; GFX9-LABEL: intrinsic_llrint_i64_f64:
-; GFX9: ; %bb.0: ; %entry
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_rndne_f64_e32 v[0:1], v[0:1]
-; GFX9-NEXT: s_movk_i32 s4, 0xffe0
-; GFX9-NEXT: v_ldexp_f64 v[2:3], v[0:1], s4
-; GFX9-NEXT: s_mov_b32 s4, 0
-; GFX9-NEXT: s_mov_b32 s5, 0xc1f00000
-; GFX9-NEXT: v_floor_f64_e32 v[2:3], v[2:3]
-; GFX9-NEXT: v_fma_f64 v[0:1], v[2:3], s[4:5], v[0:1]
-; GFX9-NEXT: v_cvt_u32_f64_e32 v0, v[0:1]
-; GFX9-NEXT: v_cvt_i32_f64_e32 v1, v[2:3]
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX9-SDAG-LABEL: intrinsic_llrint_i64_f64:
+; GFX9-SDAG: ; %bb.0: ; %entry
+; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-SDAG-NEXT: v_rndne_f64_e32 v[0:1], v[0:1]
+; GFX9-SDAG-NEXT: s_movk_i32 s4, 0xffe0
+; GFX9-SDAG-NEXT: v_ldexp_f64 v[2:3], v[0:1], s4
+; GFX9-SDAG-NEXT: s_mov_b32 s4, 0
+; GFX9-SDAG-NEXT: s_mov_b32 s5, 0xc1f00000
+; GFX9-SDAG-NEXT: v_floor_f64_e32 v[2:3], v[2:3]
+; GFX9-SDAG-NEXT: v_fma_f64 v[0:1], v[2:3], s[4:5], v[0:1]
+; GFX9-SDAG-NEXT: v_cvt_u32_f64_e32 v0, v[0:1]
+; GFX9-SDAG-NEXT: v_cvt_i32_f64_e32 v1, v[2:3]
+; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
;
-; GFX10-LABEL: intrinsic_llrint_i64_f64:
-; GFX10: ; %bb.0: ; %entry
-; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: v_rndne_f64_e32 v[0:1], v[0:1]
-; GFX10-NEXT: v_ldexp_f64 v[2:3], v[0:1], 0xffffffe0
-; GFX10-NEXT: v_floor_f64_e32 v[2:3], v[2:3]
-; GFX10-NEXT: v_fma_f64 v[0:1], 0xc1f00000, v[2:3], v[0:1]
-; GFX10-NEXT: v_cvt_u32_f64_e32 v0, v[0:1]
-; GFX10-NEXT: v_cvt_i32_f64_e32 v1, v[2:3]
-; GFX10-NEXT: s_setpc_b64 s[30:31]
+; GFX9-GISEL-LABEL: intrinsic_llrint_i64_f64:
+; GFX9-GISEL: ; %bb.0: ; %entry
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GISEL-NEXT: v_rndne_f64_e32 v[0:1], v[0:1]
+; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, 0
+; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, 0x3df00000
+; GFX9-GISEL-NEXT: v_mov_b32_e32 v4, 0
+; GFX9-GISEL-NEXT: v_mov_b32_e32 v5, 0xc1f00000
+; GFX9-GISEL-NEXT: v_trunc_f64_e32 v[0:1], v[0:1]
+; GFX9-GISEL-NEXT: v_mul_f64 v[2:3], v[0:1], v[2:3]
+; GFX9-GISEL-NEXT: v_floor_f64_e32 v[2:3], v[2:3]
+; GFX9-GISEL-NEXT: v_fma_f64 v[0:1], v[2:3], v[4:5], v[0:1]
+; GFX9-GISEL-NEXT: v_cvt_u32_f64_e32 v0, v[0:1]
+; GFX9-GISEL-NEXT: v_cvt_i32_f64_e32 v1, v[2:3]
+; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-LABEL: intrinsic_llrint_i64_f64:
-; GFX11: ; %bb.0: ; %entry
-; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: v_rndne_f64_e32 v[0:1], v[0:1]
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NEXT: v_ldexp_f64 v[2:3], v[0:1], 0xffffffe0
-; GFX11-NEXT: v_floor_f64_e32 v[2:3], v[2:3]
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NEXT: v_fma_f64 v[0:1], 0xc1f00000, v[2:3], v[0:1]
-; GFX11-NEXT: v_cvt_u32_f64_e32 v0, v[0:1]
-; GFX11-NEXT: v_cvt_i32_f64_e32 v1, v[2:3]
-; GFX11-NEXT: s_setpc_b64 s[30:31]
+; GFX10-SDAG-LABEL: intrinsic_llrint_i64_f64:
+; GFX10-SDAG: ; %bb.0: ; %entry
+; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-SDAG-NEXT: v_rndne_f64_e32 v[0:1], v[0:1]
+; GFX10-SDAG-NEXT: v_ldexp_f64 v[2:3], v[0:1], 0xffffffe0
+; GFX10-SDAG-NEXT: v_floor_f64_e32 v[2:3], v[2:3]
+; GFX10-SDAG-NEXT: v_fma_f64 v[0:1], 0xc1f00000, v[2:3], v[0:1]
+; GFX10-SDAG-NEXT: v_cvt_u32_f64_e32 v0, v[0:1]
+; GFX10-SDAG-NEXT: v_cvt_i32_f64_e32 v1, v[2:3]
+; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-GISEL-LABEL: intrinsic_llrint_i64_f64:
+; GFX10-GISEL: ; %bb.0: ; %entry
+; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-GISEL-NEXT: v_rndne_f64_e32 v[0:1], v[0:1]
+; GFX10-GISEL-NEXT: v_trunc_f64_e32 v[0:1], v[0:1]
+; GFX10-GISEL-NEXT: v_mul_f64 v[2:3], 0x3df00000, v[0:1]
+; GFX10-GISEL-NEXT: v_floor_f64_e32 v[2:3], v[2:3]
+; GFX10-GISEL-NEXT: v_fma_f64 v[0:1], 0xc1f00000, v[2:3], v[0:1]
+; GFX10-GISEL-NEXT: v_cvt_u32_f64_e32 v0, v[0:1]
+; GFX10-GISEL-NEXT: v_cvt_i32_f64_e32 v1, v[2:3]
+; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-SDAG-LABEL: intrinsic_llrint_i64_f64:
+; GFX11-SDAG: ; %bb.0: ; %entry
+; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SDAG-NEXT: v_rndne_f64_e32 v[0:1], v[0:1]
+; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-SDAG-NEXT: v_ldexp_f64 v[2:3], v[0:1], 0xffffffe0
+; GFX11-SDAG-NEXT: v_floor_f64_e32 v[2:3], v[2:3]
+; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-SDAG-NEXT: v_fma_f64 v[0:1], 0xc1f00000, v[2:3], v[0:1]
+; GFX11-SDAG-NEXT: v_cvt_u32_f64_e32 v0, v[0:1]
+; GFX11-SDAG-NEXT: v_cvt_i32_f64_e32 v1, v[2:3]
+; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GISEL-LABEL: intrinsic_llrint_i64_f64:
+; GFX11-GISEL: ; %bb.0: ; %entry
+; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GISEL-NEXT: v_rndne_f64_e32 v[0:1], v[0:1]
+; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-GISEL-NEXT: v_trunc_f64_e32 v[0:1], v[0:1]
+; GFX11-GISEL-NEXT: v_mul_f64 v[2:3], 0x3df00000, v[0:1]
+; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-GISEL-NEXT: v_floor_f64_e32 v[2:3], v[2:3]
+; GFX11-GISEL-NEXT: v_fma_f64 v[0:1], 0xc1f00000, v[2:3], v[0:1]
+; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GISEL-NEXT: v_cvt_u32_f64_e32 v0, v[0:1]
+; GFX11-GISEL-NEXT: v_cvt_i32_f64_e32 v1, v[2:3]
+; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
entry:
- %0 = tail call i64 @llvm.llrint.i64.f64(double %arg)
- ret i64 %0
+ %res = tail call i64 @llvm.llrint.i64.f64(double %arg)
+ ret i64 %res
}
define half @intrinsic_frint_half(half %arg) {
@@ -291,8 +497,8 @@ define half @intrinsic_frint_half(half %arg) {
; GCN-NEXT: v_rndne_f16_e32 v0, v0
; GCN-NEXT: s_setpc_b64 s[30:31]
entry:
- %0 = tail call half @llvm.rint.f16(half %arg)
- ret half %0
+ %res = tail call half @llvm.rint.f16(half %arg)
+ ret half %res
}
define i32 @intrinsic_lrint_i32_f16(half %arg) {
@@ -321,8 +527,8 @@ define i32 @intrinsic_lrint_i32_f16(half %arg) {
; GFX11-NEXT: v_cvt_i32_f32_e32 v0, v0
; GFX11-NEXT: s_setpc_b64 s[30:31]
entry:
- %0 = tail call i32 @llvm.lrint.i32.f16(half %arg)
- ret i32 %0
+ %res = tail call i32 @llvm.lrint.i32.f16(half %arg)
+ ret i32 %res
}
define <2 x float> @intrinsic_frint_v2f32_v2f32(<2 x float> %arg) {
@@ -333,8 +539,8 @@ define <2 x float> @intrinsic_frint_v2f32_v2f32(<2 x float> %arg) {
; GCN-NEXT: v_rndne_f32_e32 v1, v1
; GCN-NEXT: s_setpc_b64 s[30:31]
entry:
- %0 = tail call <2 x float> @llvm.rint.v2f32.v2f32(<2 x float> %arg)
- ret <2 x float> %0
+ %res = tail call <2 x float> @llvm.rint.v2f32.v2f32(<2 x float> %arg)
+ ret <2 x float> %res
}
define <2 x i32> @intrinsic_lrint_v2i32_v2f32(<2 x float> %arg) {
@@ -366,102 +572,200 @@ define <2 x i32> @intrinsic_lrint_v2i32_v2f32(<2 x float> %arg) {
; GFX11-NEXT: v_cvt_i32_f32_e32 v1, v1
; GFX11-NEXT: s_setpc_b64 s[30:31]
entry:
- %0 = tail call <2 x i32> @llvm.lrint.v2i32.v2f32(<2 x float> %arg)
- ret <2 x i32> %0
+ %res = tail call <2 x i32> @llvm.lrint.v2i32.v2f32(<2 x float> %arg)
+ ret <2 x i32> %res
}
define <2 x i64> @intrinsic_lrint_v2i64_v2f32(<2 x float> %arg) {
-; GFX9-LABEL: intrinsic_lrint_v2i64_v2f32:
-; GFX9: ; %bb.0: ; %entry
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_rndne_f32_e32 v0, v0
-; GFX9-NEXT: s_mov_b32 s4, 0x2f800000
-; GFX9-NEXT: v_mul_f32_e64 v2, |v0|, s4
-; GFX9-NEXT: v_floor_f32_e32 v2, v2
-; GFX9-NEXT: s_mov_b32 s5, 0xcf800000
-; GFX9-NEXT: v_cvt_u32_f32_e32 v3, v2
-; GFX9-NEXT: v_fma_f32 v2, v2, s5, |v0|
-; GFX9-NEXT: v_cvt_u32_f32_e32 v2, v2
-; GFX9-NEXT: v_ashrrev_i32_e32 v4, 31, v0
-; GFX9-NEXT: v_xor_b32_e32 v3, v3, v4
-; GFX9-NEXT: v_xor_b32_e32 v0, v2, v4
-; GFX9-NEXT: v_rndne_f32_e32 v2, v1
-; GFX9-NEXT: v_mul_f32_e64 v1, |v2|, s4
-; GFX9-NEXT: v_floor_f32_e32 v1, v1
-; GFX9-NEXT: v_cvt_u32_f32_e32 v5, v1
-; GFX9-NEXT: v_fma_f32 v1, v1, s5, |v2|
-; GFX9-NEXT: v_cvt_u32_f32_e32 v6, v1
-; GFX9-NEXT: v_sub_co_u32_e32 v0, vcc, v0, v4
-; GFX9-NEXT: v_subb_co_u32_e32 v1, vcc, v3, v4, vcc
-; GFX9-NEXT: v_ashrrev_i32_e32 v3, 31, v2
-; GFX9-NEXT: v_xor_b32_e32 v2, v6, v3
-; GFX9-NEXT: v_xor_b32_e32 v4, v5, v3
-; GFX9-NEXT: v_sub_co_u32_e32 v2, vcc, v2, v3
-; GFX9-NEXT: v_subb_co_u32_e32 v3, vcc, v4, v3, vcc
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX9-SDAG-LABEL: intrinsic_lrint_v2i64_v2f32:
+; GFX9-SDAG: ; %bb.0: ; %entry
+; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-SDAG-NEXT: v_rndne_f32_e32 v0, v0
+; GFX9-SDAG-NEXT: s_mov_b32 s4, 0x2f800000
+; GFX9-SDAG-NEXT: v_mul_f32_e64 v2, |v0|, s4
+; GFX9-SDAG-NEXT: v_floor_f32_e32 v2, v2
+; GFX9-SDAG-NEXT: s_mov_b32 s5, 0xcf800000
+; GFX9-SDAG-NEXT: v_cvt_u32_f32_e32 v3, v2
+; GFX9-SDAG-NEXT: v_fma_f32 v2, v2, s5, |v0|
+; GFX9-SDAG-NEXT: v_cvt_u32_f32_e32 v2, v2
+; GFX9-SDAG-NEXT: v_ashrrev_i32_e32 v4, 31, v0
+; GFX9-SDAG-NEXT: v_xor_b32_e32 v3, v3, v4
+; GFX9-SDAG-NEXT: v_xor_b32_e32 v0, v2, v4
+; GFX9-SDAG-NEXT: v_rndne_f32_e32 v2, v1
+; GFX9-SDAG-NEXT: v_mul_f32_e64 v1, |v2|, s4
+; GFX9-SDAG-NEXT: v_floor_f32_e32 v1, v1
+; GFX9-SDAG-NEXT: v_cvt_u32_f32_e32 v5, v1
+; GFX9-SDAG-NEXT: v_fma_f32 v1, v1, s5, |v2|
+; GFX9-SDAG-NEXT: v_cvt_u32_f32_e32 v6, v1
+; GFX9-SDAG-NEXT: v_sub_co_u32_e32 v0, vcc, v0, v4
+; GFX9-SDAG-NEXT: v_subb_co_u32_e32 v1, vcc, v3, v4, vcc
+; GFX9-SDAG-NEXT: v_ashrrev_i32_e32 v3, 31, v2
+; GFX9-SDAG-NEXT: v_xor_b32_e32 v2, v6, v3
+; GFX9-SDAG-NEXT: v_xor_b32_e32 v4, v5, v3
+; GFX9-SDAG-NEXT: v_sub_co_u32_e32 v2, vcc, v2, v3
+; GFX9-SDAG-NEXT: v_subb_co_u32_e32 v3, vcc, v4, v3, vcc
+; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
;
-; GFX10-LABEL: intrinsic_lrint_v2i64_v2f32:
-; GFX10: ; %bb.0: ; %entry
-; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: v_rndne_f32_e32 v0, v0
-; GFX10-NEXT: v_rndne_f32_e32 v1, v1
-; GFX10-NEXT: v_mul_f32_e64 v2, 0x2f800000, |v0|
-; GFX10-NEXT: v_mul_f32_e64 v3, 0x2f800000, |v1|
-; GFX10-NEXT: v_ashrrev_i32_e32 v5, 31, v0
-; GFX10-NEXT: v_ashrrev_i32_e32 v6, 31, v1
-; GFX10-NEXT: v_floor_f32_e32 v2, v2
-; GFX10-NEXT: v_floor_f32_e32 v3, v3
-; GFX10-NEXT: v_fma_f32 v4, 0xcf800000, v2, |v0|
-; GFX10-NEXT: v_fma_f32 v0, 0xcf800000, v3, |v1|
-; GFX10-NEXT: v_cvt_u32_f32_e32 v2, v2
-; GFX10-NEXT: v_cvt_u32_f32_e32 v3, v3
-; GFX10-NEXT: v_cvt_u32_f32_e32 v1, v4
-; GFX10-NEXT: v_cvt_u32_f32_e32 v0, v0
-; GFX10-NEXT: v_xor_b32_e32 v2, v2, v5
-; GFX10-NEXT: v_xor_b32_e32 v3, v3, v6
-; GFX10-NEXT: v_xor_b32_e32 v1, v1, v5
-; GFX10-NEXT: v_xor_b32_e32 v4, v0, v6
-; GFX10-NEXT: v_sub_co_u32 v0, vcc_lo, v1, v5
-; GFX10-NEXT: v_sub_co_ci_u32_e32 v1, vcc_lo, v2, v5, vcc_lo
-; GFX10-NEXT: v_sub_co_u32 v2, vcc_lo, v4, v6
-; GFX10-NEXT: v_sub_co_ci_u32_e32 v3, vcc_lo, v3, v6, vcc_lo
-; GFX10-NEXT: s_setpc_b64 s[30:31]
+; GFX9-GISEL-LABEL: intrinsic_lrint_v2i64_v2f32:
+; GFX9-GISEL: ; %bb.0: ; %entry
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GISEL-NEXT: v_rndne_f32_e32 v0, v0
+; GFX9-GISEL-NEXT: v_trunc_f32_e32 v2, v0
+; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, 0x2f800000
+; GFX9-GISEL-NEXT: v_mul_f32_e64 v4, |v2|, v3
+; GFX9-GISEL-NEXT: v_floor_f32_e32 v4, v4
+; GFX9-GISEL-NEXT: v_mov_b32_e32 v5, 0xcf800000
+; GFX9-GISEL-NEXT: v_fma_f32 v2, v4, v5, |v2|
+; GFX9-GISEL-NEXT: v_cvt_u32_f32_e32 v2, v2
+; GFX9-GISEL-NEXT: v_cvt_u32_f32_e32 v4, v4
+; GFX9-GISEL-NEXT: v_ashrrev_i32_e32 v6, 31, v0
+; GFX9-GISEL-NEXT: v_xor_b32_e32 v0, v2, v6
+; GFX9-GISEL-NEXT: v_xor_b32_e32 v2, v4, v6
+; GFX9-GISEL-NEXT: v_rndne_f32_e32 v4, v1
+; GFX9-GISEL-NEXT: v_trunc_f32_e32 v1, v4
+; GFX9-GISEL-NEXT: v_mul_f32_e64 v3, |v1|, v3
+; GFX9-GISEL-NEXT: v_floor_f32_e32 v3, v3
+; GFX9-GISEL-NEXT: v_fma_f32 v1, v3, v5, |v1|
+; GFX9-GISEL-NEXT: v_cvt_u32_f32_e32 v5, v1
+; GFX9-GISEL-NEXT: v_cvt_u32_f32_e32 v3, v3
+; GFX9-GISEL-NEXT: v_sub_co_u32_e32 v0, vcc, v0, v6
+; GFX9-GISEL-NEXT: v_ashrrev_i32_e32 v4, 31, v4
+; GFX9-GISEL-NEXT: v_subb_co_u32_e32 v1, vcc, v2, v6, vcc
+; GFX9-GISEL-NEXT: v_xor_b32_e32 v2, v5, v4
+; GFX9-GISEL-NEXT: v_xor_b32_e32 v3, v3, v4
+; GFX9-GISEL-NEXT: v_sub_co_u32_e32 v2, vcc, v2, v4
+; GFX9-GISEL-NEXT: v_subb_co_u32_e32 v3, vcc, v3, v4, vcc
+; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-LABEL: intrinsic_lrint_v2i64_v2f32:
-; GFX11: ; %bb.0: ; %entry
-; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: v_rndne_f32_e32 v0, v0
-; GFX11-NEXT: v_rndne_f32_e32 v1, v1
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-NEXT: v_mul_f32_e64 v2, 0x2f800000, |v0|
-; GFX11-NEXT: v_mul_f32_e64 v3, 0x2f800000, |v1|
-; GFX11-NEXT: v_ashrrev_i32_e32 v5, 31, v0
-; GFX11-NEXT: v_ashrrev_i32_e32 v6, 31, v1
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
-; GFX11-NEXT: v_floor_f32_e32 v2, v2
-; GFX11-NEXT: v_floor_f32_e32 v3, v3
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-NEXT: v_fma_f32 v4, 0xcf800000, v2, |v0|
-; GFX11-NEXT: v_fma_f32 v0, 0xcf800000, v3, |v1|
-; GFX11-NEXT: v_cvt_u32_f32_e32 v2, v2
-; GFX11-NEXT: v_cvt_u32_f32_e32 v3, v3
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
-; GFX11-NEXT: v_cvt_u32_f32_e32 v1, v4
-; GFX11-NEXT: v_cvt_u32_f32_e32 v0, v0
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
-; GFX11-NEXT: v_xor_b32_e32 v2, v2, v5
-; GFX11-NEXT: v_xor_b32_e32 v3, v3, v6
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
-; GFX11-NEXT: v_xor_b32_e32 v1, v1, v5
-; GFX11-NEXT: v_xor_b32_e32 v4, v0, v6
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_3)
-; GFX11-NEXT: v_sub_co_u32 v0, vcc_lo, v1, v5
-; GFX11-NEXT: v_sub_co_ci_u32_e32 v1, vcc_lo, v2, v5, vcc_lo
-; GFX11-NEXT: v_sub_co_u32 v2, vcc_lo, v4, v6
-; GFX11-NEXT: v_sub_co_ci_u32_e32 v3, vcc_lo, v3, v6, vcc_lo
-; GFX11-NEXT: s_setpc_b64 s[30:31]
+; GFX10-SDAG-LABEL: intrinsic_lrint_v2i64_v2f32:
+; GFX10-SDAG: ; %bb.0: ; %entry
+; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-SDAG-NEXT: v_rndne_f32_e32 v0, v0
+; GFX10-SDAG-NEXT: v_rndne_f32_e32 v1, v1
+; GFX10-SDAG-NEXT: v_mul_f32_e64 v2, 0x2f800000, |v0|
+; GFX10-SDAG-NEXT: v_mul_f32_e64 v3, 0x2f800000, |v1|
+; GFX10-SDAG-NEXT: v_ashrrev_i32_e32 v5, 31, v0
+; GFX10-SDAG-NEXT: v_ashrrev_i32_e32 v6, 31, v1
+; GFX10-SDAG-NEXT: v_floor_f32_e32 v2, v2
+; GFX10-SDAG-NEXT: v_floor_f32_e32 v3, v3
+; GFX10-SDAG-NEXT: v_fma_f32 v4, 0xcf800000, v2, |v0|
+; GFX10-SDAG-NEXT: v_fma_f32 v0, 0xcf800000, v3, |v1|
+; GFX10-SDAG-NEXT: v_cvt_u32_f32_e32 v2, v2
+; GFX10-SDAG-NEXT: v_cvt_u32_f32_e32 v3, v3
+; GFX10-SDAG-NEXT: v_cvt_u32_f32_e32 v1, v4
+; GFX10-SDAG-NEXT: v_cvt_u32_f32_e32 v0, v0
+; GFX10-SDAG-NEXT: v_xor_b32_e32 v2, v2, v5
+; GFX10-SDAG-NEXT: v_xor_b32_e32 v3, v3, v6
+; GFX10-SDAG-NEXT: v_xor_b32_e32 v1, v1, v5
+; GFX10-SDAG-NEXT: v_xor_b32_e32 v4, v0, v6
+; GFX10-SDAG-NEXT: v_sub_co_u32 v0, vcc_lo, v1, v5
+; GFX10-SDAG-NEXT: v_sub_co_ci_u32_e32 v1, vcc_lo, v2, v5, vcc_lo
+; GFX10-SDAG-NEXT: v_sub_co_u32 v2, vcc_lo, v4, v6
+; GFX10-SDAG-NEXT: v_sub_co_ci_u32_e32 v3, vcc_lo, v3, v6, vcc_lo
+; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-GISEL-LABEL: intrinsic_lrint_v2i64_v2f32:
+; GFX10-GISEL: ; %bb.0: ; %entry
+; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-GISEL-NEXT: v_rndne_f32_e32 v0, v0
+; GFX10-GISEL-NEXT: v_rndne_f32_e32 v1, v1
+; GFX10-GISEL-NEXT: v_trunc_f32_e32 v2, v0
+; GFX10-GISEL-NEXT: v_trunc_f32_e32 v3, v1
+; GFX10-GISEL-NEXT: v_ashrrev_i32_e32 v6, 31, v0
+; GFX10-GISEL-NEXT: v_mul_f32_e64 v4, 0x2f800000, |v2|
+; GFX10-GISEL-NEXT: v_mul_f32_e64 v5, 0x2f800000, |v3|
+; GFX10-GISEL-NEXT: v_floor_f32_e32 v4, v4
+; GFX10-GISEL-NEXT: v_floor_f32_e32 v5, v5
+; GFX10-GISEL-NEXT: v_fma_f32 v2, 0xcf800000, v4, |v2|
+; GFX10-GISEL-NEXT: v_fma_f32 v0, 0xcf800000, v5, |v3|
+; GFX10-GISEL-NEXT: v_ashrrev_i32_e32 v3, 31, v1
+; GFX10-GISEL-NEXT: v_cvt_u32_f32_e32 v1, v2
+; GFX10-GISEL-NEXT: v_cvt_u32_f32_e32 v2, v4
+; GFX10-GISEL-NEXT: v_cvt_u32_f32_e32 v0, v0
+; GFX10-GISEL-NEXT: v_cvt_u32_f32_e32 v4, v5
+; GFX10-GISEL-NEXT: v_xor_b32_e32 v1, v1, v6
+; GFX10-GISEL-NEXT: v_xor_b32_e32 v2, v2, v6
+; GFX10-GISEL-NEXT: v_xor_b32_e32 v5, v0, v3
+; GFX10-GISEL-NEXT: v_xor_b32_e32 v4, v4, v3
+; GFX10-GISEL-NEXT: v_sub_co_u32 v0, vcc_lo, v1, v6
+; GFX10-GISEL-NEXT: v_sub_co_ci_u32_e32 v1, vcc_lo, v2, v6, vcc_lo
+; GFX10-GISEL-NEXT: v_sub_co_u32 v2, vcc_lo, v5, v3
+; GFX10-GISEL-NEXT: v_sub_co_ci_u32_e32 v3, vcc_lo, v4, v3, vcc_lo
+; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-SDAG-LABEL: intrinsic_lrint_v2i64_v2f32:
+; GFX11-SDAG: ; %bb.0: ; %entry
+; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SDAG-NEXT: v_rndne_f32_e32 v0, v0
+; GFX11-SDAG-NEXT: v_rndne_f32_e32 v1, v1
+; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-SDAG-NEXT: v_mul_f32_e64 v2, 0x2f800000, |v0|
+; GFX11-SDAG-NEXT: v_mul_f32_e64 v3, 0x2f800000, |v1|
+; GFX11-SDAG-NEXT: v_ashrrev_i32_e32 v5, 31, v0
+; GFX11-SDAG-NEXT: v_ashrrev_i32_e32 v6, 31, v1
+; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX11-SDAG-NEXT: v_floor_f32_e32 v2, v2
+; GFX11-SDAG-NEXT: v_floor_f32_e32 v3, v3
+; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-SDAG-NEXT: v_fma_f32 v4, 0xcf800000, v2, |v0|
+; GFX11-SDAG-NEXT: v_fma_f32 v0, 0xcf800000, v3, |v1|
+; GFX11-SDAG-NEXT: v_cvt_u32_f32_e32 v2, v2
+; GFX11-SDAG-NEXT: v_cvt_u32_f32_e32 v3, v3
+; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX11-SDAG-NEXT: v_cvt_u32_f32_e32 v1, v4
+; GFX11-SDAG-NEXT: v_cvt_u32_f32_e32 v0, v0
+; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX11-SDAG-NEXT: v_xor_b32_e32 v2, v2, v5
+; GFX11-SDAG-NEXT: v_xor_b32_e32 v3, v3, v6
+; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX11-SDAG-NEXT: v_xor_b32_e32 v1, v1, v5
+; GFX11-SDAG-NEXT: v_xor_b32_e32 v4, v0, v6
+; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_3)
+; GFX11-SDAG-NEXT: v_sub_co_u32 v0, vcc_lo, v1, v5
+; GFX11-SDAG-NEXT: v_sub_co_ci_u32_e32 v1, vcc_lo, v2, v5, vcc_lo
+; GFX11-SDAG-NEXT: v_sub_co_u32 v2, vcc_lo, v4, v6
+; GFX11-SDAG-NEXT: v_sub_co_ci_u32_e32 v3, vcc_lo, v3, v6, vcc_lo
+; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GISEL-LABEL: intrinsic_lrint_v2i64_v2f32:
+; GFX11-GISEL: ; %bb.0: ; %entry
+; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GISEL-NEXT: v_rndne_f32_e32 v0, v0
+; GFX11-GISEL-NEXT: v_rndne_f32_e32 v1, v1
+; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-GISEL-NEXT: v_trunc_f32_e32 v2, v0
+; GFX11-GISEL-NEXT: v_trunc_f32_e32 v3, v1
+; GFX11-GISEL-NEXT: v_ashrrev_i32_e32 v6, 31, v0
+; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX11-GISEL-NEXT: v_mul_f32_e64 v4, 0x2f800000, |v2|
+; GFX11-GISEL-NEXT: v_mul_f32_e64 v5, 0x2f800000, |v3|
+; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-GISEL-NEXT: v_floor_f32_e32 v4, v4
+; GFX11-GISEL-NEXT: v_floor_f32_e32 v5, v5
+; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-GISEL-NEXT: v_fma_f32 v2, 0xcf800000, v4, |v2|
+; GFX11-GISEL-NEXT: v_fma_f32 v0, 0xcf800000, v5, |v3|
+; GFX11-GISEL-NEXT: v_ashrrev_i32_e32 v3, 31, v1
+; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_4)
+; GFX11-GISEL-NEXT: v_cvt_u32_f32_e32 v1, v2
+; GFX11-GISEL-NEXT: v_cvt_u32_f32_e32 v2, v4
+; GFX11-GISEL-NEXT: v_cvt_u32_f32_e32 v0, v0
+; GFX11-GISEL-NEXT: v_cvt_u32_f32_e32 v4, v5
+; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX11-GISEL-NEXT: v_xor_b32_e32 v1, v1, v6
+; GFX11-GISEL-NEXT: v_xor_b32_e32 v2, v2, v6
+; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX11-GISEL-NEXT: v_xor_b32_e32 v5, v0, v3
+; GFX11-GISEL-NEXT: v_xor_b32_e32 v4, v4, v3
+; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX11-GISEL-NEXT: v_sub_co_u32 v0, vcc_lo, v1, v6
+; GFX11-GISEL-NEXT: v_sub_co_ci_u32_e32 v1, vcc_lo, v2, v6, vcc_lo
+; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX11-GISEL-NEXT: v_sub_co_u32 v2, vcc_lo, v5, v3
+; GFX11-GISEL-NEXT: v_sub_co_ci_u32_e32 v3, vcc_lo, v4, v3, vcc_lo
+; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
entry:
- %0 = tail call <2 x i64> @llvm.lrint.v2i64.v2f32(<2 x float> %arg)
- ret <2 x i64> %0
+ %res = tail call <2 x i64> @llvm.lrint.v2i64.v2f32(<2 x float> %arg)
+ ret <2 x i64> %res
}
More information about the llvm-commits
mailing list