[llvm] [AMDGPU] Implement llvm.lrint intrinsic lowering (PR #98931)

Sumanth Gundapaneni via llvm-commits llvm-commits at lists.llvm.org
Tue Jul 23 08:06:45 PDT 2024


https://github.com/sgundapa updated https://github.com/llvm/llvm-project/pull/98931

>From 772f639642e926d823fc65522b379f0ebc139563 Mon Sep 17 00:00:00 2001
From: Sumanth Gundapaneni <sumanth.gundapaneni at amd.com>
Date: Wed, 26 Jun 2024 10:58:20 -0500
Subject: [PATCH 1/5] [AMDGPU] Implement llvm.lrint intrinsic lowering

This patch enabled the  target-independent lowering of llvm.lrint via
GlobalISel. For SelectionDAG, the instrinsic is custom lowered.
---
 .../CodeGen/GlobalISel/LegalizerHelper.cpp    |  13 +
 llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp |  17 +-
 llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h   |   1 +
 .../lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp |   5 +
 llvm/test/CodeGen/AMDGPU/GlobalISel/lrint.ll  | 493 ++++++++++++++++++
 llvm/test/CodeGen/AMDGPU/lrint.ll             | 467 +++++++++++++++++
 6 files changed, 994 insertions(+), 2 deletions(-)
 create mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/lrint.ll
 create mode 100644 llvm/test/CodeGen/AMDGPU/lrint.ll

diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
index 3f1094e0ac703..c63b24caf6106 100644
--- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
@@ -3818,6 +3818,17 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT LowerHintTy) {
     changeOpcode(MI, TargetOpcode::G_INTRINSIC_ROUNDEVEN);
     return Legalized;
   }
+  case TargetOpcode::G_INTRINSIC_LRINT:
+  case TargetOpcode::G_INTRINSIC_LLRINT: {
+    Register DstReg = MI.getOperand(0).getReg();
+    Register SrcReg = MI.getOperand(1).getReg();
+    LLT SrcTy = MRI.getType(SrcReg);
+    auto Round = MIRBuilder.buildIntrinsicRoundeven(SrcTy, SrcReg);
+
+    MIRBuilder.buildFPTOSI(DstReg, Round);
+    MI.eraseFromParent();
+    return Legalized;
+  }
   case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: {
     auto [OldValRes, SuccessRes, Addr, CmpVal, NewVal] = MI.getFirst5Regs();
     Register NewOldValRes = MRI.cloneVirtualRegister(OldValRes);
@@ -4668,6 +4679,8 @@ LegalizerHelper::fewerElementsVector(MachineInstr &MI, unsigned TypeIdx,
   case G_FCEIL:
   case G_FFLOOR:
   case G_FRINT:
+  case G_INTRINSIC_LRINT:
+  case G_INTRINSIC_LLRINT:
   case G_INTRINSIC_ROUND:
   case G_INTRINSIC_ROUNDEVEN:
   case G_INTRINSIC_TRUNC:
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
index ef30bf6d993fa..ef3e74c9a622f 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@@ -404,7 +404,8 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM,
 
   setOperationAction(ISD::FNEARBYINT, {MVT::f16, MVT::f32, MVT::f64}, Custom);
 
-  setOperationAction(ISD::FRINT, {MVT::f16, MVT::f32, MVT::f64}, Custom);
+  setOperationAction({ISD::FRINT, ISD::LRINT, ISD::LLRINT},
+                     {MVT::f16, MVT::f32, MVT::f64}, Custom);
 
   setOperationAction(ISD::FREM, {MVT::f16, MVT::f32, MVT::f64}, Custom);
 
@@ -1388,7 +1389,11 @@ SDValue AMDGPUTargetLowering::LowerOperation(SDValue Op,
   case ISD::FCEIL: return LowerFCEIL(Op, DAG);
   case ISD::FTRUNC: return LowerFTRUNC(Op, DAG);
   case ISD::FRINT: return LowerFRINT(Op, DAG);
-  case ISD::FNEARBYINT: return LowerFNEARBYINT(Op, DAG);
+  case ISD::LRINT:
+  case ISD::LLRINT:
+    return LowerLRINT(Op, DAG);
+  case ISD::FNEARBYINT:
+    return LowerFNEARBYINT(Op, DAG);
   case ISD::FROUNDEVEN:
     return LowerFROUNDEVEN(Op, DAG);
   case ISD::FROUND: return LowerFROUND(Op, DAG);
@@ -2496,6 +2501,14 @@ SDValue AMDGPUTargetLowering::LowerFRINT(SDValue Op, SelectionDAG &DAG) const {
   return DAG.getNode(ISD::FROUNDEVEN, SDLoc(Op), VT, Arg);
 }
 
+SDValue AMDGPUTargetLowering::LowerLRINT(SDValue Op, SelectionDAG &DAG) const {
+  auto ResVT = Op.getValueType();
+  auto Arg = Op.getOperand(0u);
+  auto ArgVT = Arg.getValueType();
+  SDValue RoundNode = DAG.getNode(ISD::FROUNDEVEN, SDLoc(Op), ArgVT, Arg);
+  return DAG.getNode(ISD::FP_TO_SINT, SDLoc(Op), ResVT, RoundNode);
+}
+
 // XXX - May require not supporting f32 denormals?
 
 // Don't handle v2f16. The extra instructions to scalarize and repack around the
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
index 37572af3897f2..2e8f857e95a2d 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
@@ -55,6 +55,7 @@ class AMDGPUTargetLowering : public TargetLowering {
   SDValue LowerFCEIL(SDValue Op, SelectionDAG &DAG) const;
   SDValue LowerFTRUNC(SDValue Op, SelectionDAG &DAG) const;
   SDValue LowerFRINT(SDValue Op, SelectionDAG &DAG) const;
+  SDValue LowerLRINT(SDValue Op, SelectionDAG &DAG) const;
   SDValue LowerFNEARBYINT(SDValue Op, SelectionDAG &DAG) const;
 
   SDValue LowerFROUNDEVEN(SDValue Op, SelectionDAG &DAG) const;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
index 88e40da110555..0622690759c35 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
@@ -1141,6 +1141,11 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
       .scalarize(0)
       .lower();
 
+  getActionDefinitionsBuilder({G_INTRINSIC_LRINT, G_INTRINSIC_LLRINT})
+      .clampScalar(0, S16, S64)
+      .scalarize(0)
+      .lower();
+
   if (ST.has16BitInsts()) {
     getActionDefinitionsBuilder(
         {G_INTRINSIC_TRUNC, G_FCEIL, G_INTRINSIC_ROUNDEVEN})
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/lrint.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/lrint.ll
new file mode 100644
index 0000000000000..c6ac0b2dd3334
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/lrint.ll
@@ -0,0 +1,493 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9 %s
+; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1010 < %s | FileCheck -check-prefixes=GCN,GFX10 %s
+; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1100 < %s | FileCheck -check-prefixes=GCN,GFX11 %s
+
+declare float @llvm.rint.f32(float)
+declare i32 @llvm.lrint.i32.f32(float)
+declare i32 @llvm.lrint.i32.f64(double)
+declare i64 @llvm.lrint.i64.f32(float)
+declare i64 @llvm.lrint.i64.f64(double)
+declare i64 @llvm.llrint.i64.f32(float)
+declare half @llvm.rint.f16(half)
+declare i32 @llvm.lrint.i32.f16(half %arg)
+declare <2 x float> @llvm.rint.v2f32.v2f32(<2 x float> %arg)
+declare <2 x i32> @llvm.lrint.v2i32.v2f32(<2 x float> %arg)
+declare <2 x i64> @llvm.lrint.v2i64.v2f32(<2 x float> %arg)
+
+define float @intrinsic_frint(float %arg) {
+; GCN-LABEL: intrinsic_frint:
+; GCN:       ; %bb.0: ; %entry
+; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT:    v_rndne_f32_e32 v0, v0
+; GCN-NEXT:    s_setpc_b64 s[30:31]
+entry:
+  %0 = tail call float @llvm.rint.f32(float %arg)
+  ret float %0
+}
+
+define i32 @intrinsic_lrint_i32_f32(float %arg) {
+; GFX9-LABEL: intrinsic_lrint_i32_f32:
+; GFX9:       ; %bb.0: ; %entry
+; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT:    v_rndne_f32_e32 v0, v0
+; GFX9-NEXT:    v_cvt_i32_f32_e32 v0, v0
+; GFX9-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-LABEL: intrinsic_lrint_i32_f32:
+; GFX10:       ; %bb.0: ; %entry
+; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT:    v_rndne_f32_e32 v0, v0
+; GFX10-NEXT:    v_cvt_i32_f32_e32 v0, v0
+; GFX10-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: intrinsic_lrint_i32_f32:
+; GFX11:       ; %bb.0: ; %entry
+; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT:    v_rndne_f32_e32 v0, v0
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-NEXT:    v_cvt_i32_f32_e32 v0, v0
+; GFX11-NEXT:    s_setpc_b64 s[30:31]
+entry:
+  %0 = tail call i32 @llvm.lrint.i32.f32(float %arg)
+  ret i32 %0
+}
+
+define i32 @intrinsic_lrint_i32_f64(double %arg) {
+; GFX9-LABEL: intrinsic_lrint_i32_f64:
+; GFX9:       ; %bb.0: ; %entry
+; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT:    v_rndne_f64_e32 v[0:1], v[0:1]
+; GFX9-NEXT:    v_cvt_i32_f64_e32 v0, v[0:1]
+; GFX9-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-LABEL: intrinsic_lrint_i32_f64:
+; GFX10:       ; %bb.0: ; %entry
+; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT:    v_rndne_f64_e32 v[0:1], v[0:1]
+; GFX10-NEXT:    v_cvt_i32_f64_e32 v0, v[0:1]
+; GFX10-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: intrinsic_lrint_i32_f64:
+; GFX11:       ; %bb.0: ; %entry
+; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT:    v_rndne_f64_e32 v[0:1], v[0:1]
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-NEXT:    v_cvt_i32_f64_e32 v0, v[0:1]
+; GFX11-NEXT:    s_setpc_b64 s[30:31]
+entry:
+  %0 = tail call i32 @llvm.lrint.i32.f64(double %arg)
+  ret i32 %0
+}
+
+define i64 @intrinsic_lrint_i64_f32(float %arg) {
+; GFX9-LABEL: intrinsic_lrint_i64_f32:
+; GFX9:       ; %bb.0: ; %entry
+; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT:    v_rndne_f32_e32 v0, v0
+; GFX9-NEXT:    v_trunc_f32_e32 v1, v0
+; GFX9-NEXT:    v_mov_b32_e32 v2, 0x2f800000
+; GFX9-NEXT:    v_mul_f32_e64 v2, |v1|, v2
+; GFX9-NEXT:    v_floor_f32_e32 v2, v2
+; GFX9-NEXT:    v_mov_b32_e32 v3, 0xcf800000
+; GFX9-NEXT:    v_fma_f32 v1, v2, v3, |v1|
+; GFX9-NEXT:    v_cvt_u32_f32_e32 v1, v1
+; GFX9-NEXT:    v_cvt_u32_f32_e32 v2, v2
+; GFX9-NEXT:    v_ashrrev_i32_e32 v3, 31, v0
+; GFX9-NEXT:    v_xor_b32_e32 v0, v1, v3
+; GFX9-NEXT:    v_xor_b32_e32 v1, v2, v3
+; GFX9-NEXT:    v_sub_co_u32_e32 v0, vcc, v0, v3
+; GFX9-NEXT:    v_subb_co_u32_e32 v1, vcc, v1, v3, vcc
+; GFX9-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-LABEL: intrinsic_lrint_i64_f32:
+; GFX10:       ; %bb.0: ; %entry
+; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT:    v_rndne_f32_e32 v0, v0
+; GFX10-NEXT:    v_trunc_f32_e32 v1, v0
+; GFX10-NEXT:    v_ashrrev_i32_e32 v3, 31, v0
+; GFX10-NEXT:    v_mul_f32_e64 v2, 0x2f800000, |v1|
+; GFX10-NEXT:    v_floor_f32_e32 v2, v2
+; GFX10-NEXT:    v_fma_f32 v1, 0xcf800000, v2, |v1|
+; GFX10-NEXT:    v_cvt_u32_f32_e32 v0, v1
+; GFX10-NEXT:    v_cvt_u32_f32_e32 v1, v2
+; GFX10-NEXT:    v_xor_b32_e32 v0, v0, v3
+; GFX10-NEXT:    v_xor_b32_e32 v1, v1, v3
+; GFX10-NEXT:    v_sub_co_u32 v0, vcc_lo, v0, v3
+; GFX10-NEXT:    v_sub_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
+; GFX10-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: intrinsic_lrint_i64_f32:
+; GFX11:       ; %bb.0: ; %entry
+; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT:    v_rndne_f32_e32 v0, v0
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX11-NEXT:    v_trunc_f32_e32 v1, v0
+; GFX11-NEXT:    v_ashrrev_i32_e32 v3, 31, v0
+; GFX11-NEXT:    v_mul_f32_e64 v2, 0x2f800000, |v1|
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NEXT:    v_floor_f32_e32 v2, v2
+; GFX11-NEXT:    v_fma_f32 v1, 0xcf800000, v2, |v1|
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX11-NEXT:    v_cvt_u32_f32_e32 v0, v1
+; GFX11-NEXT:    v_cvt_u32_f32_e32 v1, v2
+; GFX11-NEXT:    v_xor_b32_e32 v0, v0, v3
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-NEXT:    v_xor_b32_e32 v1, v1, v3
+; GFX11-NEXT:    v_sub_co_u32 v0, vcc_lo, v0, v3
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2)
+; GFX11-NEXT:    v_sub_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
+; GFX11-NEXT:    s_setpc_b64 s[30:31]
+entry:
+  %0 = tail call i64 @llvm.lrint.i64.f32(float %arg)
+  ret i64 %0
+}
+
+define i64 @intrinsic_lrint_i64_f64(double %arg) {
+; GFX9-LABEL: intrinsic_lrint_i64_f64:
+; GFX9:       ; %bb.0: ; %entry
+; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT:    v_rndne_f64_e32 v[0:1], v[0:1]
+; GFX9-NEXT:    v_mov_b32_e32 v2, 0
+; GFX9-NEXT:    v_mov_b32_e32 v3, 0x3df00000
+; GFX9-NEXT:    v_mov_b32_e32 v4, 0
+; GFX9-NEXT:    v_mov_b32_e32 v5, 0xc1f00000
+; GFX9-NEXT:    v_trunc_f64_e32 v[0:1], v[0:1]
+; GFX9-NEXT:    v_mul_f64 v[2:3], v[0:1], v[2:3]
+; GFX9-NEXT:    v_floor_f64_e32 v[2:3], v[2:3]
+; GFX9-NEXT:    v_fma_f64 v[0:1], v[2:3], v[4:5], v[0:1]
+; GFX9-NEXT:    v_cvt_u32_f64_e32 v0, v[0:1]
+; GFX9-NEXT:    v_cvt_i32_f64_e32 v1, v[2:3]
+; GFX9-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-LABEL: intrinsic_lrint_i64_f64:
+; GFX10:       ; %bb.0: ; %entry
+; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT:    v_rndne_f64_e32 v[0:1], v[0:1]
+; GFX10-NEXT:    v_trunc_f64_e32 v[0:1], v[0:1]
+; GFX10-NEXT:    v_mul_f64 v[2:3], 0x3df00000, v[0:1]
+; GFX10-NEXT:    v_floor_f64_e32 v[2:3], v[2:3]
+; GFX10-NEXT:    v_fma_f64 v[0:1], 0xc1f00000, v[2:3], v[0:1]
+; GFX10-NEXT:    v_cvt_u32_f64_e32 v0, v[0:1]
+; GFX10-NEXT:    v_cvt_i32_f64_e32 v1, v[2:3]
+; GFX10-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: intrinsic_lrint_i64_f64:
+; GFX11:       ; %bb.0: ; %entry
+; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT:    v_rndne_f64_e32 v[0:1], v[0:1]
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NEXT:    v_trunc_f64_e32 v[0:1], v[0:1]
+; GFX11-NEXT:    v_mul_f64 v[2:3], 0x3df00000, v[0:1]
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NEXT:    v_floor_f64_e32 v[2:3], v[2:3]
+; GFX11-NEXT:    v_fma_f64 v[0:1], 0xc1f00000, v[2:3], v[0:1]
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-NEXT:    v_cvt_u32_f64_e32 v0, v[0:1]
+; GFX11-NEXT:    v_cvt_i32_f64_e32 v1, v[2:3]
+; GFX11-NEXT:    s_setpc_b64 s[30:31]
+entry:
+  %0 = tail call i64 @llvm.lrint.i64.f64(double %arg)
+  ret i64 %0
+}
+
+define i64 @intrinsic_llrint_i64_f32(float %arg) {
+; GFX9-LABEL: intrinsic_llrint_i64_f32:
+; GFX9:       ; %bb.0: ; %entry
+; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT:    v_rndne_f32_e32 v0, v0
+; GFX9-NEXT:    v_trunc_f32_e32 v1, v0
+; GFX9-NEXT:    v_mov_b32_e32 v2, 0x2f800000
+; GFX9-NEXT:    v_mul_f32_e64 v2, |v1|, v2
+; GFX9-NEXT:    v_floor_f32_e32 v2, v2
+; GFX9-NEXT:    v_mov_b32_e32 v3, 0xcf800000
+; GFX9-NEXT:    v_fma_f32 v1, v2, v3, |v1|
+; GFX9-NEXT:    v_cvt_u32_f32_e32 v1, v1
+; GFX9-NEXT:    v_cvt_u32_f32_e32 v2, v2
+; GFX9-NEXT:    v_ashrrev_i32_e32 v3, 31, v0
+; GFX9-NEXT:    v_xor_b32_e32 v0, v1, v3
+; GFX9-NEXT:    v_xor_b32_e32 v1, v2, v3
+; GFX9-NEXT:    v_sub_co_u32_e32 v0, vcc, v0, v3
+; GFX9-NEXT:    v_subb_co_u32_e32 v1, vcc, v1, v3, vcc
+; GFX9-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-LABEL: intrinsic_llrint_i64_f32:
+; GFX10:       ; %bb.0: ; %entry
+; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT:    v_rndne_f32_e32 v0, v0
+; GFX10-NEXT:    v_trunc_f32_e32 v1, v0
+; GFX10-NEXT:    v_ashrrev_i32_e32 v3, 31, v0
+; GFX10-NEXT:    v_mul_f32_e64 v2, 0x2f800000, |v1|
+; GFX10-NEXT:    v_floor_f32_e32 v2, v2
+; GFX10-NEXT:    v_fma_f32 v1, 0xcf800000, v2, |v1|
+; GFX10-NEXT:    v_cvt_u32_f32_e32 v0, v1
+; GFX10-NEXT:    v_cvt_u32_f32_e32 v1, v2
+; GFX10-NEXT:    v_xor_b32_e32 v0, v0, v3
+; GFX10-NEXT:    v_xor_b32_e32 v1, v1, v3
+; GFX10-NEXT:    v_sub_co_u32 v0, vcc_lo, v0, v3
+; GFX10-NEXT:    v_sub_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
+; GFX10-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: intrinsic_llrint_i64_f32:
+; GFX11:       ; %bb.0: ; %entry
+; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT:    v_rndne_f32_e32 v0, v0
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX11-NEXT:    v_trunc_f32_e32 v1, v0
+; GFX11-NEXT:    v_ashrrev_i32_e32 v3, 31, v0
+; GFX11-NEXT:    v_mul_f32_e64 v2, 0x2f800000, |v1|
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NEXT:    v_floor_f32_e32 v2, v2
+; GFX11-NEXT:    v_fma_f32 v1, 0xcf800000, v2, |v1|
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX11-NEXT:    v_cvt_u32_f32_e32 v0, v1
+; GFX11-NEXT:    v_cvt_u32_f32_e32 v1, v2
+; GFX11-NEXT:    v_xor_b32_e32 v0, v0, v3
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-NEXT:    v_xor_b32_e32 v1, v1, v3
+; GFX11-NEXT:    v_sub_co_u32 v0, vcc_lo, v0, v3
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2)
+; GFX11-NEXT:    v_sub_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
+; GFX11-NEXT:    s_setpc_b64 s[30:31]
+entry:
+  %0 = tail call i64 @llvm.llrint.i64.f32(float %arg)
+  ret i64 %0
+}
+
+define i64 @intrinsic_llrint_i64_f64(double %arg) {
+; GFX9-LABEL: intrinsic_llrint_i64_f64:
+; GFX9:       ; %bb.0: ; %entry
+; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT:    v_rndne_f64_e32 v[0:1], v[0:1]
+; GFX9-NEXT:    v_mov_b32_e32 v2, 0
+; GFX9-NEXT:    v_mov_b32_e32 v3, 0x3df00000
+; GFX9-NEXT:    v_mov_b32_e32 v4, 0
+; GFX9-NEXT:    v_mov_b32_e32 v5, 0xc1f00000
+; GFX9-NEXT:    v_trunc_f64_e32 v[0:1], v[0:1]
+; GFX9-NEXT:    v_mul_f64 v[2:3], v[0:1], v[2:3]
+; GFX9-NEXT:    v_floor_f64_e32 v[2:3], v[2:3]
+; GFX9-NEXT:    v_fma_f64 v[0:1], v[2:3], v[4:5], v[0:1]
+; GFX9-NEXT:    v_cvt_u32_f64_e32 v0, v[0:1]
+; GFX9-NEXT:    v_cvt_i32_f64_e32 v1, v[2:3]
+; GFX9-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-LABEL: intrinsic_llrint_i64_f64:
+; GFX10:       ; %bb.0: ; %entry
+; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT:    v_rndne_f64_e32 v[0:1], v[0:1]
+; GFX10-NEXT:    v_trunc_f64_e32 v[0:1], v[0:1]
+; GFX10-NEXT:    v_mul_f64 v[2:3], 0x3df00000, v[0:1]
+; GFX10-NEXT:    v_floor_f64_e32 v[2:3], v[2:3]
+; GFX10-NEXT:    v_fma_f64 v[0:1], 0xc1f00000, v[2:3], v[0:1]
+; GFX10-NEXT:    v_cvt_u32_f64_e32 v0, v[0:1]
+; GFX10-NEXT:    v_cvt_i32_f64_e32 v1, v[2:3]
+; GFX10-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: intrinsic_llrint_i64_f64:
+; GFX11:       ; %bb.0: ; %entry
+; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT:    v_rndne_f64_e32 v[0:1], v[0:1]
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NEXT:    v_trunc_f64_e32 v[0:1], v[0:1]
+; GFX11-NEXT:    v_mul_f64 v[2:3], 0x3df00000, v[0:1]
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NEXT:    v_floor_f64_e32 v[2:3], v[2:3]
+; GFX11-NEXT:    v_fma_f64 v[0:1], 0xc1f00000, v[2:3], v[0:1]
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-NEXT:    v_cvt_u32_f64_e32 v0, v[0:1]
+; GFX11-NEXT:    v_cvt_i32_f64_e32 v1, v[2:3]
+; GFX11-NEXT:    s_setpc_b64 s[30:31]
+entry:
+  %0 = tail call i64 @llvm.llrint.i64.f64(double %arg)
+  ret i64 %0
+}
+
+define half @intrinsic_frint_half(half %arg) {
+; GCN-LABEL: intrinsic_frint_half:
+; GCN:       ; %bb.0: ; %entry
+; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT:    v_rndne_f16_e32 v0, v0
+; GCN-NEXT:    s_setpc_b64 s[30:31]
+entry:
+  %0 = tail call half @llvm.rint.f16(half %arg)
+  ret half %0
+}
+
+define i32 @intrinsic_lrint_i32_f16(half %arg) {
+; GFX9-LABEL: intrinsic_lrint_i32_f16:
+; GFX9:       ; %bb.0: ; %entry
+; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT:    v_rndne_f16_e32 v0, v0
+; GFX9-NEXT:    v_cvt_f32_f16_e32 v0, v0
+; GFX9-NEXT:    v_cvt_i32_f32_e32 v0, v0
+; GFX9-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-LABEL: intrinsic_lrint_i32_f16:
+; GFX10:       ; %bb.0: ; %entry
+; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT:    v_rndne_f16_e32 v0, v0
+; GFX10-NEXT:    v_cvt_f32_f16_e32 v0, v0
+; GFX10-NEXT:    v_cvt_i32_f32_e32 v0, v0
+; GFX10-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: intrinsic_lrint_i32_f16:
+; GFX11:       ; %bb.0: ; %entry
+; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT:    v_rndne_f16_e32 v0, v0
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NEXT:    v_cvt_f32_f16_e32 v0, v0
+; GFX11-NEXT:    v_cvt_i32_f32_e32 v0, v0
+; GFX11-NEXT:    s_setpc_b64 s[30:31]
+entry:
+  %0 = tail call i32 @llvm.lrint.i32.f16(half %arg)
+  ret i32 %0
+}
+
+define <2 x float> @intrinsic_frint_v2f32_v2f32(<2 x float> %arg) {
+; GCN-LABEL: intrinsic_frint_v2f32_v2f32:
+; GCN:       ; %bb.0: ; %entry
+; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT:    v_rndne_f32_e32 v0, v0
+; GCN-NEXT:    v_rndne_f32_e32 v1, v1
+; GCN-NEXT:    s_setpc_b64 s[30:31]
+entry:
+  %0 = tail call <2 x float> @llvm.rint.v2f32.v2f32(<2 x float> %arg)
+  ret <2 x float> %0
+}
+
+define <2 x i32> @intrinsic_lrint_v2i32_v2f32(<2 x float> %arg) {
+; GFX9-LABEL: intrinsic_lrint_v2i32_v2f32:
+; GFX9:       ; %bb.0: ; %entry
+; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT:    v_rndne_f32_e32 v0, v0
+; GFX9-NEXT:    v_rndne_f32_e32 v1, v1
+; GFX9-NEXT:    v_cvt_i32_f32_e32 v0, v0
+; GFX9-NEXT:    v_cvt_i32_f32_e32 v1, v1
+; GFX9-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-LABEL: intrinsic_lrint_v2i32_v2f32:
+; GFX10:       ; %bb.0: ; %entry
+; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT:    v_rndne_f32_e32 v0, v0
+; GFX10-NEXT:    v_rndne_f32_e32 v1, v1
+; GFX10-NEXT:    v_cvt_i32_f32_e32 v0, v0
+; GFX10-NEXT:    v_cvt_i32_f32_e32 v1, v1
+; GFX10-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: intrinsic_lrint_v2i32_v2f32:
+; GFX11:       ; %bb.0: ; %entry
+; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT:    v_rndne_f32_e32 v0, v0
+; GFX11-NEXT:    v_rndne_f32_e32 v1, v1
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-NEXT:    v_cvt_i32_f32_e32 v0, v0
+; GFX11-NEXT:    v_cvt_i32_f32_e32 v1, v1
+; GFX11-NEXT:    s_setpc_b64 s[30:31]
+entry:
+  %0 = tail call <2 x i32> @llvm.lrint.v2i32.v2f32(<2 x float> %arg)
+  ret <2 x i32> %0
+}
+
+define <2 x i64> @intrinsic_lrint_v2i64_v2f32(<2 x float> %arg) {
+; GFX9-LABEL: intrinsic_lrint_v2i64_v2f32:
+; GFX9:       ; %bb.0: ; %entry
+; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT:    v_rndne_f32_e32 v0, v0
+; GFX9-NEXT:    v_trunc_f32_e32 v2, v0
+; GFX9-NEXT:    v_mov_b32_e32 v3, 0x2f800000
+; GFX9-NEXT:    v_mul_f32_e64 v4, |v2|, v3
+; GFX9-NEXT:    v_floor_f32_e32 v4, v4
+; GFX9-NEXT:    v_mov_b32_e32 v5, 0xcf800000
+; GFX9-NEXT:    v_fma_f32 v2, v4, v5, |v2|
+; GFX9-NEXT:    v_cvt_u32_f32_e32 v2, v2
+; GFX9-NEXT:    v_cvt_u32_f32_e32 v4, v4
+; GFX9-NEXT:    v_ashrrev_i32_e32 v6, 31, v0
+; GFX9-NEXT:    v_xor_b32_e32 v0, v2, v6
+; GFX9-NEXT:    v_xor_b32_e32 v2, v4, v6
+; GFX9-NEXT:    v_rndne_f32_e32 v4, v1
+; GFX9-NEXT:    v_trunc_f32_e32 v1, v4
+; GFX9-NEXT:    v_mul_f32_e64 v3, |v1|, v3
+; GFX9-NEXT:    v_floor_f32_e32 v3, v3
+; GFX9-NEXT:    v_fma_f32 v1, v3, v5, |v1|
+; GFX9-NEXT:    v_cvt_u32_f32_e32 v5, v1
+; GFX9-NEXT:    v_cvt_u32_f32_e32 v3, v3
+; GFX9-NEXT:    v_sub_co_u32_e32 v0, vcc, v0, v6
+; GFX9-NEXT:    v_ashrrev_i32_e32 v4, 31, v4
+; GFX9-NEXT:    v_subb_co_u32_e32 v1, vcc, v2, v6, vcc
+; GFX9-NEXT:    v_xor_b32_e32 v2, v5, v4
+; GFX9-NEXT:    v_xor_b32_e32 v3, v3, v4
+; GFX9-NEXT:    v_sub_co_u32_e32 v2, vcc, v2, v4
+; GFX9-NEXT:    v_subb_co_u32_e32 v3, vcc, v3, v4, vcc
+; GFX9-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-LABEL: intrinsic_lrint_v2i64_v2f32:
+; GFX10:       ; %bb.0: ; %entry
+; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT:    v_rndne_f32_e32 v0, v0
+; GFX10-NEXT:    v_rndne_f32_e32 v1, v1
+; GFX10-NEXT:    v_trunc_f32_e32 v2, v0
+; GFX10-NEXT:    v_trunc_f32_e32 v3, v1
+; GFX10-NEXT:    v_ashrrev_i32_e32 v6, 31, v0
+; GFX10-NEXT:    v_mul_f32_e64 v4, 0x2f800000, |v2|
+; GFX10-NEXT:    v_mul_f32_e64 v5, 0x2f800000, |v3|
+; GFX10-NEXT:    v_floor_f32_e32 v4, v4
+; GFX10-NEXT:    v_floor_f32_e32 v5, v5
+; GFX10-NEXT:    v_fma_f32 v2, 0xcf800000, v4, |v2|
+; GFX10-NEXT:    v_fma_f32 v0, 0xcf800000, v5, |v3|
+; GFX10-NEXT:    v_ashrrev_i32_e32 v3, 31, v1
+; GFX10-NEXT:    v_cvt_u32_f32_e32 v1, v2
+; GFX10-NEXT:    v_cvt_u32_f32_e32 v2, v4
+; GFX10-NEXT:    v_cvt_u32_f32_e32 v0, v0
+; GFX10-NEXT:    v_cvt_u32_f32_e32 v4, v5
+; GFX10-NEXT:    v_xor_b32_e32 v1, v1, v6
+; GFX10-NEXT:    v_xor_b32_e32 v2, v2, v6
+; GFX10-NEXT:    v_xor_b32_e32 v5, v0, v3
+; GFX10-NEXT:    v_xor_b32_e32 v4, v4, v3
+; GFX10-NEXT:    v_sub_co_u32 v0, vcc_lo, v1, v6
+; GFX10-NEXT:    v_sub_co_ci_u32_e32 v1, vcc_lo, v2, v6, vcc_lo
+; GFX10-NEXT:    v_sub_co_u32 v2, vcc_lo, v5, v3
+; GFX10-NEXT:    v_sub_co_ci_u32_e32 v3, vcc_lo, v4, v3, vcc_lo
+; GFX10-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: intrinsic_lrint_v2i64_v2f32:
+; GFX11:       ; %bb.0: ; %entry
+; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT:    v_rndne_f32_e32 v0, v0
+; GFX11-NEXT:    v_rndne_f32_e32 v1, v1
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-NEXT:    v_trunc_f32_e32 v2, v0
+; GFX11-NEXT:    v_trunc_f32_e32 v3, v1
+; GFX11-NEXT:    v_ashrrev_i32_e32 v6, 31, v0
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX11-NEXT:    v_mul_f32_e64 v4, 0x2f800000, |v2|
+; GFX11-NEXT:    v_mul_f32_e64 v5, 0x2f800000, |v3|
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-NEXT:    v_floor_f32_e32 v4, v4
+; GFX11-NEXT:    v_floor_f32_e32 v5, v5
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-NEXT:    v_fma_f32 v2, 0xcf800000, v4, |v2|
+; GFX11-NEXT:    v_fma_f32 v0, 0xcf800000, v5, |v3|
+; GFX11-NEXT:    v_ashrrev_i32_e32 v3, 31, v1
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_4)
+; GFX11-NEXT:    v_cvt_u32_f32_e32 v1, v2
+; GFX11-NEXT:    v_cvt_u32_f32_e32 v2, v4
+; GFX11-NEXT:    v_cvt_u32_f32_e32 v0, v0
+; GFX11-NEXT:    v_cvt_u32_f32_e32 v4, v5
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX11-NEXT:    v_xor_b32_e32 v1, v1, v6
+; GFX11-NEXT:    v_xor_b32_e32 v2, v2, v6
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX11-NEXT:    v_xor_b32_e32 v5, v0, v3
+; GFX11-NEXT:    v_xor_b32_e32 v4, v4, v3
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX11-NEXT:    v_sub_co_u32 v0, vcc_lo, v1, v6
+; GFX11-NEXT:    v_sub_co_ci_u32_e32 v1, vcc_lo, v2, v6, vcc_lo
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX11-NEXT:    v_sub_co_u32 v2, vcc_lo, v5, v3
+; GFX11-NEXT:    v_sub_co_ci_u32_e32 v3, vcc_lo, v4, v3, vcc_lo
+; GFX11-NEXT:    s_setpc_b64 s[30:31]
+entry:
+  %0 = tail call <2 x i64> @llvm.lrint.v2i64.v2f32(<2 x float> %arg)
+  ret <2 x i64> %0
+}
+
diff --git a/llvm/test/CodeGen/AMDGPU/lrint.ll b/llvm/test/CodeGen/AMDGPU/lrint.ll
new file mode 100644
index 0000000000000..2da1bd095165b
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/lrint.ll
@@ -0,0 +1,467 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9 %s
+; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1010 < %s | FileCheck -check-prefixes=GCN,GFX10 %s
+; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1100 < %s | FileCheck -check-prefixes=GCN,GFX11 %s
+
+declare float @llvm.rint.f32(float)
+declare i32 @llvm.lrint.i32.f32(float)
+declare i32 @llvm.lrint.i32.f64(double)
+declare i64 @llvm.lrint.i64.f32(float)
+declare i64 @llvm.lrint.i64.f64(double)
+declare i64 @llvm.llrint.i64.f32(float)
+declare half @llvm.rint.f16(half)
+declare i32 @llvm.lrint.i32.f16(half %arg)
+declare <2 x float> @llvm.rint.v2f32.v2f32(<2 x float> %arg)
+declare <2 x i32> @llvm.lrint.v2i32.v2f32(<2 x float> %arg)
+declare <2 x i64> @llvm.lrint.v2i64.v2f32(<2 x float> %arg)
+
+define float @intrinsic_frint(float %arg) {
+; GCN-LABEL: intrinsic_frint:
+; GCN:       ; %bb.0: ; %entry
+; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT:    v_rndne_f32_e32 v0, v0
+; GCN-NEXT:    s_setpc_b64 s[30:31]
+entry:
+  %0 = tail call float @llvm.rint.f32(float %arg)
+  ret float %0
+}
+
+define i32 @intrinsic_lrint_i32_f32(float %arg) {
+; GFX9-LABEL: intrinsic_lrint_i32_f32:
+; GFX9:       ; %bb.0: ; %entry
+; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT:    v_rndne_f32_e32 v0, v0
+; GFX9-NEXT:    v_cvt_i32_f32_e32 v0, v0
+; GFX9-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-LABEL: intrinsic_lrint_i32_f32:
+; GFX10:       ; %bb.0: ; %entry
+; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT:    v_rndne_f32_e32 v0, v0
+; GFX10-NEXT:    v_cvt_i32_f32_e32 v0, v0
+; GFX10-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: intrinsic_lrint_i32_f32:
+; GFX11:       ; %bb.0: ; %entry
+; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT:    v_rndne_f32_e32 v0, v0
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-NEXT:    v_cvt_i32_f32_e32 v0, v0
+; GFX11-NEXT:    s_setpc_b64 s[30:31]
+entry:
+  %0 = tail call i32 @llvm.lrint.i32.f32(float %arg)
+  ret i32 %0
+}
+
+define i32 @intrinsic_lrint_i32_f64(double %arg) {
+; GFX9-LABEL: intrinsic_lrint_i32_f64:
+; GFX9:       ; %bb.0: ; %entry
+; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT:    v_rndne_f64_e32 v[0:1], v[0:1]
+; GFX9-NEXT:    v_cvt_i32_f64_e32 v0, v[0:1]
+; GFX9-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-LABEL: intrinsic_lrint_i32_f64:
+; GFX10:       ; %bb.0: ; %entry
+; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT:    v_rndne_f64_e32 v[0:1], v[0:1]
+; GFX10-NEXT:    v_cvt_i32_f64_e32 v0, v[0:1]
+; GFX10-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: intrinsic_lrint_i32_f64:
+; GFX11:       ; %bb.0: ; %entry
+; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT:    v_rndne_f64_e32 v[0:1], v[0:1]
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-NEXT:    v_cvt_i32_f64_e32 v0, v[0:1]
+; GFX11-NEXT:    s_setpc_b64 s[30:31]
+entry:
+  %0 = tail call i32 @llvm.lrint.i32.f64(double %arg)
+  ret i32 %0
+}
+
+define i64 @intrinsic_lrint_i64_f32(float %arg) {
+; GFX9-LABEL: intrinsic_lrint_i64_f32:
+; GFX9:       ; %bb.0: ; %entry
+; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT:    v_rndne_f32_e32 v0, v0
+; GFX9-NEXT:    s_mov_b32 s4, 0x2f800000
+; GFX9-NEXT:    v_mul_f32_e64 v1, |v0|, s4
+; GFX9-NEXT:    v_floor_f32_e32 v1, v1
+; GFX9-NEXT:    s_mov_b32 s4, 0xcf800000
+; GFX9-NEXT:    v_cvt_u32_f32_e32 v2, v1
+; GFX9-NEXT:    v_fma_f32 v1, v1, s4, |v0|
+; GFX9-NEXT:    v_cvt_u32_f32_e32 v1, v1
+; GFX9-NEXT:    v_ashrrev_i32_e32 v3, 31, v0
+; GFX9-NEXT:    v_xor_b32_e32 v2, v2, v3
+; GFX9-NEXT:    v_xor_b32_e32 v0, v1, v3
+; GFX9-NEXT:    v_sub_co_u32_e32 v0, vcc, v0, v3
+; GFX9-NEXT:    v_subb_co_u32_e32 v1, vcc, v2, v3, vcc
+; GFX9-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-LABEL: intrinsic_lrint_i64_f32:
+; GFX10:       ; %bb.0: ; %entry
+; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT:    v_rndne_f32_e32 v0, v0
+; GFX10-NEXT:    v_mul_f32_e64 v1, 0x2f800000, |v0|
+; GFX10-NEXT:    v_ashrrev_i32_e32 v3, 31, v0
+; GFX10-NEXT:    v_floor_f32_e32 v1, v1
+; GFX10-NEXT:    v_fma_f32 v2, 0xcf800000, v1, |v0|
+; GFX10-NEXT:    v_cvt_u32_f32_e32 v1, v1
+; GFX10-NEXT:    v_cvt_u32_f32_e32 v0, v2
+; GFX10-NEXT:    v_xor_b32_e32 v1, v1, v3
+; GFX10-NEXT:    v_xor_b32_e32 v0, v0, v3
+; GFX10-NEXT:    v_sub_co_u32 v0, vcc_lo, v0, v3
+; GFX10-NEXT:    v_sub_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
+; GFX10-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: intrinsic_lrint_i64_f32:
+; GFX11:       ; %bb.0: ; %entry
+; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT:    v_rndne_f32_e32 v0, v0
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX11-NEXT:    v_mul_f32_e64 v1, 0x2f800000, |v0|
+; GFX11-NEXT:    v_ashrrev_i32_e32 v3, 31, v0
+; GFX11-NEXT:    v_floor_f32_e32 v1, v1
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX11-NEXT:    v_fma_f32 v2, 0xcf800000, v1, |v0|
+; GFX11-NEXT:    v_cvt_u32_f32_e32 v1, v1
+; GFX11-NEXT:    v_cvt_u32_f32_e32 v0, v2
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-NEXT:    v_xor_b32_e32 v1, v1, v3
+; GFX11-NEXT:    v_xor_b32_e32 v0, v0, v3
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX11-NEXT:    v_sub_co_u32 v0, vcc_lo, v0, v3
+; GFX11-NEXT:    v_sub_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
+; GFX11-NEXT:    s_setpc_b64 s[30:31]
+entry:
+  %0 = tail call i64 @llvm.lrint.i64.f32(float %arg)
+  ret i64 %0
+}
+
+define i64 @intrinsic_lrint_i64_f64(double %arg) {
+; GFX9-LABEL: intrinsic_lrint_i64_f64:
+; GFX9:       ; %bb.0: ; %entry
+; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT:    v_rndne_f64_e32 v[0:1], v[0:1]
+; GFX9-NEXT:    s_movk_i32 s4, 0xffe0
+; GFX9-NEXT:    v_ldexp_f64 v[2:3], v[0:1], s4
+; GFX9-NEXT:    s_mov_b32 s4, 0
+; GFX9-NEXT:    s_mov_b32 s5, 0xc1f00000
+; GFX9-NEXT:    v_floor_f64_e32 v[2:3], v[2:3]
+; GFX9-NEXT:    v_fma_f64 v[0:1], v[2:3], s[4:5], v[0:1]
+; GFX9-NEXT:    v_cvt_u32_f64_e32 v0, v[0:1]
+; GFX9-NEXT:    v_cvt_i32_f64_e32 v1, v[2:3]
+; GFX9-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-LABEL: intrinsic_lrint_i64_f64:
+; GFX10:       ; %bb.0: ; %entry
+; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT:    v_rndne_f64_e32 v[0:1], v[0:1]
+; GFX10-NEXT:    v_ldexp_f64 v[2:3], v[0:1], 0xffffffe0
+; GFX10-NEXT:    v_floor_f64_e32 v[2:3], v[2:3]
+; GFX10-NEXT:    v_fma_f64 v[0:1], 0xc1f00000, v[2:3], v[0:1]
+; GFX10-NEXT:    v_cvt_u32_f64_e32 v0, v[0:1]
+; GFX10-NEXT:    v_cvt_i32_f64_e32 v1, v[2:3]
+; GFX10-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: intrinsic_lrint_i64_f64:
+; GFX11:       ; %bb.0: ; %entry
+; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT:    v_rndne_f64_e32 v[0:1], v[0:1]
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NEXT:    v_ldexp_f64 v[2:3], v[0:1], 0xffffffe0
+; GFX11-NEXT:    v_floor_f64_e32 v[2:3], v[2:3]
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NEXT:    v_fma_f64 v[0:1], 0xc1f00000, v[2:3], v[0:1]
+; GFX11-NEXT:    v_cvt_u32_f64_e32 v0, v[0:1]
+; GFX11-NEXT:    v_cvt_i32_f64_e32 v1, v[2:3]
+; GFX11-NEXT:    s_setpc_b64 s[30:31]
+entry:
+  %0 = tail call i64 @llvm.lrint.i64.f64(double %arg)
+  ret i64 %0
+}
+
+define i64 @intrinsic_llrint_i64_f32(float %arg) {
+; GFX9-LABEL: intrinsic_llrint_i64_f32:
+; GFX9:       ; %bb.0: ; %entry
+; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT:    v_rndne_f32_e32 v0, v0
+; GFX9-NEXT:    s_mov_b32 s4, 0x2f800000
+; GFX9-NEXT:    v_mul_f32_e64 v1, |v0|, s4
+; GFX9-NEXT:    v_floor_f32_e32 v1, v1
+; GFX9-NEXT:    s_mov_b32 s4, 0xcf800000
+; GFX9-NEXT:    v_cvt_u32_f32_e32 v2, v1
+; GFX9-NEXT:    v_fma_f32 v1, v1, s4, |v0|
+; GFX9-NEXT:    v_cvt_u32_f32_e32 v1, v1
+; GFX9-NEXT:    v_ashrrev_i32_e32 v3, 31, v0
+; GFX9-NEXT:    v_xor_b32_e32 v2, v2, v3
+; GFX9-NEXT:    v_xor_b32_e32 v0, v1, v3
+; GFX9-NEXT:    v_sub_co_u32_e32 v0, vcc, v0, v3
+; GFX9-NEXT:    v_subb_co_u32_e32 v1, vcc, v2, v3, vcc
+; GFX9-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-LABEL: intrinsic_llrint_i64_f32:
+; GFX10:       ; %bb.0: ; %entry
+; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT:    v_rndne_f32_e32 v0, v0
+; GFX10-NEXT:    v_mul_f32_e64 v1, 0x2f800000, |v0|
+; GFX10-NEXT:    v_ashrrev_i32_e32 v3, 31, v0
+; GFX10-NEXT:    v_floor_f32_e32 v1, v1
+; GFX10-NEXT:    v_fma_f32 v2, 0xcf800000, v1, |v0|
+; GFX10-NEXT:    v_cvt_u32_f32_e32 v1, v1
+; GFX10-NEXT:    v_cvt_u32_f32_e32 v0, v2
+; GFX10-NEXT:    v_xor_b32_e32 v1, v1, v3
+; GFX10-NEXT:    v_xor_b32_e32 v0, v0, v3
+; GFX10-NEXT:    v_sub_co_u32 v0, vcc_lo, v0, v3
+; GFX10-NEXT:    v_sub_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
+; GFX10-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: intrinsic_llrint_i64_f32:
+; GFX11:       ; %bb.0: ; %entry
+; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT:    v_rndne_f32_e32 v0, v0
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX11-NEXT:    v_mul_f32_e64 v1, 0x2f800000, |v0|
+; GFX11-NEXT:    v_ashrrev_i32_e32 v3, 31, v0
+; GFX11-NEXT:    v_floor_f32_e32 v1, v1
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX11-NEXT:    v_fma_f32 v2, 0xcf800000, v1, |v0|
+; GFX11-NEXT:    v_cvt_u32_f32_e32 v1, v1
+; GFX11-NEXT:    v_cvt_u32_f32_e32 v0, v2
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-NEXT:    v_xor_b32_e32 v1, v1, v3
+; GFX11-NEXT:    v_xor_b32_e32 v0, v0, v3
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX11-NEXT:    v_sub_co_u32 v0, vcc_lo, v0, v3
+; GFX11-NEXT:    v_sub_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
+; GFX11-NEXT:    s_setpc_b64 s[30:31]
+entry:
+  %0 = tail call i64 @llvm.llrint.i64.f32(float %arg)
+  ret i64 %0
+}
+
+define i64 @intrinsic_llrint_i64_f64(double %arg) {
+; GFX9-LABEL: intrinsic_llrint_i64_f64:
+; GFX9:       ; %bb.0: ; %entry
+; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT:    v_rndne_f64_e32 v[0:1], v[0:1]
+; GFX9-NEXT:    s_movk_i32 s4, 0xffe0
+; GFX9-NEXT:    v_ldexp_f64 v[2:3], v[0:1], s4
+; GFX9-NEXT:    s_mov_b32 s4, 0
+; GFX9-NEXT:    s_mov_b32 s5, 0xc1f00000
+; GFX9-NEXT:    v_floor_f64_e32 v[2:3], v[2:3]
+; GFX9-NEXT:    v_fma_f64 v[0:1], v[2:3], s[4:5], v[0:1]
+; GFX9-NEXT:    v_cvt_u32_f64_e32 v0, v[0:1]
+; GFX9-NEXT:    v_cvt_i32_f64_e32 v1, v[2:3]
+; GFX9-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-LABEL: intrinsic_llrint_i64_f64:
+; GFX10:       ; %bb.0: ; %entry
+; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT:    v_rndne_f64_e32 v[0:1], v[0:1]
+; GFX10-NEXT:    v_ldexp_f64 v[2:3], v[0:1], 0xffffffe0
+; GFX10-NEXT:    v_floor_f64_e32 v[2:3], v[2:3]
+; GFX10-NEXT:    v_fma_f64 v[0:1], 0xc1f00000, v[2:3], v[0:1]
+; GFX10-NEXT:    v_cvt_u32_f64_e32 v0, v[0:1]
+; GFX10-NEXT:    v_cvt_i32_f64_e32 v1, v[2:3]
+; GFX10-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: intrinsic_llrint_i64_f64:
+; GFX11:       ; %bb.0: ; %entry
+; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT:    v_rndne_f64_e32 v[0:1], v[0:1]
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NEXT:    v_ldexp_f64 v[2:3], v[0:1], 0xffffffe0
+; GFX11-NEXT:    v_floor_f64_e32 v[2:3], v[2:3]
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NEXT:    v_fma_f64 v[0:1], 0xc1f00000, v[2:3], v[0:1]
+; GFX11-NEXT:    v_cvt_u32_f64_e32 v0, v[0:1]
+; GFX11-NEXT:    v_cvt_i32_f64_e32 v1, v[2:3]
+; GFX11-NEXT:    s_setpc_b64 s[30:31]
+entry:
+  %0 = tail call i64 @llvm.llrint.i64.f64(double %arg)
+  ret i64 %0
+}
+
+define half @intrinsic_frint_half(half %arg) {
+; GCN-LABEL: intrinsic_frint_half:
+; GCN:       ; %bb.0: ; %entry
+; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT:    v_rndne_f16_e32 v0, v0
+; GCN-NEXT:    s_setpc_b64 s[30:31]
+entry:
+  %0 = tail call half @llvm.rint.f16(half %arg)
+  ret half %0
+}
+
+define i32 @intrinsic_lrint_i32_f16(half %arg) {
+; GFX9-LABEL: intrinsic_lrint_i32_f16:
+; GFX9:       ; %bb.0: ; %entry
+; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT:    v_rndne_f16_e32 v0, v0
+; GFX9-NEXT:    v_cvt_f32_f16_e32 v0, v0
+; GFX9-NEXT:    v_cvt_i32_f32_e32 v0, v0
+; GFX9-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-LABEL: intrinsic_lrint_i32_f16:
+; GFX10:       ; %bb.0: ; %entry
+; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT:    v_rndne_f16_e32 v0, v0
+; GFX10-NEXT:    v_cvt_f32_f16_e32 v0, v0
+; GFX10-NEXT:    v_cvt_i32_f32_e32 v0, v0
+; GFX10-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: intrinsic_lrint_i32_f16:
+; GFX11:       ; %bb.0: ; %entry
+; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT:    v_rndne_f16_e32 v0, v0
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NEXT:    v_cvt_f32_f16_e32 v0, v0
+; GFX11-NEXT:    v_cvt_i32_f32_e32 v0, v0
+; GFX11-NEXT:    s_setpc_b64 s[30:31]
+entry:
+  %0 = tail call i32 @llvm.lrint.i32.f16(half %arg)
+  ret i32 %0
+}
+
+define <2 x float> @intrinsic_frint_v2f32_v2f32(<2 x float> %arg) {
+; GCN-LABEL: intrinsic_frint_v2f32_v2f32:
+; GCN:       ; %bb.0: ; %entry
+; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT:    v_rndne_f32_e32 v0, v0
+; GCN-NEXT:    v_rndne_f32_e32 v1, v1
+; GCN-NEXT:    s_setpc_b64 s[30:31]
+entry:
+  %0 = tail call <2 x float> @llvm.rint.v2f32.v2f32(<2 x float> %arg)
+  ret <2 x float> %0
+}
+
+define <2 x i32> @intrinsic_lrint_v2i32_v2f32(<2 x float> %arg) {
+; GFX9-LABEL: intrinsic_lrint_v2i32_v2f32:
+; GFX9:       ; %bb.0: ; %entry
+; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT:    v_rndne_f32_e32 v0, v0
+; GFX9-NEXT:    v_rndne_f32_e32 v1, v1
+; GFX9-NEXT:    v_cvt_i32_f32_e32 v0, v0
+; GFX9-NEXT:    v_cvt_i32_f32_e32 v1, v1
+; GFX9-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-LABEL: intrinsic_lrint_v2i32_v2f32:
+; GFX10:       ; %bb.0: ; %entry
+; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT:    v_rndne_f32_e32 v0, v0
+; GFX10-NEXT:    v_rndne_f32_e32 v1, v1
+; GFX10-NEXT:    v_cvt_i32_f32_e32 v0, v0
+; GFX10-NEXT:    v_cvt_i32_f32_e32 v1, v1
+; GFX10-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: intrinsic_lrint_v2i32_v2f32:
+; GFX11:       ; %bb.0: ; %entry
+; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT:    v_rndne_f32_e32 v0, v0
+; GFX11-NEXT:    v_rndne_f32_e32 v1, v1
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-NEXT:    v_cvt_i32_f32_e32 v0, v0
+; GFX11-NEXT:    v_cvt_i32_f32_e32 v1, v1
+; GFX11-NEXT:    s_setpc_b64 s[30:31]
+entry:
+  %0 = tail call <2 x i32> @llvm.lrint.v2i32.v2f32(<2 x float> %arg)
+  ret <2 x i32> %0
+}
+
+define <2 x i64> @intrinsic_lrint_v2i64_v2f32(<2 x float> %arg) {
+; GFX9-LABEL: intrinsic_lrint_v2i64_v2f32:
+; GFX9:       ; %bb.0: ; %entry
+; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT:    v_rndne_f32_e32 v0, v0
+; GFX9-NEXT:    s_mov_b32 s4, 0x2f800000
+; GFX9-NEXT:    v_mul_f32_e64 v2, |v0|, s4
+; GFX9-NEXT:    v_floor_f32_e32 v2, v2
+; GFX9-NEXT:    s_mov_b32 s5, 0xcf800000
+; GFX9-NEXT:    v_cvt_u32_f32_e32 v3, v2
+; GFX9-NEXT:    v_fma_f32 v2, v2, s5, |v0|
+; GFX9-NEXT:    v_cvt_u32_f32_e32 v2, v2
+; GFX9-NEXT:    v_ashrrev_i32_e32 v4, 31, v0
+; GFX9-NEXT:    v_xor_b32_e32 v3, v3, v4
+; GFX9-NEXT:    v_xor_b32_e32 v0, v2, v4
+; GFX9-NEXT:    v_rndne_f32_e32 v2, v1
+; GFX9-NEXT:    v_mul_f32_e64 v1, |v2|, s4
+; GFX9-NEXT:    v_floor_f32_e32 v1, v1
+; GFX9-NEXT:    v_cvt_u32_f32_e32 v5, v1
+; GFX9-NEXT:    v_fma_f32 v1, v1, s5, |v2|
+; GFX9-NEXT:    v_cvt_u32_f32_e32 v6, v1
+; GFX9-NEXT:    v_sub_co_u32_e32 v0, vcc, v0, v4
+; GFX9-NEXT:    v_subb_co_u32_e32 v1, vcc, v3, v4, vcc
+; GFX9-NEXT:    v_ashrrev_i32_e32 v3, 31, v2
+; GFX9-NEXT:    v_xor_b32_e32 v2, v6, v3
+; GFX9-NEXT:    v_xor_b32_e32 v4, v5, v3
+; GFX9-NEXT:    v_sub_co_u32_e32 v2, vcc, v2, v3
+; GFX9-NEXT:    v_subb_co_u32_e32 v3, vcc, v4, v3, vcc
+; GFX9-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-LABEL: intrinsic_lrint_v2i64_v2f32:
+; GFX10:       ; %bb.0: ; %entry
+; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT:    v_rndne_f32_e32 v0, v0
+; GFX10-NEXT:    v_rndne_f32_e32 v1, v1
+; GFX10-NEXT:    v_mul_f32_e64 v2, 0x2f800000, |v0|
+; GFX10-NEXT:    v_mul_f32_e64 v3, 0x2f800000, |v1|
+; GFX10-NEXT:    v_ashrrev_i32_e32 v5, 31, v0
+; GFX10-NEXT:    v_ashrrev_i32_e32 v6, 31, v1
+; GFX10-NEXT:    v_floor_f32_e32 v2, v2
+; GFX10-NEXT:    v_floor_f32_e32 v3, v3
+; GFX10-NEXT:    v_fma_f32 v4, 0xcf800000, v2, |v0|
+; GFX10-NEXT:    v_fma_f32 v0, 0xcf800000, v3, |v1|
+; GFX10-NEXT:    v_cvt_u32_f32_e32 v2, v2
+; GFX10-NEXT:    v_cvt_u32_f32_e32 v3, v3
+; GFX10-NEXT:    v_cvt_u32_f32_e32 v1, v4
+; GFX10-NEXT:    v_cvt_u32_f32_e32 v0, v0
+; GFX10-NEXT:    v_xor_b32_e32 v2, v2, v5
+; GFX10-NEXT:    v_xor_b32_e32 v3, v3, v6
+; GFX10-NEXT:    v_xor_b32_e32 v1, v1, v5
+; GFX10-NEXT:    v_xor_b32_e32 v4, v0, v6
+; GFX10-NEXT:    v_sub_co_u32 v0, vcc_lo, v1, v5
+; GFX10-NEXT:    v_sub_co_ci_u32_e32 v1, vcc_lo, v2, v5, vcc_lo
+; GFX10-NEXT:    v_sub_co_u32 v2, vcc_lo, v4, v6
+; GFX10-NEXT:    v_sub_co_ci_u32_e32 v3, vcc_lo, v3, v6, vcc_lo
+; GFX10-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: intrinsic_lrint_v2i64_v2f32:
+; GFX11:       ; %bb.0: ; %entry
+; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT:    v_rndne_f32_e32 v0, v0
+; GFX11-NEXT:    v_rndne_f32_e32 v1, v1
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-NEXT:    v_mul_f32_e64 v2, 0x2f800000, |v0|
+; GFX11-NEXT:    v_mul_f32_e64 v3, 0x2f800000, |v1|
+; GFX11-NEXT:    v_ashrrev_i32_e32 v5, 31, v0
+; GFX11-NEXT:    v_ashrrev_i32_e32 v6, 31, v1
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX11-NEXT:    v_floor_f32_e32 v2, v2
+; GFX11-NEXT:    v_floor_f32_e32 v3, v3
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-NEXT:    v_fma_f32 v4, 0xcf800000, v2, |v0|
+; GFX11-NEXT:    v_fma_f32 v0, 0xcf800000, v3, |v1|
+; GFX11-NEXT:    v_cvt_u32_f32_e32 v2, v2
+; GFX11-NEXT:    v_cvt_u32_f32_e32 v3, v3
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX11-NEXT:    v_cvt_u32_f32_e32 v1, v4
+; GFX11-NEXT:    v_cvt_u32_f32_e32 v0, v0
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX11-NEXT:    v_xor_b32_e32 v2, v2, v5
+; GFX11-NEXT:    v_xor_b32_e32 v3, v3, v6
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX11-NEXT:    v_xor_b32_e32 v1, v1, v5
+; GFX11-NEXT:    v_xor_b32_e32 v4, v0, v6
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_3)
+; GFX11-NEXT:    v_sub_co_u32 v0, vcc_lo, v1, v5
+; GFX11-NEXT:    v_sub_co_ci_u32_e32 v1, vcc_lo, v2, v5, vcc_lo
+; GFX11-NEXT:    v_sub_co_u32 v2, vcc_lo, v4, v6
+; GFX11-NEXT:    v_sub_co_ci_u32_e32 v3, vcc_lo, v3, v6, vcc_lo
+; GFX11-NEXT:    s_setpc_b64 s[30:31]
+entry:
+  %0 = tail call <2 x i64> @llvm.lrint.v2i64.v2f32(<2 x float> %arg)
+  ret <2 x i64> %0
+}
+

>From f08e460eb5e45fee48a74fea085a83325a82d1c5 Mon Sep 17 00:00:00 2001
From: Sumanth Gundapaneni <sumanth.gundapaneni at amd.com>
Date: Tue, 16 Jul 2024 11:50:00 -0500
Subject: [PATCH 2/5] [AMDGPU] Implement llvm.lrint intrinsic lowering.

Unless the target expands this node, the intrinsic is defaulted to lower
to a library call.
---
 llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp | 10 ++++++++++
 llvm/lib/CodeGen/TargetLoweringBase.cpp       |  7 +++++--
 llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp |  6 ++++--
 3 files changed, 19 insertions(+), 4 deletions(-)

diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index d6a0dd9ae9b20..357826ba418e6 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -4326,6 +4326,16 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
     // targets where it is not needed.
     Results.push_back(Node->getOperand(0));
     break;
+  case ISD::LRINT:
+  case ISD::LLRINT: {
+    SDValue Arg = Node->getOperand(0);
+    EVT ArgVT = Arg.getValueType();
+    EVT ResVT = Node->getValueType(0);
+    SDLoc dl(Node);
+    SDValue RoundNode = DAG.getNode(ISD::FROUNDEVEN, dl, ArgVT, Arg);
+    Results.push_back(DAG.getNode(ISD::FP_TO_SINT, dl, ResVT, RoundNode));
+    break;
+  }
   case ISD::GLOBAL_OFFSET_TABLE:
   case ISD::GlobalAddress:
   case ISD::GlobalTLSAddress:
diff --git a/llvm/lib/CodeGen/TargetLoweringBase.cpp b/llvm/lib/CodeGen/TargetLoweringBase.cpp
index 353b0cafbd72e..210e09ba6485f 100644
--- a/llvm/lib/CodeGen/TargetLoweringBase.cpp
+++ b/llvm/lib/CodeGen/TargetLoweringBase.cpp
@@ -1019,10 +1019,13 @@ void TargetLoweringBase::initActions() {
   setOperationAction({ISD::FCBRT, ISD::FLOG, ISD::FLOG2, ISD::FLOG10, ISD::FEXP,
                       ISD::FEXP2, ISD::FEXP10, ISD::FFLOOR, ISD::FNEARBYINT,
                       ISD::FCEIL, ISD::FRINT, ISD::FTRUNC, ISD::LROUND,
-                      ISD::LLROUND, ISD::LRINT, ISD::LLRINT, ISD::FROUNDEVEN,
-                      ISD::FTAN},
+                      ISD::LLROUND, ISD::FROUNDEVEN, ISD::FTAN},
                      {MVT::f32, MVT::f64, MVT::f128}, Expand);
 
+  // Unless the target expands, default LRINT to LibCall.
+  setOperationAction({ISD::LRINT, ISD::LLRINT}, {MVT::f32, MVT::f64, MVT::f128},
+                     LibCall);
+
   setOperationAction(ISD::FTAN, MVT::f16, Promote);
   // Default ISD::TRAP to expand (which turns it into abort).
   setOperationAction(ISD::TRAP, MVT::Other, Expand);
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
index ef3e74c9a622f..ebef6f6ad785c 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@@ -404,8 +404,10 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM,
 
   setOperationAction(ISD::FNEARBYINT, {MVT::f16, MVT::f32, MVT::f64}, Custom);
 
-  setOperationAction({ISD::FRINT, ISD::LRINT, ISD::LLRINT},
-                     {MVT::f16, MVT::f32, MVT::f64}, Custom);
+  setOperationAction(ISD::FRINT, {MVT::f16, MVT::f32, MVT::f64}, Custom);
+
+  setOperationAction({ISD::LRINT, ISD::LLRINT}, {MVT::f16, MVT::f32, MVT::f64},
+                     Expand);
 
   setOperationAction(ISD::FREM, {MVT::f16, MVT::f32, MVT::f64}, Custom);
 

>From 608d85b0ceae61fbfe1b72b057732fd48625b4ed Mon Sep 17 00:00:00 2001
From: Sumanth Gundapaneni <sumanth.gundapaneni at amd.com>
Date: Tue, 16 Jul 2024 12:53:33 -0500
Subject: [PATCH 3/5] Fix clang-format error.

---
 llvm/lib/CodeGen/TargetLoweringBase.cpp | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/llvm/lib/CodeGen/TargetLoweringBase.cpp b/llvm/lib/CodeGen/TargetLoweringBase.cpp
index cb4f7a7598f4c..96ad500d327a0 100644
--- a/llvm/lib/CodeGen/TargetLoweringBase.cpp
+++ b/llvm/lib/CodeGen/TargetLoweringBase.cpp
@@ -793,15 +793,15 @@ void TargetLoweringBase::initActions() {
        ISD::FTAN,  ISD::FACOS,  ISD::FASIN,  ISD::FATAN,      ISD::FCOSH,
        ISD::FSINH, ISD::FTANH},
       {MVT::f32, MVT::f64, MVT::f128}, Expand);
-  
+
   // Unless the target expands, default LRINT to LibCall.
   setOperationAction({ISD::LRINT, ISD::LLRINT}, {MVT::f32, MVT::f64, MVT::f128},
                      LibCall);
-  
+
   setOperationAction({ISD::FTAN, ISD::FACOS, ISD::FASIN, ISD::FATAN, ISD::FCOSH,
                       ISD::FSINH, ISD::FTANH},
                      MVT::f16, Promote);
-    // Default ISD::TRAP to expand (which turns it into abort).
+  // Default ISD::TRAP to expand (which turns it into abort).
   setOperationAction(ISD::TRAP, MVT::Other, Expand);
 
   // On most systems, DEBUGTRAP and TRAP have no difference. The "Expand"

>From 2f66f98ac61910d9e542df4eb18796feb208756d Mon Sep 17 00:00:00 2001
From: Sumanth Gundapaneni <sumanth.gundapaneni at amd.com>
Date: Thu, 18 Jul 2024 15:27:26 -0500
Subject: [PATCH 4/5] Eliminate dead code

---
 llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp | 14 +-------------
 llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h   |  1 -
 2 files changed, 1 insertion(+), 14 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
index 1a295f298923d..2a243f4fc2b07 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@@ -1385,11 +1385,7 @@ SDValue AMDGPUTargetLowering::LowerOperation(SDValue Op,
   case ISD::FCEIL: return LowerFCEIL(Op, DAG);
   case ISD::FTRUNC: return LowerFTRUNC(Op, DAG);
   case ISD::FRINT: return LowerFRINT(Op, DAG);
-  case ISD::LRINT:
-  case ISD::LLRINT:
-    return LowerLRINT(Op, DAG);
-  case ISD::FNEARBYINT:
-    return LowerFNEARBYINT(Op, DAG);
+  case ISD::FNEARBYINT: return LowerFNEARBYINT(Op, DAG);
   case ISD::FROUNDEVEN:
     return LowerFROUNDEVEN(Op, DAG);
   case ISD::FROUND: return LowerFROUND(Op, DAG);
@@ -2497,14 +2493,6 @@ SDValue AMDGPUTargetLowering::LowerFRINT(SDValue Op, SelectionDAG &DAG) const {
   return DAG.getNode(ISD::FROUNDEVEN, SDLoc(Op), VT, Arg);
 }
 
-SDValue AMDGPUTargetLowering::LowerLRINT(SDValue Op, SelectionDAG &DAG) const {
-  auto ResVT = Op.getValueType();
-  auto Arg = Op.getOperand(0u);
-  auto ArgVT = Arg.getValueType();
-  SDValue RoundNode = DAG.getNode(ISD::FROUNDEVEN, SDLoc(Op), ArgVT, Arg);
-  return DAG.getNode(ISD::FP_TO_SINT, SDLoc(Op), ResVT, RoundNode);
-}
-
 // XXX - May require not supporting f32 denormals?
 
 // Don't handle v2f16. The extra instructions to scalarize and repack around the
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
index 2e8f857e95a2d..37572af3897f2 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
@@ -55,7 +55,6 @@ class AMDGPUTargetLowering : public TargetLowering {
   SDValue LowerFCEIL(SDValue Op, SelectionDAG &DAG) const;
   SDValue LowerFTRUNC(SDValue Op, SelectionDAG &DAG) const;
   SDValue LowerFRINT(SDValue Op, SelectionDAG &DAG) const;
-  SDValue LowerLRINT(SDValue Op, SelectionDAG &DAG) const;
   SDValue LowerFNEARBYINT(SDValue Op, SelectionDAG &DAG) const;
 
   SDValue LowerFROUNDEVEN(SDValue Op, SelectionDAG &DAG) const;

>From 7448a22f333dbbb61b8d2fdb51717eb6f36ad5d4 Mon Sep 17 00:00:00 2001
From: Sumanth Gundapaneni <sumanth.gundapaneni at amd.com>
Date: Tue, 23 Jul 2024 10:06:20 -0500
Subject: [PATCH 5/5] Address comments

---
 .../CodeGen/GlobalISel/LegalizerHelper.cpp    |   3 +-
 llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp |   2 +-
 llvm/lib/CodeGen/TargetLoweringBase.cpp       |   2 +-
 llvm/test/CodeGen/AMDGPU/GlobalISel/lrint.ll  | 493 ----------
 llvm/test/CodeGen/AMDGPU/lrint.ll             | 878 ++++++++++++------
 5 files changed, 594 insertions(+), 784 deletions(-)
 delete mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/lrint.ll

diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
index 973ad218e81ea..ab9593d291471 100644
--- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
@@ -3894,8 +3894,7 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT LowerHintTy) {
     Register DstReg = MI.getOperand(0).getReg();
     Register SrcReg = MI.getOperand(1).getReg();
     LLT SrcTy = MRI.getType(SrcReg);
-    auto Round = MIRBuilder.buildIntrinsicRoundeven(SrcTy, SrcReg);
-
+    auto Round = MIRBuilder.buildInstr(TargetOpcode::G_FRINT, {SrcTy}, {SrcReg});
     MIRBuilder.buildFPTOSI(DstReg, Round);
     MI.eraseFromParent();
     return Legalized;
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index d8bbb3402600a..20c19dad3f86d 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -4332,7 +4332,7 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
     EVT ArgVT = Arg.getValueType();
     EVT ResVT = Node->getValueType(0);
     SDLoc dl(Node);
-    SDValue RoundNode = DAG.getNode(ISD::FROUNDEVEN, dl, ArgVT, Arg);
+    SDValue RoundNode = DAG.getNode(ISD::FRINT, dl, ArgVT, Arg);
     Results.push_back(DAG.getNode(ISD::FP_TO_SINT, dl, ResVT, RoundNode));
     break;
   }
diff --git a/llvm/lib/CodeGen/TargetLoweringBase.cpp b/llvm/lib/CodeGen/TargetLoweringBase.cpp
index 96ad500d327a0..aa1f681c377f4 100644
--- a/llvm/lib/CodeGen/TargetLoweringBase.cpp
+++ b/llvm/lib/CodeGen/TargetLoweringBase.cpp
@@ -794,7 +794,7 @@ void TargetLoweringBase::initActions() {
        ISD::FSINH, ISD::FTANH},
       {MVT::f32, MVT::f64, MVT::f128}, Expand);
 
-  // Unless the target expands, default LRINT to LibCall.
+  // FIXME: Query RuntimeLibCalls to make the decision.
   setOperationAction({ISD::LRINT, ISD::LLRINT}, {MVT::f32, MVT::f64, MVT::f128},
                      LibCall);
 
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/lrint.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/lrint.ll
deleted file mode 100644
index c6ac0b2dd3334..0000000000000
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/lrint.ll
+++ /dev/null
@@ -1,493 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
-; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9 %s
-; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1010 < %s | FileCheck -check-prefixes=GCN,GFX10 %s
-; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1100 < %s | FileCheck -check-prefixes=GCN,GFX11 %s
-
-declare float @llvm.rint.f32(float)
-declare i32 @llvm.lrint.i32.f32(float)
-declare i32 @llvm.lrint.i32.f64(double)
-declare i64 @llvm.lrint.i64.f32(float)
-declare i64 @llvm.lrint.i64.f64(double)
-declare i64 @llvm.llrint.i64.f32(float)
-declare half @llvm.rint.f16(half)
-declare i32 @llvm.lrint.i32.f16(half %arg)
-declare <2 x float> @llvm.rint.v2f32.v2f32(<2 x float> %arg)
-declare <2 x i32> @llvm.lrint.v2i32.v2f32(<2 x float> %arg)
-declare <2 x i64> @llvm.lrint.v2i64.v2f32(<2 x float> %arg)
-
-define float @intrinsic_frint(float %arg) {
-; GCN-LABEL: intrinsic_frint:
-; GCN:       ; %bb.0: ; %entry
-; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-NEXT:    v_rndne_f32_e32 v0, v0
-; GCN-NEXT:    s_setpc_b64 s[30:31]
-entry:
-  %0 = tail call float @llvm.rint.f32(float %arg)
-  ret float %0
-}
-
-define i32 @intrinsic_lrint_i32_f32(float %arg) {
-; GFX9-LABEL: intrinsic_lrint_i32_f32:
-; GFX9:       ; %bb.0: ; %entry
-; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT:    v_rndne_f32_e32 v0, v0
-; GFX9-NEXT:    v_cvt_i32_f32_e32 v0, v0
-; GFX9-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX10-LABEL: intrinsic_lrint_i32_f32:
-; GFX10:       ; %bb.0: ; %entry
-; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT:    v_rndne_f32_e32 v0, v0
-; GFX10-NEXT:    v_cvt_i32_f32_e32 v0, v0
-; GFX10-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX11-LABEL: intrinsic_lrint_i32_f32:
-; GFX11:       ; %bb.0: ; %entry
-; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT:    v_rndne_f32_e32 v0, v0
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NEXT:    v_cvt_i32_f32_e32 v0, v0
-; GFX11-NEXT:    s_setpc_b64 s[30:31]
-entry:
-  %0 = tail call i32 @llvm.lrint.i32.f32(float %arg)
-  ret i32 %0
-}
-
-define i32 @intrinsic_lrint_i32_f64(double %arg) {
-; GFX9-LABEL: intrinsic_lrint_i32_f64:
-; GFX9:       ; %bb.0: ; %entry
-; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT:    v_rndne_f64_e32 v[0:1], v[0:1]
-; GFX9-NEXT:    v_cvt_i32_f64_e32 v0, v[0:1]
-; GFX9-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX10-LABEL: intrinsic_lrint_i32_f64:
-; GFX10:       ; %bb.0: ; %entry
-; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT:    v_rndne_f64_e32 v[0:1], v[0:1]
-; GFX10-NEXT:    v_cvt_i32_f64_e32 v0, v[0:1]
-; GFX10-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX11-LABEL: intrinsic_lrint_i32_f64:
-; GFX11:       ; %bb.0: ; %entry
-; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT:    v_rndne_f64_e32 v[0:1], v[0:1]
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NEXT:    v_cvt_i32_f64_e32 v0, v[0:1]
-; GFX11-NEXT:    s_setpc_b64 s[30:31]
-entry:
-  %0 = tail call i32 @llvm.lrint.i32.f64(double %arg)
-  ret i32 %0
-}
-
-define i64 @intrinsic_lrint_i64_f32(float %arg) {
-; GFX9-LABEL: intrinsic_lrint_i64_f32:
-; GFX9:       ; %bb.0: ; %entry
-; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT:    v_rndne_f32_e32 v0, v0
-; GFX9-NEXT:    v_trunc_f32_e32 v1, v0
-; GFX9-NEXT:    v_mov_b32_e32 v2, 0x2f800000
-; GFX9-NEXT:    v_mul_f32_e64 v2, |v1|, v2
-; GFX9-NEXT:    v_floor_f32_e32 v2, v2
-; GFX9-NEXT:    v_mov_b32_e32 v3, 0xcf800000
-; GFX9-NEXT:    v_fma_f32 v1, v2, v3, |v1|
-; GFX9-NEXT:    v_cvt_u32_f32_e32 v1, v1
-; GFX9-NEXT:    v_cvt_u32_f32_e32 v2, v2
-; GFX9-NEXT:    v_ashrrev_i32_e32 v3, 31, v0
-; GFX9-NEXT:    v_xor_b32_e32 v0, v1, v3
-; GFX9-NEXT:    v_xor_b32_e32 v1, v2, v3
-; GFX9-NEXT:    v_sub_co_u32_e32 v0, vcc, v0, v3
-; GFX9-NEXT:    v_subb_co_u32_e32 v1, vcc, v1, v3, vcc
-; GFX9-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX10-LABEL: intrinsic_lrint_i64_f32:
-; GFX10:       ; %bb.0: ; %entry
-; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT:    v_rndne_f32_e32 v0, v0
-; GFX10-NEXT:    v_trunc_f32_e32 v1, v0
-; GFX10-NEXT:    v_ashrrev_i32_e32 v3, 31, v0
-; GFX10-NEXT:    v_mul_f32_e64 v2, 0x2f800000, |v1|
-; GFX10-NEXT:    v_floor_f32_e32 v2, v2
-; GFX10-NEXT:    v_fma_f32 v1, 0xcf800000, v2, |v1|
-; GFX10-NEXT:    v_cvt_u32_f32_e32 v0, v1
-; GFX10-NEXT:    v_cvt_u32_f32_e32 v1, v2
-; GFX10-NEXT:    v_xor_b32_e32 v0, v0, v3
-; GFX10-NEXT:    v_xor_b32_e32 v1, v1, v3
-; GFX10-NEXT:    v_sub_co_u32 v0, vcc_lo, v0, v3
-; GFX10-NEXT:    v_sub_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
-; GFX10-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX11-LABEL: intrinsic_lrint_i64_f32:
-; GFX11:       ; %bb.0: ; %entry
-; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT:    v_rndne_f32_e32 v0, v0
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
-; GFX11-NEXT:    v_trunc_f32_e32 v1, v0
-; GFX11-NEXT:    v_ashrrev_i32_e32 v3, 31, v0
-; GFX11-NEXT:    v_mul_f32_e64 v2, 0x2f800000, |v1|
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NEXT:    v_floor_f32_e32 v2, v2
-; GFX11-NEXT:    v_fma_f32 v1, 0xcf800000, v2, |v1|
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
-; GFX11-NEXT:    v_cvt_u32_f32_e32 v0, v1
-; GFX11-NEXT:    v_cvt_u32_f32_e32 v1, v2
-; GFX11-NEXT:    v_xor_b32_e32 v0, v0, v3
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-NEXT:    v_xor_b32_e32 v1, v1, v3
-; GFX11-NEXT:    v_sub_co_u32 v0, vcc_lo, v0, v3
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2)
-; GFX11-NEXT:    v_sub_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
-; GFX11-NEXT:    s_setpc_b64 s[30:31]
-entry:
-  %0 = tail call i64 @llvm.lrint.i64.f32(float %arg)
-  ret i64 %0
-}
-
-define i64 @intrinsic_lrint_i64_f64(double %arg) {
-; GFX9-LABEL: intrinsic_lrint_i64_f64:
-; GFX9:       ; %bb.0: ; %entry
-; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT:    v_rndne_f64_e32 v[0:1], v[0:1]
-; GFX9-NEXT:    v_mov_b32_e32 v2, 0
-; GFX9-NEXT:    v_mov_b32_e32 v3, 0x3df00000
-; GFX9-NEXT:    v_mov_b32_e32 v4, 0
-; GFX9-NEXT:    v_mov_b32_e32 v5, 0xc1f00000
-; GFX9-NEXT:    v_trunc_f64_e32 v[0:1], v[0:1]
-; GFX9-NEXT:    v_mul_f64 v[2:3], v[0:1], v[2:3]
-; GFX9-NEXT:    v_floor_f64_e32 v[2:3], v[2:3]
-; GFX9-NEXT:    v_fma_f64 v[0:1], v[2:3], v[4:5], v[0:1]
-; GFX9-NEXT:    v_cvt_u32_f64_e32 v0, v[0:1]
-; GFX9-NEXT:    v_cvt_i32_f64_e32 v1, v[2:3]
-; GFX9-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX10-LABEL: intrinsic_lrint_i64_f64:
-; GFX10:       ; %bb.0: ; %entry
-; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT:    v_rndne_f64_e32 v[0:1], v[0:1]
-; GFX10-NEXT:    v_trunc_f64_e32 v[0:1], v[0:1]
-; GFX10-NEXT:    v_mul_f64 v[2:3], 0x3df00000, v[0:1]
-; GFX10-NEXT:    v_floor_f64_e32 v[2:3], v[2:3]
-; GFX10-NEXT:    v_fma_f64 v[0:1], 0xc1f00000, v[2:3], v[0:1]
-; GFX10-NEXT:    v_cvt_u32_f64_e32 v0, v[0:1]
-; GFX10-NEXT:    v_cvt_i32_f64_e32 v1, v[2:3]
-; GFX10-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX11-LABEL: intrinsic_lrint_i64_f64:
-; GFX11:       ; %bb.0: ; %entry
-; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT:    v_rndne_f64_e32 v[0:1], v[0:1]
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NEXT:    v_trunc_f64_e32 v[0:1], v[0:1]
-; GFX11-NEXT:    v_mul_f64 v[2:3], 0x3df00000, v[0:1]
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NEXT:    v_floor_f64_e32 v[2:3], v[2:3]
-; GFX11-NEXT:    v_fma_f64 v[0:1], 0xc1f00000, v[2:3], v[0:1]
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NEXT:    v_cvt_u32_f64_e32 v0, v[0:1]
-; GFX11-NEXT:    v_cvt_i32_f64_e32 v1, v[2:3]
-; GFX11-NEXT:    s_setpc_b64 s[30:31]
-entry:
-  %0 = tail call i64 @llvm.lrint.i64.f64(double %arg)
-  ret i64 %0
-}
-
-define i64 @intrinsic_llrint_i64_f32(float %arg) {
-; GFX9-LABEL: intrinsic_llrint_i64_f32:
-; GFX9:       ; %bb.0: ; %entry
-; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT:    v_rndne_f32_e32 v0, v0
-; GFX9-NEXT:    v_trunc_f32_e32 v1, v0
-; GFX9-NEXT:    v_mov_b32_e32 v2, 0x2f800000
-; GFX9-NEXT:    v_mul_f32_e64 v2, |v1|, v2
-; GFX9-NEXT:    v_floor_f32_e32 v2, v2
-; GFX9-NEXT:    v_mov_b32_e32 v3, 0xcf800000
-; GFX9-NEXT:    v_fma_f32 v1, v2, v3, |v1|
-; GFX9-NEXT:    v_cvt_u32_f32_e32 v1, v1
-; GFX9-NEXT:    v_cvt_u32_f32_e32 v2, v2
-; GFX9-NEXT:    v_ashrrev_i32_e32 v3, 31, v0
-; GFX9-NEXT:    v_xor_b32_e32 v0, v1, v3
-; GFX9-NEXT:    v_xor_b32_e32 v1, v2, v3
-; GFX9-NEXT:    v_sub_co_u32_e32 v0, vcc, v0, v3
-; GFX9-NEXT:    v_subb_co_u32_e32 v1, vcc, v1, v3, vcc
-; GFX9-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX10-LABEL: intrinsic_llrint_i64_f32:
-; GFX10:       ; %bb.0: ; %entry
-; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT:    v_rndne_f32_e32 v0, v0
-; GFX10-NEXT:    v_trunc_f32_e32 v1, v0
-; GFX10-NEXT:    v_ashrrev_i32_e32 v3, 31, v0
-; GFX10-NEXT:    v_mul_f32_e64 v2, 0x2f800000, |v1|
-; GFX10-NEXT:    v_floor_f32_e32 v2, v2
-; GFX10-NEXT:    v_fma_f32 v1, 0xcf800000, v2, |v1|
-; GFX10-NEXT:    v_cvt_u32_f32_e32 v0, v1
-; GFX10-NEXT:    v_cvt_u32_f32_e32 v1, v2
-; GFX10-NEXT:    v_xor_b32_e32 v0, v0, v3
-; GFX10-NEXT:    v_xor_b32_e32 v1, v1, v3
-; GFX10-NEXT:    v_sub_co_u32 v0, vcc_lo, v0, v3
-; GFX10-NEXT:    v_sub_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
-; GFX10-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX11-LABEL: intrinsic_llrint_i64_f32:
-; GFX11:       ; %bb.0: ; %entry
-; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT:    v_rndne_f32_e32 v0, v0
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
-; GFX11-NEXT:    v_trunc_f32_e32 v1, v0
-; GFX11-NEXT:    v_ashrrev_i32_e32 v3, 31, v0
-; GFX11-NEXT:    v_mul_f32_e64 v2, 0x2f800000, |v1|
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NEXT:    v_floor_f32_e32 v2, v2
-; GFX11-NEXT:    v_fma_f32 v1, 0xcf800000, v2, |v1|
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
-; GFX11-NEXT:    v_cvt_u32_f32_e32 v0, v1
-; GFX11-NEXT:    v_cvt_u32_f32_e32 v1, v2
-; GFX11-NEXT:    v_xor_b32_e32 v0, v0, v3
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-NEXT:    v_xor_b32_e32 v1, v1, v3
-; GFX11-NEXT:    v_sub_co_u32 v0, vcc_lo, v0, v3
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2)
-; GFX11-NEXT:    v_sub_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
-; GFX11-NEXT:    s_setpc_b64 s[30:31]
-entry:
-  %0 = tail call i64 @llvm.llrint.i64.f32(float %arg)
-  ret i64 %0
-}
-
-define i64 @intrinsic_llrint_i64_f64(double %arg) {
-; GFX9-LABEL: intrinsic_llrint_i64_f64:
-; GFX9:       ; %bb.0: ; %entry
-; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT:    v_rndne_f64_e32 v[0:1], v[0:1]
-; GFX9-NEXT:    v_mov_b32_e32 v2, 0
-; GFX9-NEXT:    v_mov_b32_e32 v3, 0x3df00000
-; GFX9-NEXT:    v_mov_b32_e32 v4, 0
-; GFX9-NEXT:    v_mov_b32_e32 v5, 0xc1f00000
-; GFX9-NEXT:    v_trunc_f64_e32 v[0:1], v[0:1]
-; GFX9-NEXT:    v_mul_f64 v[2:3], v[0:1], v[2:3]
-; GFX9-NEXT:    v_floor_f64_e32 v[2:3], v[2:3]
-; GFX9-NEXT:    v_fma_f64 v[0:1], v[2:3], v[4:5], v[0:1]
-; GFX9-NEXT:    v_cvt_u32_f64_e32 v0, v[0:1]
-; GFX9-NEXT:    v_cvt_i32_f64_e32 v1, v[2:3]
-; GFX9-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX10-LABEL: intrinsic_llrint_i64_f64:
-; GFX10:       ; %bb.0: ; %entry
-; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT:    v_rndne_f64_e32 v[0:1], v[0:1]
-; GFX10-NEXT:    v_trunc_f64_e32 v[0:1], v[0:1]
-; GFX10-NEXT:    v_mul_f64 v[2:3], 0x3df00000, v[0:1]
-; GFX10-NEXT:    v_floor_f64_e32 v[2:3], v[2:3]
-; GFX10-NEXT:    v_fma_f64 v[0:1], 0xc1f00000, v[2:3], v[0:1]
-; GFX10-NEXT:    v_cvt_u32_f64_e32 v0, v[0:1]
-; GFX10-NEXT:    v_cvt_i32_f64_e32 v1, v[2:3]
-; GFX10-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX11-LABEL: intrinsic_llrint_i64_f64:
-; GFX11:       ; %bb.0: ; %entry
-; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT:    v_rndne_f64_e32 v[0:1], v[0:1]
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NEXT:    v_trunc_f64_e32 v[0:1], v[0:1]
-; GFX11-NEXT:    v_mul_f64 v[2:3], 0x3df00000, v[0:1]
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NEXT:    v_floor_f64_e32 v[2:3], v[2:3]
-; GFX11-NEXT:    v_fma_f64 v[0:1], 0xc1f00000, v[2:3], v[0:1]
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NEXT:    v_cvt_u32_f64_e32 v0, v[0:1]
-; GFX11-NEXT:    v_cvt_i32_f64_e32 v1, v[2:3]
-; GFX11-NEXT:    s_setpc_b64 s[30:31]
-entry:
-  %0 = tail call i64 @llvm.llrint.i64.f64(double %arg)
-  ret i64 %0
-}
-
-define half @intrinsic_frint_half(half %arg) {
-; GCN-LABEL: intrinsic_frint_half:
-; GCN:       ; %bb.0: ; %entry
-; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-NEXT:    v_rndne_f16_e32 v0, v0
-; GCN-NEXT:    s_setpc_b64 s[30:31]
-entry:
-  %0 = tail call half @llvm.rint.f16(half %arg)
-  ret half %0
-}
-
-define i32 @intrinsic_lrint_i32_f16(half %arg) {
-; GFX9-LABEL: intrinsic_lrint_i32_f16:
-; GFX9:       ; %bb.0: ; %entry
-; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT:    v_rndne_f16_e32 v0, v0
-; GFX9-NEXT:    v_cvt_f32_f16_e32 v0, v0
-; GFX9-NEXT:    v_cvt_i32_f32_e32 v0, v0
-; GFX9-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX10-LABEL: intrinsic_lrint_i32_f16:
-; GFX10:       ; %bb.0: ; %entry
-; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT:    v_rndne_f16_e32 v0, v0
-; GFX10-NEXT:    v_cvt_f32_f16_e32 v0, v0
-; GFX10-NEXT:    v_cvt_i32_f32_e32 v0, v0
-; GFX10-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX11-LABEL: intrinsic_lrint_i32_f16:
-; GFX11:       ; %bb.0: ; %entry
-; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT:    v_rndne_f16_e32 v0, v0
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NEXT:    v_cvt_f32_f16_e32 v0, v0
-; GFX11-NEXT:    v_cvt_i32_f32_e32 v0, v0
-; GFX11-NEXT:    s_setpc_b64 s[30:31]
-entry:
-  %0 = tail call i32 @llvm.lrint.i32.f16(half %arg)
-  ret i32 %0
-}
-
-define <2 x float> @intrinsic_frint_v2f32_v2f32(<2 x float> %arg) {
-; GCN-LABEL: intrinsic_frint_v2f32_v2f32:
-; GCN:       ; %bb.0: ; %entry
-; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-NEXT:    v_rndne_f32_e32 v0, v0
-; GCN-NEXT:    v_rndne_f32_e32 v1, v1
-; GCN-NEXT:    s_setpc_b64 s[30:31]
-entry:
-  %0 = tail call <2 x float> @llvm.rint.v2f32.v2f32(<2 x float> %arg)
-  ret <2 x float> %0
-}
-
-define <2 x i32> @intrinsic_lrint_v2i32_v2f32(<2 x float> %arg) {
-; GFX9-LABEL: intrinsic_lrint_v2i32_v2f32:
-; GFX9:       ; %bb.0: ; %entry
-; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT:    v_rndne_f32_e32 v0, v0
-; GFX9-NEXT:    v_rndne_f32_e32 v1, v1
-; GFX9-NEXT:    v_cvt_i32_f32_e32 v0, v0
-; GFX9-NEXT:    v_cvt_i32_f32_e32 v1, v1
-; GFX9-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX10-LABEL: intrinsic_lrint_v2i32_v2f32:
-; GFX10:       ; %bb.0: ; %entry
-; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT:    v_rndne_f32_e32 v0, v0
-; GFX10-NEXT:    v_rndne_f32_e32 v1, v1
-; GFX10-NEXT:    v_cvt_i32_f32_e32 v0, v0
-; GFX10-NEXT:    v_cvt_i32_f32_e32 v1, v1
-; GFX10-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX11-LABEL: intrinsic_lrint_v2i32_v2f32:
-; GFX11:       ; %bb.0: ; %entry
-; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT:    v_rndne_f32_e32 v0, v0
-; GFX11-NEXT:    v_rndne_f32_e32 v1, v1
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-NEXT:    v_cvt_i32_f32_e32 v0, v0
-; GFX11-NEXT:    v_cvt_i32_f32_e32 v1, v1
-; GFX11-NEXT:    s_setpc_b64 s[30:31]
-entry:
-  %0 = tail call <2 x i32> @llvm.lrint.v2i32.v2f32(<2 x float> %arg)
-  ret <2 x i32> %0
-}
-
-define <2 x i64> @intrinsic_lrint_v2i64_v2f32(<2 x float> %arg) {
-; GFX9-LABEL: intrinsic_lrint_v2i64_v2f32:
-; GFX9:       ; %bb.0: ; %entry
-; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT:    v_rndne_f32_e32 v0, v0
-; GFX9-NEXT:    v_trunc_f32_e32 v2, v0
-; GFX9-NEXT:    v_mov_b32_e32 v3, 0x2f800000
-; GFX9-NEXT:    v_mul_f32_e64 v4, |v2|, v3
-; GFX9-NEXT:    v_floor_f32_e32 v4, v4
-; GFX9-NEXT:    v_mov_b32_e32 v5, 0xcf800000
-; GFX9-NEXT:    v_fma_f32 v2, v4, v5, |v2|
-; GFX9-NEXT:    v_cvt_u32_f32_e32 v2, v2
-; GFX9-NEXT:    v_cvt_u32_f32_e32 v4, v4
-; GFX9-NEXT:    v_ashrrev_i32_e32 v6, 31, v0
-; GFX9-NEXT:    v_xor_b32_e32 v0, v2, v6
-; GFX9-NEXT:    v_xor_b32_e32 v2, v4, v6
-; GFX9-NEXT:    v_rndne_f32_e32 v4, v1
-; GFX9-NEXT:    v_trunc_f32_e32 v1, v4
-; GFX9-NEXT:    v_mul_f32_e64 v3, |v1|, v3
-; GFX9-NEXT:    v_floor_f32_e32 v3, v3
-; GFX9-NEXT:    v_fma_f32 v1, v3, v5, |v1|
-; GFX9-NEXT:    v_cvt_u32_f32_e32 v5, v1
-; GFX9-NEXT:    v_cvt_u32_f32_e32 v3, v3
-; GFX9-NEXT:    v_sub_co_u32_e32 v0, vcc, v0, v6
-; GFX9-NEXT:    v_ashrrev_i32_e32 v4, 31, v4
-; GFX9-NEXT:    v_subb_co_u32_e32 v1, vcc, v2, v6, vcc
-; GFX9-NEXT:    v_xor_b32_e32 v2, v5, v4
-; GFX9-NEXT:    v_xor_b32_e32 v3, v3, v4
-; GFX9-NEXT:    v_sub_co_u32_e32 v2, vcc, v2, v4
-; GFX9-NEXT:    v_subb_co_u32_e32 v3, vcc, v3, v4, vcc
-; GFX9-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX10-LABEL: intrinsic_lrint_v2i64_v2f32:
-; GFX10:       ; %bb.0: ; %entry
-; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT:    v_rndne_f32_e32 v0, v0
-; GFX10-NEXT:    v_rndne_f32_e32 v1, v1
-; GFX10-NEXT:    v_trunc_f32_e32 v2, v0
-; GFX10-NEXT:    v_trunc_f32_e32 v3, v1
-; GFX10-NEXT:    v_ashrrev_i32_e32 v6, 31, v0
-; GFX10-NEXT:    v_mul_f32_e64 v4, 0x2f800000, |v2|
-; GFX10-NEXT:    v_mul_f32_e64 v5, 0x2f800000, |v3|
-; GFX10-NEXT:    v_floor_f32_e32 v4, v4
-; GFX10-NEXT:    v_floor_f32_e32 v5, v5
-; GFX10-NEXT:    v_fma_f32 v2, 0xcf800000, v4, |v2|
-; GFX10-NEXT:    v_fma_f32 v0, 0xcf800000, v5, |v3|
-; GFX10-NEXT:    v_ashrrev_i32_e32 v3, 31, v1
-; GFX10-NEXT:    v_cvt_u32_f32_e32 v1, v2
-; GFX10-NEXT:    v_cvt_u32_f32_e32 v2, v4
-; GFX10-NEXT:    v_cvt_u32_f32_e32 v0, v0
-; GFX10-NEXT:    v_cvt_u32_f32_e32 v4, v5
-; GFX10-NEXT:    v_xor_b32_e32 v1, v1, v6
-; GFX10-NEXT:    v_xor_b32_e32 v2, v2, v6
-; GFX10-NEXT:    v_xor_b32_e32 v5, v0, v3
-; GFX10-NEXT:    v_xor_b32_e32 v4, v4, v3
-; GFX10-NEXT:    v_sub_co_u32 v0, vcc_lo, v1, v6
-; GFX10-NEXT:    v_sub_co_ci_u32_e32 v1, vcc_lo, v2, v6, vcc_lo
-; GFX10-NEXT:    v_sub_co_u32 v2, vcc_lo, v5, v3
-; GFX10-NEXT:    v_sub_co_ci_u32_e32 v3, vcc_lo, v4, v3, vcc_lo
-; GFX10-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX11-LABEL: intrinsic_lrint_v2i64_v2f32:
-; GFX11:       ; %bb.0: ; %entry
-; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT:    v_rndne_f32_e32 v0, v0
-; GFX11-NEXT:    v_rndne_f32_e32 v1, v1
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-NEXT:    v_trunc_f32_e32 v2, v0
-; GFX11-NEXT:    v_trunc_f32_e32 v3, v1
-; GFX11-NEXT:    v_ashrrev_i32_e32 v6, 31, v0
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX11-NEXT:    v_mul_f32_e64 v4, 0x2f800000, |v2|
-; GFX11-NEXT:    v_mul_f32_e64 v5, 0x2f800000, |v3|
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-NEXT:    v_floor_f32_e32 v4, v4
-; GFX11-NEXT:    v_floor_f32_e32 v5, v5
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-NEXT:    v_fma_f32 v2, 0xcf800000, v4, |v2|
-; GFX11-NEXT:    v_fma_f32 v0, 0xcf800000, v5, |v3|
-; GFX11-NEXT:    v_ashrrev_i32_e32 v3, 31, v1
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_4)
-; GFX11-NEXT:    v_cvt_u32_f32_e32 v1, v2
-; GFX11-NEXT:    v_cvt_u32_f32_e32 v2, v4
-; GFX11-NEXT:    v_cvt_u32_f32_e32 v0, v0
-; GFX11-NEXT:    v_cvt_u32_f32_e32 v4, v5
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
-; GFX11-NEXT:    v_xor_b32_e32 v1, v1, v6
-; GFX11-NEXT:    v_xor_b32_e32 v2, v2, v6
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
-; GFX11-NEXT:    v_xor_b32_e32 v5, v0, v3
-; GFX11-NEXT:    v_xor_b32_e32 v4, v4, v3
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
-; GFX11-NEXT:    v_sub_co_u32 v0, vcc_lo, v1, v6
-; GFX11-NEXT:    v_sub_co_ci_u32_e32 v1, vcc_lo, v2, v6, vcc_lo
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
-; GFX11-NEXT:    v_sub_co_u32 v2, vcc_lo, v5, v3
-; GFX11-NEXT:    v_sub_co_ci_u32_e32 v3, vcc_lo, v4, v3, vcc_lo
-; GFX11-NEXT:    s_setpc_b64 s[30:31]
-entry:
-  %0 = tail call <2 x i64> @llvm.lrint.v2i64.v2f32(<2 x float> %arg)
-  ret <2 x i64> %0
-}
-
diff --git a/llvm/test/CodeGen/AMDGPU/lrint.ll b/llvm/test/CodeGen/AMDGPU/lrint.ll
index 2da1bd095165b..31e6cf6ea645c 100644
--- a/llvm/test/CodeGen/AMDGPU/lrint.ll
+++ b/llvm/test/CodeGen/AMDGPU/lrint.ll
@@ -1,7 +1,11 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
-; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9 %s
-; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1010 < %s | FileCheck -check-prefixes=GCN,GFX10 %s
-; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1100 < %s | FileCheck -check-prefixes=GCN,GFX11 %s
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --extra_scrub --version 5
+
+; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9,GFX9-SDAG %s
+; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9,GFX9-GISEL %s
+; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1010 < %s | FileCheck -check-prefixes=GCN,GFX10,GFX10-SDAG %s
+; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1010 < %s | FileCheck -check-prefixes=GCN,GFX10,GFX10-GISEL %s
+; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1100 < %s | FileCheck -check-prefixes=GCN,GFX11,GFX11-SDAG %s
+; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1100 < %s | FileCheck -check-prefixes=GCN,GFX11,GFX11-GISEL %s
 
 declare float @llvm.rint.f32(float)
 declare i32 @llvm.lrint.i32.f32(float)
@@ -22,8 +26,8 @@ define float @intrinsic_frint(float %arg) {
 ; GCN-NEXT:    v_rndne_f32_e32 v0, v0
 ; GCN-NEXT:    s_setpc_b64 s[30:31]
 entry:
-  %0 = tail call float @llvm.rint.f32(float %arg)
-  ret float %0
+  %res = tail call float @llvm.rint.f32(float %arg)
+  ret float %res
 }
 
 define i32 @intrinsic_lrint_i32_f32(float %arg) {
@@ -49,8 +53,8 @@ define i32 @intrinsic_lrint_i32_f32(float %arg) {
 ; GFX11-NEXT:    v_cvt_i32_f32_e32 v0, v0
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
 entry:
-  %0 = tail call i32 @llvm.lrint.i32.f32(float %arg)
-  ret i32 %0
+  %res = tail call i32 @llvm.lrint.i32.f32(float %arg)
+  ret i32 %res
 }
 
 define i32 @intrinsic_lrint_i32_f64(double %arg) {
@@ -76,212 +80,414 @@ define i32 @intrinsic_lrint_i32_f64(double %arg) {
 ; GFX11-NEXT:    v_cvt_i32_f64_e32 v0, v[0:1]
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
 entry:
-  %0 = tail call i32 @llvm.lrint.i32.f64(double %arg)
-  ret i32 %0
+  %res = tail call i32 @llvm.lrint.i32.f64(double %arg)
+  ret i32 %res
 }
 
 define i64 @intrinsic_lrint_i64_f32(float %arg) {
-; GFX9-LABEL: intrinsic_lrint_i64_f32:
-; GFX9:       ; %bb.0: ; %entry
-; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT:    v_rndne_f32_e32 v0, v0
-; GFX9-NEXT:    s_mov_b32 s4, 0x2f800000
-; GFX9-NEXT:    v_mul_f32_e64 v1, |v0|, s4
-; GFX9-NEXT:    v_floor_f32_e32 v1, v1
-; GFX9-NEXT:    s_mov_b32 s4, 0xcf800000
-; GFX9-NEXT:    v_cvt_u32_f32_e32 v2, v1
-; GFX9-NEXT:    v_fma_f32 v1, v1, s4, |v0|
-; GFX9-NEXT:    v_cvt_u32_f32_e32 v1, v1
-; GFX9-NEXT:    v_ashrrev_i32_e32 v3, 31, v0
-; GFX9-NEXT:    v_xor_b32_e32 v2, v2, v3
-; GFX9-NEXT:    v_xor_b32_e32 v0, v1, v3
-; GFX9-NEXT:    v_sub_co_u32_e32 v0, vcc, v0, v3
-; GFX9-NEXT:    v_subb_co_u32_e32 v1, vcc, v2, v3, vcc
-; GFX9-NEXT:    s_setpc_b64 s[30:31]
+; GFX9-SDAG-LABEL: intrinsic_lrint_i64_f32:
+; GFX9-SDAG:       ; %bb.0: ; %entry
+; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-SDAG-NEXT:    v_rndne_f32_e32 v0, v0
+; GFX9-SDAG-NEXT:    s_mov_b32 s4, 0x2f800000
+; GFX9-SDAG-NEXT:    v_mul_f32_e64 v1, |v0|, s4
+; GFX9-SDAG-NEXT:    v_floor_f32_e32 v1, v1
+; GFX9-SDAG-NEXT:    s_mov_b32 s4, 0xcf800000
+; GFX9-SDAG-NEXT:    v_cvt_u32_f32_e32 v2, v1
+; GFX9-SDAG-NEXT:    v_fma_f32 v1, v1, s4, |v0|
+; GFX9-SDAG-NEXT:    v_cvt_u32_f32_e32 v1, v1
+; GFX9-SDAG-NEXT:    v_ashrrev_i32_e32 v3, 31, v0
+; GFX9-SDAG-NEXT:    v_xor_b32_e32 v2, v2, v3
+; GFX9-SDAG-NEXT:    v_xor_b32_e32 v0, v1, v3
+; GFX9-SDAG-NEXT:    v_sub_co_u32_e32 v0, vcc, v0, v3
+; GFX9-SDAG-NEXT:    v_subb_co_u32_e32 v1, vcc, v2, v3, vcc
+; GFX9-SDAG-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX10-LABEL: intrinsic_lrint_i64_f32:
-; GFX10:       ; %bb.0: ; %entry
-; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT:    v_rndne_f32_e32 v0, v0
-; GFX10-NEXT:    v_mul_f32_e64 v1, 0x2f800000, |v0|
-; GFX10-NEXT:    v_ashrrev_i32_e32 v3, 31, v0
-; GFX10-NEXT:    v_floor_f32_e32 v1, v1
-; GFX10-NEXT:    v_fma_f32 v2, 0xcf800000, v1, |v0|
-; GFX10-NEXT:    v_cvt_u32_f32_e32 v1, v1
-; GFX10-NEXT:    v_cvt_u32_f32_e32 v0, v2
-; GFX10-NEXT:    v_xor_b32_e32 v1, v1, v3
-; GFX10-NEXT:    v_xor_b32_e32 v0, v0, v3
-; GFX10-NEXT:    v_sub_co_u32 v0, vcc_lo, v0, v3
-; GFX10-NEXT:    v_sub_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
-; GFX10-NEXT:    s_setpc_b64 s[30:31]
+; GFX9-GISEL-LABEL: intrinsic_lrint_i64_f32:
+; GFX9-GISEL:       ; %bb.0: ; %entry
+; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GISEL-NEXT:    v_rndne_f32_e32 v0, v0
+; GFX9-GISEL-NEXT:    v_trunc_f32_e32 v1, v0
+; GFX9-GISEL-NEXT:    v_mov_b32_e32 v2, 0x2f800000
+; GFX9-GISEL-NEXT:    v_mul_f32_e64 v2, |v1|, v2
+; GFX9-GISEL-NEXT:    v_floor_f32_e32 v2, v2
+; GFX9-GISEL-NEXT:    v_mov_b32_e32 v3, 0xcf800000
+; GFX9-GISEL-NEXT:    v_fma_f32 v1, v2, v3, |v1|
+; GFX9-GISEL-NEXT:    v_cvt_u32_f32_e32 v1, v1
+; GFX9-GISEL-NEXT:    v_cvt_u32_f32_e32 v2, v2
+; GFX9-GISEL-NEXT:    v_ashrrev_i32_e32 v3, 31, v0
+; GFX9-GISEL-NEXT:    v_xor_b32_e32 v0, v1, v3
+; GFX9-GISEL-NEXT:    v_xor_b32_e32 v1, v2, v3
+; GFX9-GISEL-NEXT:    v_sub_co_u32_e32 v0, vcc, v0, v3
+; GFX9-GISEL-NEXT:    v_subb_co_u32_e32 v1, vcc, v1, v3, vcc
+; GFX9-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11-LABEL: intrinsic_lrint_i64_f32:
-; GFX11:       ; %bb.0: ; %entry
-; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT:    v_rndne_f32_e32 v0, v0
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
-; GFX11-NEXT:    v_mul_f32_e64 v1, 0x2f800000, |v0|
-; GFX11-NEXT:    v_ashrrev_i32_e32 v3, 31, v0
-; GFX11-NEXT:    v_floor_f32_e32 v1, v1
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
-; GFX11-NEXT:    v_fma_f32 v2, 0xcf800000, v1, |v0|
-; GFX11-NEXT:    v_cvt_u32_f32_e32 v1, v1
-; GFX11-NEXT:    v_cvt_u32_f32_e32 v0, v2
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-NEXT:    v_xor_b32_e32 v1, v1, v3
-; GFX11-NEXT:    v_xor_b32_e32 v0, v0, v3
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX11-NEXT:    v_sub_co_u32 v0, vcc_lo, v0, v3
-; GFX11-NEXT:    v_sub_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
-; GFX11-NEXT:    s_setpc_b64 s[30:31]
+; GFX10-SDAG-LABEL: intrinsic_lrint_i64_f32:
+; GFX10-SDAG:       ; %bb.0: ; %entry
+; GFX10-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-SDAG-NEXT:    v_rndne_f32_e32 v0, v0
+; GFX10-SDAG-NEXT:    v_mul_f32_e64 v1, 0x2f800000, |v0|
+; GFX10-SDAG-NEXT:    v_ashrrev_i32_e32 v3, 31, v0
+; GFX10-SDAG-NEXT:    v_floor_f32_e32 v1, v1
+; GFX10-SDAG-NEXT:    v_fma_f32 v2, 0xcf800000, v1, |v0|
+; GFX10-SDAG-NEXT:    v_cvt_u32_f32_e32 v1, v1
+; GFX10-SDAG-NEXT:    v_cvt_u32_f32_e32 v0, v2
+; GFX10-SDAG-NEXT:    v_xor_b32_e32 v1, v1, v3
+; GFX10-SDAG-NEXT:    v_xor_b32_e32 v0, v0, v3
+; GFX10-SDAG-NEXT:    v_sub_co_u32 v0, vcc_lo, v0, v3
+; GFX10-SDAG-NEXT:    v_sub_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
+; GFX10-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-GISEL-LABEL: intrinsic_lrint_i64_f32:
+; GFX10-GISEL:       ; %bb.0: ; %entry
+; GFX10-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-GISEL-NEXT:    v_rndne_f32_e32 v0, v0
+; GFX10-GISEL-NEXT:    v_trunc_f32_e32 v1, v0
+; GFX10-GISEL-NEXT:    v_ashrrev_i32_e32 v3, 31, v0
+; GFX10-GISEL-NEXT:    v_mul_f32_e64 v2, 0x2f800000, |v1|
+; GFX10-GISEL-NEXT:    v_floor_f32_e32 v2, v2
+; GFX10-GISEL-NEXT:    v_fma_f32 v1, 0xcf800000, v2, |v1|
+; GFX10-GISEL-NEXT:    v_cvt_u32_f32_e32 v0, v1
+; GFX10-GISEL-NEXT:    v_cvt_u32_f32_e32 v1, v2
+; GFX10-GISEL-NEXT:    v_xor_b32_e32 v0, v0, v3
+; GFX10-GISEL-NEXT:    v_xor_b32_e32 v1, v1, v3
+; GFX10-GISEL-NEXT:    v_sub_co_u32 v0, vcc_lo, v0, v3
+; GFX10-GISEL-NEXT:    v_sub_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
+; GFX10-GISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-SDAG-LABEL: intrinsic_lrint_i64_f32:
+; GFX11-SDAG:       ; %bb.0: ; %entry
+; GFX11-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SDAG-NEXT:    v_rndne_f32_e32 v0, v0
+; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX11-SDAG-NEXT:    v_mul_f32_e64 v1, 0x2f800000, |v0|
+; GFX11-SDAG-NEXT:    v_ashrrev_i32_e32 v3, 31, v0
+; GFX11-SDAG-NEXT:    v_floor_f32_e32 v1, v1
+; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX11-SDAG-NEXT:    v_fma_f32 v2, 0xcf800000, v1, |v0|
+; GFX11-SDAG-NEXT:    v_cvt_u32_f32_e32 v1, v1
+; GFX11-SDAG-NEXT:    v_cvt_u32_f32_e32 v0, v2
+; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-SDAG-NEXT:    v_xor_b32_e32 v1, v1, v3
+; GFX11-SDAG-NEXT:    v_xor_b32_e32 v0, v0, v3
+; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX11-SDAG-NEXT:    v_sub_co_u32 v0, vcc_lo, v0, v3
+; GFX11-SDAG-NEXT:    v_sub_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
+; GFX11-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-GISEL-LABEL: intrinsic_lrint_i64_f32:
+; GFX11-GISEL:       ; %bb.0: ; %entry
+; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GISEL-NEXT:    v_rndne_f32_e32 v0, v0
+; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX11-GISEL-NEXT:    v_trunc_f32_e32 v1, v0
+; GFX11-GISEL-NEXT:    v_ashrrev_i32_e32 v3, 31, v0
+; GFX11-GISEL-NEXT:    v_mul_f32_e64 v2, 0x2f800000, |v1|
+; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-GISEL-NEXT:    v_floor_f32_e32 v2, v2
+; GFX11-GISEL-NEXT:    v_fma_f32 v1, 0xcf800000, v2, |v1|
+; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX11-GISEL-NEXT:    v_cvt_u32_f32_e32 v0, v1
+; GFX11-GISEL-NEXT:    v_cvt_u32_f32_e32 v1, v2
+; GFX11-GISEL-NEXT:    v_xor_b32_e32 v0, v0, v3
+; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-GISEL-NEXT:    v_xor_b32_e32 v1, v1, v3
+; GFX11-GISEL-NEXT:    v_sub_co_u32 v0, vcc_lo, v0, v3
+; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_2)
+; GFX11-GISEL-NEXT:    v_sub_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
+; GFX11-GISEL-NEXT:    s_setpc_b64 s[30:31]
 entry:
-  %0 = tail call i64 @llvm.lrint.i64.f32(float %arg)
-  ret i64 %0
+  %res = tail call i64 @llvm.lrint.i64.f32(float %arg)
+  ret i64 %res
 }
 
 define i64 @intrinsic_lrint_i64_f64(double %arg) {
-; GFX9-LABEL: intrinsic_lrint_i64_f64:
-; GFX9:       ; %bb.0: ; %entry
-; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT:    v_rndne_f64_e32 v[0:1], v[0:1]
-; GFX9-NEXT:    s_movk_i32 s4, 0xffe0
-; GFX9-NEXT:    v_ldexp_f64 v[2:3], v[0:1], s4
-; GFX9-NEXT:    s_mov_b32 s4, 0
-; GFX9-NEXT:    s_mov_b32 s5, 0xc1f00000
-; GFX9-NEXT:    v_floor_f64_e32 v[2:3], v[2:3]
-; GFX9-NEXT:    v_fma_f64 v[0:1], v[2:3], s[4:5], v[0:1]
-; GFX9-NEXT:    v_cvt_u32_f64_e32 v0, v[0:1]
-; GFX9-NEXT:    v_cvt_i32_f64_e32 v1, v[2:3]
-; GFX9-NEXT:    s_setpc_b64 s[30:31]
+; GFX9-SDAG-LABEL: intrinsic_lrint_i64_f64:
+; GFX9-SDAG:       ; %bb.0: ; %entry
+; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-SDAG-NEXT:    v_rndne_f64_e32 v[0:1], v[0:1]
+; GFX9-SDAG-NEXT:    s_movk_i32 s4, 0xffe0
+; GFX9-SDAG-NEXT:    v_ldexp_f64 v[2:3], v[0:1], s4
+; GFX9-SDAG-NEXT:    s_mov_b32 s4, 0
+; GFX9-SDAG-NEXT:    s_mov_b32 s5, 0xc1f00000
+; GFX9-SDAG-NEXT:    v_floor_f64_e32 v[2:3], v[2:3]
+; GFX9-SDAG-NEXT:    v_fma_f64 v[0:1], v[2:3], s[4:5], v[0:1]
+; GFX9-SDAG-NEXT:    v_cvt_u32_f64_e32 v0, v[0:1]
+; GFX9-SDAG-NEXT:    v_cvt_i32_f64_e32 v1, v[2:3]
+; GFX9-SDAG-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX10-LABEL: intrinsic_lrint_i64_f64:
-; GFX10:       ; %bb.0: ; %entry
-; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT:    v_rndne_f64_e32 v[0:1], v[0:1]
-; GFX10-NEXT:    v_ldexp_f64 v[2:3], v[0:1], 0xffffffe0
-; GFX10-NEXT:    v_floor_f64_e32 v[2:3], v[2:3]
-; GFX10-NEXT:    v_fma_f64 v[0:1], 0xc1f00000, v[2:3], v[0:1]
-; GFX10-NEXT:    v_cvt_u32_f64_e32 v0, v[0:1]
-; GFX10-NEXT:    v_cvt_i32_f64_e32 v1, v[2:3]
-; GFX10-NEXT:    s_setpc_b64 s[30:31]
+; GFX9-GISEL-LABEL: intrinsic_lrint_i64_f64:
+; GFX9-GISEL:       ; %bb.0: ; %entry
+; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GISEL-NEXT:    v_rndne_f64_e32 v[0:1], v[0:1]
+; GFX9-GISEL-NEXT:    v_mov_b32_e32 v2, 0
+; GFX9-GISEL-NEXT:    v_mov_b32_e32 v3, 0x3df00000
+; GFX9-GISEL-NEXT:    v_mov_b32_e32 v4, 0
+; GFX9-GISEL-NEXT:    v_mov_b32_e32 v5, 0xc1f00000
+; GFX9-GISEL-NEXT:    v_trunc_f64_e32 v[0:1], v[0:1]
+; GFX9-GISEL-NEXT:    v_mul_f64 v[2:3], v[0:1], v[2:3]
+; GFX9-GISEL-NEXT:    v_floor_f64_e32 v[2:3], v[2:3]
+; GFX9-GISEL-NEXT:    v_fma_f64 v[0:1], v[2:3], v[4:5], v[0:1]
+; GFX9-GISEL-NEXT:    v_cvt_u32_f64_e32 v0, v[0:1]
+; GFX9-GISEL-NEXT:    v_cvt_i32_f64_e32 v1, v[2:3]
+; GFX9-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11-LABEL: intrinsic_lrint_i64_f64:
-; GFX11:       ; %bb.0: ; %entry
-; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT:    v_rndne_f64_e32 v[0:1], v[0:1]
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NEXT:    v_ldexp_f64 v[2:3], v[0:1], 0xffffffe0
-; GFX11-NEXT:    v_floor_f64_e32 v[2:3], v[2:3]
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NEXT:    v_fma_f64 v[0:1], 0xc1f00000, v[2:3], v[0:1]
-; GFX11-NEXT:    v_cvt_u32_f64_e32 v0, v[0:1]
-; GFX11-NEXT:    v_cvt_i32_f64_e32 v1, v[2:3]
-; GFX11-NEXT:    s_setpc_b64 s[30:31]
+; GFX10-SDAG-LABEL: intrinsic_lrint_i64_f64:
+; GFX10-SDAG:       ; %bb.0: ; %entry
+; GFX10-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-SDAG-NEXT:    v_rndne_f64_e32 v[0:1], v[0:1]
+; GFX10-SDAG-NEXT:    v_ldexp_f64 v[2:3], v[0:1], 0xffffffe0
+; GFX10-SDAG-NEXT:    v_floor_f64_e32 v[2:3], v[2:3]
+; GFX10-SDAG-NEXT:    v_fma_f64 v[0:1], 0xc1f00000, v[2:3], v[0:1]
+; GFX10-SDAG-NEXT:    v_cvt_u32_f64_e32 v0, v[0:1]
+; GFX10-SDAG-NEXT:    v_cvt_i32_f64_e32 v1, v[2:3]
+; GFX10-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-GISEL-LABEL: intrinsic_lrint_i64_f64:
+; GFX10-GISEL:       ; %bb.0: ; %entry
+; GFX10-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-GISEL-NEXT:    v_rndne_f64_e32 v[0:1], v[0:1]
+; GFX10-GISEL-NEXT:    v_trunc_f64_e32 v[0:1], v[0:1]
+; GFX10-GISEL-NEXT:    v_mul_f64 v[2:3], 0x3df00000, v[0:1]
+; GFX10-GISEL-NEXT:    v_floor_f64_e32 v[2:3], v[2:3]
+; GFX10-GISEL-NEXT:    v_fma_f64 v[0:1], 0xc1f00000, v[2:3], v[0:1]
+; GFX10-GISEL-NEXT:    v_cvt_u32_f64_e32 v0, v[0:1]
+; GFX10-GISEL-NEXT:    v_cvt_i32_f64_e32 v1, v[2:3]
+; GFX10-GISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-SDAG-LABEL: intrinsic_lrint_i64_f64:
+; GFX11-SDAG:       ; %bb.0: ; %entry
+; GFX11-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SDAG-NEXT:    v_rndne_f64_e32 v[0:1], v[0:1]
+; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-SDAG-NEXT:    v_ldexp_f64 v[2:3], v[0:1], 0xffffffe0
+; GFX11-SDAG-NEXT:    v_floor_f64_e32 v[2:3], v[2:3]
+; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-SDAG-NEXT:    v_fma_f64 v[0:1], 0xc1f00000, v[2:3], v[0:1]
+; GFX11-SDAG-NEXT:    v_cvt_u32_f64_e32 v0, v[0:1]
+; GFX11-SDAG-NEXT:    v_cvt_i32_f64_e32 v1, v[2:3]
+; GFX11-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-GISEL-LABEL: intrinsic_lrint_i64_f64:
+; GFX11-GISEL:       ; %bb.0: ; %entry
+; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GISEL-NEXT:    v_rndne_f64_e32 v[0:1], v[0:1]
+; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-GISEL-NEXT:    v_trunc_f64_e32 v[0:1], v[0:1]
+; GFX11-GISEL-NEXT:    v_mul_f64 v[2:3], 0x3df00000, v[0:1]
+; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-GISEL-NEXT:    v_floor_f64_e32 v[2:3], v[2:3]
+; GFX11-GISEL-NEXT:    v_fma_f64 v[0:1], 0xc1f00000, v[2:3], v[0:1]
+; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GISEL-NEXT:    v_cvt_u32_f64_e32 v0, v[0:1]
+; GFX11-GISEL-NEXT:    v_cvt_i32_f64_e32 v1, v[2:3]
+; GFX11-GISEL-NEXT:    s_setpc_b64 s[30:31]
 entry:
-  %0 = tail call i64 @llvm.lrint.i64.f64(double %arg)
-  ret i64 %0
+  %res = tail call i64 @llvm.lrint.i64.f64(double %arg)
+  ret i64 %res
 }
 
 define i64 @intrinsic_llrint_i64_f32(float %arg) {
-; GFX9-LABEL: intrinsic_llrint_i64_f32:
-; GFX9:       ; %bb.0: ; %entry
-; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT:    v_rndne_f32_e32 v0, v0
-; GFX9-NEXT:    s_mov_b32 s4, 0x2f800000
-; GFX9-NEXT:    v_mul_f32_e64 v1, |v0|, s4
-; GFX9-NEXT:    v_floor_f32_e32 v1, v1
-; GFX9-NEXT:    s_mov_b32 s4, 0xcf800000
-; GFX9-NEXT:    v_cvt_u32_f32_e32 v2, v1
-; GFX9-NEXT:    v_fma_f32 v1, v1, s4, |v0|
-; GFX9-NEXT:    v_cvt_u32_f32_e32 v1, v1
-; GFX9-NEXT:    v_ashrrev_i32_e32 v3, 31, v0
-; GFX9-NEXT:    v_xor_b32_e32 v2, v2, v3
-; GFX9-NEXT:    v_xor_b32_e32 v0, v1, v3
-; GFX9-NEXT:    v_sub_co_u32_e32 v0, vcc, v0, v3
-; GFX9-NEXT:    v_subb_co_u32_e32 v1, vcc, v2, v3, vcc
-; GFX9-NEXT:    s_setpc_b64 s[30:31]
+; GFX9-SDAG-LABEL: intrinsic_llrint_i64_f32:
+; GFX9-SDAG:       ; %bb.0: ; %entry
+; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-SDAG-NEXT:    v_rndne_f32_e32 v0, v0
+; GFX9-SDAG-NEXT:    s_mov_b32 s4, 0x2f800000
+; GFX9-SDAG-NEXT:    v_mul_f32_e64 v1, |v0|, s4
+; GFX9-SDAG-NEXT:    v_floor_f32_e32 v1, v1
+; GFX9-SDAG-NEXT:    s_mov_b32 s4, 0xcf800000
+; GFX9-SDAG-NEXT:    v_cvt_u32_f32_e32 v2, v1
+; GFX9-SDAG-NEXT:    v_fma_f32 v1, v1, s4, |v0|
+; GFX9-SDAG-NEXT:    v_cvt_u32_f32_e32 v1, v1
+; GFX9-SDAG-NEXT:    v_ashrrev_i32_e32 v3, 31, v0
+; GFX9-SDAG-NEXT:    v_xor_b32_e32 v2, v2, v3
+; GFX9-SDAG-NEXT:    v_xor_b32_e32 v0, v1, v3
+; GFX9-SDAG-NEXT:    v_sub_co_u32_e32 v0, vcc, v0, v3
+; GFX9-SDAG-NEXT:    v_subb_co_u32_e32 v1, vcc, v2, v3, vcc
+; GFX9-SDAG-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX10-LABEL: intrinsic_llrint_i64_f32:
-; GFX10:       ; %bb.0: ; %entry
-; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT:    v_rndne_f32_e32 v0, v0
-; GFX10-NEXT:    v_mul_f32_e64 v1, 0x2f800000, |v0|
-; GFX10-NEXT:    v_ashrrev_i32_e32 v3, 31, v0
-; GFX10-NEXT:    v_floor_f32_e32 v1, v1
-; GFX10-NEXT:    v_fma_f32 v2, 0xcf800000, v1, |v0|
-; GFX10-NEXT:    v_cvt_u32_f32_e32 v1, v1
-; GFX10-NEXT:    v_cvt_u32_f32_e32 v0, v2
-; GFX10-NEXT:    v_xor_b32_e32 v1, v1, v3
-; GFX10-NEXT:    v_xor_b32_e32 v0, v0, v3
-; GFX10-NEXT:    v_sub_co_u32 v0, vcc_lo, v0, v3
-; GFX10-NEXT:    v_sub_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
-; GFX10-NEXT:    s_setpc_b64 s[30:31]
+; GFX9-GISEL-LABEL: intrinsic_llrint_i64_f32:
+; GFX9-GISEL:       ; %bb.0: ; %entry
+; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GISEL-NEXT:    v_rndne_f32_e32 v0, v0
+; GFX9-GISEL-NEXT:    v_trunc_f32_e32 v1, v0
+; GFX9-GISEL-NEXT:    v_mov_b32_e32 v2, 0x2f800000
+; GFX9-GISEL-NEXT:    v_mul_f32_e64 v2, |v1|, v2
+; GFX9-GISEL-NEXT:    v_floor_f32_e32 v2, v2
+; GFX9-GISEL-NEXT:    v_mov_b32_e32 v3, 0xcf800000
+; GFX9-GISEL-NEXT:    v_fma_f32 v1, v2, v3, |v1|
+; GFX9-GISEL-NEXT:    v_cvt_u32_f32_e32 v1, v1
+; GFX9-GISEL-NEXT:    v_cvt_u32_f32_e32 v2, v2
+; GFX9-GISEL-NEXT:    v_ashrrev_i32_e32 v3, 31, v0
+; GFX9-GISEL-NEXT:    v_xor_b32_e32 v0, v1, v3
+; GFX9-GISEL-NEXT:    v_xor_b32_e32 v1, v2, v3
+; GFX9-GISEL-NEXT:    v_sub_co_u32_e32 v0, vcc, v0, v3
+; GFX9-GISEL-NEXT:    v_subb_co_u32_e32 v1, vcc, v1, v3, vcc
+; GFX9-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11-LABEL: intrinsic_llrint_i64_f32:
-; GFX11:       ; %bb.0: ; %entry
-; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT:    v_rndne_f32_e32 v0, v0
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
-; GFX11-NEXT:    v_mul_f32_e64 v1, 0x2f800000, |v0|
-; GFX11-NEXT:    v_ashrrev_i32_e32 v3, 31, v0
-; GFX11-NEXT:    v_floor_f32_e32 v1, v1
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
-; GFX11-NEXT:    v_fma_f32 v2, 0xcf800000, v1, |v0|
-; GFX11-NEXT:    v_cvt_u32_f32_e32 v1, v1
-; GFX11-NEXT:    v_cvt_u32_f32_e32 v0, v2
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-NEXT:    v_xor_b32_e32 v1, v1, v3
-; GFX11-NEXT:    v_xor_b32_e32 v0, v0, v3
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX11-NEXT:    v_sub_co_u32 v0, vcc_lo, v0, v3
-; GFX11-NEXT:    v_sub_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
-; GFX11-NEXT:    s_setpc_b64 s[30:31]
+; GFX10-SDAG-LABEL: intrinsic_llrint_i64_f32:
+; GFX10-SDAG:       ; %bb.0: ; %entry
+; GFX10-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-SDAG-NEXT:    v_rndne_f32_e32 v0, v0
+; GFX10-SDAG-NEXT:    v_mul_f32_e64 v1, 0x2f800000, |v0|
+; GFX10-SDAG-NEXT:    v_ashrrev_i32_e32 v3, 31, v0
+; GFX10-SDAG-NEXT:    v_floor_f32_e32 v1, v1
+; GFX10-SDAG-NEXT:    v_fma_f32 v2, 0xcf800000, v1, |v0|
+; GFX10-SDAG-NEXT:    v_cvt_u32_f32_e32 v1, v1
+; GFX10-SDAG-NEXT:    v_cvt_u32_f32_e32 v0, v2
+; GFX10-SDAG-NEXT:    v_xor_b32_e32 v1, v1, v3
+; GFX10-SDAG-NEXT:    v_xor_b32_e32 v0, v0, v3
+; GFX10-SDAG-NEXT:    v_sub_co_u32 v0, vcc_lo, v0, v3
+; GFX10-SDAG-NEXT:    v_sub_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
+; GFX10-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-GISEL-LABEL: intrinsic_llrint_i64_f32:
+; GFX10-GISEL:       ; %bb.0: ; %entry
+; GFX10-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-GISEL-NEXT:    v_rndne_f32_e32 v0, v0
+; GFX10-GISEL-NEXT:    v_trunc_f32_e32 v1, v0
+; GFX10-GISEL-NEXT:    v_ashrrev_i32_e32 v3, 31, v0
+; GFX10-GISEL-NEXT:    v_mul_f32_e64 v2, 0x2f800000, |v1|
+; GFX10-GISEL-NEXT:    v_floor_f32_e32 v2, v2
+; GFX10-GISEL-NEXT:    v_fma_f32 v1, 0xcf800000, v2, |v1|
+; GFX10-GISEL-NEXT:    v_cvt_u32_f32_e32 v0, v1
+; GFX10-GISEL-NEXT:    v_cvt_u32_f32_e32 v1, v2
+; GFX10-GISEL-NEXT:    v_xor_b32_e32 v0, v0, v3
+; GFX10-GISEL-NEXT:    v_xor_b32_e32 v1, v1, v3
+; GFX10-GISEL-NEXT:    v_sub_co_u32 v0, vcc_lo, v0, v3
+; GFX10-GISEL-NEXT:    v_sub_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
+; GFX10-GISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-SDAG-LABEL: intrinsic_llrint_i64_f32:
+; GFX11-SDAG:       ; %bb.0: ; %entry
+; GFX11-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SDAG-NEXT:    v_rndne_f32_e32 v0, v0
+; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX11-SDAG-NEXT:    v_mul_f32_e64 v1, 0x2f800000, |v0|
+; GFX11-SDAG-NEXT:    v_ashrrev_i32_e32 v3, 31, v0
+; GFX11-SDAG-NEXT:    v_floor_f32_e32 v1, v1
+; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX11-SDAG-NEXT:    v_fma_f32 v2, 0xcf800000, v1, |v0|
+; GFX11-SDAG-NEXT:    v_cvt_u32_f32_e32 v1, v1
+; GFX11-SDAG-NEXT:    v_cvt_u32_f32_e32 v0, v2
+; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-SDAG-NEXT:    v_xor_b32_e32 v1, v1, v3
+; GFX11-SDAG-NEXT:    v_xor_b32_e32 v0, v0, v3
+; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX11-SDAG-NEXT:    v_sub_co_u32 v0, vcc_lo, v0, v3
+; GFX11-SDAG-NEXT:    v_sub_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
+; GFX11-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-GISEL-LABEL: intrinsic_llrint_i64_f32:
+; GFX11-GISEL:       ; %bb.0: ; %entry
+; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GISEL-NEXT:    v_rndne_f32_e32 v0, v0
+; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX11-GISEL-NEXT:    v_trunc_f32_e32 v1, v0
+; GFX11-GISEL-NEXT:    v_ashrrev_i32_e32 v3, 31, v0
+; GFX11-GISEL-NEXT:    v_mul_f32_e64 v2, 0x2f800000, |v1|
+; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-GISEL-NEXT:    v_floor_f32_e32 v2, v2
+; GFX11-GISEL-NEXT:    v_fma_f32 v1, 0xcf800000, v2, |v1|
+; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX11-GISEL-NEXT:    v_cvt_u32_f32_e32 v0, v1
+; GFX11-GISEL-NEXT:    v_cvt_u32_f32_e32 v1, v2
+; GFX11-GISEL-NEXT:    v_xor_b32_e32 v0, v0, v3
+; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-GISEL-NEXT:    v_xor_b32_e32 v1, v1, v3
+; GFX11-GISEL-NEXT:    v_sub_co_u32 v0, vcc_lo, v0, v3
+; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_2)
+; GFX11-GISEL-NEXT:    v_sub_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
+; GFX11-GISEL-NEXT:    s_setpc_b64 s[30:31]
 entry:
-  %0 = tail call i64 @llvm.llrint.i64.f32(float %arg)
-  ret i64 %0
+  %res = tail call i64 @llvm.llrint.i64.f32(float %arg)
+  ret i64 %res
 }
 
 define i64 @intrinsic_llrint_i64_f64(double %arg) {
-; GFX9-LABEL: intrinsic_llrint_i64_f64:
-; GFX9:       ; %bb.0: ; %entry
-; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT:    v_rndne_f64_e32 v[0:1], v[0:1]
-; GFX9-NEXT:    s_movk_i32 s4, 0xffe0
-; GFX9-NEXT:    v_ldexp_f64 v[2:3], v[0:1], s4
-; GFX9-NEXT:    s_mov_b32 s4, 0
-; GFX9-NEXT:    s_mov_b32 s5, 0xc1f00000
-; GFX9-NEXT:    v_floor_f64_e32 v[2:3], v[2:3]
-; GFX9-NEXT:    v_fma_f64 v[0:1], v[2:3], s[4:5], v[0:1]
-; GFX9-NEXT:    v_cvt_u32_f64_e32 v0, v[0:1]
-; GFX9-NEXT:    v_cvt_i32_f64_e32 v1, v[2:3]
-; GFX9-NEXT:    s_setpc_b64 s[30:31]
+; GFX9-SDAG-LABEL: intrinsic_llrint_i64_f64:
+; GFX9-SDAG:       ; %bb.0: ; %entry
+; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-SDAG-NEXT:    v_rndne_f64_e32 v[0:1], v[0:1]
+; GFX9-SDAG-NEXT:    s_movk_i32 s4, 0xffe0
+; GFX9-SDAG-NEXT:    v_ldexp_f64 v[2:3], v[0:1], s4
+; GFX9-SDAG-NEXT:    s_mov_b32 s4, 0
+; GFX9-SDAG-NEXT:    s_mov_b32 s5, 0xc1f00000
+; GFX9-SDAG-NEXT:    v_floor_f64_e32 v[2:3], v[2:3]
+; GFX9-SDAG-NEXT:    v_fma_f64 v[0:1], v[2:3], s[4:5], v[0:1]
+; GFX9-SDAG-NEXT:    v_cvt_u32_f64_e32 v0, v[0:1]
+; GFX9-SDAG-NEXT:    v_cvt_i32_f64_e32 v1, v[2:3]
+; GFX9-SDAG-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX10-LABEL: intrinsic_llrint_i64_f64:
-; GFX10:       ; %bb.0: ; %entry
-; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT:    v_rndne_f64_e32 v[0:1], v[0:1]
-; GFX10-NEXT:    v_ldexp_f64 v[2:3], v[0:1], 0xffffffe0
-; GFX10-NEXT:    v_floor_f64_e32 v[2:3], v[2:3]
-; GFX10-NEXT:    v_fma_f64 v[0:1], 0xc1f00000, v[2:3], v[0:1]
-; GFX10-NEXT:    v_cvt_u32_f64_e32 v0, v[0:1]
-; GFX10-NEXT:    v_cvt_i32_f64_e32 v1, v[2:3]
-; GFX10-NEXT:    s_setpc_b64 s[30:31]
+; GFX9-GISEL-LABEL: intrinsic_llrint_i64_f64:
+; GFX9-GISEL:       ; %bb.0: ; %entry
+; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GISEL-NEXT:    v_rndne_f64_e32 v[0:1], v[0:1]
+; GFX9-GISEL-NEXT:    v_mov_b32_e32 v2, 0
+; GFX9-GISEL-NEXT:    v_mov_b32_e32 v3, 0x3df00000
+; GFX9-GISEL-NEXT:    v_mov_b32_e32 v4, 0
+; GFX9-GISEL-NEXT:    v_mov_b32_e32 v5, 0xc1f00000
+; GFX9-GISEL-NEXT:    v_trunc_f64_e32 v[0:1], v[0:1]
+; GFX9-GISEL-NEXT:    v_mul_f64 v[2:3], v[0:1], v[2:3]
+; GFX9-GISEL-NEXT:    v_floor_f64_e32 v[2:3], v[2:3]
+; GFX9-GISEL-NEXT:    v_fma_f64 v[0:1], v[2:3], v[4:5], v[0:1]
+; GFX9-GISEL-NEXT:    v_cvt_u32_f64_e32 v0, v[0:1]
+; GFX9-GISEL-NEXT:    v_cvt_i32_f64_e32 v1, v[2:3]
+; GFX9-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11-LABEL: intrinsic_llrint_i64_f64:
-; GFX11:       ; %bb.0: ; %entry
-; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT:    v_rndne_f64_e32 v[0:1], v[0:1]
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NEXT:    v_ldexp_f64 v[2:3], v[0:1], 0xffffffe0
-; GFX11-NEXT:    v_floor_f64_e32 v[2:3], v[2:3]
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NEXT:    v_fma_f64 v[0:1], 0xc1f00000, v[2:3], v[0:1]
-; GFX11-NEXT:    v_cvt_u32_f64_e32 v0, v[0:1]
-; GFX11-NEXT:    v_cvt_i32_f64_e32 v1, v[2:3]
-; GFX11-NEXT:    s_setpc_b64 s[30:31]
+; GFX10-SDAG-LABEL: intrinsic_llrint_i64_f64:
+; GFX10-SDAG:       ; %bb.0: ; %entry
+; GFX10-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-SDAG-NEXT:    v_rndne_f64_e32 v[0:1], v[0:1]
+; GFX10-SDAG-NEXT:    v_ldexp_f64 v[2:3], v[0:1], 0xffffffe0
+; GFX10-SDAG-NEXT:    v_floor_f64_e32 v[2:3], v[2:3]
+; GFX10-SDAG-NEXT:    v_fma_f64 v[0:1], 0xc1f00000, v[2:3], v[0:1]
+; GFX10-SDAG-NEXT:    v_cvt_u32_f64_e32 v0, v[0:1]
+; GFX10-SDAG-NEXT:    v_cvt_i32_f64_e32 v1, v[2:3]
+; GFX10-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-GISEL-LABEL: intrinsic_llrint_i64_f64:
+; GFX10-GISEL:       ; %bb.0: ; %entry
+; GFX10-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-GISEL-NEXT:    v_rndne_f64_e32 v[0:1], v[0:1]
+; GFX10-GISEL-NEXT:    v_trunc_f64_e32 v[0:1], v[0:1]
+; GFX10-GISEL-NEXT:    v_mul_f64 v[2:3], 0x3df00000, v[0:1]
+; GFX10-GISEL-NEXT:    v_floor_f64_e32 v[2:3], v[2:3]
+; GFX10-GISEL-NEXT:    v_fma_f64 v[0:1], 0xc1f00000, v[2:3], v[0:1]
+; GFX10-GISEL-NEXT:    v_cvt_u32_f64_e32 v0, v[0:1]
+; GFX10-GISEL-NEXT:    v_cvt_i32_f64_e32 v1, v[2:3]
+; GFX10-GISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-SDAG-LABEL: intrinsic_llrint_i64_f64:
+; GFX11-SDAG:       ; %bb.0: ; %entry
+; GFX11-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SDAG-NEXT:    v_rndne_f64_e32 v[0:1], v[0:1]
+; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-SDAG-NEXT:    v_ldexp_f64 v[2:3], v[0:1], 0xffffffe0
+; GFX11-SDAG-NEXT:    v_floor_f64_e32 v[2:3], v[2:3]
+; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-SDAG-NEXT:    v_fma_f64 v[0:1], 0xc1f00000, v[2:3], v[0:1]
+; GFX11-SDAG-NEXT:    v_cvt_u32_f64_e32 v0, v[0:1]
+; GFX11-SDAG-NEXT:    v_cvt_i32_f64_e32 v1, v[2:3]
+; GFX11-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-GISEL-LABEL: intrinsic_llrint_i64_f64:
+; GFX11-GISEL:       ; %bb.0: ; %entry
+; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GISEL-NEXT:    v_rndne_f64_e32 v[0:1], v[0:1]
+; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-GISEL-NEXT:    v_trunc_f64_e32 v[0:1], v[0:1]
+; GFX11-GISEL-NEXT:    v_mul_f64 v[2:3], 0x3df00000, v[0:1]
+; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-GISEL-NEXT:    v_floor_f64_e32 v[2:3], v[2:3]
+; GFX11-GISEL-NEXT:    v_fma_f64 v[0:1], 0xc1f00000, v[2:3], v[0:1]
+; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GISEL-NEXT:    v_cvt_u32_f64_e32 v0, v[0:1]
+; GFX11-GISEL-NEXT:    v_cvt_i32_f64_e32 v1, v[2:3]
+; GFX11-GISEL-NEXT:    s_setpc_b64 s[30:31]
 entry:
-  %0 = tail call i64 @llvm.llrint.i64.f64(double %arg)
-  ret i64 %0
+  %res = tail call i64 @llvm.llrint.i64.f64(double %arg)
+  ret i64 %res
 }
 
 define half @intrinsic_frint_half(half %arg) {
@@ -291,8 +497,8 @@ define half @intrinsic_frint_half(half %arg) {
 ; GCN-NEXT:    v_rndne_f16_e32 v0, v0
 ; GCN-NEXT:    s_setpc_b64 s[30:31]
 entry:
-  %0 = tail call half @llvm.rint.f16(half %arg)
-  ret half %0
+  %res = tail call half @llvm.rint.f16(half %arg)
+  ret half %res
 }
 
 define i32 @intrinsic_lrint_i32_f16(half %arg) {
@@ -321,8 +527,8 @@ define i32 @intrinsic_lrint_i32_f16(half %arg) {
 ; GFX11-NEXT:    v_cvt_i32_f32_e32 v0, v0
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
 entry:
-  %0 = tail call i32 @llvm.lrint.i32.f16(half %arg)
-  ret i32 %0
+  %res = tail call i32 @llvm.lrint.i32.f16(half %arg)
+  ret i32 %res
 }
 
 define <2 x float> @intrinsic_frint_v2f32_v2f32(<2 x float> %arg) {
@@ -333,8 +539,8 @@ define <2 x float> @intrinsic_frint_v2f32_v2f32(<2 x float> %arg) {
 ; GCN-NEXT:    v_rndne_f32_e32 v1, v1
 ; GCN-NEXT:    s_setpc_b64 s[30:31]
 entry:
-  %0 = tail call <2 x float> @llvm.rint.v2f32.v2f32(<2 x float> %arg)
-  ret <2 x float> %0
+  %res = tail call <2 x float> @llvm.rint.v2f32.v2f32(<2 x float> %arg)
+  ret <2 x float> %res
 }
 
 define <2 x i32> @intrinsic_lrint_v2i32_v2f32(<2 x float> %arg) {
@@ -366,102 +572,200 @@ define <2 x i32> @intrinsic_lrint_v2i32_v2f32(<2 x float> %arg) {
 ; GFX11-NEXT:    v_cvt_i32_f32_e32 v1, v1
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
 entry:
-  %0 = tail call <2 x i32> @llvm.lrint.v2i32.v2f32(<2 x float> %arg)
-  ret <2 x i32> %0
+  %res = tail call <2 x i32> @llvm.lrint.v2i32.v2f32(<2 x float> %arg)
+  ret <2 x i32> %res
 }
 
 define <2 x i64> @intrinsic_lrint_v2i64_v2f32(<2 x float> %arg) {
-; GFX9-LABEL: intrinsic_lrint_v2i64_v2f32:
-; GFX9:       ; %bb.0: ; %entry
-; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT:    v_rndne_f32_e32 v0, v0
-; GFX9-NEXT:    s_mov_b32 s4, 0x2f800000
-; GFX9-NEXT:    v_mul_f32_e64 v2, |v0|, s4
-; GFX9-NEXT:    v_floor_f32_e32 v2, v2
-; GFX9-NEXT:    s_mov_b32 s5, 0xcf800000
-; GFX9-NEXT:    v_cvt_u32_f32_e32 v3, v2
-; GFX9-NEXT:    v_fma_f32 v2, v2, s5, |v0|
-; GFX9-NEXT:    v_cvt_u32_f32_e32 v2, v2
-; GFX9-NEXT:    v_ashrrev_i32_e32 v4, 31, v0
-; GFX9-NEXT:    v_xor_b32_e32 v3, v3, v4
-; GFX9-NEXT:    v_xor_b32_e32 v0, v2, v4
-; GFX9-NEXT:    v_rndne_f32_e32 v2, v1
-; GFX9-NEXT:    v_mul_f32_e64 v1, |v2|, s4
-; GFX9-NEXT:    v_floor_f32_e32 v1, v1
-; GFX9-NEXT:    v_cvt_u32_f32_e32 v5, v1
-; GFX9-NEXT:    v_fma_f32 v1, v1, s5, |v2|
-; GFX9-NEXT:    v_cvt_u32_f32_e32 v6, v1
-; GFX9-NEXT:    v_sub_co_u32_e32 v0, vcc, v0, v4
-; GFX9-NEXT:    v_subb_co_u32_e32 v1, vcc, v3, v4, vcc
-; GFX9-NEXT:    v_ashrrev_i32_e32 v3, 31, v2
-; GFX9-NEXT:    v_xor_b32_e32 v2, v6, v3
-; GFX9-NEXT:    v_xor_b32_e32 v4, v5, v3
-; GFX9-NEXT:    v_sub_co_u32_e32 v2, vcc, v2, v3
-; GFX9-NEXT:    v_subb_co_u32_e32 v3, vcc, v4, v3, vcc
-; GFX9-NEXT:    s_setpc_b64 s[30:31]
+; GFX9-SDAG-LABEL: intrinsic_lrint_v2i64_v2f32:
+; GFX9-SDAG:       ; %bb.0: ; %entry
+; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-SDAG-NEXT:    v_rndne_f32_e32 v0, v0
+; GFX9-SDAG-NEXT:    s_mov_b32 s4, 0x2f800000
+; GFX9-SDAG-NEXT:    v_mul_f32_e64 v2, |v0|, s4
+; GFX9-SDAG-NEXT:    v_floor_f32_e32 v2, v2
+; GFX9-SDAG-NEXT:    s_mov_b32 s5, 0xcf800000
+; GFX9-SDAG-NEXT:    v_cvt_u32_f32_e32 v3, v2
+; GFX9-SDAG-NEXT:    v_fma_f32 v2, v2, s5, |v0|
+; GFX9-SDAG-NEXT:    v_cvt_u32_f32_e32 v2, v2
+; GFX9-SDAG-NEXT:    v_ashrrev_i32_e32 v4, 31, v0
+; GFX9-SDAG-NEXT:    v_xor_b32_e32 v3, v3, v4
+; GFX9-SDAG-NEXT:    v_xor_b32_e32 v0, v2, v4
+; GFX9-SDAG-NEXT:    v_rndne_f32_e32 v2, v1
+; GFX9-SDAG-NEXT:    v_mul_f32_e64 v1, |v2|, s4
+; GFX9-SDAG-NEXT:    v_floor_f32_e32 v1, v1
+; GFX9-SDAG-NEXT:    v_cvt_u32_f32_e32 v5, v1
+; GFX9-SDAG-NEXT:    v_fma_f32 v1, v1, s5, |v2|
+; GFX9-SDAG-NEXT:    v_cvt_u32_f32_e32 v6, v1
+; GFX9-SDAG-NEXT:    v_sub_co_u32_e32 v0, vcc, v0, v4
+; GFX9-SDAG-NEXT:    v_subb_co_u32_e32 v1, vcc, v3, v4, vcc
+; GFX9-SDAG-NEXT:    v_ashrrev_i32_e32 v3, 31, v2
+; GFX9-SDAG-NEXT:    v_xor_b32_e32 v2, v6, v3
+; GFX9-SDAG-NEXT:    v_xor_b32_e32 v4, v5, v3
+; GFX9-SDAG-NEXT:    v_sub_co_u32_e32 v2, vcc, v2, v3
+; GFX9-SDAG-NEXT:    v_subb_co_u32_e32 v3, vcc, v4, v3, vcc
+; GFX9-SDAG-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX10-LABEL: intrinsic_lrint_v2i64_v2f32:
-; GFX10:       ; %bb.0: ; %entry
-; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT:    v_rndne_f32_e32 v0, v0
-; GFX10-NEXT:    v_rndne_f32_e32 v1, v1
-; GFX10-NEXT:    v_mul_f32_e64 v2, 0x2f800000, |v0|
-; GFX10-NEXT:    v_mul_f32_e64 v3, 0x2f800000, |v1|
-; GFX10-NEXT:    v_ashrrev_i32_e32 v5, 31, v0
-; GFX10-NEXT:    v_ashrrev_i32_e32 v6, 31, v1
-; GFX10-NEXT:    v_floor_f32_e32 v2, v2
-; GFX10-NEXT:    v_floor_f32_e32 v3, v3
-; GFX10-NEXT:    v_fma_f32 v4, 0xcf800000, v2, |v0|
-; GFX10-NEXT:    v_fma_f32 v0, 0xcf800000, v3, |v1|
-; GFX10-NEXT:    v_cvt_u32_f32_e32 v2, v2
-; GFX10-NEXT:    v_cvt_u32_f32_e32 v3, v3
-; GFX10-NEXT:    v_cvt_u32_f32_e32 v1, v4
-; GFX10-NEXT:    v_cvt_u32_f32_e32 v0, v0
-; GFX10-NEXT:    v_xor_b32_e32 v2, v2, v5
-; GFX10-NEXT:    v_xor_b32_e32 v3, v3, v6
-; GFX10-NEXT:    v_xor_b32_e32 v1, v1, v5
-; GFX10-NEXT:    v_xor_b32_e32 v4, v0, v6
-; GFX10-NEXT:    v_sub_co_u32 v0, vcc_lo, v1, v5
-; GFX10-NEXT:    v_sub_co_ci_u32_e32 v1, vcc_lo, v2, v5, vcc_lo
-; GFX10-NEXT:    v_sub_co_u32 v2, vcc_lo, v4, v6
-; GFX10-NEXT:    v_sub_co_ci_u32_e32 v3, vcc_lo, v3, v6, vcc_lo
-; GFX10-NEXT:    s_setpc_b64 s[30:31]
+; GFX9-GISEL-LABEL: intrinsic_lrint_v2i64_v2f32:
+; GFX9-GISEL:       ; %bb.0: ; %entry
+; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GISEL-NEXT:    v_rndne_f32_e32 v0, v0
+; GFX9-GISEL-NEXT:    v_trunc_f32_e32 v2, v0
+; GFX9-GISEL-NEXT:    v_mov_b32_e32 v3, 0x2f800000
+; GFX9-GISEL-NEXT:    v_mul_f32_e64 v4, |v2|, v3
+; GFX9-GISEL-NEXT:    v_floor_f32_e32 v4, v4
+; GFX9-GISEL-NEXT:    v_mov_b32_e32 v5, 0xcf800000
+; GFX9-GISEL-NEXT:    v_fma_f32 v2, v4, v5, |v2|
+; GFX9-GISEL-NEXT:    v_cvt_u32_f32_e32 v2, v2
+; GFX9-GISEL-NEXT:    v_cvt_u32_f32_e32 v4, v4
+; GFX9-GISEL-NEXT:    v_ashrrev_i32_e32 v6, 31, v0
+; GFX9-GISEL-NEXT:    v_xor_b32_e32 v0, v2, v6
+; GFX9-GISEL-NEXT:    v_xor_b32_e32 v2, v4, v6
+; GFX9-GISEL-NEXT:    v_rndne_f32_e32 v4, v1
+; GFX9-GISEL-NEXT:    v_trunc_f32_e32 v1, v4
+; GFX9-GISEL-NEXT:    v_mul_f32_e64 v3, |v1|, v3
+; GFX9-GISEL-NEXT:    v_floor_f32_e32 v3, v3
+; GFX9-GISEL-NEXT:    v_fma_f32 v1, v3, v5, |v1|
+; GFX9-GISEL-NEXT:    v_cvt_u32_f32_e32 v5, v1
+; GFX9-GISEL-NEXT:    v_cvt_u32_f32_e32 v3, v3
+; GFX9-GISEL-NEXT:    v_sub_co_u32_e32 v0, vcc, v0, v6
+; GFX9-GISEL-NEXT:    v_ashrrev_i32_e32 v4, 31, v4
+; GFX9-GISEL-NEXT:    v_subb_co_u32_e32 v1, vcc, v2, v6, vcc
+; GFX9-GISEL-NEXT:    v_xor_b32_e32 v2, v5, v4
+; GFX9-GISEL-NEXT:    v_xor_b32_e32 v3, v3, v4
+; GFX9-GISEL-NEXT:    v_sub_co_u32_e32 v2, vcc, v2, v4
+; GFX9-GISEL-NEXT:    v_subb_co_u32_e32 v3, vcc, v3, v4, vcc
+; GFX9-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11-LABEL: intrinsic_lrint_v2i64_v2f32:
-; GFX11:       ; %bb.0: ; %entry
-; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT:    v_rndne_f32_e32 v0, v0
-; GFX11-NEXT:    v_rndne_f32_e32 v1, v1
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-NEXT:    v_mul_f32_e64 v2, 0x2f800000, |v0|
-; GFX11-NEXT:    v_mul_f32_e64 v3, 0x2f800000, |v1|
-; GFX11-NEXT:    v_ashrrev_i32_e32 v5, 31, v0
-; GFX11-NEXT:    v_ashrrev_i32_e32 v6, 31, v1
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
-; GFX11-NEXT:    v_floor_f32_e32 v2, v2
-; GFX11-NEXT:    v_floor_f32_e32 v3, v3
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-NEXT:    v_fma_f32 v4, 0xcf800000, v2, |v0|
-; GFX11-NEXT:    v_fma_f32 v0, 0xcf800000, v3, |v1|
-; GFX11-NEXT:    v_cvt_u32_f32_e32 v2, v2
-; GFX11-NEXT:    v_cvt_u32_f32_e32 v3, v3
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
-; GFX11-NEXT:    v_cvt_u32_f32_e32 v1, v4
-; GFX11-NEXT:    v_cvt_u32_f32_e32 v0, v0
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
-; GFX11-NEXT:    v_xor_b32_e32 v2, v2, v5
-; GFX11-NEXT:    v_xor_b32_e32 v3, v3, v6
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
-; GFX11-NEXT:    v_xor_b32_e32 v1, v1, v5
-; GFX11-NEXT:    v_xor_b32_e32 v4, v0, v6
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_3)
-; GFX11-NEXT:    v_sub_co_u32 v0, vcc_lo, v1, v5
-; GFX11-NEXT:    v_sub_co_ci_u32_e32 v1, vcc_lo, v2, v5, vcc_lo
-; GFX11-NEXT:    v_sub_co_u32 v2, vcc_lo, v4, v6
-; GFX11-NEXT:    v_sub_co_ci_u32_e32 v3, vcc_lo, v3, v6, vcc_lo
-; GFX11-NEXT:    s_setpc_b64 s[30:31]
+; GFX10-SDAG-LABEL: intrinsic_lrint_v2i64_v2f32:
+; GFX10-SDAG:       ; %bb.0: ; %entry
+; GFX10-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-SDAG-NEXT:    v_rndne_f32_e32 v0, v0
+; GFX10-SDAG-NEXT:    v_rndne_f32_e32 v1, v1
+; GFX10-SDAG-NEXT:    v_mul_f32_e64 v2, 0x2f800000, |v0|
+; GFX10-SDAG-NEXT:    v_mul_f32_e64 v3, 0x2f800000, |v1|
+; GFX10-SDAG-NEXT:    v_ashrrev_i32_e32 v5, 31, v0
+; GFX10-SDAG-NEXT:    v_ashrrev_i32_e32 v6, 31, v1
+; GFX10-SDAG-NEXT:    v_floor_f32_e32 v2, v2
+; GFX10-SDAG-NEXT:    v_floor_f32_e32 v3, v3
+; GFX10-SDAG-NEXT:    v_fma_f32 v4, 0xcf800000, v2, |v0|
+; GFX10-SDAG-NEXT:    v_fma_f32 v0, 0xcf800000, v3, |v1|
+; GFX10-SDAG-NEXT:    v_cvt_u32_f32_e32 v2, v2
+; GFX10-SDAG-NEXT:    v_cvt_u32_f32_e32 v3, v3
+; GFX10-SDAG-NEXT:    v_cvt_u32_f32_e32 v1, v4
+; GFX10-SDAG-NEXT:    v_cvt_u32_f32_e32 v0, v0
+; GFX10-SDAG-NEXT:    v_xor_b32_e32 v2, v2, v5
+; GFX10-SDAG-NEXT:    v_xor_b32_e32 v3, v3, v6
+; GFX10-SDAG-NEXT:    v_xor_b32_e32 v1, v1, v5
+; GFX10-SDAG-NEXT:    v_xor_b32_e32 v4, v0, v6
+; GFX10-SDAG-NEXT:    v_sub_co_u32 v0, vcc_lo, v1, v5
+; GFX10-SDAG-NEXT:    v_sub_co_ci_u32_e32 v1, vcc_lo, v2, v5, vcc_lo
+; GFX10-SDAG-NEXT:    v_sub_co_u32 v2, vcc_lo, v4, v6
+; GFX10-SDAG-NEXT:    v_sub_co_ci_u32_e32 v3, vcc_lo, v3, v6, vcc_lo
+; GFX10-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-GISEL-LABEL: intrinsic_lrint_v2i64_v2f32:
+; GFX10-GISEL:       ; %bb.0: ; %entry
+; GFX10-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-GISEL-NEXT:    v_rndne_f32_e32 v0, v0
+; GFX10-GISEL-NEXT:    v_rndne_f32_e32 v1, v1
+; GFX10-GISEL-NEXT:    v_trunc_f32_e32 v2, v0
+; GFX10-GISEL-NEXT:    v_trunc_f32_e32 v3, v1
+; GFX10-GISEL-NEXT:    v_ashrrev_i32_e32 v6, 31, v0
+; GFX10-GISEL-NEXT:    v_mul_f32_e64 v4, 0x2f800000, |v2|
+; GFX10-GISEL-NEXT:    v_mul_f32_e64 v5, 0x2f800000, |v3|
+; GFX10-GISEL-NEXT:    v_floor_f32_e32 v4, v4
+; GFX10-GISEL-NEXT:    v_floor_f32_e32 v5, v5
+; GFX10-GISEL-NEXT:    v_fma_f32 v2, 0xcf800000, v4, |v2|
+; GFX10-GISEL-NEXT:    v_fma_f32 v0, 0xcf800000, v5, |v3|
+; GFX10-GISEL-NEXT:    v_ashrrev_i32_e32 v3, 31, v1
+; GFX10-GISEL-NEXT:    v_cvt_u32_f32_e32 v1, v2
+; GFX10-GISEL-NEXT:    v_cvt_u32_f32_e32 v2, v4
+; GFX10-GISEL-NEXT:    v_cvt_u32_f32_e32 v0, v0
+; GFX10-GISEL-NEXT:    v_cvt_u32_f32_e32 v4, v5
+; GFX10-GISEL-NEXT:    v_xor_b32_e32 v1, v1, v6
+; GFX10-GISEL-NEXT:    v_xor_b32_e32 v2, v2, v6
+; GFX10-GISEL-NEXT:    v_xor_b32_e32 v5, v0, v3
+; GFX10-GISEL-NEXT:    v_xor_b32_e32 v4, v4, v3
+; GFX10-GISEL-NEXT:    v_sub_co_u32 v0, vcc_lo, v1, v6
+; GFX10-GISEL-NEXT:    v_sub_co_ci_u32_e32 v1, vcc_lo, v2, v6, vcc_lo
+; GFX10-GISEL-NEXT:    v_sub_co_u32 v2, vcc_lo, v5, v3
+; GFX10-GISEL-NEXT:    v_sub_co_ci_u32_e32 v3, vcc_lo, v4, v3, vcc_lo
+; GFX10-GISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-SDAG-LABEL: intrinsic_lrint_v2i64_v2f32:
+; GFX11-SDAG:       ; %bb.0: ; %entry
+; GFX11-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SDAG-NEXT:    v_rndne_f32_e32 v0, v0
+; GFX11-SDAG-NEXT:    v_rndne_f32_e32 v1, v1
+; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-SDAG-NEXT:    v_mul_f32_e64 v2, 0x2f800000, |v0|
+; GFX11-SDAG-NEXT:    v_mul_f32_e64 v3, 0x2f800000, |v1|
+; GFX11-SDAG-NEXT:    v_ashrrev_i32_e32 v5, 31, v0
+; GFX11-SDAG-NEXT:    v_ashrrev_i32_e32 v6, 31, v1
+; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX11-SDAG-NEXT:    v_floor_f32_e32 v2, v2
+; GFX11-SDAG-NEXT:    v_floor_f32_e32 v3, v3
+; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-SDAG-NEXT:    v_fma_f32 v4, 0xcf800000, v2, |v0|
+; GFX11-SDAG-NEXT:    v_fma_f32 v0, 0xcf800000, v3, |v1|
+; GFX11-SDAG-NEXT:    v_cvt_u32_f32_e32 v2, v2
+; GFX11-SDAG-NEXT:    v_cvt_u32_f32_e32 v3, v3
+; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX11-SDAG-NEXT:    v_cvt_u32_f32_e32 v1, v4
+; GFX11-SDAG-NEXT:    v_cvt_u32_f32_e32 v0, v0
+; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX11-SDAG-NEXT:    v_xor_b32_e32 v2, v2, v5
+; GFX11-SDAG-NEXT:    v_xor_b32_e32 v3, v3, v6
+; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX11-SDAG-NEXT:    v_xor_b32_e32 v1, v1, v5
+; GFX11-SDAG-NEXT:    v_xor_b32_e32 v4, v0, v6
+; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_3)
+; GFX11-SDAG-NEXT:    v_sub_co_u32 v0, vcc_lo, v1, v5
+; GFX11-SDAG-NEXT:    v_sub_co_ci_u32_e32 v1, vcc_lo, v2, v5, vcc_lo
+; GFX11-SDAG-NEXT:    v_sub_co_u32 v2, vcc_lo, v4, v6
+; GFX11-SDAG-NEXT:    v_sub_co_ci_u32_e32 v3, vcc_lo, v3, v6, vcc_lo
+; GFX11-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-GISEL-LABEL: intrinsic_lrint_v2i64_v2f32:
+; GFX11-GISEL:       ; %bb.0: ; %entry
+; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GISEL-NEXT:    v_rndne_f32_e32 v0, v0
+; GFX11-GISEL-NEXT:    v_rndne_f32_e32 v1, v1
+; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-GISEL-NEXT:    v_trunc_f32_e32 v2, v0
+; GFX11-GISEL-NEXT:    v_trunc_f32_e32 v3, v1
+; GFX11-GISEL-NEXT:    v_ashrrev_i32_e32 v6, 31, v0
+; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX11-GISEL-NEXT:    v_mul_f32_e64 v4, 0x2f800000, |v2|
+; GFX11-GISEL-NEXT:    v_mul_f32_e64 v5, 0x2f800000, |v3|
+; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-GISEL-NEXT:    v_floor_f32_e32 v4, v4
+; GFX11-GISEL-NEXT:    v_floor_f32_e32 v5, v5
+; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-GISEL-NEXT:    v_fma_f32 v2, 0xcf800000, v4, |v2|
+; GFX11-GISEL-NEXT:    v_fma_f32 v0, 0xcf800000, v5, |v3|
+; GFX11-GISEL-NEXT:    v_ashrrev_i32_e32 v3, 31, v1
+; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_4)
+; GFX11-GISEL-NEXT:    v_cvt_u32_f32_e32 v1, v2
+; GFX11-GISEL-NEXT:    v_cvt_u32_f32_e32 v2, v4
+; GFX11-GISEL-NEXT:    v_cvt_u32_f32_e32 v0, v0
+; GFX11-GISEL-NEXT:    v_cvt_u32_f32_e32 v4, v5
+; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX11-GISEL-NEXT:    v_xor_b32_e32 v1, v1, v6
+; GFX11-GISEL-NEXT:    v_xor_b32_e32 v2, v2, v6
+; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX11-GISEL-NEXT:    v_xor_b32_e32 v5, v0, v3
+; GFX11-GISEL-NEXT:    v_xor_b32_e32 v4, v4, v3
+; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX11-GISEL-NEXT:    v_sub_co_u32 v0, vcc_lo, v1, v6
+; GFX11-GISEL-NEXT:    v_sub_co_ci_u32_e32 v1, vcc_lo, v2, v6, vcc_lo
+; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX11-GISEL-NEXT:    v_sub_co_u32 v2, vcc_lo, v5, v3
+; GFX11-GISEL-NEXT:    v_sub_co_ci_u32_e32 v3, vcc_lo, v4, v3, vcc_lo
+; GFX11-GISEL-NEXT:    s_setpc_b64 s[30:31]
 entry:
-  %0 = tail call <2 x i64> @llvm.lrint.v2i64.v2f32(<2 x float> %arg)
-  ret <2 x i64> %0
+  %res = tail call <2 x i64> @llvm.lrint.v2i64.v2f32(<2 x float> %arg)
+  ret <2 x i64> %res
 }
 



More information about the llvm-commits mailing list