[llvm] [AMDGPU] Update patterns for v_cvt_flr and v_cvt_rpi (PR #177962)

Mirko BrkuĊĦanin via llvm-commits llvm-commits at lists.llvm.org
Mon Jan 26 05:57:38 PST 2026


https://github.com/mbrkusanin created https://github.com/llvm/llvm-project/pull/177962

Support GlobalISel and support cases with nnan flag on instruction.
Instruction are renamed to v_cvt_floor and v_cvt_nearest on gfx11+
so add gfx11 tests as well.


>From cbc7adae114c30c52734a9c32c405ec6a2b446f4 Mon Sep 17 00:00:00 2001
From: Mirko Brkusanin <Mirko.Brkusanin at amd.com>
Date: Mon, 26 Jan 2026 14:22:22 +0100
Subject: [PATCH 1/2] Regenerate tests and add a test with nnan floor inst

---
 llvm/test/CodeGen/AMDGPU/cvt_flr_i32_f32.ll | 551 ++++++++++++++++++--
 llvm/test/CodeGen/AMDGPU/cvt_rpi_i32_f32.ll | 507 ++++++++++++++++--
 2 files changed, 994 insertions(+), 64 deletions(-)

diff --git a/llvm/test/CodeGen/AMDGPU/cvt_flr_i32_f32.ll b/llvm/test/CodeGen/AMDGPU/cvt_flr_i32_f32.ll
index 0974ce99aee36..1a21c63387286 100644
--- a/llvm/test/CodeGen/AMDGPU/cvt_flr_i32_f32.ll
+++ b/llvm/test/CodeGen/AMDGPU/cvt_flr_i32_f32.ll
@@ -1,28 +1,169 @@
-; RUN: llc -mtriple=amdgcn < %s | FileCheck -check-prefix=SI-SAFE -check-prefix=SI -check-prefix=FUNC %s
-; RUN: llc -mtriple=amdgcn -enable-no-nans-fp-math < %s | FileCheck -check-prefix=SI-NONAN -check-prefix=SI -check-prefix=FUNC %s
-; RUN: llc -mtriple=amdgcn -mcpu=tonga < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
+; RUN: llc -mtriple=amdgcn < %s | FileCheck -check-prefix=SI-SAFE %s
+; RUN: llc -mtriple=amdgcn -enable-no-nans-fp-math < %s | FileCheck -check-prefix=SI-NONAN %s
+; RUN: llc -mtriple=amdgcn -enable-no-nans-fp-math -global-isel < %s | FileCheck -check-prefix=SI-NONAN-GISEL %s
+; RUN: llc -mtriple=amdgcn -mcpu=tonga < %s | FileCheck -check-prefix=SI-SDAG %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -enable-no-nans-fp-math < %s | FileCheck -check-prefix=GFX11-SDAG %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -enable-no-nans-fp-math -global-isel < %s | FileCheck -check-prefix=GFX11-GISEL %s
 
 declare float @llvm.fabs.f32(float) #1
 declare float @llvm.floor.f32(float) #1
 
-; FUNC-LABEL: {{^}}cvt_flr_i32_f32_0:
-; SI-SAFE-NOT: v_cvt_flr_i32_f32
-; SI-NOT: add
-; SI-NONAN: v_cvt_flr_i32_f32_e32 v{{[0-9]+}}, s{{[0-9]+}}
-; SI: s_endpgm
 define amdgpu_kernel void @cvt_flr_i32_f32_0(ptr addrspace(1) %out, float %x) #0 {
+; SI-SAFE-LABEL: cvt_flr_i32_f32_0:
+; SI-SAFE:       ; %bb.0:
+; SI-SAFE-NEXT:    s_load_dword s6, s[4:5], 0xb
+; SI-SAFE-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x9
+; SI-SAFE-NEXT:    s_mov_b32 s3, 0xf000
+; SI-SAFE-NEXT:    s_mov_b32 s2, -1
+; SI-SAFE-NEXT:    s_waitcnt lgkmcnt(0)
+; SI-SAFE-NEXT:    v_floor_f32_e32 v0, s6
+; SI-SAFE-NEXT:    v_cvt_i32_f32_e32 v0, v0
+; SI-SAFE-NEXT:    buffer_store_dword v0, off, s[0:3], 0
+; SI-SAFE-NEXT:    s_endpgm
+;
+; SI-NONAN-LABEL: cvt_flr_i32_f32_0:
+; SI-NONAN:       ; %bb.0:
+; SI-NONAN-NEXT:    s_load_dword s6, s[4:5], 0xb
+; SI-NONAN-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x9
+; SI-NONAN-NEXT:    s_mov_b32 s3, 0xf000
+; SI-NONAN-NEXT:    s_mov_b32 s2, -1
+; SI-NONAN-NEXT:    s_waitcnt lgkmcnt(0)
+; SI-NONAN-NEXT:    v_cvt_flr_i32_f32_e32 v0, s6
+; SI-NONAN-NEXT:    buffer_store_dword v0, off, s[0:3], 0
+; SI-NONAN-NEXT:    s_endpgm
+;
+; SI-NONAN-GISEL-LABEL: cvt_flr_i32_f32_0:
+; SI-NONAN-GISEL:       ; %bb.0:
+; SI-NONAN-GISEL-NEXT:    s_load_dword s3, s[4:5], 0xb
+; SI-NONAN-GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x9
+; SI-NONAN-GISEL-NEXT:    s_mov_b32 s2, -1
+; SI-NONAN-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; SI-NONAN-GISEL-NEXT:    v_floor_f32_e32 v0, s3
+; SI-NONAN-GISEL-NEXT:    v_cvt_i32_f32_e32 v0, v0
+; SI-NONAN-GISEL-NEXT:    s_mov_b32 s3, 0xf000
+; SI-NONAN-GISEL-NEXT:    buffer_store_dword v0, off, s[0:3], 0
+; SI-NONAN-GISEL-NEXT:    s_endpgm
+;
+; SI-SDAG-LABEL: cvt_flr_i32_f32_0:
+; SI-SDAG:       ; %bb.0:
+; SI-SDAG-NEXT:    s_load_dword s2, s[4:5], 0x2c
+; SI-SDAG-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
+; SI-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
+; SI-SDAG-NEXT:    v_floor_f32_e32 v0, s2
+; SI-SDAG-NEXT:    v_cvt_i32_f32_e32 v2, v0
+; SI-SDAG-NEXT:    v_mov_b32_e32 v0, s0
+; SI-SDAG-NEXT:    v_mov_b32_e32 v1, s1
+; SI-SDAG-NEXT:    flat_store_dword v[0:1], v2
+; SI-SDAG-NEXT:    s_endpgm
+;
+; GFX11-SDAG-LABEL: cvt_flr_i32_f32_0:
+; GFX11-SDAG:       ; %bb.0:
+; GFX11-SDAG-NEXT:    s_clause 0x1
+; GFX11-SDAG-NEXT:    s_load_b32 s2, s[4:5], 0x2c
+; GFX11-SDAG-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
+; GFX11-SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; GFX11-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX11-SDAG-NEXT:    v_cvt_floor_i32_f32_e32 v1, s2
+; GFX11-SDAG-NEXT:    global_store_b32 v0, v1, s[0:1]
+; GFX11-SDAG-NEXT:    s_endpgm
+;
+; GFX11-GISEL-LABEL: cvt_flr_i32_f32_0:
+; GFX11-GISEL:       ; %bb.0:
+; GFX11-GISEL-NEXT:    s_clause 0x1
+; GFX11-GISEL-NEXT:    s_load_b32 s2, s[4:5], 0x2c
+; GFX11-GISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
+; GFX11-GISEL-NEXT:    v_mov_b32_e32 v1, 0
+; GFX11-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX11-GISEL-NEXT:    v_floor_f32_e32 v0, s2
+; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GISEL-NEXT:    v_cvt_i32_f32_e32 v0, v0
+; GFX11-GISEL-NEXT:    global_store_b32 v1, v0, s[0:1]
+; GFX11-GISEL-NEXT:    s_endpgm
   %floor = call float @llvm.floor.f32(float %x) #1
   %cvt = fptosi float %floor to i32
   store i32 %cvt, ptr addrspace(1) %out
   ret void
 }
 
-; FUNC-LABEL: {{^}}cvt_flr_i32_f32_1:
-; SI: v_add_f32_e64 [[TMP:v[0-9]+]], s{{[0-9]+}}, 1.0
-; SI-SAFE-NOT: v_cvt_flr_i32_f32
-; SI-NONAN: v_cvt_flr_i32_f32_e32 v{{[0-9]+}}, [[TMP]]
-; SI: s_endpgm
 define amdgpu_kernel void @cvt_flr_i32_f32_1(ptr addrspace(1) %out, float %x) #0 {
+; SI-SAFE-LABEL: cvt_flr_i32_f32_1:
+; SI-SAFE:       ; %bb.0:
+; SI-SAFE-NEXT:    s_load_dword s6, s[4:5], 0xb
+; SI-SAFE-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x9
+; SI-SAFE-NEXT:    s_mov_b32 s3, 0xf000
+; SI-SAFE-NEXT:    s_mov_b32 s2, -1
+; SI-SAFE-NEXT:    s_waitcnt lgkmcnt(0)
+; SI-SAFE-NEXT:    v_add_f32_e64 v0, s6, 1.0
+; SI-SAFE-NEXT:    v_floor_f32_e32 v0, v0
+; SI-SAFE-NEXT:    v_cvt_i32_f32_e32 v0, v0
+; SI-SAFE-NEXT:    buffer_store_dword v0, off, s[0:3], 0
+; SI-SAFE-NEXT:    s_endpgm
+;
+; SI-NONAN-LABEL: cvt_flr_i32_f32_1:
+; SI-NONAN:       ; %bb.0:
+; SI-NONAN-NEXT:    s_load_dword s6, s[4:5], 0xb
+; SI-NONAN-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x9
+; SI-NONAN-NEXT:    s_mov_b32 s3, 0xf000
+; SI-NONAN-NEXT:    s_mov_b32 s2, -1
+; SI-NONAN-NEXT:    s_waitcnt lgkmcnt(0)
+; SI-NONAN-NEXT:    v_add_f32_e64 v0, s6, 1.0
+; SI-NONAN-NEXT:    v_cvt_flr_i32_f32_e32 v0, v0
+; SI-NONAN-NEXT:    buffer_store_dword v0, off, s[0:3], 0
+; SI-NONAN-NEXT:    s_endpgm
+;
+; SI-NONAN-GISEL-LABEL: cvt_flr_i32_f32_1:
+; SI-NONAN-GISEL:       ; %bb.0:
+; SI-NONAN-GISEL-NEXT:    s_load_dword s3, s[4:5], 0xb
+; SI-NONAN-GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x9
+; SI-NONAN-GISEL-NEXT:    s_mov_b32 s2, -1
+; SI-NONAN-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; SI-NONAN-GISEL-NEXT:    v_add_f32_e64 v0, s3, 1.0
+; SI-NONAN-GISEL-NEXT:    v_floor_f32_e32 v0, v0
+; SI-NONAN-GISEL-NEXT:    v_cvt_i32_f32_e32 v0, v0
+; SI-NONAN-GISEL-NEXT:    s_mov_b32 s3, 0xf000
+; SI-NONAN-GISEL-NEXT:    buffer_store_dword v0, off, s[0:3], 0
+; SI-NONAN-GISEL-NEXT:    s_endpgm
+;
+; SI-SDAG-LABEL: cvt_flr_i32_f32_1:
+; SI-SDAG:       ; %bb.0:
+; SI-SDAG-NEXT:    s_load_dword s2, s[4:5], 0x2c
+; SI-SDAG-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
+; SI-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
+; SI-SDAG-NEXT:    v_add_f32_e64 v0, s2, 1.0
+; SI-SDAG-NEXT:    v_floor_f32_e32 v0, v0
+; SI-SDAG-NEXT:    v_cvt_i32_f32_e32 v2, v0
+; SI-SDAG-NEXT:    v_mov_b32_e32 v0, s0
+; SI-SDAG-NEXT:    v_mov_b32_e32 v1, s1
+; SI-SDAG-NEXT:    flat_store_dword v[0:1], v2
+; SI-SDAG-NEXT:    s_endpgm
+;
+; GFX11-SDAG-LABEL: cvt_flr_i32_f32_1:
+; GFX11-SDAG:       ; %bb.0:
+; GFX11-SDAG-NEXT:    s_clause 0x1
+; GFX11-SDAG-NEXT:    s_load_b32 s2, s[4:5], 0x2c
+; GFX11-SDAG-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
+; GFX11-SDAG-NEXT:    v_mov_b32_e32 v1, 0
+; GFX11-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX11-SDAG-NEXT:    v_add_f32_e64 v0, s2, 1.0
+; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-SDAG-NEXT:    v_cvt_floor_i32_f32_e32 v0, v0
+; GFX11-SDAG-NEXT:    global_store_b32 v1, v0, s[0:1]
+; GFX11-SDAG-NEXT:    s_endpgm
+;
+; GFX11-GISEL-LABEL: cvt_flr_i32_f32_1:
+; GFX11-GISEL:       ; %bb.0:
+; GFX11-GISEL-NEXT:    s_clause 0x1
+; GFX11-GISEL-NEXT:    s_load_b32 s2, s[4:5], 0x2c
+; GFX11-GISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
+; GFX11-GISEL-NEXT:    v_mov_b32_e32 v1, 0
+; GFX11-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX11-GISEL-NEXT:    v_add_f32_e64 v0, s2, 1.0
+; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-GISEL-NEXT:    v_floor_f32_e32 v0, v0
+; GFX11-GISEL-NEXT:    v_cvt_i32_f32_e32 v0, v0
+; GFX11-GISEL-NEXT:    global_store_b32 v1, v0, s[0:1]
+; GFX11-GISEL-NEXT:    s_endpgm
   %fadd = fadd float %x, 1.0
   %floor = call float @llvm.floor.f32(float %fadd) #1
   %cvt = fptosi float %floor to i32
@@ -30,12 +171,77 @@ define amdgpu_kernel void @cvt_flr_i32_f32_1(ptr addrspace(1) %out, float %x) #0
   ret void
 }
 
-; FUNC-LABEL: {{^}}cvt_flr_i32_f32_fabs:
-; SI-NOT: add
-; SI-SAFE-NOT: v_cvt_flr_i32_f32
-; SI-NONAN: v_cvt_flr_i32_f32_e64 v{{[0-9]+}}, |s{{[0-9]+}}|
-; SI: s_endpgm
 define amdgpu_kernel void @cvt_flr_i32_f32_fabs(ptr addrspace(1) %out, float %x) #0 {
+; SI-SAFE-LABEL: cvt_flr_i32_f32_fabs:
+; SI-SAFE:       ; %bb.0:
+; SI-SAFE-NEXT:    s_load_dword s6, s[4:5], 0xb
+; SI-SAFE-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x9
+; SI-SAFE-NEXT:    s_mov_b32 s3, 0xf000
+; SI-SAFE-NEXT:    s_mov_b32 s2, -1
+; SI-SAFE-NEXT:    s_waitcnt lgkmcnt(0)
+; SI-SAFE-NEXT:    v_floor_f32_e64 v0, |s6|
+; SI-SAFE-NEXT:    v_cvt_i32_f32_e32 v0, v0
+; SI-SAFE-NEXT:    buffer_store_dword v0, off, s[0:3], 0
+; SI-SAFE-NEXT:    s_endpgm
+;
+; SI-NONAN-LABEL: cvt_flr_i32_f32_fabs:
+; SI-NONAN:       ; %bb.0:
+; SI-NONAN-NEXT:    s_load_dword s6, s[4:5], 0xb
+; SI-NONAN-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x9
+; SI-NONAN-NEXT:    s_mov_b32 s3, 0xf000
+; SI-NONAN-NEXT:    s_mov_b32 s2, -1
+; SI-NONAN-NEXT:    s_waitcnt lgkmcnt(0)
+; SI-NONAN-NEXT:    v_cvt_flr_i32_f32_e64 v0, |s6|
+; SI-NONAN-NEXT:    buffer_store_dword v0, off, s[0:3], 0
+; SI-NONAN-NEXT:    s_endpgm
+;
+; SI-NONAN-GISEL-LABEL: cvt_flr_i32_f32_fabs:
+; SI-NONAN-GISEL:       ; %bb.0:
+; SI-NONAN-GISEL-NEXT:    s_load_dword s3, s[4:5], 0xb
+; SI-NONAN-GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x9
+; SI-NONAN-GISEL-NEXT:    s_mov_b32 s2, -1
+; SI-NONAN-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; SI-NONAN-GISEL-NEXT:    v_floor_f32_e64 v0, |s3|
+; SI-NONAN-GISEL-NEXT:    v_cvt_i32_f32_e32 v0, v0
+; SI-NONAN-GISEL-NEXT:    s_mov_b32 s3, 0xf000
+; SI-NONAN-GISEL-NEXT:    buffer_store_dword v0, off, s[0:3], 0
+; SI-NONAN-GISEL-NEXT:    s_endpgm
+;
+; SI-SDAG-LABEL: cvt_flr_i32_f32_fabs:
+; SI-SDAG:       ; %bb.0:
+; SI-SDAG-NEXT:    s_load_dword s2, s[4:5], 0x2c
+; SI-SDAG-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
+; SI-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
+; SI-SDAG-NEXT:    v_floor_f32_e64 v0, |s2|
+; SI-SDAG-NEXT:    v_cvt_i32_f32_e32 v2, v0
+; SI-SDAG-NEXT:    v_mov_b32_e32 v0, s0
+; SI-SDAG-NEXT:    v_mov_b32_e32 v1, s1
+; SI-SDAG-NEXT:    flat_store_dword v[0:1], v2
+; SI-SDAG-NEXT:    s_endpgm
+;
+; GFX11-SDAG-LABEL: cvt_flr_i32_f32_fabs:
+; GFX11-SDAG:       ; %bb.0:
+; GFX11-SDAG-NEXT:    s_clause 0x1
+; GFX11-SDAG-NEXT:    s_load_b32 s2, s[4:5], 0x2c
+; GFX11-SDAG-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
+; GFX11-SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; GFX11-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX11-SDAG-NEXT:    v_cvt_floor_i32_f32_e64 v1, |s2|
+; GFX11-SDAG-NEXT:    global_store_b32 v0, v1, s[0:1]
+; GFX11-SDAG-NEXT:    s_endpgm
+;
+; GFX11-GISEL-LABEL: cvt_flr_i32_f32_fabs:
+; GFX11-GISEL:       ; %bb.0:
+; GFX11-GISEL-NEXT:    s_clause 0x1
+; GFX11-GISEL-NEXT:    s_load_b32 s2, s[4:5], 0x2c
+; GFX11-GISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
+; GFX11-GISEL-NEXT:    v_mov_b32_e32 v1, 0
+; GFX11-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX11-GISEL-NEXT:    v_floor_f32_e64 v0, |s2|
+; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GISEL-NEXT:    v_cvt_i32_f32_e32 v0, v0
+; GFX11-GISEL-NEXT:    global_store_b32 v1, v0, s[0:1]
+; GFX11-GISEL-NEXT:    s_endpgm
   %x.fabs = call float @llvm.fabs.f32(float %x) #1
   %floor = call float @llvm.floor.f32(float %x.fabs) #1
   %cvt = fptosi float %floor to i32
@@ -43,12 +249,79 @@ define amdgpu_kernel void @cvt_flr_i32_f32_fabs(ptr addrspace(1) %out, float %x)
   ret void
 }
 
-; FUNC-LABEL: {{^}}cvt_flr_i32_f32_fneg:
-; SI-NOT: add
-; SI-SAFE-NOT: v_cvt_flr_i32_f32
-; SI-NONAN: v_cvt_flr_i32_f32_e64 v{{[0-9]+}}, -s{{[0-9]+}}
-; SI: s_endpgm
 define amdgpu_kernel void @cvt_flr_i32_f32_fneg(ptr addrspace(1) %out, float %x) #0 {
+; SI-SAFE-LABEL: cvt_flr_i32_f32_fneg:
+; SI-SAFE:       ; %bb.0:
+; SI-SAFE-NEXT:    s_load_dword s6, s[4:5], 0xb
+; SI-SAFE-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x9
+; SI-SAFE-NEXT:    s_mov_b32 s3, 0xf000
+; SI-SAFE-NEXT:    s_mov_b32 s2, -1
+; SI-SAFE-NEXT:    s_waitcnt lgkmcnt(0)
+; SI-SAFE-NEXT:    v_floor_f32_e64 v0, -s6
+; SI-SAFE-NEXT:    v_cvt_i32_f32_e32 v0, v0
+; SI-SAFE-NEXT:    buffer_store_dword v0, off, s[0:3], 0
+; SI-SAFE-NEXT:    s_endpgm
+;
+; SI-NONAN-LABEL: cvt_flr_i32_f32_fneg:
+; SI-NONAN:       ; %bb.0:
+; SI-NONAN-NEXT:    s_load_dword s6, s[4:5], 0xb
+; SI-NONAN-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x9
+; SI-NONAN-NEXT:    s_mov_b32 s3, 0xf000
+; SI-NONAN-NEXT:    s_mov_b32 s2, -1
+; SI-NONAN-NEXT:    s_waitcnt lgkmcnt(0)
+; SI-NONAN-NEXT:    v_cvt_flr_i32_f32_e64 v0, -s6
+; SI-NONAN-NEXT:    buffer_store_dword v0, off, s[0:3], 0
+; SI-NONAN-NEXT:    s_endpgm
+;
+; SI-NONAN-GISEL-LABEL: cvt_flr_i32_f32_fneg:
+; SI-NONAN-GISEL:       ; %bb.0:
+; SI-NONAN-GISEL-NEXT:    s_load_dword s3, s[4:5], 0xb
+; SI-NONAN-GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x9
+; SI-NONAN-GISEL-NEXT:    s_mov_b32 s2, -1
+; SI-NONAN-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; SI-NONAN-GISEL-NEXT:    v_mul_f32_e64 v0, 1.0, -s3
+; SI-NONAN-GISEL-NEXT:    v_floor_f32_e32 v0, v0
+; SI-NONAN-GISEL-NEXT:    v_cvt_i32_f32_e32 v0, v0
+; SI-NONAN-GISEL-NEXT:    s_mov_b32 s3, 0xf000
+; SI-NONAN-GISEL-NEXT:    buffer_store_dword v0, off, s[0:3], 0
+; SI-NONAN-GISEL-NEXT:    s_endpgm
+;
+; SI-SDAG-LABEL: cvt_flr_i32_f32_fneg:
+; SI-SDAG:       ; %bb.0:
+; SI-SDAG-NEXT:    s_load_dword s2, s[4:5], 0x2c
+; SI-SDAG-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
+; SI-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
+; SI-SDAG-NEXT:    v_floor_f32_e64 v0, -s2
+; SI-SDAG-NEXT:    v_cvt_i32_f32_e32 v2, v0
+; SI-SDAG-NEXT:    v_mov_b32_e32 v0, s0
+; SI-SDAG-NEXT:    v_mov_b32_e32 v1, s1
+; SI-SDAG-NEXT:    flat_store_dword v[0:1], v2
+; SI-SDAG-NEXT:    s_endpgm
+;
+; GFX11-SDAG-LABEL: cvt_flr_i32_f32_fneg:
+; GFX11-SDAG:       ; %bb.0:
+; GFX11-SDAG-NEXT:    s_clause 0x1
+; GFX11-SDAG-NEXT:    s_load_b32 s2, s[4:5], 0x2c
+; GFX11-SDAG-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
+; GFX11-SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; GFX11-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX11-SDAG-NEXT:    v_cvt_floor_i32_f32_e64 v1, -s2
+; GFX11-SDAG-NEXT:    global_store_b32 v0, v1, s[0:1]
+; GFX11-SDAG-NEXT:    s_endpgm
+;
+; GFX11-GISEL-LABEL: cvt_flr_i32_f32_fneg:
+; GFX11-GISEL:       ; %bb.0:
+; GFX11-GISEL-NEXT:    s_clause 0x1
+; GFX11-GISEL-NEXT:    s_load_b32 s2, s[4:5], 0x2c
+; GFX11-GISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
+; GFX11-GISEL-NEXT:    v_mov_b32_e32 v1, 0
+; GFX11-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX11-GISEL-NEXT:    v_max_f32_e64 v0, -s2, -s2
+; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-GISEL-NEXT:    v_floor_f32_e32 v0, v0
+; GFX11-GISEL-NEXT:    v_cvt_i32_f32_e32 v0, v0
+; GFX11-GISEL-NEXT:    global_store_b32 v1, v0, s[0:1]
+; GFX11-GISEL-NEXT:    s_endpgm
   %x.fneg = fsub float -0.000000e+00, %x
   %floor = call float @llvm.floor.f32(float %x.fneg) #1
   %cvt = fptosi float %floor to i32
@@ -56,12 +329,79 @@ define amdgpu_kernel void @cvt_flr_i32_f32_fneg(ptr addrspace(1) %out, float %x)
   ret void
 }
 
-; FUNC-LABEL: {{^}}cvt_flr_i32_f32_fabs_fneg:
-; SI-NOT: add
-; SI-SAFE-NOT: v_cvt_flr_i32_f32
-; SI-NONAN: v_cvt_flr_i32_f32_e64 v{{[0-9]+}}, -|s{{[0-9]+}}|
-; SI: s_endpgm
 define amdgpu_kernel void @cvt_flr_i32_f32_fabs_fneg(ptr addrspace(1) %out, float %x) #0 {
+; SI-SAFE-LABEL: cvt_flr_i32_f32_fabs_fneg:
+; SI-SAFE:       ; %bb.0:
+; SI-SAFE-NEXT:    s_load_dword s6, s[4:5], 0xb
+; SI-SAFE-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x9
+; SI-SAFE-NEXT:    s_mov_b32 s3, 0xf000
+; SI-SAFE-NEXT:    s_mov_b32 s2, -1
+; SI-SAFE-NEXT:    s_waitcnt lgkmcnt(0)
+; SI-SAFE-NEXT:    v_floor_f32_e64 v0, -|s6|
+; SI-SAFE-NEXT:    v_cvt_i32_f32_e32 v0, v0
+; SI-SAFE-NEXT:    buffer_store_dword v0, off, s[0:3], 0
+; SI-SAFE-NEXT:    s_endpgm
+;
+; SI-NONAN-LABEL: cvt_flr_i32_f32_fabs_fneg:
+; SI-NONAN:       ; %bb.0:
+; SI-NONAN-NEXT:    s_load_dword s6, s[4:5], 0xb
+; SI-NONAN-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x9
+; SI-NONAN-NEXT:    s_mov_b32 s3, 0xf000
+; SI-NONAN-NEXT:    s_mov_b32 s2, -1
+; SI-NONAN-NEXT:    s_waitcnt lgkmcnt(0)
+; SI-NONAN-NEXT:    v_cvt_flr_i32_f32_e64 v0, -|s6|
+; SI-NONAN-NEXT:    buffer_store_dword v0, off, s[0:3], 0
+; SI-NONAN-NEXT:    s_endpgm
+;
+; SI-NONAN-GISEL-LABEL: cvt_flr_i32_f32_fabs_fneg:
+; SI-NONAN-GISEL:       ; %bb.0:
+; SI-NONAN-GISEL-NEXT:    s_load_dword s3, s[4:5], 0xb
+; SI-NONAN-GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x9
+; SI-NONAN-GISEL-NEXT:    s_mov_b32 s2, -1
+; SI-NONAN-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; SI-NONAN-GISEL-NEXT:    v_mul_f32_e64 v0, 1.0, -|s3|
+; SI-NONAN-GISEL-NEXT:    v_floor_f32_e32 v0, v0
+; SI-NONAN-GISEL-NEXT:    v_cvt_i32_f32_e32 v0, v0
+; SI-NONAN-GISEL-NEXT:    s_mov_b32 s3, 0xf000
+; SI-NONAN-GISEL-NEXT:    buffer_store_dword v0, off, s[0:3], 0
+; SI-NONAN-GISEL-NEXT:    s_endpgm
+;
+; SI-SDAG-LABEL: cvt_flr_i32_f32_fabs_fneg:
+; SI-SDAG:       ; %bb.0:
+; SI-SDAG-NEXT:    s_load_dword s2, s[4:5], 0x2c
+; SI-SDAG-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
+; SI-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
+; SI-SDAG-NEXT:    v_floor_f32_e64 v0, -|s2|
+; SI-SDAG-NEXT:    v_cvt_i32_f32_e32 v2, v0
+; SI-SDAG-NEXT:    v_mov_b32_e32 v0, s0
+; SI-SDAG-NEXT:    v_mov_b32_e32 v1, s1
+; SI-SDAG-NEXT:    flat_store_dword v[0:1], v2
+; SI-SDAG-NEXT:    s_endpgm
+;
+; GFX11-SDAG-LABEL: cvt_flr_i32_f32_fabs_fneg:
+; GFX11-SDAG:       ; %bb.0:
+; GFX11-SDAG-NEXT:    s_clause 0x1
+; GFX11-SDAG-NEXT:    s_load_b32 s2, s[4:5], 0x2c
+; GFX11-SDAG-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
+; GFX11-SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; GFX11-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX11-SDAG-NEXT:    v_cvt_floor_i32_f32_e64 v1, -|s2|
+; GFX11-SDAG-NEXT:    global_store_b32 v0, v1, s[0:1]
+; GFX11-SDAG-NEXT:    s_endpgm
+;
+; GFX11-GISEL-LABEL: cvt_flr_i32_f32_fabs_fneg:
+; GFX11-GISEL:       ; %bb.0:
+; GFX11-GISEL-NEXT:    s_clause 0x1
+; GFX11-GISEL-NEXT:    s_load_b32 s2, s[4:5], 0x2c
+; GFX11-GISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
+; GFX11-GISEL-NEXT:    v_mov_b32_e32 v1, 0
+; GFX11-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX11-GISEL-NEXT:    v_max_f32_e64 v0, -|s2|, -|s2|
+; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-GISEL-NEXT:    v_floor_f32_e32 v0, v0
+; GFX11-GISEL-NEXT:    v_cvt_i32_f32_e32 v0, v0
+; GFX11-GISEL-NEXT:    global_store_b32 v1, v0, s[0:1]
+; GFX11-GISEL-NEXT:    s_endpgm
   %x.fabs = call float @llvm.fabs.f32(float %x) #1
   %x.fabs.fneg = fsub float -0.000000e+00, %x.fabs
   %floor = call float @llvm.floor.f32(float %x.fabs.fneg) #1
@@ -70,17 +410,162 @@ define amdgpu_kernel void @cvt_flr_i32_f32_fabs_fneg(ptr addrspace(1) %out, floa
   ret void
 }
 
-; FUNC-LABEL: {{^}}no_cvt_flr_i32_f32_0:
-; SI-NOT: v_cvt_flr_i32_f32
-; SI: v_floor_f32
-; SI: v_cvt_u32_f32_e32
-; SI: s_endpgm
 define amdgpu_kernel void @no_cvt_flr_i32_f32_0(ptr addrspace(1) %out, float %x) #0 {
+; SI-SAFE-LABEL: no_cvt_flr_i32_f32_0:
+; SI-SAFE:       ; %bb.0:
+; SI-SAFE-NEXT:    s_load_dword s6, s[4:5], 0xb
+; SI-SAFE-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x9
+; SI-SAFE-NEXT:    s_mov_b32 s3, 0xf000
+; SI-SAFE-NEXT:    s_mov_b32 s2, -1
+; SI-SAFE-NEXT:    s_waitcnt lgkmcnt(0)
+; SI-SAFE-NEXT:    v_floor_f32_e32 v0, s6
+; SI-SAFE-NEXT:    v_cvt_u32_f32_e32 v0, v0
+; SI-SAFE-NEXT:    buffer_store_dword v0, off, s[0:3], 0
+; SI-SAFE-NEXT:    s_endpgm
+;
+; SI-NONAN-LABEL: no_cvt_flr_i32_f32_0:
+; SI-NONAN:       ; %bb.0:
+; SI-NONAN-NEXT:    s_load_dword s6, s[4:5], 0xb
+; SI-NONAN-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x9
+; SI-NONAN-NEXT:    s_mov_b32 s3, 0xf000
+; SI-NONAN-NEXT:    s_mov_b32 s2, -1
+; SI-NONAN-NEXT:    s_waitcnt lgkmcnt(0)
+; SI-NONAN-NEXT:    v_floor_f32_e32 v0, s6
+; SI-NONAN-NEXT:    v_cvt_u32_f32_e32 v0, v0
+; SI-NONAN-NEXT:    buffer_store_dword v0, off, s[0:3], 0
+; SI-NONAN-NEXT:    s_endpgm
+;
+; SI-NONAN-GISEL-LABEL: no_cvt_flr_i32_f32_0:
+; SI-NONAN-GISEL:       ; %bb.0:
+; SI-NONAN-GISEL-NEXT:    s_load_dword s3, s[4:5], 0xb
+; SI-NONAN-GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x9
+; SI-NONAN-GISEL-NEXT:    s_mov_b32 s2, -1
+; SI-NONAN-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; SI-NONAN-GISEL-NEXT:    v_floor_f32_e32 v0, s3
+; SI-NONAN-GISEL-NEXT:    v_cvt_u32_f32_e32 v0, v0
+; SI-NONAN-GISEL-NEXT:    s_mov_b32 s3, 0xf000
+; SI-NONAN-GISEL-NEXT:    buffer_store_dword v0, off, s[0:3], 0
+; SI-NONAN-GISEL-NEXT:    s_endpgm
+;
+; SI-SDAG-LABEL: no_cvt_flr_i32_f32_0:
+; SI-SDAG:       ; %bb.0:
+; SI-SDAG-NEXT:    s_load_dword s2, s[4:5], 0x2c
+; SI-SDAG-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
+; SI-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
+; SI-SDAG-NEXT:    v_floor_f32_e32 v0, s2
+; SI-SDAG-NEXT:    v_cvt_u32_f32_e32 v2, v0
+; SI-SDAG-NEXT:    v_mov_b32_e32 v0, s0
+; SI-SDAG-NEXT:    v_mov_b32_e32 v1, s1
+; SI-SDAG-NEXT:    flat_store_dword v[0:1], v2
+; SI-SDAG-NEXT:    s_endpgm
+;
+; GFX11-SDAG-LABEL: no_cvt_flr_i32_f32_0:
+; GFX11-SDAG:       ; %bb.0:
+; GFX11-SDAG-NEXT:    s_clause 0x1
+; GFX11-SDAG-NEXT:    s_load_b32 s2, s[4:5], 0x2c
+; GFX11-SDAG-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
+; GFX11-SDAG-NEXT:    v_mov_b32_e32 v1, 0
+; GFX11-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX11-SDAG-NEXT:    v_floor_f32_e32 v0, s2
+; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-SDAG-NEXT:    v_cvt_u32_f32_e32 v0, v0
+; GFX11-SDAG-NEXT:    global_store_b32 v1, v0, s[0:1]
+; GFX11-SDAG-NEXT:    s_endpgm
+;
+; GFX11-GISEL-LABEL: no_cvt_flr_i32_f32_0:
+; GFX11-GISEL:       ; %bb.0:
+; GFX11-GISEL-NEXT:    s_clause 0x1
+; GFX11-GISEL-NEXT:    s_load_b32 s2, s[4:5], 0x2c
+; GFX11-GISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
+; GFX11-GISEL-NEXT:    v_mov_b32_e32 v1, 0
+; GFX11-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX11-GISEL-NEXT:    v_floor_f32_e32 v0, s2
+; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GISEL-NEXT:    v_cvt_u32_f32_e32 v0, v0
+; GFX11-GISEL-NEXT:    global_store_b32 v1, v0, s[0:1]
+; GFX11-GISEL-NEXT:    s_endpgm
   %floor = call float @llvm.floor.f32(float %x) #1
   %cvt = fptoui float %floor to i32
   store i32 %cvt, ptr addrspace(1) %out
   ret void
 }
 
+define amdgpu_kernel void @cvt_flr_i32_f32_nnan(ptr addrspace(1) %out, float %x) #0 {
+; SI-SAFE-LABEL: cvt_flr_i32_f32_nnan:
+; SI-SAFE:       ; %bb.0:
+; SI-SAFE-NEXT:    s_load_dword s6, s[4:5], 0xb
+; SI-SAFE-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x9
+; SI-SAFE-NEXT:    s_mov_b32 s3, 0xf000
+; SI-SAFE-NEXT:    s_mov_b32 s2, -1
+; SI-SAFE-NEXT:    s_waitcnt lgkmcnt(0)
+; SI-SAFE-NEXT:    v_floor_f32_e32 v0, s6
+; SI-SAFE-NEXT:    v_cvt_i32_f32_e32 v0, v0
+; SI-SAFE-NEXT:    buffer_store_dword v0, off, s[0:3], 0
+; SI-SAFE-NEXT:    s_endpgm
+;
+; SI-NONAN-LABEL: cvt_flr_i32_f32_nnan:
+; SI-NONAN:       ; %bb.0:
+; SI-NONAN-NEXT:    s_load_dword s6, s[4:5], 0xb
+; SI-NONAN-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x9
+; SI-NONAN-NEXT:    s_mov_b32 s3, 0xf000
+; SI-NONAN-NEXT:    s_mov_b32 s2, -1
+; SI-NONAN-NEXT:    s_waitcnt lgkmcnt(0)
+; SI-NONAN-NEXT:    v_cvt_flr_i32_f32_e32 v0, s6
+; SI-NONAN-NEXT:    buffer_store_dword v0, off, s[0:3], 0
+; SI-NONAN-NEXT:    s_endpgm
+;
+; SI-NONAN-GISEL-LABEL: cvt_flr_i32_f32_nnan:
+; SI-NONAN-GISEL:       ; %bb.0:
+; SI-NONAN-GISEL-NEXT:    s_load_dword s3, s[4:5], 0xb
+; SI-NONAN-GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x9
+; SI-NONAN-GISEL-NEXT:    s_mov_b32 s2, -1
+; SI-NONAN-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; SI-NONAN-GISEL-NEXT:    v_floor_f32_e32 v0, s3
+; SI-NONAN-GISEL-NEXT:    v_cvt_i32_f32_e32 v0, v0
+; SI-NONAN-GISEL-NEXT:    s_mov_b32 s3, 0xf000
+; SI-NONAN-GISEL-NEXT:    buffer_store_dword v0, off, s[0:3], 0
+; SI-NONAN-GISEL-NEXT:    s_endpgm
+;
+; SI-SDAG-LABEL: cvt_flr_i32_f32_nnan:
+; SI-SDAG:       ; %bb.0:
+; SI-SDAG-NEXT:    s_load_dword s2, s[4:5], 0x2c
+; SI-SDAG-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
+; SI-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
+; SI-SDAG-NEXT:    v_floor_f32_e32 v0, s2
+; SI-SDAG-NEXT:    v_cvt_i32_f32_e32 v2, v0
+; SI-SDAG-NEXT:    v_mov_b32_e32 v0, s0
+; SI-SDAG-NEXT:    v_mov_b32_e32 v1, s1
+; SI-SDAG-NEXT:    flat_store_dword v[0:1], v2
+; SI-SDAG-NEXT:    s_endpgm
+;
+; GFX11-SDAG-LABEL: cvt_flr_i32_f32_nnan:
+; GFX11-SDAG:       ; %bb.0:
+; GFX11-SDAG-NEXT:    s_clause 0x1
+; GFX11-SDAG-NEXT:    s_load_b32 s2, s[4:5], 0x2c
+; GFX11-SDAG-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
+; GFX11-SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; GFX11-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX11-SDAG-NEXT:    v_cvt_floor_i32_f32_e32 v1, s2
+; GFX11-SDAG-NEXT:    global_store_b32 v0, v1, s[0:1]
+; GFX11-SDAG-NEXT:    s_endpgm
+;
+; GFX11-GISEL-LABEL: cvt_flr_i32_f32_nnan:
+; GFX11-GISEL:       ; %bb.0:
+; GFX11-GISEL-NEXT:    s_clause 0x1
+; GFX11-GISEL-NEXT:    s_load_b32 s2, s[4:5], 0x2c
+; GFX11-GISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
+; GFX11-GISEL-NEXT:    v_mov_b32_e32 v1, 0
+; GFX11-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX11-GISEL-NEXT:    v_floor_f32_e32 v0, s2
+; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GISEL-NEXT:    v_cvt_i32_f32_e32 v0, v0
+; GFX11-GISEL-NEXT:    global_store_b32 v1, v0, s[0:1]
+; GFX11-GISEL-NEXT:    s_endpgm
+  %floor = call nnan float @llvm.floor.f32(float %x) #1
+  %cvt = fptosi float %floor to i32
+  store i32 %cvt, ptr addrspace(1) %out
+  ret void
+}
+
 attributes #0 = { nounwind }
 attributes #1 = { nounwind readnone }
diff --git a/llvm/test/CodeGen/AMDGPU/cvt_rpi_i32_f32.ll b/llvm/test/CodeGen/AMDGPU/cvt_rpi_i32_f32.ll
index 0203b2d4f896f..31d8b6d9da9c2 100644
--- a/llvm/test/CodeGen/AMDGPU/cvt_rpi_i32_f32.ll
+++ b/llvm/test/CodeGen/AMDGPU/cvt_rpi_i32_f32.ll
@@ -1,15 +1,89 @@
-; RUN: llc -mtriple=amdgcn < %s | FileCheck -check-prefix=SI-SAFE -check-prefix=SI -check-prefix=FUNC %s
-; RUN: llc -mtriple=amdgcn -enable-no-nans-fp-math < %s | FileCheck -check-prefix=SI-NONAN -check-prefix=SI -check-prefix=FUNC %s
-; RUN: llc -mtriple=amdgcn -mcpu=tonga < %s | FileCheck -check-prefix=SI-SAFE -check-prefix=SI -check-prefix=FUNC %s
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
+; RUN: llc -mtriple=amdgcn < %s | FileCheck -check-prefix=SI-SAFE %s
+; RUN: llc -mtriple=amdgcn -enable-no-nans-fp-math < %s | FileCheck -check-prefix=SI-NONAN %s
+; RUN: llc -mtriple=amdgcn -enable-no-nans-fp-math -global-isel < %s | FileCheck -check-prefix=SI-NONAN-GISEL %s
+; RUN: llc -mtriple=amdgcn -mcpu=tonga < %s | FileCheck -check-prefix=SI-SDAG %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -enable-no-nans-fp-math < %s | FileCheck -check-prefix=GFX11-SDAG %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -enable-no-nans-fp-math -global-isel < %s | FileCheck -check-prefix=GFX11-GISEL %s
 
 declare float @llvm.fabs.f32(float) #1
 declare float @llvm.floor.f32(float) #1
 
-; FUNC-LABEL: {{^}}cvt_rpi_i32_f32:
-; SI-SAFE-NOT: v_cvt_rpi_i32_f32
-; SI-NONAN: v_cvt_rpi_i32_f32_e32 v{{[0-9]+}}, s{{[0-9]+}}
-; SI: s_endpgm
 define amdgpu_kernel void @cvt_rpi_i32_f32(ptr addrspace(1) %out, float %x) #0 {
+; SI-SAFE-LABEL: cvt_rpi_i32_f32:
+; SI-SAFE:       ; %bb.0:
+; SI-SAFE-NEXT:    s_load_dword s6, s[4:5], 0xb
+; SI-SAFE-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x9
+; SI-SAFE-NEXT:    s_mov_b32 s3, 0xf000
+; SI-SAFE-NEXT:    s_mov_b32 s2, -1
+; SI-SAFE-NEXT:    s_waitcnt lgkmcnt(0)
+; SI-SAFE-NEXT:    v_add_f32_e64 v0, s6, 0.5
+; SI-SAFE-NEXT:    v_floor_f32_e32 v0, v0
+; SI-SAFE-NEXT:    v_cvt_i32_f32_e32 v0, v0
+; SI-SAFE-NEXT:    buffer_store_dword v0, off, s[0:3], 0
+; SI-SAFE-NEXT:    s_endpgm
+;
+; SI-NONAN-LABEL: cvt_rpi_i32_f32:
+; SI-NONAN:       ; %bb.0:
+; SI-NONAN-NEXT:    s_load_dword s6, s[4:5], 0xb
+; SI-NONAN-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x9
+; SI-NONAN-NEXT:    s_mov_b32 s3, 0xf000
+; SI-NONAN-NEXT:    s_mov_b32 s2, -1
+; SI-NONAN-NEXT:    s_waitcnt lgkmcnt(0)
+; SI-NONAN-NEXT:    v_cvt_rpi_i32_f32_e32 v0, s6
+; SI-NONAN-NEXT:    buffer_store_dword v0, off, s[0:3], 0
+; SI-NONAN-NEXT:    s_endpgm
+;
+; SI-NONAN-GISEL-LABEL: cvt_rpi_i32_f32:
+; SI-NONAN-GISEL:       ; %bb.0:
+; SI-NONAN-GISEL-NEXT:    s_load_dword s3, s[4:5], 0xb
+; SI-NONAN-GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x9
+; SI-NONAN-GISEL-NEXT:    s_mov_b32 s2, -1
+; SI-NONAN-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; SI-NONAN-GISEL-NEXT:    v_add_f32_e64 v0, s3, 0.5
+; SI-NONAN-GISEL-NEXT:    v_floor_f32_e32 v0, v0
+; SI-NONAN-GISEL-NEXT:    v_cvt_i32_f32_e32 v0, v0
+; SI-NONAN-GISEL-NEXT:    s_mov_b32 s3, 0xf000
+; SI-NONAN-GISEL-NEXT:    buffer_store_dword v0, off, s[0:3], 0
+; SI-NONAN-GISEL-NEXT:    s_endpgm
+;
+; SI-SDAG-LABEL: cvt_rpi_i32_f32:
+; SI-SDAG:       ; %bb.0:
+; SI-SDAG-NEXT:    s_load_dword s2, s[4:5], 0x2c
+; SI-SDAG-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
+; SI-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
+; SI-SDAG-NEXT:    v_add_f32_e64 v0, s2, 0.5
+; SI-SDAG-NEXT:    v_floor_f32_e32 v0, v0
+; SI-SDAG-NEXT:    v_cvt_i32_f32_e32 v2, v0
+; SI-SDAG-NEXT:    v_mov_b32_e32 v0, s0
+; SI-SDAG-NEXT:    v_mov_b32_e32 v1, s1
+; SI-SDAG-NEXT:    flat_store_dword v[0:1], v2
+; SI-SDAG-NEXT:    s_endpgm
+;
+; GFX11-SDAG-LABEL: cvt_rpi_i32_f32:
+; GFX11-SDAG:       ; %bb.0:
+; GFX11-SDAG-NEXT:    s_clause 0x1
+; GFX11-SDAG-NEXT:    s_load_b32 s2, s[4:5], 0x2c
+; GFX11-SDAG-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
+; GFX11-SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; GFX11-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX11-SDAG-NEXT:    v_cvt_nearest_i32_f32_e32 v1, s2
+; GFX11-SDAG-NEXT:    global_store_b32 v0, v1, s[0:1]
+; GFX11-SDAG-NEXT:    s_endpgm
+;
+; GFX11-GISEL-LABEL: cvt_rpi_i32_f32:
+; GFX11-GISEL:       ; %bb.0:
+; GFX11-GISEL-NEXT:    s_clause 0x1
+; GFX11-GISEL-NEXT:    s_load_b32 s2, s[4:5], 0x2c
+; GFX11-GISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
+; GFX11-GISEL-NEXT:    v_mov_b32_e32 v1, 0
+; GFX11-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX11-GISEL-NEXT:    v_add_f32_e64 v0, s2, 0.5
+; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-GISEL-NEXT:    v_floor_f32_e32 v0, v0
+; GFX11-GISEL-NEXT:    v_cvt_i32_f32_e32 v0, v0
+; GFX11-GISEL-NEXT:    global_store_b32 v1, v0, s[0:1]
+; GFX11-GISEL-NEXT:    s_endpgm
   %fadd = fadd float %x, 0.5
   %floor = call float @llvm.floor.f32(float %fadd) #1
   %cvt = fptosi float %floor to i32
@@ -17,11 +91,81 @@ define amdgpu_kernel void @cvt_rpi_i32_f32(ptr addrspace(1) %out, float %x) #0 {
   ret void
 }
 
-; FUNC-LABEL: {{^}}cvt_rpi_i32_f32_fabs:
-; SI-SAFE-NOT: v_cvt_rpi_i32_f32
-; SI-NONAN: v_cvt_rpi_i32_f32_e64 v{{[0-9]+}}, |s{{[0-9]+}}|{{$}}
-; SI: s_endpgm
 define amdgpu_kernel void @cvt_rpi_i32_f32_fabs(ptr addrspace(1) %out, float %x) #0 {
+; SI-SAFE-LABEL: cvt_rpi_i32_f32_fabs:
+; SI-SAFE:       ; %bb.0:
+; SI-SAFE-NEXT:    s_load_dword s6, s[4:5], 0xb
+; SI-SAFE-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x9
+; SI-SAFE-NEXT:    s_mov_b32 s3, 0xf000
+; SI-SAFE-NEXT:    s_mov_b32 s2, -1
+; SI-SAFE-NEXT:    s_waitcnt lgkmcnt(0)
+; SI-SAFE-NEXT:    v_add_f32_e64 v0, |s6|, 0.5
+; SI-SAFE-NEXT:    v_floor_f32_e32 v0, v0
+; SI-SAFE-NEXT:    v_cvt_i32_f32_e32 v0, v0
+; SI-SAFE-NEXT:    buffer_store_dword v0, off, s[0:3], 0
+; SI-SAFE-NEXT:    s_endpgm
+;
+; SI-NONAN-LABEL: cvt_rpi_i32_f32_fabs:
+; SI-NONAN:       ; %bb.0:
+; SI-NONAN-NEXT:    s_load_dword s6, s[4:5], 0xb
+; SI-NONAN-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x9
+; SI-NONAN-NEXT:    s_mov_b32 s3, 0xf000
+; SI-NONAN-NEXT:    s_mov_b32 s2, -1
+; SI-NONAN-NEXT:    s_waitcnt lgkmcnt(0)
+; SI-NONAN-NEXT:    v_cvt_rpi_i32_f32_e64 v0, |s6|
+; SI-NONAN-NEXT:    buffer_store_dword v0, off, s[0:3], 0
+; SI-NONAN-NEXT:    s_endpgm
+;
+; SI-NONAN-GISEL-LABEL: cvt_rpi_i32_f32_fabs:
+; SI-NONAN-GISEL:       ; %bb.0:
+; SI-NONAN-GISEL-NEXT:    s_load_dword s3, s[4:5], 0xb
+; SI-NONAN-GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x9
+; SI-NONAN-GISEL-NEXT:    s_mov_b32 s2, -1
+; SI-NONAN-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; SI-NONAN-GISEL-NEXT:    v_add_f32_e64 v0, |s3|, 0.5
+; SI-NONAN-GISEL-NEXT:    v_floor_f32_e32 v0, v0
+; SI-NONAN-GISEL-NEXT:    v_cvt_i32_f32_e32 v0, v0
+; SI-NONAN-GISEL-NEXT:    s_mov_b32 s3, 0xf000
+; SI-NONAN-GISEL-NEXT:    buffer_store_dword v0, off, s[0:3], 0
+; SI-NONAN-GISEL-NEXT:    s_endpgm
+;
+; SI-SDAG-LABEL: cvt_rpi_i32_f32_fabs:
+; SI-SDAG:       ; %bb.0:
+; SI-SDAG-NEXT:    s_load_dword s2, s[4:5], 0x2c
+; SI-SDAG-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
+; SI-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
+; SI-SDAG-NEXT:    v_add_f32_e64 v0, |s2|, 0.5
+; SI-SDAG-NEXT:    v_floor_f32_e32 v0, v0
+; SI-SDAG-NEXT:    v_cvt_i32_f32_e32 v2, v0
+; SI-SDAG-NEXT:    v_mov_b32_e32 v0, s0
+; SI-SDAG-NEXT:    v_mov_b32_e32 v1, s1
+; SI-SDAG-NEXT:    flat_store_dword v[0:1], v2
+; SI-SDAG-NEXT:    s_endpgm
+;
+; GFX11-SDAG-LABEL: cvt_rpi_i32_f32_fabs:
+; GFX11-SDAG:       ; %bb.0:
+; GFX11-SDAG-NEXT:    s_clause 0x1
+; GFX11-SDAG-NEXT:    s_load_b32 s2, s[4:5], 0x2c
+; GFX11-SDAG-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
+; GFX11-SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; GFX11-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX11-SDAG-NEXT:    v_cvt_nearest_i32_f32_e64 v1, |s2|
+; GFX11-SDAG-NEXT:    global_store_b32 v0, v1, s[0:1]
+; GFX11-SDAG-NEXT:    s_endpgm
+;
+; GFX11-GISEL-LABEL: cvt_rpi_i32_f32_fabs:
+; GFX11-GISEL:       ; %bb.0:
+; GFX11-GISEL-NEXT:    s_clause 0x1
+; GFX11-GISEL-NEXT:    s_load_b32 s2, s[4:5], 0x2c
+; GFX11-GISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
+; GFX11-GISEL-NEXT:    v_mov_b32_e32 v1, 0
+; GFX11-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX11-GISEL-NEXT:    v_add_f32_e64 v0, |s2|, 0.5
+; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-GISEL-NEXT:    v_floor_f32_e32 v0, v0
+; GFX11-GISEL-NEXT:    v_cvt_i32_f32_e32 v0, v0
+; GFX11-GISEL-NEXT:    global_store_b32 v1, v0, s[0:1]
+; GFX11-GISEL-NEXT:    s_endpgm
   %x.fabs = call float @llvm.fabs.f32(float %x) #1
   %fadd = fadd float %x.fabs, 0.5
   %floor = call float @llvm.floor.f32(float %fadd) #1
@@ -31,13 +175,87 @@ define amdgpu_kernel void @cvt_rpi_i32_f32_fabs(ptr addrspace(1) %out, float %x)
 }
 
 ; FIXME: This doesn't work because it forms fsub 0.5, x
-; FUNC-LABEL: {{^}}cvt_rpi_i32_f32_fneg:
-; XSI-NONAN: v_cvt_rpi_i32_f32_e64 v{{[0-9]+}}, -s{{[0-9]+}}
-; SI: v_sub_f32_e64 [[TMP:v[0-9]+]], 0.5, s{{[0-9]+}}
-; SI-SAFE-NOT: v_cvt_flr_i32_f32
-; SI-NONAN: v_cvt_flr_i32_f32_e32 {{v[0-9]+}}, [[TMP]]
-; SI: s_endpgm
 define amdgpu_kernel void @cvt_rpi_i32_f32_fneg(ptr addrspace(1) %out, float %x) #0 {
+; SI-SAFE-LABEL: cvt_rpi_i32_f32_fneg:
+; SI-SAFE:       ; %bb.0:
+; SI-SAFE-NEXT:    s_load_dword s6, s[4:5], 0xb
+; SI-SAFE-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x9
+; SI-SAFE-NEXT:    s_mov_b32 s3, 0xf000
+; SI-SAFE-NEXT:    s_mov_b32 s2, -1
+; SI-SAFE-NEXT:    s_waitcnt lgkmcnt(0)
+; SI-SAFE-NEXT:    v_sub_f32_e64 v0, 0.5, s6
+; SI-SAFE-NEXT:    v_floor_f32_e32 v0, v0
+; SI-SAFE-NEXT:    v_cvt_i32_f32_e32 v0, v0
+; SI-SAFE-NEXT:    buffer_store_dword v0, off, s[0:3], 0
+; SI-SAFE-NEXT:    s_endpgm
+;
+; SI-NONAN-LABEL: cvt_rpi_i32_f32_fneg:
+; SI-NONAN:       ; %bb.0:
+; SI-NONAN-NEXT:    s_load_dword s6, s[4:5], 0xb
+; SI-NONAN-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x9
+; SI-NONAN-NEXT:    s_mov_b32 s3, 0xf000
+; SI-NONAN-NEXT:    s_mov_b32 s2, -1
+; SI-NONAN-NEXT:    s_waitcnt lgkmcnt(0)
+; SI-NONAN-NEXT:    v_sub_f32_e64 v0, 0.5, s6
+; SI-NONAN-NEXT:    v_cvt_flr_i32_f32_e32 v0, v0
+; SI-NONAN-NEXT:    buffer_store_dword v0, off, s[0:3], 0
+; SI-NONAN-NEXT:    s_endpgm
+;
+; SI-NONAN-GISEL-LABEL: cvt_rpi_i32_f32_fneg:
+; SI-NONAN-GISEL:       ; %bb.0:
+; SI-NONAN-GISEL-NEXT:    s_load_dword s3, s[4:5], 0xb
+; SI-NONAN-GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x9
+; SI-NONAN-GISEL-NEXT:    s_mov_b32 s2, -1
+; SI-NONAN-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; SI-NONAN-GISEL-NEXT:    v_mul_f32_e64 v0, 1.0, -s3
+; SI-NONAN-GISEL-NEXT:    v_add_f32_e32 v0, 0.5, v0
+; SI-NONAN-GISEL-NEXT:    v_floor_f32_e32 v0, v0
+; SI-NONAN-GISEL-NEXT:    v_cvt_i32_f32_e32 v0, v0
+; SI-NONAN-GISEL-NEXT:    s_mov_b32 s3, 0xf000
+; SI-NONAN-GISEL-NEXT:    buffer_store_dword v0, off, s[0:3], 0
+; SI-NONAN-GISEL-NEXT:    s_endpgm
+;
+; SI-SDAG-LABEL: cvt_rpi_i32_f32_fneg:
+; SI-SDAG:       ; %bb.0:
+; SI-SDAG-NEXT:    s_load_dword s2, s[4:5], 0x2c
+; SI-SDAG-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
+; SI-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
+; SI-SDAG-NEXT:    v_sub_f32_e64 v0, 0.5, s2
+; SI-SDAG-NEXT:    v_floor_f32_e32 v0, v0
+; SI-SDAG-NEXT:    v_cvt_i32_f32_e32 v2, v0
+; SI-SDAG-NEXT:    v_mov_b32_e32 v0, s0
+; SI-SDAG-NEXT:    v_mov_b32_e32 v1, s1
+; SI-SDAG-NEXT:    flat_store_dword v[0:1], v2
+; SI-SDAG-NEXT:    s_endpgm
+;
+; GFX11-SDAG-LABEL: cvt_rpi_i32_f32_fneg:
+; GFX11-SDAG:       ; %bb.0:
+; GFX11-SDAG-NEXT:    s_clause 0x1
+; GFX11-SDAG-NEXT:    s_load_b32 s2, s[4:5], 0x2c
+; GFX11-SDAG-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
+; GFX11-SDAG-NEXT:    v_mov_b32_e32 v1, 0
+; GFX11-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX11-SDAG-NEXT:    v_sub_f32_e64 v0, 0.5, s2
+; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-SDAG-NEXT:    v_cvt_floor_i32_f32_e32 v0, v0
+; GFX11-SDAG-NEXT:    global_store_b32 v1, v0, s[0:1]
+; GFX11-SDAG-NEXT:    s_endpgm
+;
+; GFX11-GISEL-LABEL: cvt_rpi_i32_f32_fneg:
+; GFX11-GISEL:       ; %bb.0:
+; GFX11-GISEL-NEXT:    s_load_b32 s0, s[4:5], 0x2c
+; GFX11-GISEL-NEXT:    v_mov_b32_e32 v1, 0
+; GFX11-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX11-GISEL-NEXT:    v_max_f32_e64 v0, -s0, -s0
+; GFX11-GISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
+; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-GISEL-NEXT:    v_add_f32_e32 v0, 0.5, v0
+; GFX11-GISEL-NEXT:    v_floor_f32_e32 v0, v0
+; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GISEL-NEXT:    v_cvt_i32_f32_e32 v0, v0
+; GFX11-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX11-GISEL-NEXT:    global_store_b32 v1, v0, s[0:1]
+; GFX11-GISEL-NEXT:    s_endpgm
   %x.fneg = fsub float -0.000000e+00, %x
   %fadd = fadd float %x.fneg, 0.5
   %floor = call float @llvm.floor.f32(float %fadd) #1
@@ -47,15 +265,87 @@ define amdgpu_kernel void @cvt_rpi_i32_f32_fneg(ptr addrspace(1) %out, float %x)
 }
 
 ; FIXME: This doesn't work for same reason as above
-; FUNC-LABEL: {{^}}cvt_rpi_i32_f32_fabs_fneg:
-; SI-SAFE-NOT: v_cvt_rpi_i32_f32
-; XSI-NONAN: v_cvt_rpi_i32_f32_e64 v{{[0-9]+}}, -|s{{[0-9]+}}|
-
-; SI: v_sub_f32_e64 [[TMP:v[0-9]+]], 0.5, |s{{[0-9]+}}|
-; SI-SAFE-NOT: v_cvt_flr_i32_f32
-; SI-NONAN: v_cvt_flr_i32_f32_e32 {{v[0-9]+}}, [[TMP]]
-; SI: s_endpgm
 define amdgpu_kernel void @cvt_rpi_i32_f32_fabs_fneg(ptr addrspace(1) %out, float %x) #0 {
+; SI-SAFE-LABEL: cvt_rpi_i32_f32_fabs_fneg:
+; SI-SAFE:       ; %bb.0:
+; SI-SAFE-NEXT:    s_load_dword s6, s[4:5], 0xb
+; SI-SAFE-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x9
+; SI-SAFE-NEXT:    s_mov_b32 s3, 0xf000
+; SI-SAFE-NEXT:    s_mov_b32 s2, -1
+; SI-SAFE-NEXT:    s_waitcnt lgkmcnt(0)
+; SI-SAFE-NEXT:    v_sub_f32_e64 v0, 0.5, |s6|
+; SI-SAFE-NEXT:    v_floor_f32_e32 v0, v0
+; SI-SAFE-NEXT:    v_cvt_i32_f32_e32 v0, v0
+; SI-SAFE-NEXT:    buffer_store_dword v0, off, s[0:3], 0
+; SI-SAFE-NEXT:    s_endpgm
+;
+; SI-NONAN-LABEL: cvt_rpi_i32_f32_fabs_fneg:
+; SI-NONAN:       ; %bb.0:
+; SI-NONAN-NEXT:    s_load_dword s6, s[4:5], 0xb
+; SI-NONAN-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x9
+; SI-NONAN-NEXT:    s_mov_b32 s3, 0xf000
+; SI-NONAN-NEXT:    s_mov_b32 s2, -1
+; SI-NONAN-NEXT:    s_waitcnt lgkmcnt(0)
+; SI-NONAN-NEXT:    v_sub_f32_e64 v0, 0.5, |s6|
+; SI-NONAN-NEXT:    v_cvt_flr_i32_f32_e32 v0, v0
+; SI-NONAN-NEXT:    buffer_store_dword v0, off, s[0:3], 0
+; SI-NONAN-NEXT:    s_endpgm
+;
+; SI-NONAN-GISEL-LABEL: cvt_rpi_i32_f32_fabs_fneg:
+; SI-NONAN-GISEL:       ; %bb.0:
+; SI-NONAN-GISEL-NEXT:    s_load_dword s3, s[4:5], 0xb
+; SI-NONAN-GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x9
+; SI-NONAN-GISEL-NEXT:    s_mov_b32 s2, -1
+; SI-NONAN-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; SI-NONAN-GISEL-NEXT:    v_mul_f32_e64 v0, 1.0, -|s3|
+; SI-NONAN-GISEL-NEXT:    v_add_f32_e32 v0, 0.5, v0
+; SI-NONAN-GISEL-NEXT:    v_floor_f32_e32 v0, v0
+; SI-NONAN-GISEL-NEXT:    v_cvt_i32_f32_e32 v0, v0
+; SI-NONAN-GISEL-NEXT:    s_mov_b32 s3, 0xf000
+; SI-NONAN-GISEL-NEXT:    buffer_store_dword v0, off, s[0:3], 0
+; SI-NONAN-GISEL-NEXT:    s_endpgm
+;
+; SI-SDAG-LABEL: cvt_rpi_i32_f32_fabs_fneg:
+; SI-SDAG:       ; %bb.0:
+; SI-SDAG-NEXT:    s_load_dword s2, s[4:5], 0x2c
+; SI-SDAG-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
+; SI-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
+; SI-SDAG-NEXT:    v_sub_f32_e64 v0, 0.5, |s2|
+; SI-SDAG-NEXT:    v_floor_f32_e32 v0, v0
+; SI-SDAG-NEXT:    v_cvt_i32_f32_e32 v2, v0
+; SI-SDAG-NEXT:    v_mov_b32_e32 v0, s0
+; SI-SDAG-NEXT:    v_mov_b32_e32 v1, s1
+; SI-SDAG-NEXT:    flat_store_dword v[0:1], v2
+; SI-SDAG-NEXT:    s_endpgm
+;
+; GFX11-SDAG-LABEL: cvt_rpi_i32_f32_fabs_fneg:
+; GFX11-SDAG:       ; %bb.0:
+; GFX11-SDAG-NEXT:    s_clause 0x1
+; GFX11-SDAG-NEXT:    s_load_b32 s2, s[4:5], 0x2c
+; GFX11-SDAG-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
+; GFX11-SDAG-NEXT:    v_mov_b32_e32 v1, 0
+; GFX11-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX11-SDAG-NEXT:    v_sub_f32_e64 v0, 0.5, |s2|
+; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-SDAG-NEXT:    v_cvt_floor_i32_f32_e32 v0, v0
+; GFX11-SDAG-NEXT:    global_store_b32 v1, v0, s[0:1]
+; GFX11-SDAG-NEXT:    s_endpgm
+;
+; GFX11-GISEL-LABEL: cvt_rpi_i32_f32_fabs_fneg:
+; GFX11-GISEL:       ; %bb.0:
+; GFX11-GISEL-NEXT:    s_load_b32 s0, s[4:5], 0x2c
+; GFX11-GISEL-NEXT:    v_mov_b32_e32 v1, 0
+; GFX11-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX11-GISEL-NEXT:    v_max_f32_e64 v0, -|s0|, -|s0|
+; GFX11-GISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
+; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-GISEL-NEXT:    v_add_f32_e32 v0, 0.5, v0
+; GFX11-GISEL-NEXT:    v_floor_f32_e32 v0, v0
+; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GISEL-NEXT:    v_cvt_i32_f32_e32 v0, v0
+; GFX11-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX11-GISEL-NEXT:    global_store_b32 v1, v0, s[0:1]
+; GFX11-GISEL-NEXT:    s_endpgm
   %x.fabs = call float @llvm.fabs.f32(float %x) #1
   %x.fabs.fneg = fsub float -0.000000e+00, %x.fabs
   %fadd = fadd float %x.fabs.fneg, 0.5
@@ -65,13 +355,86 @@ define amdgpu_kernel void @cvt_rpi_i32_f32_fabs_fneg(ptr addrspace(1) %out, floa
   ret void
 }
 
-; FUNC-LABEL: {{^}}no_cvt_rpi_i32_f32_0:
-; SI-NOT: v_cvt_rpi_i32_f32
-; SI: v_add_f32
-; SI: v_floor_f32
-; SI: v_cvt_u32_f32
-; SI: s_endpgm
 define amdgpu_kernel void @no_cvt_rpi_i32_f32_0(ptr addrspace(1) %out, float %x) #0 {
+; SI-SAFE-LABEL: no_cvt_rpi_i32_f32_0:
+; SI-SAFE:       ; %bb.0:
+; SI-SAFE-NEXT:    s_load_dword s6, s[4:5], 0xb
+; SI-SAFE-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x9
+; SI-SAFE-NEXT:    s_mov_b32 s3, 0xf000
+; SI-SAFE-NEXT:    s_mov_b32 s2, -1
+; SI-SAFE-NEXT:    s_waitcnt lgkmcnt(0)
+; SI-SAFE-NEXT:    v_add_f32_e64 v0, s6, 0.5
+; SI-SAFE-NEXT:    v_floor_f32_e32 v0, v0
+; SI-SAFE-NEXT:    v_cvt_u32_f32_e32 v0, v0
+; SI-SAFE-NEXT:    buffer_store_dword v0, off, s[0:3], 0
+; SI-SAFE-NEXT:    s_endpgm
+;
+; SI-NONAN-LABEL: no_cvt_rpi_i32_f32_0:
+; SI-NONAN:       ; %bb.0:
+; SI-NONAN-NEXT:    s_load_dword s6, s[4:5], 0xb
+; SI-NONAN-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x9
+; SI-NONAN-NEXT:    s_mov_b32 s3, 0xf000
+; SI-NONAN-NEXT:    s_mov_b32 s2, -1
+; SI-NONAN-NEXT:    s_waitcnt lgkmcnt(0)
+; SI-NONAN-NEXT:    v_add_f32_e64 v0, s6, 0.5
+; SI-NONAN-NEXT:    v_floor_f32_e32 v0, v0
+; SI-NONAN-NEXT:    v_cvt_u32_f32_e32 v0, v0
+; SI-NONAN-NEXT:    buffer_store_dword v0, off, s[0:3], 0
+; SI-NONAN-NEXT:    s_endpgm
+;
+; SI-NONAN-GISEL-LABEL: no_cvt_rpi_i32_f32_0:
+; SI-NONAN-GISEL:       ; %bb.0:
+; SI-NONAN-GISEL-NEXT:    s_load_dword s3, s[4:5], 0xb
+; SI-NONAN-GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x9
+; SI-NONAN-GISEL-NEXT:    s_mov_b32 s2, -1
+; SI-NONAN-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; SI-NONAN-GISEL-NEXT:    v_add_f32_e64 v0, s3, 0.5
+; SI-NONAN-GISEL-NEXT:    v_floor_f32_e32 v0, v0
+; SI-NONAN-GISEL-NEXT:    v_cvt_u32_f32_e32 v0, v0
+; SI-NONAN-GISEL-NEXT:    s_mov_b32 s3, 0xf000
+; SI-NONAN-GISEL-NEXT:    buffer_store_dword v0, off, s[0:3], 0
+; SI-NONAN-GISEL-NEXT:    s_endpgm
+;
+; SI-SDAG-LABEL: no_cvt_rpi_i32_f32_0:
+; SI-SDAG:       ; %bb.0:
+; SI-SDAG-NEXT:    s_load_dword s2, s[4:5], 0x2c
+; SI-SDAG-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
+; SI-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
+; SI-SDAG-NEXT:    v_add_f32_e64 v0, s2, 0.5
+; SI-SDAG-NEXT:    v_floor_f32_e32 v0, v0
+; SI-SDAG-NEXT:    v_cvt_u32_f32_e32 v2, v0
+; SI-SDAG-NEXT:    v_mov_b32_e32 v0, s0
+; SI-SDAG-NEXT:    v_mov_b32_e32 v1, s1
+; SI-SDAG-NEXT:    flat_store_dword v[0:1], v2
+; SI-SDAG-NEXT:    s_endpgm
+;
+; GFX11-SDAG-LABEL: no_cvt_rpi_i32_f32_0:
+; GFX11-SDAG:       ; %bb.0:
+; GFX11-SDAG-NEXT:    s_clause 0x1
+; GFX11-SDAG-NEXT:    s_load_b32 s2, s[4:5], 0x2c
+; GFX11-SDAG-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
+; GFX11-SDAG-NEXT:    v_mov_b32_e32 v1, 0
+; GFX11-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX11-SDAG-NEXT:    v_add_f32_e64 v0, s2, 0.5
+; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-SDAG-NEXT:    v_floor_f32_e32 v0, v0
+; GFX11-SDAG-NEXT:    v_cvt_u32_f32_e32 v0, v0
+; GFX11-SDAG-NEXT:    global_store_b32 v1, v0, s[0:1]
+; GFX11-SDAG-NEXT:    s_endpgm
+;
+; GFX11-GISEL-LABEL: no_cvt_rpi_i32_f32_0:
+; GFX11-GISEL:       ; %bb.0:
+; GFX11-GISEL-NEXT:    s_clause 0x1
+; GFX11-GISEL-NEXT:    s_load_b32 s2, s[4:5], 0x2c
+; GFX11-GISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
+; GFX11-GISEL-NEXT:    v_mov_b32_e32 v1, 0
+; GFX11-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX11-GISEL-NEXT:    v_add_f32_e64 v0, s2, 0.5
+; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-GISEL-NEXT:    v_floor_f32_e32 v0, v0
+; GFX11-GISEL-NEXT:    v_cvt_u32_f32_e32 v0, v0
+; GFX11-GISEL-NEXT:    global_store_b32 v1, v0, s[0:1]
+; GFX11-GISEL-NEXT:    s_endpgm
   %fadd = fadd float %x, 0.5
   %floor = call float @llvm.floor.f32(float %fadd) #1
   %cvt = fptoui float %floor to i32
@@ -79,5 +442,87 @@ define amdgpu_kernel void @no_cvt_rpi_i32_f32_0(ptr addrspace(1) %out, float %x)
   ret void
 }
 
+define amdgpu_kernel void @cvt_rpi_i32_f32_nnan(ptr addrspace(1) %out, float %x) #0 {
+; SI-SAFE-LABEL: cvt_rpi_i32_f32_nnan:
+; SI-SAFE:       ; %bb.0:
+; SI-SAFE-NEXT:    s_load_dword s6, s[4:5], 0xb
+; SI-SAFE-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x9
+; SI-SAFE-NEXT:    s_mov_b32 s3, 0xf000
+; SI-SAFE-NEXT:    s_mov_b32 s2, -1
+; SI-SAFE-NEXT:    s_waitcnt lgkmcnt(0)
+; SI-SAFE-NEXT:    v_add_f32_e64 v0, s6, 0.5
+; SI-SAFE-NEXT:    v_floor_f32_e32 v0, v0
+; SI-SAFE-NEXT:    v_cvt_i32_f32_e32 v0, v0
+; SI-SAFE-NEXT:    buffer_store_dword v0, off, s[0:3], 0
+; SI-SAFE-NEXT:    s_endpgm
+;
+; SI-NONAN-LABEL: cvt_rpi_i32_f32_nnan:
+; SI-NONAN:       ; %bb.0:
+; SI-NONAN-NEXT:    s_load_dword s6, s[4:5], 0xb
+; SI-NONAN-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x9
+; SI-NONAN-NEXT:    s_mov_b32 s3, 0xf000
+; SI-NONAN-NEXT:    s_mov_b32 s2, -1
+; SI-NONAN-NEXT:    s_waitcnt lgkmcnt(0)
+; SI-NONAN-NEXT:    v_cvt_rpi_i32_f32_e32 v0, s6
+; SI-NONAN-NEXT:    buffer_store_dword v0, off, s[0:3], 0
+; SI-NONAN-NEXT:    s_endpgm
+;
+; SI-NONAN-GISEL-LABEL: cvt_rpi_i32_f32_nnan:
+; SI-NONAN-GISEL:       ; %bb.0:
+; SI-NONAN-GISEL-NEXT:    s_load_dword s3, s[4:5], 0xb
+; SI-NONAN-GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x9
+; SI-NONAN-GISEL-NEXT:    s_mov_b32 s2, -1
+; SI-NONAN-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; SI-NONAN-GISEL-NEXT:    v_add_f32_e64 v0, s3, 0.5
+; SI-NONAN-GISEL-NEXT:    v_floor_f32_e32 v0, v0
+; SI-NONAN-GISEL-NEXT:    v_cvt_i32_f32_e32 v0, v0
+; SI-NONAN-GISEL-NEXT:    s_mov_b32 s3, 0xf000
+; SI-NONAN-GISEL-NEXT:    buffer_store_dword v0, off, s[0:3], 0
+; SI-NONAN-GISEL-NEXT:    s_endpgm
+;
+; SI-SDAG-LABEL: cvt_rpi_i32_f32_nnan:
+; SI-SDAG:       ; %bb.0:
+; SI-SDAG-NEXT:    s_load_dword s2, s[4:5], 0x2c
+; SI-SDAG-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
+; SI-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
+; SI-SDAG-NEXT:    v_add_f32_e64 v0, s2, 0.5
+; SI-SDAG-NEXT:    v_floor_f32_e32 v0, v0
+; SI-SDAG-NEXT:    v_cvt_i32_f32_e32 v2, v0
+; SI-SDAG-NEXT:    v_mov_b32_e32 v0, s0
+; SI-SDAG-NEXT:    v_mov_b32_e32 v1, s1
+; SI-SDAG-NEXT:    flat_store_dword v[0:1], v2
+; SI-SDAG-NEXT:    s_endpgm
+;
+; GFX11-SDAG-LABEL: cvt_rpi_i32_f32_nnan:
+; GFX11-SDAG:       ; %bb.0:
+; GFX11-SDAG-NEXT:    s_clause 0x1
+; GFX11-SDAG-NEXT:    s_load_b32 s2, s[4:5], 0x2c
+; GFX11-SDAG-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
+; GFX11-SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; GFX11-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX11-SDAG-NEXT:    v_cvt_nearest_i32_f32_e32 v1, s2
+; GFX11-SDAG-NEXT:    global_store_b32 v0, v1, s[0:1]
+; GFX11-SDAG-NEXT:    s_endpgm
+;
+; GFX11-GISEL-LABEL: cvt_rpi_i32_f32_nnan:
+; GFX11-GISEL:       ; %bb.0:
+; GFX11-GISEL-NEXT:    s_clause 0x1
+; GFX11-GISEL-NEXT:    s_load_b32 s2, s[4:5], 0x2c
+; GFX11-GISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
+; GFX11-GISEL-NEXT:    v_mov_b32_e32 v1, 0
+; GFX11-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX11-GISEL-NEXT:    v_add_f32_e64 v0, s2, 0.5
+; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-GISEL-NEXT:    v_floor_f32_e32 v0, v0
+; GFX11-GISEL-NEXT:    v_cvt_i32_f32_e32 v0, v0
+; GFX11-GISEL-NEXT:    global_store_b32 v1, v0, s[0:1]
+; GFX11-GISEL-NEXT:    s_endpgm
+  %fadd = fadd float %x, 0.5
+  %floor = call nnan float @llvm.floor.f32(float %fadd) #1
+  %cvt = fptosi float %floor to i32
+  store i32 %cvt, ptr addrspace(1) %out
+  ret void
+}
+
 attributes #0 = { nounwind }
 attributes #1 = { nounwind readnone }

>From 70a859ebbf60f82d232c768698cafa69c9556d00 Mon Sep 17 00:00:00 2001
From: Mirko Brkusanin <Mirko.Brkusanin at amd.com>
Date: Mon, 26 Jan 2026 14:37:21 +0100
Subject: [PATCH 2/2] [AMDGPU] Update patterns for v_cvt_flr and v_cvt_rpi

Support GlobalISel and support cases with nnan flag on instruction.
Instruction are renamed to v_cvt_floor and v_cvt_nearest on gfx11+
so add gfx11 tests as well.
---
 llvm/lib/Target/AMDGPU/AMDGPUInstructions.td | 30 +++++----
 llvm/test/CodeGen/AMDGPU/cvt_flr_i32_f32.ll  | 51 +++++----------
 llvm/test/CodeGen/AMDGPU/cvt_rpi_i32_f32.ll  | 69 ++++++--------------
 3 files changed, 55 insertions(+), 95 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td b/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td
index 2a99dacba52a4..774339d442b96 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td
@@ -758,8 +758,11 @@ def FP_ONE : PatLeaf <
 
 def FP_HALF : PatLeaf <
   (fpimm),
-  [{return N->isExactlyValue(0.5);}]
->;
+  [{return N->isExactlyValue(0.5);}]> {
+  let GISelPredicateCode = [{
+    return MI.getOperand(1).getFPImm()->isExactlyValue(0.5);
+  }];
+}
 
 /* Generic helper patterns for intrinsics */
 /* -------------------------------------- */
@@ -806,17 +809,20 @@ class DwordAddrPat<ValueType vt, RegisterClass rc> : AMDGPUPat <
 
 // Special conversion patterns
 
-def cvt_rpi_i32_f32 : PatFrag <
-  (ops node:$src),
-  (fp_to_sint (ffloor (fadd $src, FP_HALF))),
-  [{ (void) N; return TM.Options.NoNaNsFPMath; }]
->;
+class NNanOpPat<dag frag> : PatFrag<(ops node:$src), frag,
+  [{ return N->getOperand(0).getNode()->getFlags().hasNoNaNs() ||
+            TM.Options.NoNaNsFPMath; }]> {
+  let GISelPredicateCode = [{
+    const MachineInstr *Def = MRI.getVRegDef(MI.getOperand(1).getReg());
+    return (Def && Def->getFlag(MachineInstr::FmNoNans)) ||
+           MF.getTarget().Options.NoNaNsFPMath;
+  }];
+}
 
-def cvt_flr_i32_f32 : PatFrag <
-  (ops node:$src),
-  (fp_to_sint (ffloor $src)),
-  [{ (void)N; return TM.Options.NoNaNsFPMath; }]
->;
+let GIIgnoreCopies = 1 in
+def cvt_rpi_i32_f32 : NNanOpPat<(fp_to_sint (ffloor (fadd $src, FP_HALF)))>, GISelFlags;
+
+def cvt_flr_i32_f32 : NNanOpPat<(fp_to_sint (ffloor $src))>;
 
 let AddedComplexity = 2 in {
 class IMad24Pat<Instruction Inst, bit HasClamp = 0> : AMDGPUPat <
diff --git a/llvm/test/CodeGen/AMDGPU/cvt_flr_i32_f32.ll b/llvm/test/CodeGen/AMDGPU/cvt_flr_i32_f32.ll
index 1a21c63387286..d8a08240a8ba3 100644
--- a/llvm/test/CodeGen/AMDGPU/cvt_flr_i32_f32.ll
+++ b/llvm/test/CodeGen/AMDGPU/cvt_flr_i32_f32.ll
@@ -39,8 +39,7 @@ define amdgpu_kernel void @cvt_flr_i32_f32_0(ptr addrspace(1) %out, float %x) #0
 ; SI-NONAN-GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x9
 ; SI-NONAN-GISEL-NEXT:    s_mov_b32 s2, -1
 ; SI-NONAN-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; SI-NONAN-GISEL-NEXT:    v_floor_f32_e32 v0, s3
-; SI-NONAN-GISEL-NEXT:    v_cvt_i32_f32_e32 v0, v0
+; SI-NONAN-GISEL-NEXT:    v_cvt_flr_i32_f32_e32 v0, s3
 ; SI-NONAN-GISEL-NEXT:    s_mov_b32 s3, 0xf000
 ; SI-NONAN-GISEL-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; SI-NONAN-GISEL-NEXT:    s_endpgm
@@ -75,9 +74,7 @@ define amdgpu_kernel void @cvt_flr_i32_f32_0(ptr addrspace(1) %out, float %x) #0
 ; GFX11-GISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
 ; GFX11-GISEL-NEXT:    v_mov_b32_e32 v1, 0
 ; GFX11-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX11-GISEL-NEXT:    v_floor_f32_e32 v0, s2
-; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX11-GISEL-NEXT:    v_cvt_i32_f32_e32 v0, v0
+; GFX11-GISEL-NEXT:    v_cvt_floor_i32_f32_e32 v0, s2
 ; GFX11-GISEL-NEXT:    global_store_b32 v1, v0, s[0:1]
 ; GFX11-GISEL-NEXT:    s_endpgm
   %floor = call float @llvm.floor.f32(float %x) #1
@@ -119,8 +116,7 @@ define amdgpu_kernel void @cvt_flr_i32_f32_1(ptr addrspace(1) %out, float %x) #0
 ; SI-NONAN-GISEL-NEXT:    s_mov_b32 s2, -1
 ; SI-NONAN-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; SI-NONAN-GISEL-NEXT:    v_add_f32_e64 v0, s3, 1.0
-; SI-NONAN-GISEL-NEXT:    v_floor_f32_e32 v0, v0
-; SI-NONAN-GISEL-NEXT:    v_cvt_i32_f32_e32 v0, v0
+; SI-NONAN-GISEL-NEXT:    v_cvt_flr_i32_f32_e32 v0, v0
 ; SI-NONAN-GISEL-NEXT:    s_mov_b32 s3, 0xf000
 ; SI-NONAN-GISEL-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; SI-NONAN-GISEL-NEXT:    s_endpgm
@@ -159,9 +155,8 @@ define amdgpu_kernel void @cvt_flr_i32_f32_1(ptr addrspace(1) %out, float %x) #0
 ; GFX11-GISEL-NEXT:    v_mov_b32_e32 v1, 0
 ; GFX11-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX11-GISEL-NEXT:    v_add_f32_e64 v0, s2, 1.0
-; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-GISEL-NEXT:    v_floor_f32_e32 v0, v0
-; GFX11-GISEL-NEXT:    v_cvt_i32_f32_e32 v0, v0
+; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GISEL-NEXT:    v_cvt_floor_i32_f32_e32 v0, v0
 ; GFX11-GISEL-NEXT:    global_store_b32 v1, v0, s[0:1]
 ; GFX11-GISEL-NEXT:    s_endpgm
   %fadd = fadd float %x, 1.0
@@ -201,8 +196,7 @@ define amdgpu_kernel void @cvt_flr_i32_f32_fabs(ptr addrspace(1) %out, float %x)
 ; SI-NONAN-GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x9
 ; SI-NONAN-GISEL-NEXT:    s_mov_b32 s2, -1
 ; SI-NONAN-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; SI-NONAN-GISEL-NEXT:    v_floor_f32_e64 v0, |s3|
-; SI-NONAN-GISEL-NEXT:    v_cvt_i32_f32_e32 v0, v0
+; SI-NONAN-GISEL-NEXT:    v_cvt_flr_i32_f32_e64 v0, |s3|
 ; SI-NONAN-GISEL-NEXT:    s_mov_b32 s3, 0xf000
 ; SI-NONAN-GISEL-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; SI-NONAN-GISEL-NEXT:    s_endpgm
@@ -237,9 +231,7 @@ define amdgpu_kernel void @cvt_flr_i32_f32_fabs(ptr addrspace(1) %out, float %x)
 ; GFX11-GISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
 ; GFX11-GISEL-NEXT:    v_mov_b32_e32 v1, 0
 ; GFX11-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX11-GISEL-NEXT:    v_floor_f32_e64 v0, |s2|
-; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX11-GISEL-NEXT:    v_cvt_i32_f32_e32 v0, v0
+; GFX11-GISEL-NEXT:    v_cvt_floor_i32_f32_e64 v0, |s2|
 ; GFX11-GISEL-NEXT:    global_store_b32 v1, v0, s[0:1]
 ; GFX11-GISEL-NEXT:    s_endpgm
   %x.fabs = call float @llvm.fabs.f32(float %x) #1
@@ -280,8 +272,7 @@ define amdgpu_kernel void @cvt_flr_i32_f32_fneg(ptr addrspace(1) %out, float %x)
 ; SI-NONAN-GISEL-NEXT:    s_mov_b32 s2, -1
 ; SI-NONAN-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; SI-NONAN-GISEL-NEXT:    v_mul_f32_e64 v0, 1.0, -s3
-; SI-NONAN-GISEL-NEXT:    v_floor_f32_e32 v0, v0
-; SI-NONAN-GISEL-NEXT:    v_cvt_i32_f32_e32 v0, v0
+; SI-NONAN-GISEL-NEXT:    v_cvt_flr_i32_f32_e32 v0, v0
 ; SI-NONAN-GISEL-NEXT:    s_mov_b32 s3, 0xf000
 ; SI-NONAN-GISEL-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; SI-NONAN-GISEL-NEXT:    s_endpgm
@@ -317,9 +308,8 @@ define amdgpu_kernel void @cvt_flr_i32_f32_fneg(ptr addrspace(1) %out, float %x)
 ; GFX11-GISEL-NEXT:    v_mov_b32_e32 v1, 0
 ; GFX11-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX11-GISEL-NEXT:    v_max_f32_e64 v0, -s2, -s2
-; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-GISEL-NEXT:    v_floor_f32_e32 v0, v0
-; GFX11-GISEL-NEXT:    v_cvt_i32_f32_e32 v0, v0
+; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GISEL-NEXT:    v_cvt_floor_i32_f32_e32 v0, v0
 ; GFX11-GISEL-NEXT:    global_store_b32 v1, v0, s[0:1]
 ; GFX11-GISEL-NEXT:    s_endpgm
   %x.fneg = fsub float -0.000000e+00, %x
@@ -360,8 +350,7 @@ define amdgpu_kernel void @cvt_flr_i32_f32_fabs_fneg(ptr addrspace(1) %out, floa
 ; SI-NONAN-GISEL-NEXT:    s_mov_b32 s2, -1
 ; SI-NONAN-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; SI-NONAN-GISEL-NEXT:    v_mul_f32_e64 v0, 1.0, -|s3|
-; SI-NONAN-GISEL-NEXT:    v_floor_f32_e32 v0, v0
-; SI-NONAN-GISEL-NEXT:    v_cvt_i32_f32_e32 v0, v0
+; SI-NONAN-GISEL-NEXT:    v_cvt_flr_i32_f32_e32 v0, v0
 ; SI-NONAN-GISEL-NEXT:    s_mov_b32 s3, 0xf000
 ; SI-NONAN-GISEL-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; SI-NONAN-GISEL-NEXT:    s_endpgm
@@ -397,9 +386,8 @@ define amdgpu_kernel void @cvt_flr_i32_f32_fabs_fneg(ptr addrspace(1) %out, floa
 ; GFX11-GISEL-NEXT:    v_mov_b32_e32 v1, 0
 ; GFX11-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX11-GISEL-NEXT:    v_max_f32_e64 v0, -|s2|, -|s2|
-; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-GISEL-NEXT:    v_floor_f32_e32 v0, v0
-; GFX11-GISEL-NEXT:    v_cvt_i32_f32_e32 v0, v0
+; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GISEL-NEXT:    v_cvt_floor_i32_f32_e32 v0, v0
 ; GFX11-GISEL-NEXT:    global_store_b32 v1, v0, s[0:1]
 ; GFX11-GISEL-NEXT:    s_endpgm
   %x.fabs = call float @llvm.fabs.f32(float %x) #1
@@ -498,8 +486,7 @@ define amdgpu_kernel void @cvt_flr_i32_f32_nnan(ptr addrspace(1) %out, float %x)
 ; SI-SAFE-NEXT:    s_mov_b32 s3, 0xf000
 ; SI-SAFE-NEXT:    s_mov_b32 s2, -1
 ; SI-SAFE-NEXT:    s_waitcnt lgkmcnt(0)
-; SI-SAFE-NEXT:    v_floor_f32_e32 v0, s6
-; SI-SAFE-NEXT:    v_cvt_i32_f32_e32 v0, v0
+; SI-SAFE-NEXT:    v_cvt_flr_i32_f32_e32 v0, s6
 ; SI-SAFE-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; SI-SAFE-NEXT:    s_endpgm
 ;
@@ -520,8 +507,7 @@ define amdgpu_kernel void @cvt_flr_i32_f32_nnan(ptr addrspace(1) %out, float %x)
 ; SI-NONAN-GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x9
 ; SI-NONAN-GISEL-NEXT:    s_mov_b32 s2, -1
 ; SI-NONAN-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; SI-NONAN-GISEL-NEXT:    v_floor_f32_e32 v0, s3
-; SI-NONAN-GISEL-NEXT:    v_cvt_i32_f32_e32 v0, v0
+; SI-NONAN-GISEL-NEXT:    v_cvt_flr_i32_f32_e32 v0, s3
 ; SI-NONAN-GISEL-NEXT:    s_mov_b32 s3, 0xf000
 ; SI-NONAN-GISEL-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; SI-NONAN-GISEL-NEXT:    s_endpgm
@@ -531,8 +517,7 @@ define amdgpu_kernel void @cvt_flr_i32_f32_nnan(ptr addrspace(1) %out, float %x)
 ; SI-SDAG-NEXT:    s_load_dword s2, s[4:5], 0x2c
 ; SI-SDAG-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
 ; SI-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
-; SI-SDAG-NEXT:    v_floor_f32_e32 v0, s2
-; SI-SDAG-NEXT:    v_cvt_i32_f32_e32 v2, v0
+; SI-SDAG-NEXT:    v_cvt_flr_i32_f32_e32 v2, s2
 ; SI-SDAG-NEXT:    v_mov_b32_e32 v0, s0
 ; SI-SDAG-NEXT:    v_mov_b32_e32 v1, s1
 ; SI-SDAG-NEXT:    flat_store_dword v[0:1], v2
@@ -556,9 +541,7 @@ define amdgpu_kernel void @cvt_flr_i32_f32_nnan(ptr addrspace(1) %out, float %x)
 ; GFX11-GISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
 ; GFX11-GISEL-NEXT:    v_mov_b32_e32 v1, 0
 ; GFX11-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX11-GISEL-NEXT:    v_floor_f32_e32 v0, s2
-; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX11-GISEL-NEXT:    v_cvt_i32_f32_e32 v0, v0
+; GFX11-GISEL-NEXT:    v_cvt_floor_i32_f32_e32 v0, s2
 ; GFX11-GISEL-NEXT:    global_store_b32 v1, v0, s[0:1]
 ; GFX11-GISEL-NEXT:    s_endpgm
   %floor = call nnan float @llvm.floor.f32(float %x) #1
diff --git a/llvm/test/CodeGen/AMDGPU/cvt_rpi_i32_f32.ll b/llvm/test/CodeGen/AMDGPU/cvt_rpi_i32_f32.ll
index 31d8b6d9da9c2..42ddb4dc373dd 100644
--- a/llvm/test/CodeGen/AMDGPU/cvt_rpi_i32_f32.ll
+++ b/llvm/test/CodeGen/AMDGPU/cvt_rpi_i32_f32.ll
@@ -40,9 +40,7 @@ define amdgpu_kernel void @cvt_rpi_i32_f32(ptr addrspace(1) %out, float %x) #0 {
 ; SI-NONAN-GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x9
 ; SI-NONAN-GISEL-NEXT:    s_mov_b32 s2, -1
 ; SI-NONAN-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; SI-NONAN-GISEL-NEXT:    v_add_f32_e64 v0, s3, 0.5
-; SI-NONAN-GISEL-NEXT:    v_floor_f32_e32 v0, v0
-; SI-NONAN-GISEL-NEXT:    v_cvt_i32_f32_e32 v0, v0
+; SI-NONAN-GISEL-NEXT:    v_cvt_rpi_i32_f32_e32 v0, s3
 ; SI-NONAN-GISEL-NEXT:    s_mov_b32 s3, 0xf000
 ; SI-NONAN-GISEL-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; SI-NONAN-GISEL-NEXT:    s_endpgm
@@ -78,10 +76,7 @@ define amdgpu_kernel void @cvt_rpi_i32_f32(ptr addrspace(1) %out, float %x) #0 {
 ; GFX11-GISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
 ; GFX11-GISEL-NEXT:    v_mov_b32_e32 v1, 0
 ; GFX11-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX11-GISEL-NEXT:    v_add_f32_e64 v0, s2, 0.5
-; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-GISEL-NEXT:    v_floor_f32_e32 v0, v0
-; GFX11-GISEL-NEXT:    v_cvt_i32_f32_e32 v0, v0
+; GFX11-GISEL-NEXT:    v_cvt_nearest_i32_f32_e32 v0, s2
 ; GFX11-GISEL-NEXT:    global_store_b32 v1, v0, s[0:1]
 ; GFX11-GISEL-NEXT:    s_endpgm
   %fadd = fadd float %x, 0.5
@@ -122,9 +117,7 @@ define amdgpu_kernel void @cvt_rpi_i32_f32_fabs(ptr addrspace(1) %out, float %x)
 ; SI-NONAN-GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x9
 ; SI-NONAN-GISEL-NEXT:    s_mov_b32 s2, -1
 ; SI-NONAN-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; SI-NONAN-GISEL-NEXT:    v_add_f32_e64 v0, |s3|, 0.5
-; SI-NONAN-GISEL-NEXT:    v_floor_f32_e32 v0, v0
-; SI-NONAN-GISEL-NEXT:    v_cvt_i32_f32_e32 v0, v0
+; SI-NONAN-GISEL-NEXT:    v_cvt_rpi_i32_f32_e64 v0, |s3|
 ; SI-NONAN-GISEL-NEXT:    s_mov_b32 s3, 0xf000
 ; SI-NONAN-GISEL-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; SI-NONAN-GISEL-NEXT:    s_endpgm
@@ -160,10 +153,7 @@ define amdgpu_kernel void @cvt_rpi_i32_f32_fabs(ptr addrspace(1) %out, float %x)
 ; GFX11-GISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
 ; GFX11-GISEL-NEXT:    v_mov_b32_e32 v1, 0
 ; GFX11-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX11-GISEL-NEXT:    v_add_f32_e64 v0, |s2|, 0.5
-; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-GISEL-NEXT:    v_floor_f32_e32 v0, v0
-; GFX11-GISEL-NEXT:    v_cvt_i32_f32_e32 v0, v0
+; GFX11-GISEL-NEXT:    v_cvt_nearest_i32_f32_e64 v0, |s2|
 ; GFX11-GISEL-NEXT:    global_store_b32 v1, v0, s[0:1]
 ; GFX11-GISEL-NEXT:    s_endpgm
   %x.fabs = call float @llvm.fabs.f32(float %x) #1
@@ -208,9 +198,7 @@ define amdgpu_kernel void @cvt_rpi_i32_f32_fneg(ptr addrspace(1) %out, float %x)
 ; SI-NONAN-GISEL-NEXT:    s_mov_b32 s2, -1
 ; SI-NONAN-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; SI-NONAN-GISEL-NEXT:    v_mul_f32_e64 v0, 1.0, -s3
-; SI-NONAN-GISEL-NEXT:    v_add_f32_e32 v0, 0.5, v0
-; SI-NONAN-GISEL-NEXT:    v_floor_f32_e32 v0, v0
-; SI-NONAN-GISEL-NEXT:    v_cvt_i32_f32_e32 v0, v0
+; SI-NONAN-GISEL-NEXT:    v_cvt_rpi_i32_f32_e32 v0, v0
 ; SI-NONAN-GISEL-NEXT:    s_mov_b32 s3, 0xf000
 ; SI-NONAN-GISEL-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; SI-NONAN-GISEL-NEXT:    s_endpgm
@@ -243,17 +231,14 @@ define amdgpu_kernel void @cvt_rpi_i32_f32_fneg(ptr addrspace(1) %out, float %x)
 ;
 ; GFX11-GISEL-LABEL: cvt_rpi_i32_f32_fneg:
 ; GFX11-GISEL:       ; %bb.0:
-; GFX11-GISEL-NEXT:    s_load_b32 s0, s[4:5], 0x2c
+; GFX11-GISEL-NEXT:    s_clause 0x1
+; GFX11-GISEL-NEXT:    s_load_b32 s2, s[4:5], 0x2c
+; GFX11-GISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
 ; GFX11-GISEL-NEXT:    v_mov_b32_e32 v1, 0
 ; GFX11-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX11-GISEL-NEXT:    v_max_f32_e64 v0, -s0, -s0
-; GFX11-GISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
-; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-GISEL-NEXT:    v_add_f32_e32 v0, 0.5, v0
-; GFX11-GISEL-NEXT:    v_floor_f32_e32 v0, v0
+; GFX11-GISEL-NEXT:    v_max_f32_e64 v0, -s2, -s2
 ; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX11-GISEL-NEXT:    v_cvt_i32_f32_e32 v0, v0
-; GFX11-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX11-GISEL-NEXT:    v_cvt_nearest_i32_f32_e32 v0, v0
 ; GFX11-GISEL-NEXT:    global_store_b32 v1, v0, s[0:1]
 ; GFX11-GISEL-NEXT:    s_endpgm
   %x.fneg = fsub float -0.000000e+00, %x
@@ -298,9 +283,7 @@ define amdgpu_kernel void @cvt_rpi_i32_f32_fabs_fneg(ptr addrspace(1) %out, floa
 ; SI-NONAN-GISEL-NEXT:    s_mov_b32 s2, -1
 ; SI-NONAN-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; SI-NONAN-GISEL-NEXT:    v_mul_f32_e64 v0, 1.0, -|s3|
-; SI-NONAN-GISEL-NEXT:    v_add_f32_e32 v0, 0.5, v0
-; SI-NONAN-GISEL-NEXT:    v_floor_f32_e32 v0, v0
-; SI-NONAN-GISEL-NEXT:    v_cvt_i32_f32_e32 v0, v0
+; SI-NONAN-GISEL-NEXT:    v_cvt_rpi_i32_f32_e32 v0, v0
 ; SI-NONAN-GISEL-NEXT:    s_mov_b32 s3, 0xf000
 ; SI-NONAN-GISEL-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; SI-NONAN-GISEL-NEXT:    s_endpgm
@@ -333,17 +316,14 @@ define amdgpu_kernel void @cvt_rpi_i32_f32_fabs_fneg(ptr addrspace(1) %out, floa
 ;
 ; GFX11-GISEL-LABEL: cvt_rpi_i32_f32_fabs_fneg:
 ; GFX11-GISEL:       ; %bb.0:
-; GFX11-GISEL-NEXT:    s_load_b32 s0, s[4:5], 0x2c
+; GFX11-GISEL-NEXT:    s_clause 0x1
+; GFX11-GISEL-NEXT:    s_load_b32 s2, s[4:5], 0x2c
+; GFX11-GISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
 ; GFX11-GISEL-NEXT:    v_mov_b32_e32 v1, 0
 ; GFX11-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX11-GISEL-NEXT:    v_max_f32_e64 v0, -|s0|, -|s0|
-; GFX11-GISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
-; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-GISEL-NEXT:    v_add_f32_e32 v0, 0.5, v0
-; GFX11-GISEL-NEXT:    v_floor_f32_e32 v0, v0
+; GFX11-GISEL-NEXT:    v_max_f32_e64 v0, -|s2|, -|s2|
 ; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX11-GISEL-NEXT:    v_cvt_i32_f32_e32 v0, v0
-; GFX11-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX11-GISEL-NEXT:    v_cvt_nearest_i32_f32_e32 v0, v0
 ; GFX11-GISEL-NEXT:    global_store_b32 v1, v0, s[0:1]
 ; GFX11-GISEL-NEXT:    s_endpgm
   %x.fabs = call float @llvm.fabs.f32(float %x) #1
@@ -450,9 +430,7 @@ define amdgpu_kernel void @cvt_rpi_i32_f32_nnan(ptr addrspace(1) %out, float %x)
 ; SI-SAFE-NEXT:    s_mov_b32 s3, 0xf000
 ; SI-SAFE-NEXT:    s_mov_b32 s2, -1
 ; SI-SAFE-NEXT:    s_waitcnt lgkmcnt(0)
-; SI-SAFE-NEXT:    v_add_f32_e64 v0, s6, 0.5
-; SI-SAFE-NEXT:    v_floor_f32_e32 v0, v0
-; SI-SAFE-NEXT:    v_cvt_i32_f32_e32 v0, v0
+; SI-SAFE-NEXT:    v_cvt_rpi_i32_f32_e32 v0, s6
 ; SI-SAFE-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; SI-SAFE-NEXT:    s_endpgm
 ;
@@ -473,9 +451,7 @@ define amdgpu_kernel void @cvt_rpi_i32_f32_nnan(ptr addrspace(1) %out, float %x)
 ; SI-NONAN-GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x9
 ; SI-NONAN-GISEL-NEXT:    s_mov_b32 s2, -1
 ; SI-NONAN-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; SI-NONAN-GISEL-NEXT:    v_add_f32_e64 v0, s3, 0.5
-; SI-NONAN-GISEL-NEXT:    v_floor_f32_e32 v0, v0
-; SI-NONAN-GISEL-NEXT:    v_cvt_i32_f32_e32 v0, v0
+; SI-NONAN-GISEL-NEXT:    v_cvt_rpi_i32_f32_e32 v0, s3
 ; SI-NONAN-GISEL-NEXT:    s_mov_b32 s3, 0xf000
 ; SI-NONAN-GISEL-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; SI-NONAN-GISEL-NEXT:    s_endpgm
@@ -485,9 +461,7 @@ define amdgpu_kernel void @cvt_rpi_i32_f32_nnan(ptr addrspace(1) %out, float %x)
 ; SI-SDAG-NEXT:    s_load_dword s2, s[4:5], 0x2c
 ; SI-SDAG-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
 ; SI-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
-; SI-SDAG-NEXT:    v_add_f32_e64 v0, s2, 0.5
-; SI-SDAG-NEXT:    v_floor_f32_e32 v0, v0
-; SI-SDAG-NEXT:    v_cvt_i32_f32_e32 v2, v0
+; SI-SDAG-NEXT:    v_cvt_rpi_i32_f32_e32 v2, s2
 ; SI-SDAG-NEXT:    v_mov_b32_e32 v0, s0
 ; SI-SDAG-NEXT:    v_mov_b32_e32 v1, s1
 ; SI-SDAG-NEXT:    flat_store_dword v[0:1], v2
@@ -511,10 +485,7 @@ define amdgpu_kernel void @cvt_rpi_i32_f32_nnan(ptr addrspace(1) %out, float %x)
 ; GFX11-GISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
 ; GFX11-GISEL-NEXT:    v_mov_b32_e32 v1, 0
 ; GFX11-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX11-GISEL-NEXT:    v_add_f32_e64 v0, s2, 0.5
-; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-GISEL-NEXT:    v_floor_f32_e32 v0, v0
-; GFX11-GISEL-NEXT:    v_cvt_i32_f32_e32 v0, v0
+; GFX11-GISEL-NEXT:    v_cvt_nearest_i32_f32_e32 v0, s2
 ; GFX11-GISEL-NEXT:    global_store_b32 v1, v0, s[0:1]
 ; GFX11-GISEL-NEXT:    s_endpgm
   %fadd = fadd float %x, 0.5



More information about the llvm-commits mailing list