[llvm] 17bd806 - AMDGPU: Implement llvm.get.fpmode
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Sun Sep 10 00:19:27 PDT 2023
Author: Matt Arsenault
Date: 2023-09-10T10:19:19+03:00
New Revision: 17bd80601efefe52fdbbceb8a75153188bb42481
URL: https://github.com/llvm/llvm-project/commit/17bd80601efefe52fdbbceb8a75153188bb42481
DIFF: https://github.com/llvm/llvm-project/commit/17bd80601efefe52fdbbceb8a75153188bb42481.diff
LOG: AMDGPU: Implement llvm.get.fpmode
Currently s_getreg_b32 is missing the possible mode use. Really we
need separate pseudos for mode-only accesses, but leave this as a
pre-existing issue.
https://reviews.llvm.org/D152710
Added:
llvm/test/CodeGen/AMDGPU/llvm.get.fpmode.ll
Modified:
llvm/docs/AMDGPUUsage.rst
llvm/docs/LangRef.rst
llvm/lib/Target/AMDGPU/SIISelLowering.cpp
llvm/lib/Target/AMDGPU/SOPInstructions.td
Removed:
################################################################################
diff --git a/llvm/docs/AMDGPUUsage.rst b/llvm/docs/AMDGPUUsage.rst
index 31cafb4d5e3ae88..f733c514ffbee47 100644
--- a/llvm/docs/AMDGPUUsage.rst
+++ b/llvm/docs/AMDGPUUsage.rst
@@ -999,6 +999,13 @@ The AMDGPU backend implements the following LLVM IR intrinsics.
:ref:`llvm.stacksave.p5 <int_stacksave>` Implemented, must use the alloca address space.
:ref:`llvm.stackrestore.p5 <int_stackrestore>` Implemented, must use the alloca address space.
+ :ref:`llvm.get.fpmode.i32 <int_get_fpmode>` The natural floating-point mode type is i32. This
+ implemented by extracting relevant bits out of the MODE
+ register with s_getreg_b32. The first 10 bits are the
+ core floating-point mode. Bits 12:18 are the exception
+ mask. On gfx9+, bit 23 is FP16_OVFL. Bitfields not
+ relevant to floating-point instructions are 0s.
+
:ref:`llvm.get.rounding<int_get_rounding>` AMDGPU supports two separately controllable rounding
modes depending on the floating-point type. One
controls float, and the other controls both double and
diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst
index 7a2878b8823fb0f..dc5c84de420d76c 100644
--- a/llvm/docs/LangRef.rst
+++ b/llvm/docs/LangRef.rst
@@ -25660,6 +25660,7 @@ The '``llvm.reset.fpenv``' intrinsic sets the current floating-point environment
to default state. It is similar to the call 'fesetenv(FE_DFL_ENV)', except it
does not return any value.
+.. _int_get_fpmode:
'``llvm.get.fpmode``' Intrinsic
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index b5af88af1d558f5..1020878955f7fce 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -757,6 +757,10 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
setOperationAction(ISD::STACKSAVE, MVT::Other, Custom);
setOperationAction(ISD::GET_ROUNDING, MVT::i32, Custom);
+ // TODO: Could move this to custom lowering, could benefit from combines on
+ // extract of relevant bits.
+ setOperationAction(ISD::GET_FPMODE, MVT::i32, Legal);
+
setTargetDAGCombine({ISD::ADD,
ISD::UADDO_CARRY,
ISD::SUB,
diff --git a/llvm/lib/Target/AMDGPU/SOPInstructions.td b/llvm/lib/Target/AMDGPU/SOPInstructions.td
index 175045a8a893e92..229aa9c75d16d2d 100644
--- a/llvm/lib/Target/AMDGPU/SOPInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SOPInstructions.td
@@ -870,6 +870,8 @@ def S_CBRANCH_I_FORK : SOPK_Pseudo <
// This is hasSideEffects to allow its use in readcyclecounter selection.
// FIXME: Need to truncate immediate to 16-bits.
+// FIXME: Missing mode register use. Should have separate pseudos for
+// known may read MODE and only read MODE.
def S_GETREG_B32 : SOPK_Pseudo <
"s_getreg_b32",
(outs SReg_32:$sdst), (ins hwreg:$simm16),
@@ -1424,6 +1426,66 @@ def : GCNPat <
(S_WAIT_EVENT (i16 0))
>;
+// The first 10 bits of the mode register are the core FP mode on all
+// subtargets.
+//
+// The high bits include additional fields, intermixed with some
+// non-floating point environment information. We extract the full
+// register and clear non-relevant bits.
+//
+// EXCP_EN covers floating point exceptions, but also some other
+// non-FP exceptions.
+//
+// Bits 12-18 cover the relevant exception mask on all subtargets.
+//
+// FIXME: Bit 18 is int_div0, should this be in the FP environment? I
+// think the only source is v_rcp_iflag_i32.
+//
+// On GFX9+:
+// Bit 23 is the additional FP16_OVFL mode.
+//
+// Bits 19, 20, and 21 cover non-FP exceptions and
diff er between
+// gfx9/10/11, so we ignore them here.
+
+// TODO: Would it be cheaper to emit multiple s_getreg_b32 calls for
+// the ranges and combine the results?
+
+defvar fp_round_mask = !add(!shl(1, 4), -1);
+defvar fp_denorm_mask = !shl(!add(!shl(1, 4), -1), 4);
+defvar dx10_clamp_mask = !shl(1, 8);
+defvar ieee_mode_mask = !shl(1, 9);
+
+// Covers fp_round, fp_denorm, dx10_clamp, and IEEE bit.
+defvar fpmode_mask =
+ !or(fp_round_mask, fp_denorm_mask, dx10_clamp_mask, ieee_mode_mask);
+
+defvar fp_excp_en_mask = !shl(!add(!shl(1, 7), -1), 12);
+defvar fp16_ovfl = !shl(1, 23);
+defvar fpmode_mask_gfx6plus = !or(fpmode_mask, fp_excp_en_mask);
+defvar fpmode_mask_gfx9plus = !or(fpmode_mask_gfx6plus, fp16_ovfl);
+
+class GetFPModePat<int fpmode_mask> : GCNPat<
+ (i32 get_fpmode),
+ (S_AND_B32 (i32 fpmode_mask),
+ (S_GETREG_B32 getHwRegImm<
+ HWREG.MODE, 0,
+ !add(!logtwo(fpmode_mask), 1)>.ret))
+>;
+
+// TODO: Might be worth moving to custom lowering so the and is
+// exposed to demanded bits optimizations. Most users probably only
+// care about the rounding or denorm mode bits. We also can reduce the
+// demanded read from the getreg immediate.
+let SubtargetPredicate = isGFX9Plus in {
+// Last bit = FP16_OVFL
+def : GetFPModePat<fpmode_mask_gfx9plus>;
+}
+
+// Last bit = EXCP_EN.int_div0
+let SubtargetPredicate = isNotGFX9Plus in {
+def : GetFPModePat<fpmode_mask_gfx6plus>;
+}
+
//===----------------------------------------------------------------------===//
// SOP2 Patterns
//===----------------------------------------------------------------------===//
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.get.fpmode.ll b/llvm/test/CodeGen/AMDGPU/llvm.get.fpmode.ll
new file mode 100644
index 000000000000000..a665e58adb8a1b1
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/llvm.get.fpmode.ll
@@ -0,0 +1,697 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
+; RUN: llc -march=amdgcn -mcpu=tahiti < %s | FileCheck -check-prefixes=GCN,GFX678,GFX6 %s
+; RUN: llc -march=amdgcn -mcpu=hawaii < %s | FileCheck -check-prefixes=GCN,GFX678,GFX7 %s
+; RUN: llc -march=amdgcn -mcpu=fiji < %s | FileCheck -check-prefixes=GCN,GFX678,GFX8 %s
+; RUN: llc -march=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9 %s
+; RUN: llc -march=amdgcn -mcpu=gfx1030 < %s | FileCheck -check-prefixes=GCN,GFX1011,GFX10 %s
+; RUN: llc -march=amdgcn -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GCN,GFX1011,GFX11 %s
+
+declare i32 @llvm.get.fpmode.i32()
+
+define i32 @func_fpmode_i32() {
+; GFX678-LABEL: func_fpmode_i32:
+; GFX678: ; %bb.0:
+; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX678-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 19)
+; GFX678-NEXT: s_and_b32 s4, 0x7f3ff, s4
+; GFX678-NEXT: v_mov_b32_e32 v0, s4
+; GFX678-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: func_fpmode_i32:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 24)
+; GFX9-NEXT: s_and_b32 s4, 0x87f3ff, s4
+; GFX9-NEXT: v_mov_b32_e32 v0, s4
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-LABEL: func_fpmode_i32:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 24)
+; GFX10-NEXT: s_and_b32 s4, 0x87f3ff, s4
+; GFX10-NEXT: v_mov_b32_e32 v0, s4
+; GFX10-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: func_fpmode_i32:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: s_getreg_b32 s0, hwreg(HW_REG_MODE, 0, 24)
+; GFX11-NEXT: s_and_b32 s0, 0x87f3ff, s0
+; GFX11-NEXT: v_mov_b32_e32 v0, s0
+; GFX11-NEXT: s_setpc_b64 s[30:31]
+ %fpmode = call i32 @llvm.get.fpmode.i32()
+ ret i32 %fpmode
+}
+
+define i32 @strictfp_func_fpmode_i32() strictfp {
+; GFX678-LABEL: strictfp_func_fpmode_i32:
+; GFX678: ; %bb.0:
+; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX678-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 19)
+; GFX678-NEXT: s_and_b32 s4, 0x7f3ff, s4
+; GFX678-NEXT: v_mov_b32_e32 v0, s4
+; GFX678-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: strictfp_func_fpmode_i32:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 24)
+; GFX9-NEXT: s_and_b32 s4, 0x87f3ff, s4
+; GFX9-NEXT: v_mov_b32_e32 v0, s4
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-LABEL: strictfp_func_fpmode_i32:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 24)
+; GFX10-NEXT: s_and_b32 s4, 0x87f3ff, s4
+; GFX10-NEXT: v_mov_b32_e32 v0, s4
+; GFX10-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: strictfp_func_fpmode_i32:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: s_getreg_b32 s0, hwreg(HW_REG_MODE, 0, 24)
+; GFX11-NEXT: s_and_b32 s0, 0x87f3ff, s0
+; GFX11-NEXT: v_mov_b32_e32 v0, s0
+; GFX11-NEXT: s_setpc_b64 s[30:31]
+ %fpmode = call i32 @llvm.get.fpmode.i32()
+ ret i32 %fpmode
+}
+
+define amdgpu_kernel void @kernel_fpmode_i32(ptr addrspace(1) %ptr) {
+; GFX6-LABEL: kernel_fpmode_i32:
+; GFX6: ; %bb.0:
+; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
+; GFX6-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 19)
+; GFX6-NEXT: s_and_b32 s4, 0x7f3ff, s4
+; GFX6-NEXT: s_mov_b32 s3, 0xf000
+; GFX6-NEXT: s_mov_b32 s2, -1
+; GFX6-NEXT: v_mov_b32_e32 v0, s4
+; GFX6-NEXT: s_waitcnt lgkmcnt(0)
+; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0
+; GFX6-NEXT: s_endpgm
+;
+; GFX7-LABEL: kernel_fpmode_i32:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
+; GFX7-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 19)
+; GFX7-NEXT: s_and_b32 s4, 0x7f3ff, s4
+; GFX7-NEXT: s_mov_b32 s3, 0xf000
+; GFX7-NEXT: s_mov_b32 s2, -1
+; GFX7-NEXT: v_mov_b32_e32 v0, s4
+; GFX7-NEXT: s_waitcnt lgkmcnt(0)
+; GFX7-NEXT: buffer_store_dword v0, off, s[0:3], 0
+; GFX7-NEXT: s_endpgm
+;
+; GFX8-LABEL: kernel_fpmode_i32:
+; GFX8: ; %bb.0:
+; GFX8-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
+; GFX8-NEXT: s_getreg_b32 s2, hwreg(HW_REG_MODE, 0, 19)
+; GFX8-NEXT: s_and_b32 s2, 0x7f3ff, s2
+; GFX8-NEXT: v_mov_b32_e32 v2, s2
+; GFX8-NEXT: s_waitcnt lgkmcnt(0)
+; GFX8-NEXT: v_mov_b32_e32 v0, s0
+; GFX8-NEXT: v_mov_b32_e32 v1, s1
+; GFX8-NEXT: flat_store_dword v[0:1], v2
+; GFX8-NEXT: s_endpgm
+;
+; GFX9-LABEL: kernel_fpmode_i32:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
+; GFX9-NEXT: s_getreg_b32 s2, hwreg(HW_REG_MODE, 0, 24)
+; GFX9-NEXT: s_and_b32 s2, 0x87f3ff, s2
+; GFX9-NEXT: v_mov_b32_e32 v0, 0
+; GFX9-NEXT: v_mov_b32_e32 v1, s2
+; GFX9-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9-NEXT: global_store_dword v0, v1, s[0:1]
+; GFX9-NEXT: s_endpgm
+;
+; GFX10-LABEL: kernel_fpmode_i32:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
+; GFX10-NEXT: s_getreg_b32 s2, hwreg(HW_REG_MODE, 0, 24)
+; GFX10-NEXT: v_mov_b32_e32 v0, 0
+; GFX10-NEXT: s_and_b32 s2, 0x87f3ff, s2
+; GFX10-NEXT: v_mov_b32_e32 v1, s2
+; GFX10-NEXT: s_waitcnt lgkmcnt(0)
+; GFX10-NEXT: global_store_dword v0, v1, s[0:1]
+; GFX10-NEXT: s_endpgm
+;
+; GFX11-LABEL: kernel_fpmode_i32:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x24
+; GFX11-NEXT: s_getreg_b32 s2, hwreg(HW_REG_MODE, 0, 24)
+; GFX11-NEXT: s_and_b32 s2, 0x87f3ff, s2
+; GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
+; GFX11-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
+; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GFX11-NEXT: s_endpgm
+ %fpmode = call i32 @llvm.get.fpmode.i32()
+ store i32 %fpmode, ptr addrspace(1) %ptr
+ ret void
+}
+
+; TODO: We should be able to reduce the demanded bits and ask for less
+; from s_getreg_b32
+define i32 @func_fpmode_i32_denormonly() {
+; GFX678-LABEL: func_fpmode_i32_denormonly:
+; GFX678: ; %bb.0:
+; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX678-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 19)
+; GFX678-NEXT: s_and_b32 s4, 0x7f3ff, s4
+; GFX678-NEXT: s_and_b32 s4, s4, 0xf0
+; GFX678-NEXT: v_mov_b32_e32 v0, s4
+; GFX678-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: func_fpmode_i32_denormonly:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 24)
+; GFX9-NEXT: s_and_b32 s4, 0x87f3ff, s4
+; GFX9-NEXT: s_and_b32 s4, s4, 0xf0
+; GFX9-NEXT: v_mov_b32_e32 v0, s4
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-LABEL: func_fpmode_i32_denormonly:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 24)
+; GFX10-NEXT: s_and_b32 s4, 0x87f3ff, s4
+; GFX10-NEXT: s_and_b32 s4, s4, 0xf0
+; GFX10-NEXT: v_mov_b32_e32 v0, s4
+; GFX10-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: func_fpmode_i32_denormonly:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: s_getreg_b32 s0, hwreg(HW_REG_MODE, 0, 24)
+; GFX11-NEXT: s_and_b32 s0, 0x87f3ff, s0
+; GFX11-NEXT: s_and_b32 s0, s0, 0xf0
+; GFX11-NEXT: v_mov_b32_e32 v0, s0
+; GFX11-NEXT: s_setpc_b64 s[30:31]
+ %fpmode = call i32 @llvm.get.fpmode.i32()
+ %denorm.only = and i32 %fpmode, 240
+ ret i32 %denorm.only
+}
+
+define i32 @func_fpmode_i32_roundonly() {
+; GFX678-LABEL: func_fpmode_i32_roundonly:
+; GFX678: ; %bb.0:
+; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX678-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 19)
+; GFX678-NEXT: s_and_b32 s4, 0x7f3ff, s4
+; GFX678-NEXT: s_and_b32 s4, s4, 15
+; GFX678-NEXT: v_mov_b32_e32 v0, s4
+; GFX678-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: func_fpmode_i32_roundonly:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 24)
+; GFX9-NEXT: s_and_b32 s4, 0x87f3ff, s4
+; GFX9-NEXT: s_and_b32 s4, s4, 15
+; GFX9-NEXT: v_mov_b32_e32 v0, s4
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-LABEL: func_fpmode_i32_roundonly:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 24)
+; GFX10-NEXT: s_and_b32 s4, 0x87f3ff, s4
+; GFX10-NEXT: s_and_b32 s4, s4, 15
+; GFX10-NEXT: v_mov_b32_e32 v0, s4
+; GFX10-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: func_fpmode_i32_roundonly:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: s_getreg_b32 s0, hwreg(HW_REG_MODE, 0, 24)
+; GFX11-NEXT: s_and_b32 s0, 0x87f3ff, s0
+; GFX11-NEXT: s_and_b32 s0, s0, 15
+; GFX11-NEXT: v_mov_b32_e32 v0, s0
+; GFX11-NEXT: s_setpc_b64 s[30:31]
+ %fpmode = call i32 @llvm.get.fpmode.i32()
+ %round.only = and i32 %fpmode, 15
+ ret i32 %round.only
+}
+
+define i32 @func_fpmode_i32_round_denorm_only() {
+; GFX678-LABEL: func_fpmode_i32_round_denorm_only:
+; GFX678: ; %bb.0:
+; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX678-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 19)
+; GFX678-NEXT: s_and_b32 s4, 0x7f3ff, s4
+; GFX678-NEXT: s_and_b32 s4, s4, 0xff
+; GFX678-NEXT: v_mov_b32_e32 v0, s4
+; GFX678-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: func_fpmode_i32_round_denorm_only:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 24)
+; GFX9-NEXT: s_and_b32 s4, 0x87f3ff, s4
+; GFX9-NEXT: s_and_b32 s4, s4, 0xff
+; GFX9-NEXT: v_mov_b32_e32 v0, s4
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-LABEL: func_fpmode_i32_round_denorm_only:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 24)
+; GFX10-NEXT: s_and_b32 s4, 0x87f3ff, s4
+; GFX10-NEXT: s_and_b32 s4, s4, 0xff
+; GFX10-NEXT: v_mov_b32_e32 v0, s4
+; GFX10-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: func_fpmode_i32_round_denorm_only:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: s_getreg_b32 s0, hwreg(HW_REG_MODE, 0, 24)
+; GFX11-NEXT: s_and_b32 s0, 0x87f3ff, s0
+; GFX11-NEXT: s_and_b32 s0, s0, 0xff
+; GFX11-NEXT: v_mov_b32_e32 v0, s0
+; GFX11-NEXT: s_setpc_b64 s[30:31]
+ %fpmode = call i32 @llvm.get.fpmode.i32()
+ %round.denorm.only = and i32 %fpmode, 255
+ ret i32 %round.denorm.only
+}
+
+define i32 @func_fpmode_i32_round_denorm_dx10_ieee() {
+; GFX678-LABEL: func_fpmode_i32_round_denorm_dx10_ieee:
+; GFX678: ; %bb.0:
+; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX678-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 19)
+; GFX678-NEXT: s_and_b32 s4, 0x7f3ff, s4
+; GFX678-NEXT: s_and_b32 s4, s4, 0x3ff
+; GFX678-NEXT: v_mov_b32_e32 v0, s4
+; GFX678-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: func_fpmode_i32_round_denorm_dx10_ieee:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 24)
+; GFX9-NEXT: s_and_b32 s4, 0x87f3ff, s4
+; GFX9-NEXT: s_and_b32 s4, s4, 0x3ff
+; GFX9-NEXT: v_mov_b32_e32 v0, s4
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-LABEL: func_fpmode_i32_round_denorm_dx10_ieee:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 24)
+; GFX10-NEXT: s_and_b32 s4, 0x87f3ff, s4
+; GFX10-NEXT: s_and_b32 s4, s4, 0x3ff
+; GFX10-NEXT: v_mov_b32_e32 v0, s4
+; GFX10-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: func_fpmode_i32_round_denorm_dx10_ieee:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: s_getreg_b32 s0, hwreg(HW_REG_MODE, 0, 24)
+; GFX11-NEXT: s_and_b32 s0, 0x87f3ff, s0
+; GFX11-NEXT: s_and_b32 s0, s0, 0x3ff
+; GFX11-NEXT: v_mov_b32_e32 v0, s0
+; GFX11-NEXT: s_setpc_b64 s[30:31]
+ %fpmode = call i32 @llvm.get.fpmode.i32()
+ %core.mode = and i32 %fpmode, 1023
+ ret i32 %core.mode
+}
+
+define i32 @func_fpmode_i32_excp_en() {
+; GFX678-LABEL: func_fpmode_i32_excp_en:
+; GFX678: ; %bb.0:
+; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX678-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 19)
+; GFX678-NEXT: s_and_b32 s4, 0x7f3ff, s4
+; GFX678-NEXT: s_and_b32 s4, s4, 0x7f000
+; GFX678-NEXT: v_mov_b32_e32 v0, s4
+; GFX678-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: func_fpmode_i32_excp_en:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 24)
+; GFX9-NEXT: s_and_b32 s4, 0x87f3ff, s4
+; GFX9-NEXT: s_and_b32 s4, s4, 0x7f000
+; GFX9-NEXT: v_mov_b32_e32 v0, s4
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-LABEL: func_fpmode_i32_excp_en:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 24)
+; GFX10-NEXT: s_and_b32 s4, 0x87f3ff, s4
+; GFX10-NEXT: s_and_b32 s4, s4, 0x7f000
+; GFX10-NEXT: v_mov_b32_e32 v0, s4
+; GFX10-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: func_fpmode_i32_excp_en:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: s_getreg_b32 s0, hwreg(HW_REG_MODE, 0, 24)
+; GFX11-NEXT: s_and_b32 s0, 0x87f3ff, s0
+; GFX11-NEXT: s_and_b32 s0, s0, 0x7f000
+; GFX11-NEXT: v_mov_b32_e32 v0, s0
+; GFX11-NEXT: s_setpc_b64 s[30:31]
+ %fpmode = call i32 @llvm.get.fpmode.i32()
+ %core.mode = and i32 %fpmode, 520192
+ ret i32 %core.mode
+}
+
+; Mask for all bits used on gfx6+
+define i32 @func_fpmode_i32_environment_gfx6() {
+; GFX678-LABEL: func_fpmode_i32_environment_gfx6:
+; GFX678: ; %bb.0:
+; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX678-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 19)
+; GFX678-NEXT: s_and_b32 s4, 0x7f3ff, s4
+; GFX678-NEXT: s_and_b32 s4, s4, 0x7f3ff
+; GFX678-NEXT: v_mov_b32_e32 v0, s4
+; GFX678-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: func_fpmode_i32_environment_gfx6:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 24)
+; GFX9-NEXT: s_and_b32 s4, 0x87f3ff, s4
+; GFX9-NEXT: s_and_b32 s4, s4, 0x7f3ff
+; GFX9-NEXT: v_mov_b32_e32 v0, s4
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-LABEL: func_fpmode_i32_environment_gfx6:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 24)
+; GFX10-NEXT: s_and_b32 s4, 0x87f3ff, s4
+; GFX10-NEXT: s_and_b32 s4, s4, 0x7f3ff
+; GFX10-NEXT: v_mov_b32_e32 v0, s4
+; GFX10-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: func_fpmode_i32_environment_gfx6:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: s_getreg_b32 s0, hwreg(HW_REG_MODE, 0, 24)
+; GFX11-NEXT: s_and_b32 s0, 0x87f3ff, s0
+; GFX11-NEXT: s_and_b32 s0, s0, 0x7f3ff
+; GFX11-NEXT: v_mov_b32_e32 v0, s0
+; GFX11-NEXT: s_setpc_b64 s[30:31]
+ %fpmode = call i32 @llvm.get.fpmode.i32()
+ %core.mode = and i32 %fpmode, 521215
+ ret i32 %core.mode
+}
+
+; Mask for all bits used on gfx9+
+define i32 @func_fpmode_i32_environment_gfx9() {
+; GFX678-LABEL: func_fpmode_i32_environment_gfx9:
+; GFX678: ; %bb.0:
+; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX678-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 19)
+; GFX678-NEXT: s_and_b32 s4, 0x7f3ff, s4
+; GFX678-NEXT: s_and_b32 s4, s4, 0x87f3ff
+; GFX678-NEXT: v_mov_b32_e32 v0, s4
+; GFX678-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: func_fpmode_i32_environment_gfx9:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 24)
+; GFX9-NEXT: s_and_b32 s4, 0x87f3ff, s4
+; GFX9-NEXT: s_and_b32 s4, s4, 0x87f3ff
+; GFX9-NEXT: v_mov_b32_e32 v0, s4
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-LABEL: func_fpmode_i32_environment_gfx9:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 24)
+; GFX10-NEXT: s_and_b32 s4, 0x87f3ff, s4
+; GFX10-NEXT: s_and_b32 s4, s4, 0x87f3ff
+; GFX10-NEXT: v_mov_b32_e32 v0, s4
+; GFX10-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: func_fpmode_i32_environment_gfx9:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: s_getreg_b32 s0, hwreg(HW_REG_MODE, 0, 24)
+; GFX11-NEXT: s_and_b32 s0, 0x87f3ff, s0
+; GFX11-NEXT: s_and_b32 s0, s0, 0x87f3ff
+; GFX11-NEXT: v_mov_b32_e32 v0, s0
+; GFX11-NEXT: s_setpc_b64 s[30:31]
+ %fpmode = call i32 @llvm.get.fpmode.i32()
+ %core.mode = and i32 %fpmode, 8909823
+ ret i32 %core.mode
+}
+
+define i32 @func_fpmode_i32_denormf32only() {
+; GFX678-LABEL: func_fpmode_i32_denormf32only:
+; GFX678: ; %bb.0:
+; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX678-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 19)
+; GFX678-NEXT: s_and_b32 s4, 0x7f3ff, s4
+; GFX678-NEXT: s_and_b32 s4, s4, 48
+; GFX678-NEXT: v_mov_b32_e32 v0, s4
+; GFX678-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: func_fpmode_i32_denormf32only:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 24)
+; GFX9-NEXT: s_and_b32 s4, 0x87f3ff, s4
+; GFX9-NEXT: s_and_b32 s4, s4, 48
+; GFX9-NEXT: v_mov_b32_e32 v0, s4
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-LABEL: func_fpmode_i32_denormf32only:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 24)
+; GFX10-NEXT: s_and_b32 s4, 0x87f3ff, s4
+; GFX10-NEXT: s_and_b32 s4, s4, 48
+; GFX10-NEXT: v_mov_b32_e32 v0, s4
+; GFX10-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: func_fpmode_i32_denormf32only:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: s_getreg_b32 s0, hwreg(HW_REG_MODE, 0, 24)
+; GFX11-NEXT: s_and_b32 s0, 0x87f3ff, s0
+; GFX11-NEXT: s_and_b32 s0, s0, 48
+; GFX11-NEXT: v_mov_b32_e32 v0, s0
+; GFX11-NEXT: s_setpc_b64 s[30:31]
+ %fpmode = call i32 @llvm.get.fpmode.i32()
+ %denorm.only = and i32 %fpmode, 48
+ ret i32 %denorm.only
+}
+
+define i32 @func_fpmode_i32_denormf32only_0() {
+; GFX678-LABEL: func_fpmode_i32_denormf32only_0:
+; GFX678: ; %bb.0:
+; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX678-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 19)
+; GFX678-NEXT: s_and_b32 s4, 0x7f3ff, s4
+; GFX678-NEXT: s_and_b32 s4, s4, 32
+; GFX678-NEXT: v_mov_b32_e32 v0, s4
+; GFX678-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: func_fpmode_i32_denormf32only_0:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 24)
+; GFX9-NEXT: s_and_b32 s4, 0x87f3ff, s4
+; GFX9-NEXT: s_and_b32 s4, s4, 32
+; GFX9-NEXT: v_mov_b32_e32 v0, s4
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-LABEL: func_fpmode_i32_denormf32only_0:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 24)
+; GFX10-NEXT: s_and_b32 s4, 0x87f3ff, s4
+; GFX10-NEXT: s_and_b32 s4, s4, 32
+; GFX10-NEXT: v_mov_b32_e32 v0, s4
+; GFX10-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: func_fpmode_i32_denormf32only_0:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: s_getreg_b32 s0, hwreg(HW_REG_MODE, 0, 24)
+; GFX11-NEXT: s_and_b32 s0, 0x87f3ff, s0
+; GFX11-NEXT: s_and_b32 s0, s0, 32
+; GFX11-NEXT: v_mov_b32_e32 v0, s0
+; GFX11-NEXT: s_setpc_b64 s[30:31]
+ %fpmode = call i32 @llvm.get.fpmode.i32()
+ %denorm.only = and i32 %fpmode, 32
+ ret i32 %denorm.only
+}
+
+define i32 @func_fpmode_i32_denormf32only_1() {
+; GFX678-LABEL: func_fpmode_i32_denormf32only_1:
+; GFX678: ; %bb.0:
+; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX678-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 19)
+; GFX678-NEXT: s_and_b32 s4, 0x7f3ff, s4
+; GFX678-NEXT: s_and_b32 s4, s4, 64
+; GFX678-NEXT: v_mov_b32_e32 v0, s4
+; GFX678-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: func_fpmode_i32_denormf32only_1:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 24)
+; GFX9-NEXT: s_and_b32 s4, 0x87f3ff, s4
+; GFX9-NEXT: s_and_b32 s4, s4, 64
+; GFX9-NEXT: v_mov_b32_e32 v0, s4
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-LABEL: func_fpmode_i32_denormf32only_1:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 24)
+; GFX10-NEXT: s_and_b32 s4, 0x87f3ff, s4
+; GFX10-NEXT: s_and_b32 s4, s4, 64
+; GFX10-NEXT: v_mov_b32_e32 v0, s4
+; GFX10-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: func_fpmode_i32_denormf32only_1:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: s_getreg_b32 s0, hwreg(HW_REG_MODE, 0, 24)
+; GFX11-NEXT: s_and_b32 s0, 0x87f3ff, s0
+; GFX11-NEXT: s_and_b32 s0, s0, 64
+; GFX11-NEXT: v_mov_b32_e32 v0, s0
+; GFX11-NEXT: s_setpc_b64 s[30:31]
+ %fpmode = call i32 @llvm.get.fpmode.i32()
+ %denorm.only = and i32 %fpmode, 64
+ ret i32 %denorm.only
+}
+
+define i32 @func_fpmode_i32_denormf64f16only() {
+; GFX678-LABEL: func_fpmode_i32_denormf64f16only:
+; GFX678: ; %bb.0:
+; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX678-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 19)
+; GFX678-NEXT: s_and_b32 s4, 0x7f3ff, s4
+; GFX678-NEXT: s_and_b32 s4, s4, 0xc0
+; GFX678-NEXT: v_mov_b32_e32 v0, s4
+; GFX678-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: func_fpmode_i32_denormf64f16only:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 24)
+; GFX9-NEXT: s_and_b32 s4, 0x87f3ff, s4
+; GFX9-NEXT: s_and_b32 s4, s4, 0xc0
+; GFX9-NEXT: v_mov_b32_e32 v0, s4
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-LABEL: func_fpmode_i32_denormf64f16only:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 24)
+; GFX10-NEXT: s_and_b32 s4, 0x87f3ff, s4
+; GFX10-NEXT: s_and_b32 s4, s4, 0xc0
+; GFX10-NEXT: v_mov_b32_e32 v0, s4
+; GFX10-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: func_fpmode_i32_denormf64f16only:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: s_getreg_b32 s0, hwreg(HW_REG_MODE, 0, 24)
+; GFX11-NEXT: s_and_b32 s0, 0x87f3ff, s0
+; GFX11-NEXT: s_and_b32 s0, s0, 0xc0
+; GFX11-NEXT: v_mov_b32_e32 v0, s0
+; GFX11-NEXT: s_setpc_b64 s[30:31]
+ %fpmode = call i32 @llvm.get.fpmode.i32()
+ %denorm.only = and i32 %fpmode, 192
+ ret i32 %denorm.only
+}
+
+define i32 @func_fpmode_i32_dx10_clamp_only() {
+; GFX678-LABEL: func_fpmode_i32_dx10_clamp_only:
+; GFX678: ; %bb.0:
+; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX678-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 19)
+; GFX678-NEXT: s_and_b32 s4, 0x7f3ff, s4
+; GFX678-NEXT: s_and_b32 s4, s4, 0x100
+; GFX678-NEXT: v_mov_b32_e32 v0, s4
+; GFX678-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: func_fpmode_i32_dx10_clamp_only:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 24)
+; GFX9-NEXT: s_and_b32 s4, 0x87f3ff, s4
+; GFX9-NEXT: s_and_b32 s4, s4, 0x100
+; GFX9-NEXT: v_mov_b32_e32 v0, s4
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-LABEL: func_fpmode_i32_dx10_clamp_only:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 24)
+; GFX10-NEXT: s_and_b32 s4, 0x87f3ff, s4
+; GFX10-NEXT: s_and_b32 s4, s4, 0x100
+; GFX10-NEXT: v_mov_b32_e32 v0, s4
+; GFX10-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: func_fpmode_i32_dx10_clamp_only:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: s_getreg_b32 s0, hwreg(HW_REG_MODE, 0, 24)
+; GFX11-NEXT: s_and_b32 s0, 0x87f3ff, s0
+; GFX11-NEXT: s_and_b32 s0, s0, 0x100
+; GFX11-NEXT: v_mov_b32_e32 v0, s0
+; GFX11-NEXT: s_setpc_b64 s[30:31]
+ %fpmode = call i32 @llvm.get.fpmode.i32()
+ %dx10.only = and i32 %fpmode, 256
+ ret i32 %dx10.only
+}
+
+define i32 @func_fpmode_i32_ieee_only() {
+; GFX678-LABEL: func_fpmode_i32_ieee_only:
+; GFX678: ; %bb.0:
+; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX678-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 19)
+; GFX678-NEXT: s_and_b32 s4, 0x7f3ff, s4
+; GFX678-NEXT: s_and_b32 s4, s4, 0x200
+; GFX678-NEXT: v_mov_b32_e32 v0, s4
+; GFX678-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: func_fpmode_i32_ieee_only:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 24)
+; GFX9-NEXT: s_and_b32 s4, 0x87f3ff, s4
+; GFX9-NEXT: s_and_b32 s4, s4, 0x200
+; GFX9-NEXT: v_mov_b32_e32 v0, s4
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-LABEL: func_fpmode_i32_ieee_only:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 24)
+; GFX10-NEXT: s_and_b32 s4, 0x87f3ff, s4
+; GFX10-NEXT: s_and_b32 s4, s4, 0x200
+; GFX10-NEXT: v_mov_b32_e32 v0, s4
+; GFX10-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: func_fpmode_i32_ieee_only:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: s_getreg_b32 s0, hwreg(HW_REG_MODE, 0, 24)
+; GFX11-NEXT: s_and_b32 s0, 0x87f3ff, s0
+; GFX11-NEXT: s_and_b32 s0, s0, 0x200
+; GFX11-NEXT: v_mov_b32_e32 v0, s0
+; GFX11-NEXT: s_setpc_b64 s[30:31]
+ %fpmode = call i32 @llvm.get.fpmode.i32()
+ %ieee.only = and i32 %fpmode, 512
+ ret i32 %ieee.only
+}
+
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; GCN: {{.*}}
+; GFX1011: {{.*}}
More information about the llvm-commits
mailing list