[llvm] 3d409e5 - AMDGPU: Handle constrained fpext

Matt Arsenault via llvm-commits llvm-commits at lists.llvm.org
Sun Jun 25 16:26:30 PDT 2023


Author: Matt Arsenault
Date: 2023-06-25T19:26:25-04:00
New Revision: 3d409e55a1f26734d1117b882e5aa8f97f7c1138

URL: https://github.com/llvm/llvm-project/commit/3d409e55a1f26734d1117b882e5aa8f97f7c1138
DIFF: https://github.com/llvm/llvm-project/commit/3d409e55a1f26734d1117b882e5aa8f97f7c1138.diff

LOG: AMDGPU: Handle constrained fpext

Added: 
    llvm/test/CodeGen/AMDGPU/strict_fpext.ll

Modified: 
    llvm/lib/Target/AMDGPU/VOP1Instructions.td

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AMDGPU/VOP1Instructions.td b/llvm/lib/Target/AMDGPU/VOP1Instructions.td
index 06ded85946f50..200de4e39c020 100644
--- a/llvm/lib/Target/AMDGPU/VOP1Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP1Instructions.td
@@ -266,7 +266,7 @@ defm V_CVT_F64_I32 : VOP1Inst <"v_cvt_f64_i32", VOP1_F64_I32, sint_to_fp>;
 }
 
 defm V_CVT_F32_F64 : VOP1Inst <"v_cvt_f32_f64", VOP_F32_F64,  fpround>;
-defm V_CVT_F64_F32 : VOP1Inst <"v_cvt_f64_f32", VOP_F64_F32,  fpextend>;
+defm V_CVT_F64_F32 : VOP1Inst <"v_cvt_f64_f32", VOP_F64_F32,  any_fpextend>;
 // OMod clears exceptions when set in this instruction
 defm V_CVT_U32_F64 : VOP1Inst <"v_cvt_u32_f64", VOP_I32_F64_SPECIAL_OMOD,  fp_to_uint>;
 
@@ -296,9 +296,9 @@ let FPDPRounding = 1, isReMaterializable = 0 in {
 } // End FPDPRounding = 1, isReMaterializable = 0
 
 let OtherPredicates = [NotHasTrue16BitInsts] in
-defm V_CVT_F32_F16 : VOP1Inst <"v_cvt_f32_f16", VOP_F32_F16, fpextend>;
+defm V_CVT_F32_F16 : VOP1Inst <"v_cvt_f32_f16", VOP_F32_F16, any_fpextend>;
 let OtherPredicates = [HasTrue16BitInsts] in
-defm V_CVT_F32_F16_t16 : VOP1Inst <"v_cvt_f32_f16_t16", VOPProfile_True16<VOP_F32_F16>, fpextend>;
+defm V_CVT_F32_F16_t16 : VOP1Inst <"v_cvt_f32_f16_t16", VOPProfile_True16<VOP_F32_F16>, any_fpextend>;
 
 let ReadsModeReg = 0, mayRaiseFPException = 0 in {
 defm V_CVT_RPI_I32_F32 : VOP1Inst <"v_cvt_rpi_i32_f32", VOP_I32_F32, cvt_rpi_i32_f32>;

diff  --git a/llvm/test/CodeGen/AMDGPU/strict_fpext.ll b/llvm/test/CodeGen/AMDGPU/strict_fpext.ll
new file mode 100644
index 0000000000000..06f8e6a69b93b
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/strict_fpext.ll
@@ -0,0 +1,341 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
+; FIXME: Missing operand promote for f16
+; XUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti < %s | FileCheck -check-prefixes=GCN,SI %s
+; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx803 < %s | FileCheck -check-prefixes=GCN,GFX89,GFX8 %s
+; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX89,GFX9 %s
+; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 < %s | FileCheck -check-prefixes=GCN,GFX1011,GFX10 %s
+; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GCN,GFX1011,GFX11 %s
+
+define float @v_constrained_fpext_f16_to_f32_fpexcept_strict(half %arg) #0 {
+; GFX89-LABEL: v_constrained_fpext_f16_to_f32_fpexcept_strict:
+; GFX89:       ; %bb.0:
+; GFX89-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX89-NEXT:    v_cvt_f32_f16_e32 v0, v0
+; GFX89-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1011-LABEL: v_constrained_fpext_f16_to_f32_fpexcept_strict:
+; GFX1011:       ; %bb.0:
+; GFX1011-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1011-NEXT:    s_waitcnt_vscnt null, 0x0
+; GFX1011-NEXT:    v_cvt_f32_f16_e32 v0, v0
+; GFX1011-NEXT:    s_setpc_b64 s[30:31]
+  %result = call float @llvm.experimental.constrained.fpext.f32.f16(half %arg, metadata !"fpexcept.strict")
+  ret float %result
+}
+
+define <2 x float> @v_constrained_fpext_v2f16_to_v2f32_fpexcept_strict(<2 x half> %arg) #0 {
+; GFX89-LABEL: v_constrained_fpext_v2f16_to_v2f32_fpexcept_strict:
+; GFX89:       ; %bb.0:
+; GFX89-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX89-NEXT:    v_cvt_f32_f16_e32 v2, v0
+; GFX89-NEXT:    v_cvt_f32_f16_sdwa v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; GFX89-NEXT:    v_mov_b32_e32 v0, v2
+; GFX89-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-LABEL: v_constrained_fpext_v2f16_to_v2f32_fpexcept_strict:
+; GFX10:       ; %bb.0:
+; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
+; GFX10-NEXT:    v_cvt_f32_f16_e32 v2, v0
+; GFX10-NEXT:    v_cvt_f32_f16_sdwa v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; GFX10-NEXT:    v_mov_b32_e32 v0, v2
+; GFX10-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: v_constrained_fpext_v2f16_to_v2f32_fpexcept_strict:
+; GFX11:       ; %bb.0:
+; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
+; GFX11-NEXT:    v_lshrrev_b32_e32 v1, 16, v0
+; GFX11-NEXT:    v_cvt_f32_f16_e32 v0, v0
+; GFX11-NEXT:    v_cvt_f32_f16_e32 v1, v1
+; GFX11-NEXT:    s_setpc_b64 s[30:31]
+  %result = call <2 x float> @llvm.experimental.constrained.fpext.v2f32.v2f16(<2 x half> %arg, metadata !"fpexcept.strict")
+  ret <2 x float>   %result
+}
+
+define <3 x float> @v_constrained_fpext_v3f16_to_v3f32_fpexcept_strict(<3 x half> %arg) #0 {
+; GFX89-LABEL: v_constrained_fpext_v3f16_to_v3f32_fpexcept_strict:
+; GFX89:       ; %bb.0:
+; GFX89-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX89-NEXT:    v_cvt_f32_f16_e32 v4, v0
+; GFX89-NEXT:    v_cvt_f32_f16_sdwa v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; GFX89-NEXT:    v_cvt_f32_f16_e32 v2, v1
+; GFX89-NEXT:    v_mov_b32_e32 v0, v4
+; GFX89-NEXT:    v_mov_b32_e32 v1, v3
+; GFX89-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-LABEL: v_constrained_fpext_v3f16_to_v3f32_fpexcept_strict:
+; GFX10:       ; %bb.0:
+; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
+; GFX10-NEXT:    v_cvt_f32_f16_e32 v4, v0
+; GFX10-NEXT:    v_cvt_f32_f16_sdwa v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; GFX10-NEXT:    v_cvt_f32_f16_e32 v2, v1
+; GFX10-NEXT:    v_mov_b32_e32 v0, v4
+; GFX10-NEXT:    v_mov_b32_e32 v1, v3
+; GFX10-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: v_constrained_fpext_v3f16_to_v3f32_fpexcept_strict:
+; GFX11:       ; %bb.0:
+; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
+; GFX11-NEXT:    v_lshrrev_b32_e32 v2, 16, v0
+; GFX11-NEXT:    v_cvt_f32_f16_e32 v0, v0
+; GFX11-NEXT:    v_cvt_f32_f16_e32 v3, v2
+; GFX11-NEXT:    v_cvt_f32_f16_e32 v2, v1
+; GFX11-NEXT:    v_mov_b32_e32 v1, v3
+; GFX11-NEXT:    s_setpc_b64 s[30:31]
+  %result = call <3 x float> @llvm.experimental.constrained.fpext.v3f32.v3f16(<3 x half> %arg, metadata !"fpexcept.strict")
+  ret <3 x float>   %result
+}
+
+define double @v_constrained_fpext_f32_to_f64_fpexcept_strict(float %arg) #0 {
+; GFX89-LABEL: v_constrained_fpext_f32_to_f64_fpexcept_strict:
+; GFX89:       ; %bb.0:
+; GFX89-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX89-NEXT:    v_cvt_f64_f32_e32 v[0:1], v0
+; GFX89-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1011-LABEL: v_constrained_fpext_f32_to_f64_fpexcept_strict:
+; GFX1011:       ; %bb.0:
+; GFX1011-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1011-NEXT:    s_waitcnt_vscnt null, 0x0
+; GFX1011-NEXT:    v_cvt_f64_f32_e32 v[0:1], v0
+; GFX1011-NEXT:    s_setpc_b64 s[30:31]
+  %result = call double @llvm.experimental.constrained.fpext.f64.f32(float %arg, metadata !"fpexcept.strict")
+  ret double %result
+}
+
+define <2 x double> @v_constrained_fpext_v2f32_to_v2f64_fpexcept_strict(<2 x float> %arg) #0 {
+; GFX89-LABEL: v_constrained_fpext_v2f32_to_v2f64_fpexcept_strict:
+; GFX89:       ; %bb.0:
+; GFX89-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX89-NEXT:    v_mov_b32_e32 v2, v1
+; GFX89-NEXT:    v_cvt_f64_f32_e32 v[0:1], v0
+; GFX89-NEXT:    v_cvt_f64_f32_e32 v[2:3], v2
+; GFX89-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1011-LABEL: v_constrained_fpext_v2f32_to_v2f64_fpexcept_strict:
+; GFX1011:       ; %bb.0:
+; GFX1011-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1011-NEXT:    s_waitcnt_vscnt null, 0x0
+; GFX1011-NEXT:    v_mov_b32_e32 v2, v1
+; GFX1011-NEXT:    v_cvt_f64_f32_e32 v[0:1], v0
+; GFX1011-NEXT:    v_cvt_f64_f32_e32 v[2:3], v2
+; GFX1011-NEXT:    s_setpc_b64 s[30:31]
+  %result = call <2 x double> @llvm.experimental.constrained.fpext.v2f64.v2f32(<2 x float> %arg, metadata !"fpexcept.strict")
+  ret <2 x double>   %result
+}
+
+define <3 x double> @v_constrained_fpext_v3f32_to_v3f64_fpexcept_strict(<3 x float> %arg) #0 {
+; GFX89-LABEL: v_constrained_fpext_v3f32_to_v3f64_fpexcept_strict:
+; GFX89:       ; %bb.0:
+; GFX89-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX89-NEXT:    v_mov_b32_e32 v4, v2
+; GFX89-NEXT:    v_mov_b32_e32 v2, v1
+; GFX89-NEXT:    v_cvt_f64_f32_e32 v[0:1], v0
+; GFX89-NEXT:    v_cvt_f64_f32_e32 v[2:3], v2
+; GFX89-NEXT:    v_cvt_f64_f32_e32 v[4:5], v4
+; GFX89-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1011-LABEL: v_constrained_fpext_v3f32_to_v3f64_fpexcept_strict:
+; GFX1011:       ; %bb.0:
+; GFX1011-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1011-NEXT:    s_waitcnt_vscnt null, 0x0
+; GFX1011-NEXT:    v_mov_b32_e32 v4, v2
+; GFX1011-NEXT:    v_mov_b32_e32 v2, v1
+; GFX1011-NEXT:    v_cvt_f64_f32_e32 v[0:1], v0
+; GFX1011-NEXT:    v_cvt_f64_f32_e32 v[4:5], v4
+; GFX1011-NEXT:    v_cvt_f64_f32_e32 v[2:3], v2
+; GFX1011-NEXT:    s_setpc_b64 s[30:31]
+  %result = call <3 x double> @llvm.experimental.constrained.fpext.v3f64.v3f32(<3 x float> %arg, metadata !"fpexcept.strict")
+  ret <3 x double>   %result
+}
+
+define double @v_constrained_fpext_f16_to_f64_fpexcept_strict(half %arg) #0 {
+; GFX89-LABEL: v_constrained_fpext_f16_to_f64_fpexcept_strict:
+; GFX89:       ; %bb.0:
+; GFX89-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX89-NEXT:    v_cvt_f32_f16_e32 v0, v0
+; GFX89-NEXT:    v_cvt_f64_f32_e32 v[0:1], v0
+; GFX89-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1011-LABEL: v_constrained_fpext_f16_to_f64_fpexcept_strict:
+; GFX1011:       ; %bb.0:
+; GFX1011-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1011-NEXT:    s_waitcnt_vscnt null, 0x0
+; GFX1011-NEXT:    v_cvt_f32_f16_e32 v0, v0
+; GFX1011-NEXT:    v_cvt_f64_f32_e32 v[0:1], v0
+; GFX1011-NEXT:    s_setpc_b64 s[30:31]
+  %result = call double @llvm.experimental.constrained.fpext.f64.f16(half %arg, metadata !"fpexcept.strict")
+  ret double %result
+}
+
+define <2 x double> @v_constrained_fpext_v2f16_to_v2f64_fpexcept_strict(<2 x half> %arg) #0 {
+; GFX89-LABEL: v_constrained_fpext_v2f16_to_v2f64_fpexcept_strict:
+; GFX89:       ; %bb.0:
+; GFX89-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX89-NEXT:    v_cvt_f32_f16_e32 v1, v0
+; GFX89-NEXT:    v_cvt_f32_f16_sdwa v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; GFX89-NEXT:    v_cvt_f64_f32_e32 v[0:1], v1
+; GFX89-NEXT:    v_cvt_f64_f32_e32 v[2:3], v2
+; GFX89-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-LABEL: v_constrained_fpext_v2f16_to_v2f64_fpexcept_strict:
+; GFX10:       ; %bb.0:
+; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
+; GFX10-NEXT:    v_cvt_f32_f16_e32 v1, v0
+; GFX10-NEXT:    v_cvt_f32_f16_sdwa v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; GFX10-NEXT:    v_cvt_f64_f32_e32 v[0:1], v1
+; GFX10-NEXT:    v_cvt_f64_f32_e32 v[2:3], v2
+; GFX10-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: v_constrained_fpext_v2f16_to_v2f64_fpexcept_strict:
+; GFX11:       ; %bb.0:
+; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
+; GFX11-NEXT:    v_lshrrev_b32_e32 v1, 16, v0
+; GFX11-NEXT:    v_cvt_f32_f16_e32 v0, v0
+; GFX11-NEXT:    v_cvt_f32_f16_e32 v2, v1
+; GFX11-NEXT:    v_cvt_f64_f32_e32 v[0:1], v0
+; GFX11-NEXT:    v_cvt_f64_f32_e32 v[2:3], v2
+; GFX11-NEXT:    s_setpc_b64 s[30:31]
+  %result = call <2 x double> @llvm.experimental.constrained.fpext.v2f64.v2f16(<2 x half> %arg, metadata !"fpexcept.strict")
+  ret <2 x double>   %result
+}
+
+define <3 x double> @v_constrained_fpext_v3f16_to_v2f64_fpexcept_strict(<3 x half> %arg) #0 {
+; GFX89-LABEL: v_constrained_fpext_v3f16_to_v2f64_fpexcept_strict:
+; GFX89:       ; %bb.0:
+; GFX89-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX89-NEXT:    v_cvt_f32_f16_e32 v2, v0
+; GFX89-NEXT:    v_cvt_f32_f16_sdwa v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; GFX89-NEXT:    v_cvt_f32_f16_e32 v4, v1
+; GFX89-NEXT:    v_cvt_f64_f32_e32 v[0:1], v2
+; GFX89-NEXT:    v_cvt_f64_f32_e32 v[2:3], v3
+; GFX89-NEXT:    v_cvt_f64_f32_e32 v[4:5], v4
+; GFX89-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-LABEL: v_constrained_fpext_v3f16_to_v2f64_fpexcept_strict:
+; GFX10:       ; %bb.0:
+; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
+; GFX10-NEXT:    v_cvt_f32_f16_e32 v2, v0
+; GFX10-NEXT:    v_cvt_f32_f16_sdwa v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; GFX10-NEXT:    v_cvt_f32_f16_e32 v4, v1
+; GFX10-NEXT:    v_cvt_f64_f32_e32 v[0:1], v2
+; GFX10-NEXT:    v_cvt_f64_f32_e32 v[2:3], v3
+; GFX10-NEXT:    v_cvt_f64_f32_e32 v[4:5], v4
+; GFX10-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: v_constrained_fpext_v3f16_to_v2f64_fpexcept_strict:
+; GFX11:       ; %bb.0:
+; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
+; GFX11-NEXT:    v_lshrrev_b32_e32 v2, 16, v0
+; GFX11-NEXT:    v_cvt_f32_f16_e32 v0, v0
+; GFX11-NEXT:    v_cvt_f32_f16_e32 v3, v1
+; GFX11-NEXT:    v_cvt_f32_f16_e32 v2, v2
+; GFX11-NEXT:    v_cvt_f64_f32_e32 v[0:1], v0
+; GFX11-NEXT:    v_cvt_f64_f32_e32 v[4:5], v3
+; GFX11-NEXT:    v_cvt_f64_f32_e32 v[2:3], v2
+; GFX11-NEXT:    s_setpc_b64 s[30:31]
+  %result = call <3 x double> @llvm.experimental.constrained.fpext.v3f64.v3f16(<3 x half> %arg, metadata !"fpexcept.strict")
+  ret <3 x double>   %result
+}
+
+define float @v_constrained_fneg_fpext_f16_to_f32_fpexcept_strict(half %arg) #0 {
+; GFX89-LABEL: v_constrained_fneg_fpext_f16_to_f32_fpexcept_strict:
+; GFX89:       ; %bb.0:
+; GFX89-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX89-NEXT:    v_cvt_f32_f16_e32 v0, v0
+; GFX89-NEXT:    v_xor_b32_e32 v0, 0x80000000, v0
+; GFX89-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1011-LABEL: v_constrained_fneg_fpext_f16_to_f32_fpexcept_strict:
+; GFX1011:       ; %bb.0:
+; GFX1011-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1011-NEXT:    s_waitcnt_vscnt null, 0x0
+; GFX1011-NEXT:    v_cvt_f32_f16_e32 v0, v0
+; GFX1011-NEXT:    v_xor_b32_e32 v0, 0x80000000, v0
+; GFX1011-NEXT:    s_setpc_b64 s[30:31]
+  %result = call float @llvm.experimental.constrained.fpext.f32.f16(half %arg, metadata !"fpexcept.strict")
+  %neg.result = fneg float %result
+  ret float %neg.result
+}
+
+define float @v_constrained_fpext_fneg_f16_to_f32_fpexcept_strict(half %arg) #0 {
+; GFX89-LABEL: v_constrained_fpext_fneg_f16_to_f32_fpexcept_strict:
+; GFX89:       ; %bb.0:
+; GFX89-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX89-NEXT:    v_cvt_f32_f16_e64 v0, -v0
+; GFX89-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1011-LABEL: v_constrained_fpext_fneg_f16_to_f32_fpexcept_strict:
+; GFX1011:       ; %bb.0:
+; GFX1011-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1011-NEXT:    s_waitcnt_vscnt null, 0x0
+; GFX1011-NEXT:    v_cvt_f32_f16_e64 v0, -v0
+; GFX1011-NEXT:    s_setpc_b64 s[30:31]
+  %neg.arg = fneg half %arg
+  %result = call float @llvm.experimental.constrained.fpext.f32.f16(half %neg.arg, metadata !"fpexcept.strict")
+  ret float %result
+}
+
+define double @v_constrained_fpext_fneg_f32_to_f64_fpexcept_strict(float %arg) #0 {
+; GFX89-LABEL: v_constrained_fpext_fneg_f32_to_f64_fpexcept_strict:
+; GFX89:       ; %bb.0:
+; GFX89-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX89-NEXT:    v_cvt_f64_f32_e32 v[0:1], v0
+; GFX89-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1011-LABEL: v_constrained_fpext_fneg_f32_to_f64_fpexcept_strict:
+; GFX1011:       ; %bb.0:
+; GFX1011-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1011-NEXT:    s_waitcnt_vscnt null, 0x0
+; GFX1011-NEXT:    v_cvt_f64_f32_e32 v[0:1], v0
+; GFX1011-NEXT:    s_setpc_b64 s[30:31]
+  %neg.arg = fneg float %arg
+  %result = call double @llvm.experimental.constrained.fpext.f64.f32(float %arg, metadata !"fpexcept.strict")
+  ret double %result
+}
+
+define double @v_constrained_fneg_fpext_f32_to_f64_fpexcept_strict(float %arg) #0 {
+; GFX89-LABEL: v_constrained_fneg_fpext_f32_to_f64_fpexcept_strict:
+; GFX89:       ; %bb.0:
+; GFX89-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX89-NEXT:    v_cvt_f64_f32_e32 v[0:1], v0
+; GFX89-NEXT:    v_xor_b32_e32 v1, 0x80000000, v1
+; GFX89-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1011-LABEL: v_constrained_fneg_fpext_f32_to_f64_fpexcept_strict:
+; GFX1011:       ; %bb.0:
+; GFX1011-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1011-NEXT:    s_waitcnt_vscnt null, 0x0
+; GFX1011-NEXT:    v_cvt_f64_f32_e32 v[0:1], v0
+; GFX1011-NEXT:    v_xor_b32_e32 v1, 0x80000000, v1
+; GFX1011-NEXT:    s_setpc_b64 s[30:31]
+  %result = call double @llvm.experimental.constrained.fpext.f64.f32(float %arg, metadata !"fpexcept.strict")
+  %neg.result = fneg double %result
+  ret double %neg.result
+}
+
+declare double @llvm.experimental.constrained.fpext.f64.f32(float, metadata) #1
+declare <2 x double> @llvm.experimental.constrained.fpext.v2f64.v2f32(<2 x float>, metadata) #1
+declare <3 x double> @llvm.experimental.constrained.fpext.v3f64.v3f32(<3 x float>, metadata) #1
+
+declare double @llvm.experimental.constrained.fpext.f64.f16(half, metadata) #1
+declare <2 x double> @llvm.experimental.constrained.fpext.v2f64.v2f16(<2 x half>, metadata) #1
+declare <3 x double> @llvm.experimental.constrained.fpext.v3f64.v3f16(<3 x half>, metadata) #1
+
+declare float @llvm.experimental.constrained.fpext.f32.f16(half, metadata) #1
+declare <2 x float> @llvm.experimental.constrained.fpext.v2f32.v2f16(<2 x half>, metadata) #1
+declare <3 x float> @llvm.experimental.constrained.fpext.v3f32.v3f16(<3 x half>, metadata) #1
+
+attributes #0 = { strictfp }
+attributes #1 = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: readwrite) }
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; GCN: {{.*}}
+; GFX8: {{.*}}
+; GFX9: {{.*}}


        


More information about the llvm-commits mailing list