[llvm] [AMDGPU][CodeGen] Support AND/OR/XOR and LDEXP True16 format (PR #102620)
Brox Chen via llvm-commits
llvm-commits at lists.llvm.org
Fri Aug 9 07:06:41 PDT 2024
https://github.com/broxigarchen created https://github.com/llvm/llvm-project/pull/102620
None
>From c8979269367a09977ed3016bbbec2cba03771679 Mon Sep 17 00:00:00 2001
From: guochen2 <guochen2 at amd.com>
Date: Fri, 9 Aug 2024 10:04:57 -0400
Subject: [PATCH] tmp
---
.../AMDGPU/AMDGPUInstructionSelector.cpp | 37 +-
llvm/lib/Target/AMDGPU/SIInstructions.td | 20 +
llvm/lib/Target/AMDGPU/VOP2Instructions.td | 37 +-
llvm/test/CodeGen/AMDGPU/bf16.ll | 104 ++-
llvm/test/CodeGen/AMDGPU/llvm.ldexp.ll | 816 +++++++++++++-----
5 files changed, 737 insertions(+), 277 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
index f78699f88de56c..40fbf428f1d61d 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
@@ -161,18 +161,31 @@ bool AMDGPUInstructionSelector::selectCOPY(MachineInstr &I) const {
// TODO: Skip masking high bits if def is known boolean.
- bool IsSGPR = TRI.isSGPRClass(SrcRC);
- unsigned AndOpc =
- IsSGPR ? AMDGPU::S_AND_B32 : AMDGPU::V_AND_B32_e32;
- auto And = BuildMI(*BB, &I, DL, TII.get(AndOpc), MaskedReg)
- .addImm(1)
- .addReg(SrcReg);
- if (IsSGPR)
- And.setOperandDead(3); // Dead scc
-
- BuildMI(*BB, &I, DL, TII.get(AMDGPU::V_CMP_NE_U32_e64), DstReg)
- .addImm(0)
- .addReg(MaskedReg);
+ if (AMDGPU::getRegBitWidth(SrcRC->getID()) == 16) {
+ assert(Subtarget->useRealTrue16Insts());
+ const int64_t NoMods = 0;
+ BuildMI(*BB, &I, DL, TII.get(AMDGPU::V_AND_B16_t16_e64), MaskedReg)
+ .addImm(NoMods).addImm(1)
+ .addImm(NoMods).addReg(SrcReg)
+ .addImm(NoMods);
+ BuildMI(*BB, &I, DL, TII.get(AMDGPU::V_CMP_NE_U16_t16_e64), DstReg)
+ .addImm(NoMods).addImm(0)
+ .addImm(NoMods).addReg(MaskedReg)
+ .addImm(NoMods);
+ } else {
+ bool IsSGPR = TRI.isSGPRClass(SrcRC);
+ unsigned AndOpc =
+ IsSGPR ? AMDGPU::S_AND_B32 : AMDGPU::V_AND_B32_e32;
+ auto And = BuildMI(*BB, &I, DL, TII.get(AndOpc), MaskedReg)
+ .addImm(1)
+ .addReg(SrcReg);
+ if (IsSGPR)
+ And.setOperandDead(3); // Dead scc
+
+ BuildMI(*BB, &I, DL, TII.get(AMDGPU::V_CMP_NE_U32_e64), DstReg)
+ .addImm(0)
+ .addReg(MaskedReg);
+ }
}
if (!MRI->getRegClassOrNull(SrcReg))
diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td
index c41850ab55f75c..5a139d1cf8d825 100644
--- a/llvm/lib/Target/AMDGPU/SIInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SIInstructions.td
@@ -2030,6 +2030,8 @@ def : GCNPat <
>;
foreach fp16vt = [f16, bf16] in {
+foreach p = [NotHasTrue16BitInsts, UseFakeTrue16Insts] in
+let SubtargetPredicate = p in {
def : GCNPat <
(fabs (fp16vt VGPR_32:$src)),
(V_AND_B32_e64 (S_MOV_B32 (i32 0x00007fff)), VGPR_32:$src)
@@ -2044,6 +2046,24 @@ def : GCNPat <
(fneg (fabs (fp16vt VGPR_32:$src))),
(V_OR_B32_e64 (S_MOV_B32 (i32 0x00008000)), VGPR_32:$src) // Set sign bit
>;
+}
+
+let SubtargetPredicate = UseRealTrue16Insts in {
+def : GCNPat <
+ (fabs (fp16vt VGPR_16:$src)),
+ (V_AND_B16_t16_e64 (i32 0), (i16 0x7fff), (i32 0), VGPR_16:$src)
+>;
+
+def : GCNPat <
+ (fneg (fp16vt VGPR_16:$src)),
+ (V_XOR_B16_t16_e64 (i32 0), (i16 0x8000), (i32 0), VGPR_16:$src)
+>;
+
+def : GCNPat <
+ (fneg (fabs (fp16vt VGPR_16:$src))),
+ (V_OR_B16_t16_e64 (i32 0), (i16 0x8000), (i32 0), VGPR_16:$src) // Set sign bit
+>;
+} // End SubtargetPredicate = UseRealTrue16Insts
} // End foreach fp16vt = ...
def : GCNPat <
diff --git a/llvm/lib/Target/AMDGPU/VOP2Instructions.td b/llvm/lib/Target/AMDGPU/VOP2Instructions.td
index d17b4f24081312..a9ebbb1a1886fa 100644
--- a/llvm/lib/Target/AMDGPU/VOP2Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP2Instructions.td
@@ -922,18 +922,25 @@ def LDEXP_F16_VOPProfile : VOPProfile <[f16, f16, f16, untyped]> {
let HasSrc1FloatMods = 0;
let Src1ModSDWA = Int16SDWAInputMods;
}
-def LDEXP_F16_VOPProfile_True16 : VOPProfile_Fake16<VOP_F16_F16_F16> {
+def LDEXP_F16_VOPProfile_True16 : VOPProfile_True16<VOP_F16_F16_F16> {
+ let Src1RC32 = RegisterOperand<VGPR_16_Lo128>;
+ let Src1DPP = RegisterOperand<VGPR_16_Lo128>;
+ let Src1ModDPP = IntT16VRegInputMods<0/*IsFake16*/>;
+}
+def LDEXP_F16_VOPProfile_Fake16 : VOPProfile_Fake16<VOP_F16_F16_F16> {
let Src1RC32 = RegisterOperand<VGPR_32_Lo128>;
let Src1DPP = RegisterOperand<VGPR_32_Lo128>;
- let Src1ModDPP = IntT16VRegInputMods</* IsFake16= */ 1>;
+ let Src1ModDPP = IntT16VRegInputMods<1/*IsFake16*/>;
}
let isReMaterializable = 1 in {
let FPDPRounding = 1 in {
let OtherPredicates = [Has16BitInsts], True16Predicate = NotHasTrue16BitInsts in
defm V_LDEXP_F16 : VOP2Inst <"v_ldexp_f16", LDEXP_F16_VOPProfile>;
- let SubtargetPredicate = HasTrue16BitInsts in
+ let SubtargetPredicate = UseRealTrue16Insts in
defm V_LDEXP_F16_t16 : VOP2Inst <"v_ldexp_f16_t16", LDEXP_F16_VOPProfile_True16>;
+ let SubtargetPredicate = UseFakeTrue16Insts in
+ defm V_LDEXP_F16_fake16 : VOP2Inst <"v_ldexp_f16_fake16", LDEXP_F16_VOPProfile_Fake16, null_frag, "v_ldexp_f16_fake16">;
} // End FPDPRounding = 1
// FIXME VOP3 Only instructions. NFC using VOPProfile_True16 for these until a planned change to use a new register class for VOP3 encoded True16 instuctions
defm V_LSHLREV_B16 : VOP2Inst_e64_t16 <"v_lshlrev_b16", VOP_I16_I16_I16, clshl_rev_16>;
@@ -968,14 +975,27 @@ class LDEXP_F16_Pat <SDPatternOperator op, VOP_Pseudo inst, VOPProfile P = inst.
let OtherPredicates = [NotHasTrue16BitInsts] in
def : LDEXP_F16_Pat<any_fldexp, V_LDEXP_F16_e64>;
-let OtherPredicates = [HasTrue16BitInsts] in
-def : LDEXP_F16_Pat<any_fldexp, V_LDEXP_F16_t16_e64>;
+class LDEXP_F16_t16_Pat <SDPatternOperator op, VOP_Pseudo inst, VOPProfile P = inst.Pfl> : GCNPat <
+ (P.DstVT (op (P.Src0VT (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp, i32:$omod)),
+ (i16 (VOP3Mods0 P.Src1VT:$src1, i32:$src1_modifiers)))),
+ (inst $src0_modifiers, $src0,
+ $src1_modifiers, $src1,
+ $clamp, /* clamp */
+ $omod, /* omod */
+ 0) /* op_sel */
+>;
+
+let OtherPredicates = [UseRealTrue16Insts] in
+def : LDEXP_F16_t16_Pat<any_fldexp, V_LDEXP_F16_t16_e64>;
+
+let OtherPredicates = [UseFakeTrue16Insts] in
+def : LDEXP_F16_Pat<any_fldexp, V_LDEXP_F16_fake16_e64>;
let SubtargetPredicate = isGFX11Plus in {
let isCommutable = 1 in {
- defm V_AND_B16_t16 : VOP2Inst_e64 <"v_and_b16_t16", VOPProfile_Fake16<VOP_I16_I16_I16>, and>;
- defm V_OR_B16_t16 : VOP2Inst_e64 <"v_or_b16_t16", VOPProfile_Fake16<VOP_I16_I16_I16>, or>;
- defm V_XOR_B16_t16 : VOP2Inst_e64 <"v_xor_b16_t16", VOPProfile_Fake16<VOP_I16_I16_I16>, xor>;
+ defm V_AND_B16_t16 : VOP2Inst_e64 <"v_and_b16_t16", VOPProfile_True16<VOP_I16_I16_I16>, and>;
+ defm V_OR_B16_t16 : VOP2Inst_e64 <"v_or_b16_t16", VOPProfile_True16<VOP_I16_I16_I16>, or>;
+ defm V_XOR_B16_t16 : VOP2Inst_e64 <"v_xor_b16_t16", VOPProfile_True16<VOP_I16_I16_I16>, xor>;
} // End isCommutable = 1
} // End SubtargetPredicate = isGFX11Plus
@@ -1714,6 +1734,7 @@ defm V_MUL_F16_t16 : VOP2_Real_FULL_t16_gfx11_gfx12<0x035, "v_mul_f16">;
defm V_MUL_F16_fake16 : VOP2_Real_FULL_t16_gfx11_gfx12<0x035, "v_mul_f16">;
defm V_FMAC_F16_t16 : VOP2_Real_FULL_t16_gfx11_gfx12<0x036, "v_fmac_f16">;
defm V_LDEXP_F16_t16 : VOP2_Real_FULL_t16_gfx11_gfx12<0x03b, "v_ldexp_f16">;
+defm V_LDEXP_F16_fake16 : VOP2_Real_FULL_t16_gfx11_gfx12<0x03b, "v_ldexp_f16">;
defm V_MAX_F16_t16 : VOP2_Real_FULL_t16_gfx11<0x039, "v_max_f16">;
defm V_MAX_F16_fake16 : VOP2_Real_FULL_t16_gfx11<0x039, "v_max_f16">;
defm V_MIN_F16_t16 : VOP2_Real_FULL_t16_gfx11<0x03a, "v_min_f16">;
diff --git a/llvm/test/CodeGen/AMDGPU/bf16.ll b/llvm/test/CodeGen/AMDGPU/bf16.ll
index 970bb08e1838b2..1228231a53bcce 100644
--- a/llvm/test/CodeGen/AMDGPU/bf16.ll
+++ b/llvm/test/CodeGen/AMDGPU/bf16.ll
@@ -17180,11 +17180,17 @@ define bfloat @v_fabs_bf16(bfloat %a) {
; GFX10-NEXT: v_and_b32_e32 v0, 0x7fff, v0
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-LABEL: v_fabs_bf16:
-; GFX11: ; %bb.0:
-; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: v_and_b32_e32 v0, 0x7fff, v0
-; GFX11-NEXT: s_setpc_b64 s[30:31]
+; GFX11TRUE16-LABEL: v_fabs_bf16:
+; GFX11TRUE16: ; %bb.0:
+; GFX11TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11TRUE16-NEXT: v_and_b16 v0.l, 0x7fff, v0.l
+; GFX11TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11FAKE16-LABEL: v_fabs_bf16:
+; GFX11FAKE16: ; %bb.0:
+; GFX11FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11FAKE16-NEXT: v_and_b32_e32 v0, 0x7fff, v0
+; GFX11FAKE16-NEXT: s_setpc_b64 s[30:31]
%op = call bfloat @llvm.fabs.bf16(bfloat %a)
ret bfloat %op
}
@@ -17266,11 +17272,17 @@ define bfloat @v_fneg_bf16(bfloat %a) {
; GFX10-NEXT: v_xor_b32_e32 v0, 0x8000, v0
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-LABEL: v_fneg_bf16:
-; GFX11: ; %bb.0:
-; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: v_xor_b32_e32 v0, 0x8000, v0
-; GFX11-NEXT: s_setpc_b64 s[30:31]
+; GFX11TRUE16-LABEL: v_fneg_bf16:
+; GFX11TRUE16: ; %bb.0:
+; GFX11TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11TRUE16-NEXT: v_xor_b16 v0.l, 0x8000, v0.l
+; GFX11TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11FAKE16-LABEL: v_fneg_bf16:
+; GFX11FAKE16: ; %bb.0:
+; GFX11FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11FAKE16-NEXT: v_xor_b32_e32 v0, 0x8000, v0
+; GFX11FAKE16-NEXT: s_setpc_b64 s[30:31]
%op = fneg bfloat %a
ret bfloat %op
}
@@ -17365,11 +17377,17 @@ define bfloat @v_fneg_fabs_bf16(bfloat %a) {
; GFX10-NEXT: v_or_b32_e32 v0, 0x8000, v0
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-LABEL: v_fneg_fabs_bf16:
-; GFX11: ; %bb.0:
-; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: v_or_b32_e32 v0, 0x8000, v0
-; GFX11-NEXT: s_setpc_b64 s[30:31]
+; GFX11TRUE16-LABEL: v_fneg_fabs_bf16:
+; GFX11TRUE16: ; %bb.0:
+; GFX11TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11TRUE16-NEXT: v_or_b16 v0.l, 0x8000, v0.l
+; GFX11TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11FAKE16-LABEL: v_fneg_fabs_bf16:
+; GFX11FAKE16: ; %bb.0:
+; GFX11FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11FAKE16-NEXT: v_or_b32_e32 v0, 0x8000, v0
+; GFX11FAKE16-NEXT: s_setpc_b64 s[30:31]
%fabs = call bfloat @llvm.fabs.bf16(bfloat %a)
%op = fneg bfloat %fabs
ret bfloat %op
@@ -34518,15 +34536,25 @@ define bfloat @v_select_fneg_lhs_bf16(i1 %cond, bfloat %a, bfloat %b) {
; GFX10-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc_lo
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-LABEL: v_select_fneg_lhs_bf16:
-; GFX11: ; %bb.0:
-; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: v_and_b32_e32 v0, 1, v0
-; GFX11-NEXT: v_xor_b32_e32 v1, 0x8000, v1
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
-; GFX11-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc_lo
-; GFX11-NEXT: s_setpc_b64 s[30:31]
+; GFX11TRUE16-LABEL: v_select_fneg_lhs_bf16:
+; GFX11TRUE16: ; %bb.0:
+; GFX11TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11TRUE16-NEXT: v_and_b32_e32 v0, 1, v0
+; GFX11TRUE16-NEXT: v_xor_b16 v1.l, 0x8000, v1.l
+; GFX11TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
+; GFX11TRUE16-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc_lo
+; GFX11TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11FAKE16-LABEL: v_select_fneg_lhs_bf16:
+; GFX11FAKE16: ; %bb.0:
+; GFX11FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11FAKE16-NEXT: v_and_b32_e32 v0, 1, v0
+; GFX11FAKE16-NEXT: v_xor_b32_e32 v1, 0x8000, v1
+; GFX11FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
+; GFX11FAKE16-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc_lo
+; GFX11FAKE16-NEXT: s_setpc_b64 s[30:31]
%neg.a = fneg bfloat %a
%op = select i1 %cond, bfloat %neg.a, bfloat %b
ret bfloat %op
@@ -34582,15 +34610,25 @@ define bfloat @v_select_fneg_rhs_bf16(i1 %cond, bfloat %a, bfloat %b) {
; GFX10-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc_lo
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-LABEL: v_select_fneg_rhs_bf16:
-; GFX11: ; %bb.0:
-; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: v_and_b32_e32 v0, 1, v0
-; GFX11-NEXT: v_xor_b32_e32 v2, 0x8000, v2
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
-; GFX11-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc_lo
-; GFX11-NEXT: s_setpc_b64 s[30:31]
+; GFX11TRUE16-LABEL: v_select_fneg_rhs_bf16:
+; GFX11TRUE16: ; %bb.0:
+; GFX11TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11TRUE16-NEXT: v_and_b32_e32 v0, 1, v0
+; GFX11TRUE16-NEXT: v_xor_b16 v2.l, 0x8000, v2.l
+; GFX11TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
+; GFX11TRUE16-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc_lo
+; GFX11TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11FAKE16-LABEL: v_select_fneg_rhs_bf16:
+; GFX11FAKE16: ; %bb.0:
+; GFX11FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11FAKE16-NEXT: v_and_b32_e32 v0, 1, v0
+; GFX11FAKE16-NEXT: v_xor_b32_e32 v2, 0x8000, v2
+; GFX11FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
+; GFX11FAKE16-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc_lo
+; GFX11FAKE16-NEXT: s_setpc_b64 s[30:31]
%neg.b = fneg bfloat %b
%op = select i1 %cond, bfloat %a, bfloat %neg.b
ret bfloat %op
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.ldexp.ll b/llvm/test/CodeGen/AMDGPU/llvm.ldexp.ll
index b2b5153bb6c2a2..a8b991d04149fd 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.ldexp.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.ldexp.ll
@@ -2,12 +2,14 @@
; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=tahiti < %s | FileCheck -check-prefixes=GCN,GFX6,GFX6-SDAG %s
; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=tonga < %s | FileCheck -check-prefixes=GCN,GFX8,GFX8-SDAG %s
; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9,GFX9-SDAG %s
-; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1100 < %s | FileCheck -check-prefixes=GCN,GFX11,GFX11-SDAG %s
+; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1100 -mattr=+real-true16 < %s | FileCheck -check-prefixes=GCN,GFX11,GFX11-SDAG-TRUE16 %s
+; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 < %s | FileCheck -check-prefixes=GCN,GFX11,GFX11-SDAG-FAKE16 %s
; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=tahiti < %s | FileCheck -check-prefixes=GCN,GFX6,GFX6-GISEL %s
; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=tonga < %s | FileCheck -check-prefixes=GCN,GFX8,GFX8-GISEL %s
; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9,GFX9-GISEL %s
-; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1100 < %s | FileCheck -check-prefixes=GCN,GFX11,GFX11-GISEL %s
+; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1100 -mattr=+real-true16 < %s | FileCheck -check-prefixes=GCN,GFX11,GFX11-GISEL-TRUE16 %s
+; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 < %s | FileCheck -check-prefixes=GCN,GFX11,GFX11-GISEL-FAKE16 %s
define float @test_ldexp_f32_i32(ptr addrspace(1) %out, float %a, i32 %b) {
; GFX6-LABEL: test_ldexp_f32_i32:
@@ -211,13 +213,22 @@ define half @test_ldexp_f16_i8(half %a, i8 %b) {
; GFX9-SDAG-NEXT: v_ldexp_f16_sdwa v0, v0, sext(v1) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-SDAG-LABEL: test_ldexp_f16_i8:
-; GFX11-SDAG: ; %bb.0:
-; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SDAG-NEXT: v_bfe_i32 v1, v1, 0, 8
-; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-SDAG-NEXT: v_ldexp_f16_e32 v0, v0, v1
-; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
+; GFX11-SDAG-TRUE16-LABEL: test_ldexp_f16_i8:
+; GFX11-SDAG-TRUE16: ; %bb.0:
+; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v1, v1, 0, 8
+; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v0.h, v1.l
+; GFX11-SDAG-TRUE16-NEXT: v_ldexp_f16_e32 v0.l, v0.l, v0.h
+; GFX11-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-SDAG-FAKE16-LABEL: test_ldexp_f16_i8:
+; GFX11-SDAG-FAKE16: ; %bb.0:
+; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SDAG-FAKE16-NEXT: v_bfe_i32 v1, v1, 0, 8
+; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-SDAG-FAKE16-NEXT: v_ldexp_f16_e32 v0, v0, v1
+; GFX11-SDAG-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX6-GISEL-LABEL: test_ldexp_f16_i8:
; GFX6-GISEL: ; %bb.0:
@@ -248,15 +259,27 @@ define half @test_ldexp_f16_i8(half %a, i8 %b) {
; GFX9-GISEL-NEXT: v_ldexp_f16_e32 v0, v0, v1
; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-GISEL-LABEL: test_ldexp_f16_i8:
-; GFX11-GISEL: ; %bb.0:
-; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-GISEL-NEXT: v_bfe_i32 v1, v1, 0, 8
-; GFX11-GISEL-NEXT: v_mov_b32_e32 v2, 0x7fff
-; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-GISEL-NEXT: v_med3_i32 v1, 0xffff8000, v1, v2
-; GFX11-GISEL-NEXT: v_ldexp_f16_e32 v0, v0, v1
-; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
+; GFX11-GISEL-TRUE16-LABEL: test_ldexp_f16_i8:
+; GFX11-GISEL-TRUE16: ; %bb.0:
+; GFX11-GISEL-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GISEL-TRUE16-NEXT: v_bfe_i32 v1, v1, 0, 8
+; GFX11-GISEL-TRUE16-NEXT: v_mov_b32_e32 v2, 0x7fff
+; GFX11-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-GISEL-TRUE16-NEXT: v_med3_i32 v1, 0xffff8000, v1, v2
+; GFX11-GISEL-TRUE16-NEXT: v_mov_b16_e32 v0.h, v1.l
+; GFX11-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GISEL-TRUE16-NEXT: v_ldexp_f16_e32 v0.l, v0.l, v0.h
+; GFX11-GISEL-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GISEL-FAKE16-LABEL: test_ldexp_f16_i8:
+; GFX11-GISEL-FAKE16: ; %bb.0:
+; GFX11-GISEL-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GISEL-FAKE16-NEXT: v_bfe_i32 v1, v1, 0, 8
+; GFX11-GISEL-FAKE16-NEXT: v_mov_b32_e32 v2, 0x7fff
+; GFX11-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-GISEL-FAKE16-NEXT: v_med3_i32 v1, 0xffff8000, v1, v2
+; GFX11-GISEL-FAKE16-NEXT: v_ldexp_f16_e32 v0, v0, v1
+; GFX11-GISEL-FAKE16-NEXT: s_setpc_b64 s[30:31]
%result = call half @llvm.ldexp.f16.i8(half %a, i8 %b)
ret half %result
}
@@ -283,11 +306,19 @@ define half @test_ldexp_f16_i16(half %a, i16 %b) {
; GFX9-NEXT: v_ldexp_f16_e32 v0, v0, v1
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-LABEL: test_ldexp_f16_i16:
-; GFX11: ; %bb.0:
-; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: v_ldexp_f16_e32 v0, v0, v1
-; GFX11-NEXT: s_setpc_b64 s[30:31]
+; GFX11-SDAG-TRUE16-LABEL: test_ldexp_f16_i16:
+; GFX11-SDAG-TRUE16: ; %bb.0:
+; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v0.h, v1.l
+; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-SDAG-TRUE16-NEXT: v_ldexp_f16_e32 v0.l, v0.l, v0.h
+; GFX11-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-SDAG-FAKE16-LABEL: test_ldexp_f16_i16:
+; GFX11-SDAG-FAKE16: ; %bb.0:
+; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SDAG-FAKE16-NEXT: v_ldexp_f16_e32 v0, v0, v1
+; GFX11-SDAG-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX6-GISEL-LABEL: test_ldexp_f16_i16:
; GFX6-GISEL: ; %bb.0:
@@ -297,6 +328,20 @@ define half @test_ldexp_f16_i16(half %a, i16 %b) {
; GFX6-GISEL-NEXT: v_ldexp_f32_e32 v0, v0, v1
; GFX6-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GISEL-TRUE16-LABEL: test_ldexp_f16_i16:
+; GFX11-GISEL-TRUE16: ; %bb.0:
+; GFX11-GISEL-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GISEL-TRUE16-NEXT: v_mov_b16_e32 v0.h, v1.l
+; GFX11-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GISEL-TRUE16-NEXT: v_ldexp_f16_e32 v0.l, v0.l, v0.h
+; GFX11-GISEL-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GISEL-FAKE16-LABEL: test_ldexp_f16_i16:
+; GFX11-GISEL-FAKE16: ; %bb.0:
+; GFX11-GISEL-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GISEL-FAKE16-NEXT: v_ldexp_f16_e32 v0, v0, v1
+; GFX11-GISEL-FAKE16-NEXT: s_setpc_b64 s[30:31]
%result = call half @llvm.ldexp.f16.i16(half %a, i16 %b)
ret half %result
}
@@ -328,14 +373,23 @@ define half @test_ldexp_f16_i32(half %a, i32 %b) {
; GFX9-SDAG-NEXT: v_ldexp_f16_e32 v0, v0, v1
; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-SDAG-LABEL: test_ldexp_f16_i32:
-; GFX11-SDAG: ; %bb.0:
-; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SDAG-NEXT: s_movk_i32 s0, 0x8000
-; GFX11-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SDAG-NEXT: v_med3_i32 v1, v1, s0, 0x7fff
-; GFX11-SDAG-NEXT: v_ldexp_f16_e32 v0, v0, v1
-; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
+; GFX11-SDAG-TRUE16-LABEL: test_ldexp_f16_i32:
+; GFX11-SDAG-TRUE16: ; %bb.0:
+; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SDAG-TRUE16-NEXT: s_movk_i32 s0, 0x8000
+; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-SDAG-TRUE16-NEXT: v_med3_i32 v1, v1, s0, 0x7fff
+; GFX11-SDAG-TRUE16-NEXT: v_ldexp_f16_e32 v0.l, v0.l, v1.l
+; GFX11-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-SDAG-FAKE16-LABEL: test_ldexp_f16_i32:
+; GFX11-SDAG-FAKE16: ; %bb.0:
+; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SDAG-FAKE16-NEXT: s_movk_i32 s0, 0x8000
+; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-SDAG-FAKE16-NEXT: v_med3_i32 v1, v1, s0, 0x7fff
+; GFX11-SDAG-FAKE16-NEXT: v_ldexp_f16_e32 v0, v0, v1
+; GFX11-SDAG-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX6-GISEL-LABEL: test_ldexp_f16_i32:
; GFX6-GISEL: ; %bb.0:
@@ -363,14 +417,25 @@ define half @test_ldexp_f16_i32(half %a, i32 %b) {
; GFX9-GISEL-NEXT: v_ldexp_f16_e32 v0, v0, v1
; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-GISEL-LABEL: test_ldexp_f16_i32:
-; GFX11-GISEL: ; %bb.0:
-; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-GISEL-NEXT: v_mov_b32_e32 v2, 0x7fff
-; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-GISEL-NEXT: v_med3_i32 v1, 0xffff8000, v1, v2
-; GFX11-GISEL-NEXT: v_ldexp_f16_e32 v0, v0, v1
-; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
+; GFX11-GISEL-TRUE16-LABEL: test_ldexp_f16_i32:
+; GFX11-GISEL-TRUE16: ; %bb.0:
+; GFX11-GISEL-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GISEL-TRUE16-NEXT: v_mov_b32_e32 v2, 0x7fff
+; GFX11-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-GISEL-TRUE16-NEXT: v_med3_i32 v1, 0xffff8000, v1, v2
+; GFX11-GISEL-TRUE16-NEXT: v_mov_b16_e32 v0.h, v1.l
+; GFX11-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GISEL-TRUE16-NEXT: v_ldexp_f16_e32 v0.l, v0.l, v0.h
+; GFX11-GISEL-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GISEL-FAKE16-LABEL: test_ldexp_f16_i32:
+; GFX11-GISEL-FAKE16: ; %bb.0:
+; GFX11-GISEL-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GISEL-FAKE16-NEXT: v_mov_b32_e32 v2, 0x7fff
+; GFX11-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-GISEL-FAKE16-NEXT: v_med3_i32 v1, 0xffff8000, v1, v2
+; GFX11-GISEL-FAKE16-NEXT: v_ldexp_f16_e32 v0, v0, v1
+; GFX11-GISEL-FAKE16-NEXT: s_setpc_b64 s[30:31]
%result = call half @llvm.ldexp.f16.i32(half %a, i32 %b)
ret half %result
}
@@ -411,19 +476,39 @@ define <2 x half> @test_ldexp_v2f16_v2i32(<2 x half> %a, <2 x i32> %b) {
; GFX9-SDAG-NEXT: v_pack_b32_f16 v0, v0, v2
; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-SDAG-LABEL: test_ldexp_v2f16_v2i32:
-; GFX11-SDAG: ; %bb.0:
-; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SDAG-NEXT: s_movk_i32 s0, 0x8000
-; GFX11-SDAG-NEXT: v_lshrrev_b32_e32 v3, 16, v0
-; GFX11-SDAG-NEXT: v_med3_i32 v2, v2, s0, 0x7fff
-; GFX11-SDAG-NEXT: v_med3_i32 v1, v1, s0, 0x7fff
-; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-SDAG-NEXT: v_ldexp_f16_e32 v2, v3, v2
-; GFX11-SDAG-NEXT: v_ldexp_f16_e32 v0, v0, v1
-; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-SDAG-NEXT: v_pack_b32_f16 v0, v0, v2
-; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
+; GFX11-SDAG-TRUE16-LABEL: test_ldexp_v2f16_v2i32:
+; GFX11-SDAG-TRUE16: ; %bb.0:
+; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SDAG-TRUE16-NEXT: s_movk_i32 s0, 0x8000
+; GFX11-SDAG-TRUE16-NEXT: v_lshrrev_b32_e32 v3, 16, v0
+; GFX11-SDAG-TRUE16-NEXT: v_med3_i32 v2, v2, s0, 0x7fff
+; GFX11-SDAG-TRUE16-NEXT: v_med3_i32 v1, v1, s0, 0x7fff
+; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.h, v3.l
+; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v0.h, v2.l
+; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-SDAG-TRUE16-NEXT: v_ldexp_f16_e32 v0.l, v0.l, v1.l
+; GFX11-SDAG-TRUE16-NEXT: v_ldexp_f16_e32 v0.h, v1.h, v0.h
+; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v0.l
+; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v0.l, v0.h
+; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-SDAG-TRUE16-NEXT: v_pack_b32_f16 v0, v1, v0
+; GFX11-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-SDAG-FAKE16-LABEL: test_ldexp_v2f16_v2i32:
+; GFX11-SDAG-FAKE16: ; %bb.0:
+; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SDAG-FAKE16-NEXT: s_movk_i32 s0, 0x8000
+; GFX11-SDAG-FAKE16-NEXT: v_lshrrev_b32_e32 v3, 16, v0
+; GFX11-SDAG-FAKE16-NEXT: v_med3_i32 v2, v2, s0, 0x7fff
+; GFX11-SDAG-FAKE16-NEXT: v_med3_i32 v1, v1, s0, 0x7fff
+; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-SDAG-FAKE16-NEXT: v_ldexp_f16_e32 v2, v3, v2
+; GFX11-SDAG-FAKE16-NEXT: v_ldexp_f16_e32 v0, v0, v1
+; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-SDAG-FAKE16-NEXT: v_pack_b32_f16 v0, v0, v2
+; GFX11-SDAG-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX6-GISEL-LABEL: test_ldexp_v2f16_v2i32:
; GFX6-GISEL: ; %bb.0:
@@ -460,21 +545,45 @@ define <2 x half> @test_ldexp_v2f16_v2i32(<2 x half> %a, <2 x i32> %b) {
; GFX9-GISEL-NEXT: v_lshl_or_b32 v0, v0, 16, v1
; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-GISEL-LABEL: test_ldexp_v2f16_v2i32:
-; GFX11-GISEL: ; %bb.0:
-; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-GISEL-NEXT: v_mov_b32_e32 v3, 0x7fff
-; GFX11-GISEL-NEXT: v_lshrrev_b32_e32 v4, 16, v0
-; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
-; GFX11-GISEL-NEXT: v_med3_i32 v1, 0xffff8000, v1, v3
-; GFX11-GISEL-NEXT: v_med3_i32 v2, 0xffff8000, v2, v3
-; GFX11-GISEL-NEXT: v_ldexp_f16_e32 v0, v0, v1
-; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-GISEL-NEXT: v_ldexp_f16_e32 v1, v4, v2
-; GFX11-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
-; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-GISEL-NEXT: v_lshl_or_b32 v0, v1, 16, v0
-; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
+; GFX11-GISEL-TRUE16-LABEL: test_ldexp_v2f16_v2i32:
+; GFX11-GISEL-TRUE16: ; %bb.0:
+; GFX11-GISEL-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GISEL-TRUE16-NEXT: v_mov_b32_e32 v3, 0x7fff
+; GFX11-GISEL-TRUE16-NEXT: v_lshrrev_b32_e32 v5, 16, v0
+; GFX11-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_4)
+; GFX11-GISEL-TRUE16-NEXT: v_med3_i32 v4, 0xffff8000, v1, v3
+; GFX11-GISEL-TRUE16-NEXT: v_mov_b16_e32 v1.l, v0.l
+; GFX11-GISEL-TRUE16-NEXT: v_med3_i32 v2, 0xffff8000, v2, v3
+; GFX11-GISEL-TRUE16-NEXT: v_mov_b16_e32 v0.h, v5.l
+; GFX11-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX11-GISEL-TRUE16-NEXT: v_mov_b16_e32 v0.l, v4.l
+; GFX11-GISEL-TRUE16-NEXT: v_mov_b16_e32 v1.h, v2.l
+; GFX11-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-GISEL-TRUE16-NEXT: v_ldexp_f16_e32 v0.l, v1.l, v0.l
+; GFX11-GISEL-TRUE16-NEXT: v_ldexp_f16_e32 v0.h, v0.h, v1.h
+; GFX11-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-GISEL-TRUE16-NEXT: v_mov_b16_e32 v1.l, v0.l
+; GFX11-GISEL-TRUE16-NEXT: v_mov_b16_e32 v0.l, v0.h
+; GFX11-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-GISEL-TRUE16-NEXT: v_and_b32_e32 v1, 0xffff, v1
+; GFX11-GISEL-TRUE16-NEXT: v_lshl_or_b32 v0, v0, 16, v1
+; GFX11-GISEL-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GISEL-FAKE16-LABEL: test_ldexp_v2f16_v2i32:
+; GFX11-GISEL-FAKE16: ; %bb.0:
+; GFX11-GISEL-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GISEL-FAKE16-NEXT: v_mov_b32_e32 v3, 0x7fff
+; GFX11-GISEL-FAKE16-NEXT: v_lshrrev_b32_e32 v4, 16, v0
+; GFX11-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX11-GISEL-FAKE16-NEXT: v_med3_i32 v1, 0xffff8000, v1, v3
+; GFX11-GISEL-FAKE16-NEXT: v_med3_i32 v2, 0xffff8000, v2, v3
+; GFX11-GISEL-FAKE16-NEXT: v_ldexp_f16_e32 v0, v0, v1
+; GFX11-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-GISEL-FAKE16-NEXT: v_ldexp_f16_e32 v1, v4, v2
+; GFX11-GISEL-FAKE16-NEXT: v_and_b32_e32 v0, 0xffff, v0
+; GFX11-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GISEL-FAKE16-NEXT: v_lshl_or_b32 v0, v1, 16, v0
+; GFX11-GISEL-FAKE16-NEXT: s_setpc_b64 s[30:31]
%result = call <2 x half> @llvm.ldexp.v2f16.v2i32(<2 x half> %a, <2 x i32> %b)
ret <2 x half> %result
}
@@ -509,16 +618,33 @@ define <2 x half> @test_ldexp_v2f16_v2i16(<2 x half> %a, <2 x i16> %b) {
; GFX9-SDAG-NEXT: v_pack_b32_f16 v0, v0, v2
; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-SDAG-LABEL: test_ldexp_v2f16_v2i16:
-; GFX11-SDAG: ; %bb.0:
-; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SDAG-NEXT: v_lshrrev_b32_e32 v2, 16, v1
-; GFX11-SDAG-NEXT: v_lshrrev_b32_e32 v3, 16, v0
-; GFX11-SDAG-NEXT: v_ldexp_f16_e32 v0, v0, v1
-; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SDAG-NEXT: v_ldexp_f16_e32 v2, v3, v2
-; GFX11-SDAG-NEXT: v_pack_b32_f16 v0, v0, v2
-; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
+; GFX11-SDAG-TRUE16-LABEL: test_ldexp_v2f16_v2i16:
+; GFX11-SDAG-TRUE16: ; %bb.0:
+; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SDAG-TRUE16-NEXT: v_lshrrev_b32_e32 v2, 16, v1
+; GFX11-SDAG-TRUE16-NEXT: v_lshrrev_b32_e32 v3, 16, v0
+; GFX11-SDAG-TRUE16-NEXT: v_ldexp_f16_e32 v0.l, v0.l, v1.l
+; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v0.h, v2.l
+; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.h, v3.l
+; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v0.l
+; GFX11-SDAG-TRUE16-NEXT: v_ldexp_f16_e32 v0.h, v1.h, v0.h
+; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v0.l, v0.h
+; GFX11-SDAG-TRUE16-NEXT: v_pack_b32_f16 v0, v1, v0
+; GFX11-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-SDAG-FAKE16-LABEL: test_ldexp_v2f16_v2i16:
+; GFX11-SDAG-FAKE16: ; %bb.0:
+; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SDAG-FAKE16-NEXT: v_lshrrev_b32_e32 v2, 16, v1
+; GFX11-SDAG-FAKE16-NEXT: v_lshrrev_b32_e32 v3, 16, v0
+; GFX11-SDAG-FAKE16-NEXT: v_ldexp_f16_e32 v0, v0, v1
+; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-SDAG-FAKE16-NEXT: v_ldexp_f16_e32 v2, v3, v2
+; GFX11-SDAG-FAKE16-NEXT: v_pack_b32_f16 v0, v0, v2
+; GFX11-SDAG-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX6-GISEL-LABEL: test_ldexp_v2f16_v2i16:
; GFX6-GISEL: ; %bb.0:
@@ -549,18 +675,39 @@ define <2 x half> @test_ldexp_v2f16_v2i16(<2 x half> %a, <2 x i16> %b) {
; GFX9-GISEL-NEXT: v_lshl_or_b32 v0, v0, 16, v2
; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-GISEL-LABEL: test_ldexp_v2f16_v2i16:
-; GFX11-GISEL: ; %bb.0:
-; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-GISEL-NEXT: v_lshrrev_b32_e32 v2, 16, v0
-; GFX11-GISEL-NEXT: v_lshrrev_b32_e32 v3, 16, v1
-; GFX11-GISEL-NEXT: v_ldexp_f16_e32 v0, v0, v1
-; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-GISEL-NEXT: v_ldexp_f16_e32 v1, v2, v3
-; GFX11-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
-; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-GISEL-NEXT: v_lshl_or_b32 v0, v1, 16, v0
-; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
+; GFX11-GISEL-TRUE16-LABEL: test_ldexp_v2f16_v2i16:
+; GFX11-GISEL-TRUE16: ; %bb.0:
+; GFX11-GISEL-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GISEL-TRUE16-NEXT: v_mov_b16_e32 v2.l, v0.l
+; GFX11-GISEL-TRUE16-NEXT: v_lshrrev_b32_e32 v3, 16, v0
+; GFX11-GISEL-TRUE16-NEXT: v_lshrrev_b32_e32 v4, 16, v1
+; GFX11-GISEL-TRUE16-NEXT: v_mov_b16_e32 v0.l, v1.l
+; GFX11-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX11-GISEL-TRUE16-NEXT: v_mov_b16_e32 v0.h, v3.l
+; GFX11-GISEL-TRUE16-NEXT: v_mov_b16_e32 v1.l, v4.l
+; GFX11-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-GISEL-TRUE16-NEXT: v_ldexp_f16_e32 v0.l, v2.l, v0.l
+; GFX11-GISEL-TRUE16-NEXT: v_ldexp_f16_e32 v0.h, v0.h, v1.l
+; GFX11-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-GISEL-TRUE16-NEXT: v_mov_b16_e32 v1.l, v0.l
+; GFX11-GISEL-TRUE16-NEXT: v_mov_b16_e32 v0.l, v0.h
+; GFX11-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-GISEL-TRUE16-NEXT: v_and_b32_e32 v1, 0xffff, v1
+; GFX11-GISEL-TRUE16-NEXT: v_lshl_or_b32 v0, v0, 16, v1
+; GFX11-GISEL-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GISEL-FAKE16-LABEL: test_ldexp_v2f16_v2i16:
+; GFX11-GISEL-FAKE16: ; %bb.0:
+; GFX11-GISEL-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GISEL-FAKE16-NEXT: v_lshrrev_b32_e32 v2, 16, v0
+; GFX11-GISEL-FAKE16-NEXT: v_lshrrev_b32_e32 v3, 16, v1
+; GFX11-GISEL-FAKE16-NEXT: v_ldexp_f16_e32 v0, v0, v1
+; GFX11-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-GISEL-FAKE16-NEXT: v_ldexp_f16_e32 v1, v2, v3
+; GFX11-GISEL-FAKE16-NEXT: v_and_b32_e32 v0, 0xffff, v0
+; GFX11-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GISEL-FAKE16-NEXT: v_lshl_or_b32 v0, v1, 16, v0
+; GFX11-GISEL-FAKE16-NEXT: s_setpc_b64 s[30:31]
%result = call <2 x half> @llvm.ldexp.v2f16.v2i16(<2 x half> %a, <2 x i16> %b)
ret <2 x half> %result
}
@@ -608,21 +755,46 @@ define <3 x half> @test_ldexp_v3f16_v3i32(<3 x half> %a, <3 x i32> %b) {
; GFX9-SDAG-NEXT: v_pack_b32_f16 v0, v0, v3
; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-SDAG-LABEL: test_ldexp_v3f16_v3i32:
-; GFX11-SDAG: ; %bb.0:
-; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SDAG-NEXT: s_movk_i32 s0, 0x8000
-; GFX11-SDAG-NEXT: v_lshrrev_b32_e32 v5, 16, v0
-; GFX11-SDAG-NEXT: v_med3_i32 v3, v3, s0, 0x7fff
-; GFX11-SDAG-NEXT: v_med3_i32 v2, v2, s0, 0x7fff
-; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-SDAG-NEXT: v_ldexp_f16_e32 v3, v5, v3
-; GFX11-SDAG-NEXT: v_ldexp_f16_e32 v0, v0, v2
-; GFX11-SDAG-NEXT: v_med3_i32 v2, v4, s0, 0x7fff
-; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-SDAG-NEXT: v_pack_b32_f16 v0, v0, v3
-; GFX11-SDAG-NEXT: v_ldexp_f16_e32 v1, v1, v2
-; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
+; GFX11-SDAG-TRUE16-LABEL: test_ldexp_v3f16_v3i32:
+; GFX11-SDAG-TRUE16: ; %bb.0:
+; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SDAG-TRUE16-NEXT: s_movk_i32 s0, 0x8000
+; GFX11-SDAG-TRUE16-NEXT: v_lshrrev_b32_e32 v6, 16, v0
+; GFX11-SDAG-TRUE16-NEXT: v_med3_i32 v3, v3, s0, 0x7fff
+; GFX11-SDAG-TRUE16-NEXT: v_med3_i32 v5, v2, s0, 0x7fff
+; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v0.h, v6.l
+; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v2.l, v3.l
+; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_3)
+; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v2.h, v5.l
+; GFX11-SDAG-TRUE16-NEXT: v_med3_i32 v3, v4, s0, 0x7fff
+; GFX11-SDAG-TRUE16-NEXT: v_ldexp_f16_e32 v0.h, v0.h, v2.l
+; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX11-SDAG-TRUE16-NEXT: v_ldexp_f16_e32 v0.l, v0.l, v2.h
+; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.h, v3.l
+; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v2.l, v0.l
+; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v0.l, v0.h
+; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-SDAG-TRUE16-NEXT: v_ldexp_f16_e32 v1.l, v1.l, v1.h
+; GFX11-SDAG-TRUE16-NEXT: v_pack_b32_f16 v0, v2, v0
+; GFX11-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-SDAG-FAKE16-LABEL: test_ldexp_v3f16_v3i32:
+; GFX11-SDAG-FAKE16: ; %bb.0:
+; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SDAG-FAKE16-NEXT: s_movk_i32 s0, 0x8000
+; GFX11-SDAG-FAKE16-NEXT: v_lshrrev_b32_e32 v5, 16, v0
+; GFX11-SDAG-FAKE16-NEXT: v_med3_i32 v3, v3, s0, 0x7fff
+; GFX11-SDAG-FAKE16-NEXT: v_med3_i32 v2, v2, s0, 0x7fff
+; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-SDAG-FAKE16-NEXT: v_ldexp_f16_e32 v3, v5, v3
+; GFX11-SDAG-FAKE16-NEXT: v_ldexp_f16_e32 v0, v0, v2
+; GFX11-SDAG-FAKE16-NEXT: v_med3_i32 v2, v4, s0, 0x7fff
+; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-SDAG-FAKE16-NEXT: v_pack_b32_f16 v0, v0, v3
+; GFX11-SDAG-FAKE16-NEXT: v_ldexp_f16_e32 v1, v1, v2
+; GFX11-SDAG-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX6-GISEL-LABEL: test_ldexp_v3f16_v3i32:
; GFX6-GISEL: ; %bb.0:
@@ -666,23 +838,51 @@ define <3 x half> @test_ldexp_v3f16_v3i32(<3 x half> %a, <3 x i32> %b) {
; GFX9-GISEL-NEXT: v_lshl_or_b32 v0, v0, 16, v2
; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-GISEL-LABEL: test_ldexp_v3f16_v3i32:
-; GFX11-GISEL: ; %bb.0:
-; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-GISEL-NEXT: v_mov_b32_e32 v5, 0x7fff
-; GFX11-GISEL-NEXT: v_lshrrev_b32_e32 v6, 16, v0
-; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
-; GFX11-GISEL-NEXT: v_med3_i32 v2, 0xffff8000, v2, v5
-; GFX11-GISEL-NEXT: v_med3_i32 v3, 0xffff8000, v3, v5
-; GFX11-GISEL-NEXT: v_ldexp_f16_e32 v0, v0, v2
-; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_3)
-; GFX11-GISEL-NEXT: v_ldexp_f16_e32 v2, v6, v3
-; GFX11-GISEL-NEXT: v_med3_i32 v3, 0xffff8000, v4, v5
-; GFX11-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
-; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-GISEL-NEXT: v_ldexp_f16_e32 v1, v1, v3
-; GFX11-GISEL-NEXT: v_lshl_or_b32 v0, v2, 16, v0
-; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
+; GFX11-GISEL-TRUE16-LABEL: test_ldexp_v3f16_v3i32:
+; GFX11-GISEL-TRUE16: ; %bb.0:
+; GFX11-GISEL-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GISEL-TRUE16-NEXT: v_mov_b32_e32 v5, 0x7fff
+; GFX11-GISEL-TRUE16-NEXT: v_lshrrev_b32_e32 v7, 16, v0
+; GFX11-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_4)
+; GFX11-GISEL-TRUE16-NEXT: v_med3_i32 v6, 0xffff8000, v2, v5
+; GFX11-GISEL-TRUE16-NEXT: v_mov_b16_e32 v2.l, v0.l
+; GFX11-GISEL-TRUE16-NEXT: v_med3_i32 v3, 0xffff8000, v3, v5
+; GFX11-GISEL-TRUE16-NEXT: v_mov_b16_e32 v0.h, v7.l
+; GFX11-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX11-GISEL-TRUE16-NEXT: v_mov_b16_e32 v0.l, v6.l
+; GFX11-GISEL-TRUE16-NEXT: v_mov_b16_e32 v2.h, v3.l
+; GFX11-GISEL-TRUE16-NEXT: v_med3_i32 v3, 0xffff8000, v4, v5
+; GFX11-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX11-GISEL-TRUE16-NEXT: v_ldexp_f16_e32 v0.l, v2.l, v0.l
+; GFX11-GISEL-TRUE16-NEXT: v_ldexp_f16_e32 v0.h, v0.h, v2.h
+; GFX11-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX11-GISEL-TRUE16-NEXT: v_mov_b16_e32 v2.l, v0.l
+; GFX11-GISEL-TRUE16-NEXT: v_mov_b16_e32 v0.l, v3.l
+; GFX11-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX11-GISEL-TRUE16-NEXT: v_mov_b16_e32 v3.l, v0.h
+; GFX11-GISEL-TRUE16-NEXT: v_and_b32_e32 v2, 0xffff, v2
+; GFX11-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-GISEL-TRUE16-NEXT: v_ldexp_f16_e32 v1.l, v1.l, v0.l
+; GFX11-GISEL-TRUE16-NEXT: v_lshl_or_b32 v0, v3, 16, v2
+; GFX11-GISEL-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GISEL-FAKE16-LABEL: test_ldexp_v3f16_v3i32:
+; GFX11-GISEL-FAKE16: ; %bb.0:
+; GFX11-GISEL-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GISEL-FAKE16-NEXT: v_mov_b32_e32 v5, 0x7fff
+; GFX11-GISEL-FAKE16-NEXT: v_lshrrev_b32_e32 v6, 16, v0
+; GFX11-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX11-GISEL-FAKE16-NEXT: v_med3_i32 v2, 0xffff8000, v2, v5
+; GFX11-GISEL-FAKE16-NEXT: v_med3_i32 v3, 0xffff8000, v3, v5
+; GFX11-GISEL-FAKE16-NEXT: v_ldexp_f16_e32 v0, v0, v2
+; GFX11-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_3)
+; GFX11-GISEL-FAKE16-NEXT: v_ldexp_f16_e32 v2, v6, v3
+; GFX11-GISEL-FAKE16-NEXT: v_med3_i32 v3, 0xffff8000, v4, v5
+; GFX11-GISEL-FAKE16-NEXT: v_and_b32_e32 v0, 0xffff, v0
+; GFX11-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-GISEL-FAKE16-NEXT: v_ldexp_f16_e32 v1, v1, v3
+; GFX11-GISEL-FAKE16-NEXT: v_lshl_or_b32 v0, v2, 16, v0
+; GFX11-GISEL-FAKE16-NEXT: s_setpc_b64 s[30:31]
%result = call <3 x half> @llvm.ldexp.v3f16.v3i32(<3 x half> %a, <3 x i32> %b)
ret <3 x half> %result
}
@@ -723,17 +923,35 @@ define <3 x half> @test_ldexp_v3f16_v3i16(<3 x half> %a, <3 x i16> %b) {
; GFX9-SDAG-NEXT: v_pack_b32_f16 v0, v0, v4
; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-SDAG-LABEL: test_ldexp_v3f16_v3i16:
-; GFX11-SDAG: ; %bb.0:
-; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SDAG-NEXT: v_lshrrev_b32_e32 v4, 16, v2
-; GFX11-SDAG-NEXT: v_lshrrev_b32_e32 v5, 16, v0
-; GFX11-SDAG-NEXT: v_ldexp_f16_e32 v0, v0, v2
-; GFX11-SDAG-NEXT: v_ldexp_f16_e32 v1, v1, v3
-; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SDAG-NEXT: v_ldexp_f16_e32 v2, v5, v4
-; GFX11-SDAG-NEXT: v_pack_b32_f16 v0, v0, v2
-; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
+; GFX11-SDAG-TRUE16-LABEL: test_ldexp_v3f16_v3i16:
+; GFX11-SDAG-TRUE16: ; %bb.0:
+; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SDAG-TRUE16-NEXT: v_lshrrev_b32_e32 v4, 16, v2
+; GFX11-SDAG-TRUE16-NEXT: v_lshrrev_b32_e32 v5, 16, v0
+; GFX11-SDAG-TRUE16-NEXT: v_ldexp_f16_e32 v0.l, v0.l, v2.l
+; GFX11-SDAG-TRUE16-NEXT: v_ldexp_f16_e32 v1.l, v1.l, v3.l
+; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v0.h, v4.l
+; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v2.h, v5.l
+; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v2.l, v0.l
+; GFX11-SDAG-TRUE16-NEXT: v_ldexp_f16_e32 v0.h, v2.h, v0.h
+; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v0.l, v0.h
+; GFX11-SDAG-TRUE16-NEXT: v_pack_b32_f16 v0, v2, v0
+; GFX11-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-SDAG-FAKE16-LABEL: test_ldexp_v3f16_v3i16:
+; GFX11-SDAG-FAKE16: ; %bb.0:
+; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SDAG-FAKE16-NEXT: v_lshrrev_b32_e32 v4, 16, v2
+; GFX11-SDAG-FAKE16-NEXT: v_lshrrev_b32_e32 v5, 16, v0
+; GFX11-SDAG-FAKE16-NEXT: v_ldexp_f16_e32 v0, v0, v2
+; GFX11-SDAG-FAKE16-NEXT: v_ldexp_f16_e32 v1, v1, v3
+; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-SDAG-FAKE16-NEXT: v_ldexp_f16_e32 v2, v5, v4
+; GFX11-SDAG-FAKE16-NEXT: v_pack_b32_f16 v0, v0, v2
+; GFX11-SDAG-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX6-GISEL-LABEL: test_ldexp_v3f16_v3i16:
; GFX6-GISEL: ; %bb.0:
@@ -770,19 +988,43 @@ define <3 x half> @test_ldexp_v3f16_v3i16(<3 x half> %a, <3 x i16> %b) {
; GFX9-GISEL-NEXT: v_lshl_or_b32 v0, v0, 16, v4
; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-GISEL-LABEL: test_ldexp_v3f16_v3i16:
-; GFX11-GISEL: ; %bb.0:
-; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-GISEL-NEXT: v_lshrrev_b32_e32 v4, 16, v0
-; GFX11-GISEL-NEXT: v_lshrrev_b32_e32 v5, 16, v2
-; GFX11-GISEL-NEXT: v_ldexp_f16_e32 v0, v0, v2
-; GFX11-GISEL-NEXT: v_ldexp_f16_e32 v1, v1, v3
-; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX11-GISEL-NEXT: v_ldexp_f16_e32 v2, v4, v5
-; GFX11-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
-; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-GISEL-NEXT: v_lshl_or_b32 v0, v2, 16, v0
-; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
+; GFX11-GISEL-TRUE16-LABEL: test_ldexp_v3f16_v3i16:
+; GFX11-GISEL-TRUE16: ; %bb.0:
+; GFX11-GISEL-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GISEL-TRUE16-NEXT: v_mov_b16_e32 v4.l, v0.l
+; GFX11-GISEL-TRUE16-NEXT: v_lshrrev_b32_e32 v5, 16, v0
+; GFX11-GISEL-TRUE16-NEXT: v_lshrrev_b32_e32 v6, 16, v2
+; GFX11-GISEL-TRUE16-NEXT: v_mov_b16_e32 v0.l, v2.l
+; GFX11-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX11-GISEL-TRUE16-NEXT: v_mov_b16_e32 v0.h, v5.l
+; GFX11-GISEL-TRUE16-NEXT: v_mov_b16_e32 v2.l, v6.l
+; GFX11-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-GISEL-TRUE16-NEXT: v_ldexp_f16_e32 v0.l, v4.l, v0.l
+; GFX11-GISEL-TRUE16-NEXT: v_ldexp_f16_e32 v0.h, v0.h, v2.l
+; GFX11-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_3)
+; GFX11-GISEL-TRUE16-NEXT: v_mov_b16_e32 v2.l, v0.l
+; GFX11-GISEL-TRUE16-NEXT: v_mov_b16_e32 v0.l, v3.l
+; GFX11-GISEL-TRUE16-NEXT: v_mov_b16_e32 v3.l, v0.h
+; GFX11-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX11-GISEL-TRUE16-NEXT: v_and_b32_e32 v2, 0xffff, v2
+; GFX11-GISEL-TRUE16-NEXT: v_ldexp_f16_e32 v1.l, v1.l, v0.l
+; GFX11-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2)
+; GFX11-GISEL-TRUE16-NEXT: v_lshl_or_b32 v0, v3, 16, v2
+; GFX11-GISEL-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GISEL-FAKE16-LABEL: test_ldexp_v3f16_v3i16:
+; GFX11-GISEL-FAKE16: ; %bb.0:
+; GFX11-GISEL-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GISEL-FAKE16-NEXT: v_lshrrev_b32_e32 v4, 16, v0
+; GFX11-GISEL-FAKE16-NEXT: v_lshrrev_b32_e32 v5, 16, v2
+; GFX11-GISEL-FAKE16-NEXT: v_ldexp_f16_e32 v0, v0, v2
+; GFX11-GISEL-FAKE16-NEXT: v_ldexp_f16_e32 v1, v1, v3
+; GFX11-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX11-GISEL-FAKE16-NEXT: v_ldexp_f16_e32 v2, v4, v5
+; GFX11-GISEL-FAKE16-NEXT: v_and_b32_e32 v0, 0xffff, v0
+; GFX11-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GISEL-FAKE16-NEXT: v_lshl_or_b32 v0, v2, 16, v0
+; GFX11-GISEL-FAKE16-NEXT: s_setpc_b64 s[30:31]
%result = call <3 x half> @llvm.ldexp.v3f16.v3i16(<3 x half> %a, <3 x i16> %b)
ret <3 x half> %result
}
@@ -839,26 +1081,57 @@ define <4 x half> @test_ldexp_v4f16_v4i32(<4 x half> %a, <4 x i32> %b) {
; GFX9-SDAG-NEXT: v_pack_b32_f16 v1, v1, v5
; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-SDAG-LABEL: test_ldexp_v4f16_v4i32:
-; GFX11-SDAG: ; %bb.0:
-; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SDAG-NEXT: s_movk_i32 s0, 0x8000
-; GFX11-SDAG-NEXT: v_lshrrev_b32_e32 v6, 16, v1
-; GFX11-SDAG-NEXT: v_med3_i32 v5, v5, s0, 0x7fff
-; GFX11-SDAG-NEXT: v_med3_i32 v3, v3, s0, 0x7fff
-; GFX11-SDAG-NEXT: v_lshrrev_b32_e32 v7, 16, v0
-; GFX11-SDAG-NEXT: v_med3_i32 v2, v2, s0, 0x7fff
-; GFX11-SDAG-NEXT: v_med3_i32 v4, v4, s0, 0x7fff
-; GFX11-SDAG-NEXT: v_ldexp_f16_e32 v5, v6, v5
-; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
-; GFX11-SDAG-NEXT: v_ldexp_f16_e32 v3, v7, v3
-; GFX11-SDAG-NEXT: v_ldexp_f16_e32 v0, v0, v2
-; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-SDAG-NEXT: v_ldexp_f16_e32 v1, v1, v4
-; GFX11-SDAG-NEXT: v_pack_b32_f16 v0, v0, v3
-; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2)
-; GFX11-SDAG-NEXT: v_pack_b32_f16 v1, v1, v5
-; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
+; GFX11-SDAG-TRUE16-LABEL: test_ldexp_v4f16_v4i32:
+; GFX11-SDAG-TRUE16: ; %bb.0:
+; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SDAG-TRUE16-NEXT: s_movk_i32 s0, 0x8000
+; GFX11-SDAG-TRUE16-NEXT: v_lshrrev_b32_e32 v7, 16, v1
+; GFX11-SDAG-TRUE16-NEXT: v_med3_i32 v5, v5, s0, 0x7fff
+; GFX11-SDAG-TRUE16-NEXT: v_med3_i32 v4, v4, s0, 0x7fff
+; GFX11-SDAG-TRUE16-NEXT: v_med3_i32 v6, v3, s0, 0x7fff
+; GFX11-SDAG-TRUE16-NEXT: v_lshrrev_b32_e32 v8, 16, v0
+; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.h, v7.l
+; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v3.l, v5.l
+; GFX11-SDAG-TRUE16-NEXT: v_med3_i32 v5, v2, s0, 0x7fff
+; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v2.l, v4.l
+; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v2.h, v6.l
+; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v0.h, v8.l
+; GFX11-SDAG-TRUE16-NEXT: v_ldexp_f16_e32 v1.h, v1.h, v3.l
+; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v3.h, v5.l
+; GFX11-SDAG-TRUE16-NEXT: v_ldexp_f16_e32 v1.l, v1.l, v2.l
+; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX11-SDAG-TRUE16-NEXT: v_ldexp_f16_e32 v0.h, v0.h, v2.h
+; GFX11-SDAG-TRUE16-NEXT: v_ldexp_f16_e32 v0.l, v0.l, v3.h
+; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_3)
+; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v3.l, v1.l
+; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v1.h
+; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v2.l, v0.l
+; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v0.l, v0.h
+; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-SDAG-TRUE16-NEXT: v_pack_b32_f16 v1, v3, v1
+; GFX11-SDAG-TRUE16-NEXT: v_pack_b32_f16 v0, v2, v0
+; GFX11-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-SDAG-FAKE16-LABEL: test_ldexp_v4f16_v4i32:
+; GFX11-SDAG-FAKE16: ; %bb.0:
+; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SDAG-FAKE16-NEXT: s_movk_i32 s0, 0x8000
+; GFX11-SDAG-FAKE16-NEXT: v_lshrrev_b32_e32 v6, 16, v1
+; GFX11-SDAG-FAKE16-NEXT: v_med3_i32 v5, v5, s0, 0x7fff
+; GFX11-SDAG-FAKE16-NEXT: v_med3_i32 v3, v3, s0, 0x7fff
+; GFX11-SDAG-FAKE16-NEXT: v_lshrrev_b32_e32 v7, 16, v0
+; GFX11-SDAG-FAKE16-NEXT: v_med3_i32 v2, v2, s0, 0x7fff
+; GFX11-SDAG-FAKE16-NEXT: v_med3_i32 v4, v4, s0, 0x7fff
+; GFX11-SDAG-FAKE16-NEXT: v_ldexp_f16_e32 v5, v6, v5
+; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX11-SDAG-FAKE16-NEXT: v_ldexp_f16_e32 v3, v7, v3
+; GFX11-SDAG-FAKE16-NEXT: v_ldexp_f16_e32 v0, v0, v2
+; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-SDAG-FAKE16-NEXT: v_ldexp_f16_e32 v1, v1, v4
+; GFX11-SDAG-FAKE16-NEXT: v_pack_b32_f16 v0, v0, v3
+; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2)
+; GFX11-SDAG-FAKE16-NEXT: v_pack_b32_f16 v1, v1, v5
+; GFX11-SDAG-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX6-GISEL-LABEL: test_ldexp_v4f16_v4i32:
; GFX6-GISEL: ; %bb.0:
@@ -911,30 +1184,67 @@ define <4 x half> @test_ldexp_v4f16_v4i32(<4 x half> %a, <4 x i32> %b) {
; GFX9-GISEL-NEXT: v_lshl_or_b32 v1, v1, 16, v3
; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-GISEL-LABEL: test_ldexp_v4f16_v4i32:
-; GFX11-GISEL: ; %bb.0:
-; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-GISEL-NEXT: v_mov_b32_e32 v6, 0x7fff
-; GFX11-GISEL-NEXT: v_lshrrev_b32_e32 v7, 16, v0
-; GFX11-GISEL-NEXT: v_lshrrev_b32_e32 v8, 16, v1
-; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_3) | instid1(VALU_DEP_4)
-; GFX11-GISEL-NEXT: v_med3_i32 v2, 0xffff8000, v2, v6
-; GFX11-GISEL-NEXT: v_med3_i32 v4, 0xffff8000, v4, v6
-; GFX11-GISEL-NEXT: v_med3_i32 v3, 0xffff8000, v3, v6
-; GFX11-GISEL-NEXT: v_med3_i32 v5, 0xffff8000, v5, v6
-; GFX11-GISEL-NEXT: v_ldexp_f16_e32 v0, v0, v2
-; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
-; GFX11-GISEL-NEXT: v_ldexp_f16_e32 v1, v1, v4
-; GFX11-GISEL-NEXT: v_ldexp_f16_e32 v2, v7, v3
-; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
-; GFX11-GISEL-NEXT: v_ldexp_f16_e32 v3, v8, v5
-; GFX11-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
-; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1
-; GFX11-GISEL-NEXT: v_lshl_or_b32 v0, v2, 16, v0
-; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2)
-; GFX11-GISEL-NEXT: v_lshl_or_b32 v1, v3, 16, v1
-; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
+; GFX11-GISEL-TRUE16-LABEL: test_ldexp_v4f16_v4i32:
+; GFX11-GISEL-TRUE16: ; %bb.0:
+; GFX11-GISEL-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GISEL-TRUE16-NEXT: v_mov_b32_e32 v7, 0x7fff
+; GFX11-GISEL-TRUE16-NEXT: v_lshrrev_b32_e32 v6, 16, v0
+; GFX11-GISEL-TRUE16-NEXT: v_mov_b16_e32 v0.h, v1.l
+; GFX11-GISEL-TRUE16-NEXT: v_lshrrev_b32_e32 v8, 16, v1
+; GFX11-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_3) | instid1(VALU_DEP_4)
+; GFX11-GISEL-TRUE16-NEXT: v_med3_i32 v2, 0xffff8000, v2, v7
+; GFX11-GISEL-TRUE16-NEXT: v_med3_i32 v4, 0xffff8000, v4, v7
+; GFX11-GISEL-TRUE16-NEXT: v_med3_i32 v3, 0xffff8000, v3, v7
+; GFX11-GISEL-TRUE16-NEXT: v_med3_i32 v5, 0xffff8000, v5, v7
+; GFX11-GISEL-TRUE16-NEXT: v_mov_b16_e32 v1.l, v2.l
+; GFX11-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4)
+; GFX11-GISEL-TRUE16-NEXT: v_mov_b16_e32 v1.h, v4.l
+; GFX11-GISEL-TRUE16-NEXT: v_mov_b16_e32 v2.l, v8.l
+; GFX11-GISEL-TRUE16-NEXT: v_mov_b16_e32 v2.h, v3.l
+; GFX11-GISEL-TRUE16-NEXT: v_mov_b16_e32 v3.l, v5.l
+; GFX11-GISEL-TRUE16-NEXT: v_ldexp_f16_e32 v0.l, v0.l, v1.l
+; GFX11-GISEL-TRUE16-NEXT: v_ldexp_f16_e32 v0.h, v0.h, v1.h
+; GFX11-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX11-GISEL-TRUE16-NEXT: v_ldexp_f16_e32 v1.l, v6.l, v2.h
+; GFX11-GISEL-TRUE16-NEXT: v_ldexp_f16_e32 v1.h, v2.l, v3.l
+; GFX11-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX11-GISEL-TRUE16-NEXT: v_mov_b16_e32 v2.l, v0.l
+; GFX11-GISEL-TRUE16-NEXT: v_mov_b16_e32 v0.l, v0.h
+; GFX11-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX11-GISEL-TRUE16-NEXT: v_mov_b16_e32 v3.l, v1.l
+; GFX11-GISEL-TRUE16-NEXT: v_mov_b16_e32 v1.l, v1.h
+; GFX11-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX11-GISEL-TRUE16-NEXT: v_and_b32_e32 v2, 0xffff, v2
+; GFX11-GISEL-TRUE16-NEXT: v_and_b32_e32 v4, 0xffff, v0
+; GFX11-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-GISEL-TRUE16-NEXT: v_lshl_or_b32 v0, v3, 16, v2
+; GFX11-GISEL-TRUE16-NEXT: v_lshl_or_b32 v1, v1, 16, v4
+; GFX11-GISEL-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GISEL-FAKE16-LABEL: test_ldexp_v4f16_v4i32:
+; GFX11-GISEL-FAKE16: ; %bb.0:
+; GFX11-GISEL-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GISEL-FAKE16-NEXT: v_mov_b32_e32 v6, 0x7fff
+; GFX11-GISEL-FAKE16-NEXT: v_lshrrev_b32_e32 v7, 16, v0
+; GFX11-GISEL-FAKE16-NEXT: v_lshrrev_b32_e32 v8, 16, v1
+; GFX11-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_3) | instid1(VALU_DEP_4)
+; GFX11-GISEL-FAKE16-NEXT: v_med3_i32 v2, 0xffff8000, v2, v6
+; GFX11-GISEL-FAKE16-NEXT: v_med3_i32 v4, 0xffff8000, v4, v6
+; GFX11-GISEL-FAKE16-NEXT: v_med3_i32 v3, 0xffff8000, v3, v6
+; GFX11-GISEL-FAKE16-NEXT: v_med3_i32 v5, 0xffff8000, v5, v6
+; GFX11-GISEL-FAKE16-NEXT: v_ldexp_f16_e32 v0, v0, v2
+; GFX11-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX11-GISEL-FAKE16-NEXT: v_ldexp_f16_e32 v1, v1, v4
+; GFX11-GISEL-FAKE16-NEXT: v_ldexp_f16_e32 v2, v7, v3
+; GFX11-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX11-GISEL-FAKE16-NEXT: v_ldexp_f16_e32 v3, v8, v5
+; GFX11-GISEL-FAKE16-NEXT: v_and_b32_e32 v0, 0xffff, v0
+; GFX11-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-GISEL-FAKE16-NEXT: v_and_b32_e32 v1, 0xffff, v1
+; GFX11-GISEL-FAKE16-NEXT: v_lshl_or_b32 v0, v2, 16, v0
+; GFX11-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2)
+; GFX11-GISEL-FAKE16-NEXT: v_lshl_or_b32 v1, v3, 16, v1
+; GFX11-GISEL-FAKE16-NEXT: s_setpc_b64 s[30:31]
%result = call <4 x half> @llvm.ldexp.v4f16.v4i32(<4 x half> %a, <4 x i32> %b)
ret <4 x half> %result
}
@@ -983,22 +1293,46 @@ define <4 x half> @test_ldexp_v4f16_v4i16(<4 x half> %a, <4 x i16> %b) {
; GFX9-SDAG-NEXT: v_pack_b32_f16 v1, v1, v4
; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-SDAG-LABEL: test_ldexp_v4f16_v4i16:
-; GFX11-SDAG: ; %bb.0:
-; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SDAG-NEXT: v_lshrrev_b32_e32 v4, 16, v3
-; GFX11-SDAG-NEXT: v_lshrrev_b32_e32 v5, 16, v2
-; GFX11-SDAG-NEXT: v_lshrrev_b32_e32 v6, 16, v0
-; GFX11-SDAG-NEXT: v_lshrrev_b32_e32 v7, 16, v1
-; GFX11-SDAG-NEXT: v_ldexp_f16_e32 v1, v1, v3
-; GFX11-SDAG-NEXT: v_ldexp_f16_e32 v0, v0, v2
-; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
-; GFX11-SDAG-NEXT: v_ldexp_f16_e32 v2, v6, v5
-; GFX11-SDAG-NEXT: v_ldexp_f16_e32 v3, v7, v4
-; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-SDAG-NEXT: v_pack_b32_f16 v0, v0, v2
-; GFX11-SDAG-NEXT: v_pack_b32_f16 v1, v1, v3
-; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
+; GFX11-SDAG-TRUE16-LABEL: test_ldexp_v4f16_v4i16:
+; GFX11-SDAG-TRUE16: ; %bb.0:
+; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SDAG-TRUE16-NEXT: v_lshrrev_b32_e32 v5, 16, v2
+; GFX11-SDAG-TRUE16-NEXT: v_lshrrev_b32_e32 v6, 16, v0
+; GFX11-SDAG-TRUE16-NEXT: v_lshrrev_b32_e32 v7, 16, v1
+; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v0.h, v3.l
+; GFX11-SDAG-TRUE16-NEXT: v_lshrrev_b32_e32 v4, 16, v3
+; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v4.h, v5.l
+; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v5.l, v6.l
+; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.h, v7.l
+; GFX11-SDAG-TRUE16-NEXT: v_ldexp_f16_e32 v0.l, v0.l, v2.l
+; GFX11-SDAG-TRUE16-NEXT: v_ldexp_f16_e32 v0.h, v1.l, v0.h
+; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX11-SDAG-TRUE16-NEXT: v_ldexp_f16_e32 v2.l, v5.l, v4.h
+; GFX11-SDAG-TRUE16-NEXT: v_ldexp_f16_e32 v1.l, v1.h, v4.l
+; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v3.l, v0.l
+; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v4.l, v0.h
+; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-SDAG-TRUE16-NEXT: v_pack_b32_f16 v0, v3, v2
+; GFX11-SDAG-TRUE16-NEXT: v_pack_b32_f16 v1, v4, v1
+; GFX11-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-SDAG-FAKE16-LABEL: test_ldexp_v4f16_v4i16:
+; GFX11-SDAG-FAKE16: ; %bb.0:
+; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SDAG-FAKE16-NEXT: v_lshrrev_b32_e32 v4, 16, v3
+; GFX11-SDAG-FAKE16-NEXT: v_lshrrev_b32_e32 v5, 16, v2
+; GFX11-SDAG-FAKE16-NEXT: v_lshrrev_b32_e32 v6, 16, v0
+; GFX11-SDAG-FAKE16-NEXT: v_lshrrev_b32_e32 v7, 16, v1
+; GFX11-SDAG-FAKE16-NEXT: v_ldexp_f16_e32 v1, v1, v3
+; GFX11-SDAG-FAKE16-NEXT: v_ldexp_f16_e32 v0, v0, v2
+; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX11-SDAG-FAKE16-NEXT: v_ldexp_f16_e32 v2, v6, v5
+; GFX11-SDAG-FAKE16-NEXT: v_ldexp_f16_e32 v3, v7, v4
+; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-SDAG-FAKE16-NEXT: v_pack_b32_f16 v0, v0, v2
+; GFX11-SDAG-FAKE16-NEXT: v_pack_b32_f16 v1, v1, v3
+; GFX11-SDAG-FAKE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX6-GISEL-LABEL: test_ldexp_v4f16_v4i16:
; GFX6-GISEL: ; %bb.0:
@@ -1043,25 +1377,59 @@ define <4 x half> @test_ldexp_v4f16_v4i16(<4 x half> %a, <4 x i16> %b) {
; GFX9-GISEL-NEXT: v_lshl_or_b32 v1, v1, 16, v2
; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-GISEL-LABEL: test_ldexp_v4f16_v4i16:
-; GFX11-GISEL: ; %bb.0:
-; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-GISEL-NEXT: v_lshrrev_b32_e32 v4, 16, v0
-; GFX11-GISEL-NEXT: v_lshrrev_b32_e32 v5, 16, v1
-; GFX11-GISEL-NEXT: v_lshrrev_b32_e32 v6, 16, v2
-; GFX11-GISEL-NEXT: v_lshrrev_b32_e32 v7, 16, v3
-; GFX11-GISEL-NEXT: v_ldexp_f16_e32 v0, v0, v2
-; GFX11-GISEL-NEXT: v_ldexp_f16_e32 v1, v1, v3
-; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
-; GFX11-GISEL-NEXT: v_ldexp_f16_e32 v2, v4, v6
-; GFX11-GISEL-NEXT: v_ldexp_f16_e32 v3, v5, v7
-; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
-; GFX11-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
-; GFX11-GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1
-; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-GISEL-NEXT: v_lshl_or_b32 v0, v2, 16, v0
-; GFX11-GISEL-NEXT: v_lshl_or_b32 v1, v3, 16, v1
-; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
+; GFX11-GISEL-TRUE16-LABEL: test_ldexp_v4f16_v4i16:
+; GFX11-GISEL-TRUE16: ; %bb.0:
+; GFX11-GISEL-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GISEL-TRUE16-NEXT: v_mov_b16_e32 v4.l, v0.l
+; GFX11-GISEL-TRUE16-NEXT: v_lshrrev_b32_e32 v5, 16, v0
+; GFX11-GISEL-TRUE16-NEXT: v_mov_b16_e32 v0.l, v1.l
+; GFX11-GISEL-TRUE16-NEXT: v_lshrrev_b32_e32 v6, 16, v1
+; GFX11-GISEL-TRUE16-NEXT: v_lshrrev_b32_e32 v7, 16, v2
+; GFX11-GISEL-TRUE16-NEXT: v_lshrrev_b32_e32 v8, 16, v3
+; GFX11-GISEL-TRUE16-NEXT: v_mov_b16_e32 v1.l, v2.l
+; GFX11-GISEL-TRUE16-NEXT: v_mov_b16_e32 v1.h, v3.l
+; GFX11-GISEL-TRUE16-NEXT: v_mov_b16_e32 v0.h, v5.l
+; GFX11-GISEL-TRUE16-NEXT: v_mov_b16_e32 v2.l, v6.l
+; GFX11-GISEL-TRUE16-NEXT: v_mov_b16_e32 v2.h, v7.l
+; GFX11-GISEL-TRUE16-NEXT: v_mov_b16_e32 v3.l, v8.l
+; GFX11-GISEL-TRUE16-NEXT: v_ldexp_f16_e32 v1.l, v4.l, v1.l
+; GFX11-GISEL-TRUE16-NEXT: v_ldexp_f16_e32 v0.l, v0.l, v1.h
+; GFX11-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX11-GISEL-TRUE16-NEXT: v_ldexp_f16_e32 v0.h, v0.h, v2.h
+; GFX11-GISEL-TRUE16-NEXT: v_ldexp_f16_e32 v1.h, v2.l, v3.l
+; GFX11-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX11-GISEL-TRUE16-NEXT: v_mov_b16_e32 v2.l, v1.l
+; GFX11-GISEL-TRUE16-NEXT: v_mov_b16_e32 v3.l, v0.l
+; GFX11-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX11-GISEL-TRUE16-NEXT: v_mov_b16_e32 v0.l, v0.h
+; GFX11-GISEL-TRUE16-NEXT: v_mov_b16_e32 v1.l, v1.h
+; GFX11-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX11-GISEL-TRUE16-NEXT: v_and_b32_e32 v2, 0xffff, v2
+; GFX11-GISEL-TRUE16-NEXT: v_and_b32_e32 v3, 0xffff, v3
+; GFX11-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-GISEL-TRUE16-NEXT: v_lshl_or_b32 v0, v0, 16, v2
+; GFX11-GISEL-TRUE16-NEXT: v_lshl_or_b32 v1, v1, 16, v3
+; GFX11-GISEL-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GISEL-FAKE16-LABEL: test_ldexp_v4f16_v4i16:
+; GFX11-GISEL-FAKE16: ; %bb.0:
+; GFX11-GISEL-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GISEL-FAKE16-NEXT: v_lshrrev_b32_e32 v4, 16, v0
+; GFX11-GISEL-FAKE16-NEXT: v_lshrrev_b32_e32 v5, 16, v1
+; GFX11-GISEL-FAKE16-NEXT: v_lshrrev_b32_e32 v6, 16, v2
+; GFX11-GISEL-FAKE16-NEXT: v_lshrrev_b32_e32 v7, 16, v3
+; GFX11-GISEL-FAKE16-NEXT: v_ldexp_f16_e32 v0, v0, v2
+; GFX11-GISEL-FAKE16-NEXT: v_ldexp_f16_e32 v1, v1, v3
+; GFX11-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX11-GISEL-FAKE16-NEXT: v_ldexp_f16_e32 v2, v4, v6
+; GFX11-GISEL-FAKE16-NEXT: v_ldexp_f16_e32 v3, v5, v7
+; GFX11-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX11-GISEL-FAKE16-NEXT: v_and_b32_e32 v0, 0xffff, v0
+; GFX11-GISEL-FAKE16-NEXT: v_and_b32_e32 v1, 0xffff, v1
+; GFX11-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-GISEL-FAKE16-NEXT: v_lshl_or_b32 v0, v2, 16, v0
+; GFX11-GISEL-FAKE16-NEXT: v_lshl_or_b32 v1, v3, 16, v1
+; GFX11-GISEL-FAKE16-NEXT: s_setpc_b64 s[30:31]
%result = call <4 x half> @llvm.ldexp.v4f16.v4i16(<4 x half> %a, <4 x i16> %b)
ret <4 x half> %result
}
More information about the llvm-commits
mailing list