[llvm] 8bc0f87 - [AMDGPU][True16][CodeGen] D16 LDS load/store pseudo instructions in true16 (#131427)
via llvm-commits
llvm-commits at lists.llvm.org
Mon Mar 17 07:28:51 PDT 2025
Author: Brox Chen
Date: 2025-03-17T10:28:45-04:00
New Revision: 8bc0f879a05228c58235ded510360da2220f0afd
URL: https://github.com/llvm/llvm-project/commit/8bc0f879a05228c58235ded510360da2220f0afd
DIFF: https://github.com/llvm/llvm-project/commit/8bc0f879a05228c58235ded510360da2220f0afd.diff
LOG: [AMDGPU][True16][CodeGen] D16 LDS load/store pseudo instructions in true16 (#131427)
Implement new pseudos with the suffix _t16 which have VGPR_16 as the
store src or load dst. This affects LDS 8 and 16-bit loads and stores.
Lower the pseudos to the existing real Hi/Lo instructions in MC inst
layer with VGPR_32 src or dst
---------
Co-authored-by: Abhinav <abhinav.garg at amd.com>
Added:
Modified:
llvm/lib/Target/AMDGPU/DSInstructions.td
llvm/test/CodeGen/AMDGPU/atomic_load_local.ll
llvm/test/CodeGen/AMDGPU/atomic_store_local.ll
llvm/test/CodeGen/AMDGPU/chain-hi-to-lo.ll
llvm/test/CodeGen/AMDGPU/ds-sub-offset.ll
llvm/test/CodeGen/AMDGPU/integer-mad-patterns.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/DSInstructions.td b/llvm/lib/Target/AMDGPU/DSInstructions.td
index 937f5d55999cb..bc1db52eeeb2f 100644
--- a/llvm/lib/Target/AMDGPU/DSInstructions.td
+++ b/llvm/lib/Target/AMDGPU/DSInstructions.td
@@ -127,6 +127,15 @@ multiclass DS_1A1D_NORET_mc<string opName, RegisterClass rc = VGPR_32> {
}
}
+multiclass DS_1A1D_NORET_t16<string opName, RegisterClass rc = VGPR_32>
+: DS_1A1D_NORET_mc<opName, rc> {
+ let has_m0_read = 0 in {
+ let True16Predicate = UseRealTrue16Insts in {
+ def "_t16" : DS_1A1D_NORET<opName#"_t16", VGPR_16>, True16D16Table<NAME#"_D16_HI", NAME>;
+ }
+ }
+}
+
multiclass DS_1A1D_NORET_mc_gfx9<string opName, RegisterClass rc = VGPR_32> {
let has_m0_read = 0 in {
def "" : DS_1A1D_NORET<opName, rc>;
@@ -297,6 +306,15 @@ multiclass DS_1A_RET_mc<string opName, RegisterClass rc = VGPR_32, bit HasTiedOu
}
}
+multiclass DS_1A_RET_t16<string opName, RegisterClass rc = VGPR_32, bit HasTiedOutput = 0, Operand ofs = Offset>
+: DS_1A_RET_mc<opName, rc, HasTiedOutput, ofs> {
+ let has_m0_read = 0 in {
+ let True16Predicate = UseRealTrue16Insts in {
+ def "_t16" : DS_1A_RET<opName#"_t16", VGPR_16, HasTiedOutput, ofs>, True16D16Table<NAME#"_D16_HI", NAME#"_D16">;
+ }
+ }
+}
+
multiclass DS_1A_RET_NoM0<string opName, RegisterClass rc = VGPR_32> {
let has_m0_read = 0 in {
def "" : DS_1A_RET<opName, rc>;
@@ -460,8 +478,6 @@ defm DS_MIN_F32 : DS_1A1D_NORET_mc<"ds_min_f32">;
defm DS_MAX_F32 : DS_1A1D_NORET_mc<"ds_max_f32">;
let mayLoad = 0 in {
-defm DS_WRITE_B8 : DS_1A1D_NORET_mc<"ds_write_b8">;
-defm DS_WRITE_B16 : DS_1A1D_NORET_mc<"ds_write_b16">;
defm DS_WRITE_B32 : DS_1A1D_NORET_mc<"ds_write_b32">;
defm DS_WRITE2_B32 : DS_1A2D_Off8_NORET_mc<"ds_write2_b32">;
defm DS_WRITE2ST64_B32: DS_1A2D_Off8_NORET_mc<"ds_write2st64_b32">;
@@ -476,6 +492,9 @@ def DS_WRITE_B16_D16_HI : DS_1A1D_NORET<"ds_write_b16_d16_hi">;
} // End has_m0_read = 0
+defm DS_WRITE_B8 : DS_1A1D_NORET_t16<"ds_write_b8">;
+defm DS_WRITE_B16 : DS_1A1D_NORET_t16<"ds_write_b16">;
+
let SubtargetPredicate = HasDSAddTid in {
def DS_WRITE_ADDTID_B32 : DS_0A1D_NORET<"ds_write_addtid_b32">;
}
@@ -628,10 +647,7 @@ def DS_SWIZZLE_B32 : DS_1A_RET <"ds_swizzle_b32", VGPR_32, 0, Swizzle>;
}
let mayStore = 0 in {
-defm DS_READ_I8 : DS_1A_RET_mc<"ds_read_i8">;
-defm DS_READ_U8 : DS_1A_RET_mc<"ds_read_u8">;
defm DS_READ_I16 : DS_1A_RET_mc<"ds_read_i16">;
-defm DS_READ_U16 : DS_1A_RET_mc<"ds_read_u16">;
defm DS_READ_B32 : DS_1A_RET_mc<"ds_read_b32">;
defm DS_READ_B64 : DS_1A_RET_mc<"ds_read_b64", VReg_64>;
@@ -652,6 +668,10 @@ def DS_READ_U16_D16_HI : DS_1A_RET_Tied<"ds_read_u16_d16_hi">;
}
} // End has_m0_read = 0
+defm DS_READ_I8 : DS_1A_RET_t16<"ds_read_i8">;
+defm DS_READ_U8 : DS_1A_RET_t16<"ds_read_u8">;
+defm DS_READ_U16 : DS_1A_RET_t16<"ds_read_u16">;
+
let SubtargetPredicate = HasDSAddTid in {
def DS_READ_ADDTID_B32 : DS_0A_RET<"ds_read_addtid_b32">;
}
@@ -795,34 +815,51 @@ multiclass DSReadPat_mc<DS_Pseudo inst, ValueType vt, string frag> {
}
}
+multiclass DSReadPat_t16<DS_Pseudo inst, ValueType vt, string frag> {
+
+ let OtherPredicates = [LDSRequiresM0Init] in {
+ def : DSReadPat<inst, vt, !cast<PatFrag>(frag#"_m0")>;
+ }
+
+ let OtherPredicates = [NotLDSRequiresM0Init] in {
+ foreach p = [NotHasTrue16BitInsts, UseFakeTrue16Insts] in
+ let True16Predicate = p in {
+ def : DSReadPat<!cast<DS_Pseudo>(!cast<string>(inst)#"_gfx9"), vt, !cast<PatFrag>(frag)>;
+ }
+ let True16Predicate = UseRealTrue16Insts in {
+ def : DSReadPat<!cast<DS_Pseudo>(!cast<string>(inst)#"_t16"), vt, !cast<PatFrag>(frag)>;
+ }
+ }
+}
+
class DSReadPat_D16 <DS_Pseudo inst, PatFrag frag, ValueType vt> : GCNPat <
(frag (DS1Addr1Offset i32:$ptr, i32:$offset), vt:$in),
(inst $ptr, Offset:$offset, (i1 0), $in)
>;
defm : DSReadPat_mc <DS_READ_I8, i32, "sextloadi8_local">;
-defm : DSReadPat_mc <DS_READ_I8, i16, "sextloadi8_local">;
defm : DSReadPat_mc <DS_READ_U8, i32, "extloadi8_local">;
defm : DSReadPat_mc <DS_READ_U8, i32, "zextloadi8_local">;
-defm : DSReadPat_mc <DS_READ_U8, i16, "extloadi8_local">;
-defm : DSReadPat_mc <DS_READ_U8, i16, "zextloadi8_local">;
defm : DSReadPat_mc <DS_READ_I16, i32, "sextloadi16_local">;
defm : DSReadPat_mc <DS_READ_I16, i32, "sextloadi16_local">;
defm : DSReadPat_mc <DS_READ_U16, i32, "extloadi16_local">;
defm : DSReadPat_mc <DS_READ_U16, i32, "zextloadi16_local">;
-defm : DSReadPat_mc <DS_READ_U16, i16, "load_local">;
+defm : DSReadPat_t16 <DS_READ_I8, i16, "sextloadi8_local">;
+defm : DSReadPat_t16 <DS_READ_U8, i16, "extloadi8_local">;
+defm : DSReadPat_t16 <DS_READ_U8, i16, "zextloadi8_local">;
+defm : DSReadPat_t16 <DS_READ_U16, i16, "load_local">;
foreach vt = Reg32Types.types in {
defm : DSReadPat_mc <DS_READ_B32, vt, "load_local">;
}
-defm : DSReadPat_mc <DS_READ_U8, i16, "atomic_load_8_local">;
+defm : DSReadPat_t16 <DS_READ_U8, i16, "atomic_load_8_local">;
defm : DSReadPat_mc <DS_READ_U8, i32, "atomic_load_8_local">;
-defm : DSReadPat_mc <DS_READ_U8, i16, "atomic_load_zext_8_local">;
+defm : DSReadPat_t16 <DS_READ_U8, i16, "atomic_load_zext_8_local">;
defm : DSReadPat_mc <DS_READ_U8, i32, "atomic_load_zext_8_local">;
-defm : DSReadPat_mc <DS_READ_I8, i16, "atomic_load_sext_8_local">;
+defm : DSReadPat_t16 <DS_READ_I8, i16, "atomic_load_sext_8_local">;
defm : DSReadPat_mc <DS_READ_I8, i32, "atomic_load_sext_8_local">;
-defm : DSReadPat_mc <DS_READ_U16, i16, "atomic_load_16_local">;
+defm : DSReadPat_t16 <DS_READ_U16, i16, "atomic_load_16_local">;
defm : DSReadPat_mc <DS_READ_U16, i32, "atomic_load_16_local">;
defm : DSReadPat_mc <DS_READ_U16, i32, "atomic_load_zext_16_local">;
defm : DSReadPat_mc <DS_READ_I16, i32, "atomic_load_sext_16_local">;
@@ -861,18 +898,34 @@ multiclass DSWritePat_mc <DS_Pseudo inst, ValueType vt, string frag> {
}
}
+multiclass DSWritePat_t16 <DS_Pseudo inst, ValueType vt, string frag> {
+ let OtherPredicates = [LDSRequiresM0Init] in {
+ def : DSWritePat<inst, vt, !cast<PatFrag>(frag#"_m0")>;
+ }
+
+ let OtherPredicates = [NotLDSRequiresM0Init] in {
+ foreach p = [NotHasTrue16BitInsts, UseFakeTrue16Insts] in
+ let True16Predicate = p in {
+ def : DSWritePat<!cast<DS_Pseudo>(!cast<string>(inst)#"_gfx9"), vt, !cast<PatFrag>(frag)>;
+ }
+ let True16Predicate = UseRealTrue16Insts in {
+ def : DSWritePat<!cast<DS_Pseudo>(!cast<string>(inst)#"_t16"), vt, !cast<PatFrag>(frag)>;
+ }
+ }
+}
+
defm : DSWritePat_mc <DS_WRITE_B8, i32, "truncstorei8_local">;
defm : DSWritePat_mc <DS_WRITE_B16, i32, "truncstorei16_local">;
-defm : DSWritePat_mc <DS_WRITE_B8, i16, "truncstorei8_local">;
-defm : DSWritePat_mc <DS_WRITE_B16, i16, "store_local">;
+defm : DSWritePat_t16 <DS_WRITE_B8, i16, "truncstorei8_local">;
+defm : DSWritePat_t16 <DS_WRITE_B16, i16, "store_local">;
foreach vt = Reg32Types.types in {
defm : DSWritePat_mc <DS_WRITE_B32, vt, "store_local">;
}
-defm : DSWritePat_mc <DS_WRITE_B8, i16, "atomic_store_8_local">;
+defm : DSWritePat_t16 <DS_WRITE_B8, i16, "atomic_store_8_local">;
defm : DSWritePat_mc <DS_WRITE_B8, i32, "atomic_store_8_local">;
-defm : DSWritePat_mc <DS_WRITE_B16, i16, "atomic_store_16_local">;
+defm : DSWritePat_t16 <DS_WRITE_B16, i16, "atomic_store_16_local">;
defm : DSWritePat_mc <DS_WRITE_B16, i32, "atomic_store_16_local">;
defm : DSWritePat_mc <DS_WRITE_B32, i32, "atomic_store_32_local">;
defm : DSWritePat_mc <DS_WRITE_B64, i64, "atomic_store_64_local">;
diff --git a/llvm/test/CodeGen/AMDGPU/atomic_load_local.ll b/llvm/test/CodeGen/AMDGPU/atomic_load_local.ll
index a3b6c283512f3..7f45b038b6d0d 100644
--- a/llvm/test/CodeGen/AMDGPU/atomic_load_local.ll
+++ b/llvm/test/CodeGen/AMDGPU/atomic_load_local.ll
@@ -1,208 +1,493 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,CI %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX9 %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -mattr=+real-true16 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX11,GFX11-TRUE16 %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -mattr=-real-true16 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX11,GFX11-FAKE16 %s
-; GCN-LABEL: {{^}}atomic_load_monotonic_i8:
-; GCN: s_waitcnt
-; GFX9-NOT: s_mov_b32 m0
-; CI-NEXT: s_mov_b32 m0
-; GCN-NEXT: ds_read_u8 v0, v0{{$}}
-; GCN-NEXT: s_waitcnt lgkmcnt(0)
-; GCN-NEXT: s_setpc_b64
define i8 @atomic_load_monotonic_i8(ptr addrspace(3) %ptr) {
+; CI-LABEL: atomic_load_monotonic_i8:
+; CI: ; %bb.0:
+; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CI-NEXT: s_mov_b32 m0, -1
+; CI-NEXT: ds_read_u8 v0, v0
+; CI-NEXT: s_waitcnt lgkmcnt(0)
+; CI-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: atomic_load_monotonic_i8:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ds_read_u8 v0, v0
+; GFX9-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-TRUE16-LABEL: atomic_load_monotonic_i8:
+; GFX11-TRUE16: ; %bb.0:
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT: ds_load_u8_d16 v0, v0
+; GFX11-TRUE16-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: atomic_load_monotonic_i8:
+; GFX11-FAKE16: ; %bb.0:
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT: ds_load_u8 v0, v0
+; GFX11-FAKE16-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
%load = load atomic i8, ptr addrspace(3) %ptr monotonic, align 1
ret i8 %load
}
-; GCN-LABEL: {{^}}atomic_load_monotonic_i8_offset:
-; GCN: s_waitcnt
-; GFX9-NOT: s_mov_b32 m0
-; CI-NEXT: s_mov_b32 m0
-; GCN-NEXT: ds_read_u8 v0, v0 offset:16{{$}}
-; GCN-NEXT: s_waitcnt lgkmcnt(0)
-; GCN-NEXT: s_setpc_b64
define i8 @atomic_load_monotonic_i8_offset(ptr addrspace(3) %ptr) {
+; CI-LABEL: atomic_load_monotonic_i8_offset:
+; CI: ; %bb.0:
+; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CI-NEXT: s_mov_b32 m0, -1
+; CI-NEXT: ds_read_u8 v0, v0 offset:16
+; CI-NEXT: s_waitcnt lgkmcnt(0)
+; CI-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: atomic_load_monotonic_i8_offset:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ds_read_u8 v0, v0 offset:16
+; GFX9-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-TRUE16-LABEL: atomic_load_monotonic_i8_offset:
+; GFX11-TRUE16: ; %bb.0:
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT: ds_load_u8_d16 v0, v0 offset:16
+; GFX11-TRUE16-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: atomic_load_monotonic_i8_offset:
+; GFX11-FAKE16: ; %bb.0:
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT: ds_load_u8 v0, v0 offset:16
+; GFX11-FAKE16-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
%gep = getelementptr inbounds i8, ptr addrspace(3) %ptr, i8 16
%load = load atomic i8, ptr addrspace(3) %gep monotonic, align 1
ret i8 %load
}
-; GCN-LABEL: {{^}}atomic_load_monotonic_i16:
-; GCN: s_waitcnt
-; GFX9-NOT: s_mov_b32 m0
-; CI-NEXT: s_mov_b32 m0
-; GCN-NEXT: ds_read_u16 v0, v0{{$}}
-; GCN-NEXT: s_waitcnt lgkmcnt(0)
-; GCN-NEXT: s_setpc_b64
define i16 @atomic_load_monotonic_i16(ptr addrspace(3) %ptr) {
+; CI-LABEL: atomic_load_monotonic_i16:
+; CI: ; %bb.0:
+; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CI-NEXT: s_mov_b32 m0, -1
+; CI-NEXT: ds_read_u16 v0, v0
+; CI-NEXT: s_waitcnt lgkmcnt(0)
+; CI-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: atomic_load_monotonic_i16:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ds_read_u16 v0, v0
+; GFX9-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-TRUE16-LABEL: atomic_load_monotonic_i16:
+; GFX11-TRUE16: ; %bb.0:
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT: ds_load_u16_d16 v0, v0
+; GFX11-TRUE16-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: atomic_load_monotonic_i16:
+; GFX11-FAKE16: ; %bb.0:
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT: ds_load_u16 v0, v0
+; GFX11-FAKE16-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
%load = load atomic i16, ptr addrspace(3) %ptr monotonic, align 2
ret i16 %load
}
-; GCN-LABEL: {{^}}atomic_load_monotonic_i16_offset:
-; GCN: s_waitcnt
-; GFX9-NOT: s_mov_b32 m0
-; CI-NEXT: s_mov_b32 m0
-; GCN-NEXT: ds_read_u16 v0, v0 offset:32{{$}}
-; GCN-NEXT: s_waitcnt lgkmcnt(0)
-; GCN-NEXT: s_setpc_b64
define i16 @atomic_load_monotonic_i16_offset(ptr addrspace(3) %ptr) {
+; CI-LABEL: atomic_load_monotonic_i16_offset:
+; CI: ; %bb.0:
+; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CI-NEXT: s_mov_b32 m0, -1
+; CI-NEXT: ds_read_u16 v0, v0 offset:32
+; CI-NEXT: s_waitcnt lgkmcnt(0)
+; CI-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: atomic_load_monotonic_i16_offset:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ds_read_u16 v0, v0 offset:32
+; GFX9-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-TRUE16-LABEL: atomic_load_monotonic_i16_offset:
+; GFX11-TRUE16: ; %bb.0:
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT: ds_load_u16_d16 v0, v0 offset:32
+; GFX11-TRUE16-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: atomic_load_monotonic_i16_offset:
+; GFX11-FAKE16: ; %bb.0:
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT: ds_load_u16 v0, v0 offset:32
+; GFX11-FAKE16-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
%gep = getelementptr inbounds i16, ptr addrspace(3) %ptr, i16 16
%load = load atomic i16, ptr addrspace(3) %gep monotonic, align 2
ret i16 %load
}
-; GCN-LABEL: {{^}}atomic_load_monotonic_i32:
-; GCN: s_waitcnt
-; GFX9-NOT: s_mov_b32 m0
-; CI-NEXT: s_mov_b32 m0
-; GCN-NEXT: ds_read_b32 v0, v0{{$}}
-; GCN-NEXT: s_waitcnt lgkmcnt(0)
-; GCN-NEXT: s_setpc_b64
define i32 @atomic_load_monotonic_i32(ptr addrspace(3) %ptr) {
+; CI-LABEL: atomic_load_monotonic_i32:
+; CI: ; %bb.0:
+; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CI-NEXT: s_mov_b32 m0, -1
+; CI-NEXT: ds_read_b32 v0, v0
+; CI-NEXT: s_waitcnt lgkmcnt(0)
+; CI-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: atomic_load_monotonic_i32:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ds_read_b32 v0, v0
+; GFX9-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: atomic_load_monotonic_i32:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: ds_load_b32 v0, v0
+; GFX11-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-NEXT: s_setpc_b64 s[30:31]
%load = load atomic i32, ptr addrspace(3) %ptr monotonic, align 4
ret i32 %load
}
-; GCN-LABEL: {{^}}atomic_load_monotonic_i32_offset:
-; GCN: s_waitcnt
-; GFX9-NOT: s_mov_b32 m0
-; CI-NEXT: s_mov_b32 m0
-; GCN-NEXT: ds_read_b32 v0, v0 offset:64{{$}}
-; GCN-NEXT: s_waitcnt lgkmcnt(0)
-; GCN-NEXT: s_setpc_b64
define i32 @atomic_load_monotonic_i32_offset(ptr addrspace(3) %ptr) {
+; CI-LABEL: atomic_load_monotonic_i32_offset:
+; CI: ; %bb.0:
+; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CI-NEXT: s_mov_b32 m0, -1
+; CI-NEXT: ds_read_b32 v0, v0 offset:64
+; CI-NEXT: s_waitcnt lgkmcnt(0)
+; CI-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: atomic_load_monotonic_i32_offset:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ds_read_b32 v0, v0 offset:64
+; GFX9-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: atomic_load_monotonic_i32_offset:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: ds_load_b32 v0, v0 offset:64
+; GFX11-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-NEXT: s_setpc_b64 s[30:31]
%gep = getelementptr inbounds i32, ptr addrspace(3) %ptr, i32 16
%load = load atomic i32, ptr addrspace(3) %gep monotonic, align 4
ret i32 %load
}
-; GCN-LABEL: {{^}}atomic_load_monotonic_i64:
-; GCN: s_waitcnt
-; GFX9-NOT: s_mov_b32 m0
-; CI-NEXT: s_mov_b32 m0
-; GCN-NEXT: ds_read_b64 v[0:1], v0{{$}}
-; GCN-NEXT: s_waitcnt lgkmcnt(0)
-; GCN-NEXT: s_setpc_b64
define i64 @atomic_load_monotonic_i64(ptr addrspace(3) %ptr) {
+; CI-LABEL: atomic_load_monotonic_i64:
+; CI: ; %bb.0:
+; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CI-NEXT: s_mov_b32 m0, -1
+; CI-NEXT: ds_read_b64 v[0:1], v0
+; CI-NEXT: s_waitcnt lgkmcnt(0)
+; CI-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: atomic_load_monotonic_i64:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ds_read_b64 v[0:1], v0
+; GFX9-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: atomic_load_monotonic_i64:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: ds_load_b64 v[0:1], v0
+; GFX11-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-NEXT: s_setpc_b64 s[30:31]
%load = load atomic i64, ptr addrspace(3) %ptr monotonic, align 8
ret i64 %load
}
-; GCN-LABEL: {{^}}atomic_load_monotonic_i64_offset:
-; GCN: s_waitcnt
-; GFX9-NOT: s_mov_b32 m0
-; CI-NEXT: s_mov_b32 m0
-; GCN-NEXT: ds_read_b64 v[0:1], v0 offset:128{{$}}
-; GCN-NEXT: s_waitcnt lgkmcnt(0)
-; GCN-NEXT: s_setpc_b64
define i64 @atomic_load_monotonic_i64_offset(ptr addrspace(3) %ptr) {
+; CI-LABEL: atomic_load_monotonic_i64_offset:
+; CI: ; %bb.0:
+; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CI-NEXT: s_mov_b32 m0, -1
+; CI-NEXT: ds_read_b64 v[0:1], v0 offset:128
+; CI-NEXT: s_waitcnt lgkmcnt(0)
+; CI-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: atomic_load_monotonic_i64_offset:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ds_read_b64 v[0:1], v0 offset:128
+; GFX9-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: atomic_load_monotonic_i64_offset:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: ds_load_b64 v[0:1], v0 offset:128
+; GFX11-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-NEXT: s_setpc_b64 s[30:31]
%gep = getelementptr inbounds i64, ptr addrspace(3) %ptr, i32 16
%load = load atomic i64, ptr addrspace(3) %gep monotonic, align 8
ret i64 %load
}
-; GCN-LABEL: {{^}}atomic_load_monotonic_f32_offset:
-; GCN: s_waitcnt
-; GFX9-NOT: s_mov_b32 m0
-; CI-NEXT: s_mov_b32 m0
-; GCN-NEXT: ds_read_b32 v0, v0 offset:64{{$}}
-; GCN-NEXT: s_waitcnt lgkmcnt(0)
-; GCN-NEXT: s_setpc_b64
define float @atomic_load_monotonic_f32_offset(ptr addrspace(3) %ptr) {
+; CI-LABEL: atomic_load_monotonic_f32_offset:
+; CI: ; %bb.0:
+; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CI-NEXT: s_mov_b32 m0, -1
+; CI-NEXT: ds_read_b32 v0, v0 offset:64
+; CI-NEXT: s_waitcnt lgkmcnt(0)
+; CI-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: atomic_load_monotonic_f32_offset:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ds_read_b32 v0, v0 offset:64
+; GFX9-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: atomic_load_monotonic_f32_offset:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: ds_load_b32 v0, v0 offset:64
+; GFX11-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-NEXT: s_setpc_b64 s[30:31]
%gep = getelementptr inbounds float, ptr addrspace(3) %ptr, i32 16
%load = load atomic float, ptr addrspace(3) %gep monotonic, align 4
ret float %load
}
-; GCN-LABEL: {{^}}atomic_load_monotonic_f64_offset:
-; GCN: s_waitcnt
-; GFX9-NOT: s_mov_b32 m0
-; CI-NEXT: s_mov_b32 m0
-; GCN-NEXT: ds_read_b64 v[0:1], v0 offset:128{{$}}
-; GCN-NEXT: s_waitcnt lgkmcnt(0)
-; GCN-NEXT: s_setpc_b64
define double @atomic_load_monotonic_f64_offset(ptr addrspace(3) %ptr) {
+; CI-LABEL: atomic_load_monotonic_f64_offset:
+; CI: ; %bb.0:
+; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CI-NEXT: s_mov_b32 m0, -1
+; CI-NEXT: ds_read_b64 v[0:1], v0 offset:128
+; CI-NEXT: s_waitcnt lgkmcnt(0)
+; CI-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: atomic_load_monotonic_f64_offset:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ds_read_b64 v[0:1], v0 offset:128
+; GFX9-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: atomic_load_monotonic_f64_offset:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: ds_load_b64 v[0:1], v0 offset:128
+; GFX11-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-NEXT: s_setpc_b64 s[30:31]
%gep = getelementptr inbounds double, ptr addrspace(3) %ptr, i32 16
%load = load atomic double, ptr addrspace(3) %gep monotonic, align 8
ret double %load
}
-; GCN-LABEL: {{^}}atomic_load_monotonic_p0i8_offset:
-; GCN: s_waitcnt
-; GFX9-NOT: s_mov_b32 m0
-; CI-NEXT: s_mov_b32 m0
-; GCN-NEXT: ds_read_b64 v[0:1], v0 offset:128{{$}}
-; GCN-NEXT: s_waitcnt lgkmcnt(0)
-; GCN-NEXT: s_setpc_b64
define ptr @atomic_load_monotonic_p0i8_offset(ptr addrspace(3) %ptr) {
+; CI-LABEL: atomic_load_monotonic_p0i8_offset:
+; CI: ; %bb.0:
+; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CI-NEXT: s_mov_b32 m0, -1
+; CI-NEXT: ds_read_b64 v[0:1], v0 offset:128
+; CI-NEXT: s_waitcnt lgkmcnt(0)
+; CI-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: atomic_load_monotonic_p0i8_offset:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ds_read_b64 v[0:1], v0 offset:128
+; GFX9-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: atomic_load_monotonic_p0i8_offset:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: ds_load_b64 v[0:1], v0 offset:128
+; GFX11-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-NEXT: s_setpc_b64 s[30:31]
%gep = getelementptr inbounds ptr, ptr addrspace(3) %ptr, i32 16
%load = load atomic ptr, ptr addrspace(3) %gep monotonic, align 8
ret ptr %load
}
-; GCN-LABEL: {{^}}atomic_load_monotonic_p3i8_offset:
-; GCN: s_waitcnt
-; GFX9-NOT: s_mov_b32 m0
-; CI-NEXT: s_mov_b32 m0
-; GCN-NEXT: ds_read_b32 v0, v0 offset:64{{$}}
-; GCN-NEXT: s_waitcnt lgkmcnt(0)
-; GCN-NEXT: s_setpc_b64
define ptr addrspace(3) @atomic_load_monotonic_p3i8_offset(ptr addrspace(3) %ptr) {
+; CI-LABEL: atomic_load_monotonic_p3i8_offset:
+; CI: ; %bb.0:
+; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CI-NEXT: s_mov_b32 m0, -1
+; CI-NEXT: ds_read_b32 v0, v0 offset:64
+; CI-NEXT: s_waitcnt lgkmcnt(0)
+; CI-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: atomic_load_monotonic_p3i8_offset:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ds_read_b32 v0, v0 offset:64
+; GFX9-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: atomic_load_monotonic_p3i8_offset:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: ds_load_b32 v0, v0 offset:64
+; GFX11-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-NEXT: s_setpc_b64 s[30:31]
%gep = getelementptr inbounds ptr addrspace(3), ptr addrspace(3) %ptr, i32 16
%load = load atomic ptr addrspace(3), ptr addrspace(3) %gep monotonic, align 4
ret ptr addrspace(3) %load
}
-; GCN-LABEL: {{^}}atomic_load_monotonic_f16:
-; GCN: s_waitcnt
-; GFX9-NOT: s_mov_b32 m0
-; CI-NEXT: s_mov_b32 m0
-; GCN-NEXT: ds_read_u16 v0, v0{{$}}
-; GCN-NEXT: s_waitcnt lgkmcnt(0)
-; GCN-NEXT: s_setpc_b64
define i16 @atomic_load_monotonic_f16(ptr addrspace(3) %ptr) {
+; CI-LABEL: atomic_load_monotonic_f16:
+; CI: ; %bb.0:
+; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CI-NEXT: s_mov_b32 m0, -1
+; CI-NEXT: ds_read_u16 v0, v0
+; CI-NEXT: s_waitcnt lgkmcnt(0)
+; CI-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: atomic_load_monotonic_f16:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ds_read_u16 v0, v0
+; GFX9-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-TRUE16-LABEL: atomic_load_monotonic_f16:
+; GFX11-TRUE16: ; %bb.0:
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT: ds_load_u16_d16 v0, v0
+; GFX11-TRUE16-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: atomic_load_monotonic_f16:
+; GFX11-FAKE16: ; %bb.0:
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT: ds_load_u16 v0, v0
+; GFX11-FAKE16-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
%load = load atomic half, ptr addrspace(3) %ptr monotonic, align 2
%ret = bitcast half %load to i16
ret i16 %ret
}
-; GCN-LABEL: {{^}}atomic_load_monotonic_f16_offset:
-; GCN: s_waitcnt
-; GFX9-NOT: s_mov_b32 m0
-; CI-NEXT: s_mov_b32 m0
-; GCN-NEXT: ds_read_u16 v0, v0 offset:32{{$}}
-; GCN-NEXT: s_waitcnt lgkmcnt(0)
-; GCN-NEXT: s_setpc_b64
define i16 @atomic_load_monotonic_f16_offset(ptr addrspace(3) %ptr) {
+; CI-LABEL: atomic_load_monotonic_f16_offset:
+; CI: ; %bb.0:
+; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CI-NEXT: s_mov_b32 m0, -1
+; CI-NEXT: ds_read_u16 v0, v0 offset:32
+; CI-NEXT: s_waitcnt lgkmcnt(0)
+; CI-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: atomic_load_monotonic_f16_offset:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ds_read_u16 v0, v0 offset:32
+; GFX9-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-TRUE16-LABEL: atomic_load_monotonic_f16_offset:
+; GFX11-TRUE16: ; %bb.0:
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT: ds_load_u16_d16 v0, v0 offset:32
+; GFX11-TRUE16-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: atomic_load_monotonic_f16_offset:
+; GFX11-FAKE16: ; %bb.0:
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT: ds_load_u16 v0, v0 offset:32
+; GFX11-FAKE16-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
%gep = getelementptr inbounds half, ptr addrspace(3) %ptr, i32 16
%load = load atomic half, ptr addrspace(3) %gep monotonic, align 2
%ret = bitcast half %load to i16
ret i16 %ret
}
-; GCN-LABEL: {{^}}atomic_load_monotonic_bf16:
-; GCN: s_waitcnt
-; GFX9-NOT: s_mov_b32 m0
-; CI-NEXT: s_mov_b32 m0
-; GCN-NEXT: ds_read_u16 v0, v0{{$}}
-; GCN-NEXT: s_waitcnt lgkmcnt(0)
-; GCN-NEXT: s_setpc_b64
define i16 @atomic_load_monotonic_bf16(ptr addrspace(3) %ptr) {
+; CI-LABEL: atomic_load_monotonic_bf16:
+; CI: ; %bb.0:
+; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CI-NEXT: s_mov_b32 m0, -1
+; CI-NEXT: ds_read_u16 v0, v0
+; CI-NEXT: s_waitcnt lgkmcnt(0)
+; CI-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: atomic_load_monotonic_bf16:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ds_read_u16 v0, v0
+; GFX9-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-TRUE16-LABEL: atomic_load_monotonic_bf16:
+; GFX11-TRUE16: ; %bb.0:
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT: ds_load_u16_d16 v0, v0
+; GFX11-TRUE16-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: atomic_load_monotonic_bf16:
+; GFX11-FAKE16: ; %bb.0:
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT: ds_load_u16 v0, v0
+; GFX11-FAKE16-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
%load = load atomic bfloat, ptr addrspace(3) %ptr monotonic, align 2
%ret = bitcast bfloat %load to i16
ret i16 %ret
}
-; GCN-LABEL: {{^}}atomic_load_monotonic_bf16_offset:
-; GCN: s_waitcnt
-; GFX9-NOT: s_mov_b32 m0
-; CI-NEXT: s_mov_b32 m0
-; GCN-NEXT: ds_read_u16 v0, v0 offset:32{{$}}
-; GCN-NEXT: s_waitcnt lgkmcnt(0)
-; GCN-NEXT: s_setpc_b64
define i16 @atomic_load_monotonic_bf16_offset(ptr addrspace(3) %ptr) {
+; CI-LABEL: atomic_load_monotonic_bf16_offset:
+; CI: ; %bb.0:
+; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CI-NEXT: s_mov_b32 m0, -1
+; CI-NEXT: ds_read_u16 v0, v0 offset:32
+; CI-NEXT: s_waitcnt lgkmcnt(0)
+; CI-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: atomic_load_monotonic_bf16_offset:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ds_read_u16 v0, v0 offset:32
+; GFX9-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-TRUE16-LABEL: atomic_load_monotonic_bf16_offset:
+; GFX11-TRUE16: ; %bb.0:
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT: ds_load_u16_d16 v0, v0 offset:32
+; GFX11-TRUE16-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: atomic_load_monotonic_bf16_offset:
+; GFX11-FAKE16: ; %bb.0:
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT: ds_load_u16 v0, v0 offset:32
+; GFX11-FAKE16-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
%gep = getelementptr inbounds bfloat, ptr addrspace(3) %ptr, i32 16
%load = load atomic bfloat, ptr addrspace(3) %gep monotonic, align 2
%ret = bitcast bfloat %load to i16
ret i16 %ret
}
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; GCN: {{.*}}
diff --git a/llvm/test/CodeGen/AMDGPU/atomic_store_local.ll b/llvm/test/CodeGen/AMDGPU/atomic_store_local.ll
index cd1e1fb1add47..9236b4018317a 100644
--- a/llvm/test/CodeGen/AMDGPU/atomic_store_local.ll
+++ b/llvm/test/CodeGen/AMDGPU/atomic_store_local.ll
@@ -1,156 +1,470 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,CI %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX9 %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -mattr=+real-true16 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX11,GFX11-TRUE16 %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -mattr=-real-true16 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX11,GFX11-FAKE16 %s
-; GCN-LABEL: {{^}}atomic_store_monotonic_i8:
-; GCN: s_waitcnt
-; GFX9-NOT: s_mov_b32 m0
-; CI-NEXT: s_mov_b32 m0
-; GCN-NEXT: ds_write_b8 v0, v1{{$}}
-; GCN-NEXT: s_waitcnt lgkmcnt(0)
-; GCN-NEXT: s_setpc_b64
define void @atomic_store_monotonic_i8(ptr addrspace(3) %ptr, i8 %val) {
+; CI-LABEL: atomic_store_monotonic_i8:
+; CI: ; %bb.0:
+; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CI-NEXT: s_mov_b32 m0, -1
+; CI-NEXT: v_add_i32_e32 v2, vcc, 2, v1
+; CI-NEXT: ds_write_b8 v0, v1
+; CI-NEXT: ds_write_b8 v0, v2
+; CI-NEXT: s_waitcnt lgkmcnt(0)
+; CI-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: atomic_store_monotonic_i8:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_add_u16_e32 v2, 2, v1
+; GFX9-NEXT: ds_write_b8 v0, v1
+; GFX9-NEXT: ds_write_b8 v0, v2
+; GFX9-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-TRUE16-LABEL: atomic_store_monotonic_i8:
+; GFX11-TRUE16: ; %bb.0:
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT: v_add_nc_u16 v1.h, v1.l, 2
+; GFX11-TRUE16-NEXT: ds_store_b8 v0, v1
+; GFX11-TRUE16-NEXT: ds_store_b8_d16_hi v0, v1
+; GFX11-TRUE16-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: atomic_store_monotonic_i8:
+; GFX11-FAKE16: ; %bb.0:
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT: v_add_nc_u16 v2, v1, 2
+; GFX11-FAKE16-NEXT: ds_store_b8 v0, v1
+; GFX11-FAKE16-NEXT: ds_store_b8 v0, v2
+; GFX11-FAKE16-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
+ %val1 = add i8 %val, 2
store atomic i8 %val, ptr addrspace(3) %ptr monotonic, align 1
+ store atomic i8 %val1, ptr addrspace(3) %ptr monotonic, align 1
ret void
}
-; GCN-LABEL: {{^}}atomic_store_monotonic_offset_i8:
-; GCN: s_waitcnt
-; GFX9-NOT: s_mov_b32 m0
-; CI-NEXT: s_mov_b32 m0
-; GCN-NEXT: ds_write_b8 v0, v1 offset:16{{$}}
-; GCN-NEXT: s_waitcnt lgkmcnt(0)
-; GCN-NEXT: s_setpc_b64
define void @atomic_store_monotonic_offset_i8(ptr addrspace(3) %ptr, i8 %val) {
- %gep = getelementptr inbounds i8, ptr addrspace(3) %ptr, i8 16
- store atomic i8 %val, ptr addrspace(3) %gep monotonic, align 1
+; CI-LABEL: atomic_store_monotonic_offset_i8:
+; CI: ; %bb.0:
+; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CI-NEXT: s_mov_b32 m0, -1
+; CI-NEXT: v_add_i32_e32 v2, vcc, 2, v1
+; CI-NEXT: ds_write_b8 v0, v1 offset:8
+; CI-NEXT: ds_write_b8 v0, v2 offset:16
+; CI-NEXT: s_waitcnt lgkmcnt(0)
+; CI-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: atomic_store_monotonic_offset_i8:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_add_u16_e32 v2, 2, v1
+; GFX9-NEXT: ds_write_b8 v0, v1 offset:8
+; GFX9-NEXT: ds_write_b8 v0, v2 offset:16
+; GFX9-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-TRUE16-LABEL: atomic_store_monotonic_offset_i8:
+; GFX11-TRUE16: ; %bb.0:
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT: v_add_nc_u16 v1.h, v1.l, 2
+; GFX11-TRUE16-NEXT: ds_store_b8 v0, v1 offset:8
+; GFX11-TRUE16-NEXT: ds_store_b8_d16_hi v0, v1 offset:16
+; GFX11-TRUE16-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: atomic_store_monotonic_offset_i8:
+; GFX11-FAKE16: ; %bb.0:
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT: v_add_nc_u16 v2, v1, 2
+; GFX11-FAKE16-NEXT: ds_store_b8 v0, v1 offset:8
+; GFX11-FAKE16-NEXT: ds_store_b8 v0, v2 offset:16
+; GFX11-FAKE16-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
+ %val1 = add i8 %val, 2
+ %gep_1 = getelementptr inbounds i8, ptr addrspace(3) %ptr, i8 8
+ %gep_2 = getelementptr inbounds i8, ptr addrspace(3) %ptr, i8 16
+ store atomic i8 %val, ptr addrspace(3) %gep_1 monotonic, align 1
+ store atomic i8 %val1, ptr addrspace(3) %gep_2 monotonic, align 1
ret void
}
-; GCN-LABEL: {{^}}atomic_store_monotonic_i16:
-; GCN: s_waitcnt
-; GFX9-NOT: s_mov_b32 m0
-; CI-NEXT: s_mov_b32 m0
-; GCN-NEXT: ds_write_b16 v0, v1{{$}}
-; GCN-NEXT: s_waitcnt lgkmcnt(0)
-; GCN-NEXT: s_setpc_b64
define void @atomic_store_monotonic_i16(ptr addrspace(3) %ptr, i16 %val) {
+; CI-LABEL: atomic_store_monotonic_i16:
+; CI: ; %bb.0:
+; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CI-NEXT: s_mov_b32 m0, -1
+; CI-NEXT: v_add_i32_e32 v2, vcc, 2, v1
+; CI-NEXT: ds_write_b16 v0, v1
+; CI-NEXT: ds_write_b16 v0, v2
+; CI-NEXT: s_waitcnt lgkmcnt(0)
+; CI-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: atomic_store_monotonic_i16:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_add_u16_e32 v2, 2, v1
+; GFX9-NEXT: ds_write_b16 v0, v1
+; GFX9-NEXT: ds_write_b16 v0, v2
+; GFX9-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-TRUE16-LABEL: atomic_store_monotonic_i16:
+; GFX11-TRUE16: ; %bb.0:
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT: v_add_nc_u16 v1.h, v1.l, 2
+; GFX11-TRUE16-NEXT: ds_store_b16 v0, v1
+; GFX11-TRUE16-NEXT: ds_store_b16_d16_hi v0, v1
+; GFX11-TRUE16-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: atomic_store_monotonic_i16:
+; GFX11-FAKE16: ; %bb.0:
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT: v_add_nc_u16 v2, v1, 2
+; GFX11-FAKE16-NEXT: ds_store_b16 v0, v1
+; GFX11-FAKE16-NEXT: ds_store_b16 v0, v2
+; GFX11-FAKE16-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
+ %val1 = add i16 %val, 2
store atomic i16 %val, ptr addrspace(3) %ptr monotonic, align 2
+ store atomic i16 %val1, ptr addrspace(3) %ptr monotonic, align 2
ret void
}
-; GCN-LABEL: {{^}}atomic_store_monotonic_offset_i16:
-; GCN: s_waitcnt
-; GFX9-NOT: s_mov_b32 m0
-; CI-NEXT: s_mov_b32 m0
-; GCN-NEXT: ds_write_b16 v0, v1 offset:32{{$}}
-; GCN-NEXT: s_waitcnt lgkmcnt(0)
-; GCN-NEXT: s_setpc_b64
define void @atomic_store_monotonic_offset_i16(ptr addrspace(3) %ptr, i16 %val) {
+; CI-LABEL: atomic_store_monotonic_offset_i16:
+; CI: ; %bb.0:
+; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CI-NEXT: s_mov_b32 m0, -1
+; CI-NEXT: v_add_i32_e32 v2, vcc, 2, v1
+; CI-NEXT: ds_write_b16 v0, v1 offset:32
+; CI-NEXT: ds_write_b16 v0, v2 offset:32
+; CI-NEXT: s_waitcnt lgkmcnt(0)
+; CI-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: atomic_store_monotonic_offset_i16:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_add_u16_e32 v2, 2, v1
+; GFX9-NEXT: ds_write_b16 v0, v1 offset:32
+; GFX9-NEXT: ds_write_b16 v0, v2 offset:32
+; GFX9-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-TRUE16-LABEL: atomic_store_monotonic_offset_i16:
+; GFX11-TRUE16: ; %bb.0:
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT: v_add_nc_u16 v1.h, v1.l, 2
+; GFX11-TRUE16-NEXT: ds_store_b16 v0, v1 offset:32
+; GFX11-TRUE16-NEXT: ds_store_b16_d16_hi v0, v1 offset:32
+; GFX11-TRUE16-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: atomic_store_monotonic_offset_i16:
+; GFX11-FAKE16: ; %bb.0:
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT: v_add_nc_u16 v2, v1, 2
+; GFX11-FAKE16-NEXT: ds_store_b16 v0, v1 offset:32
+; GFX11-FAKE16-NEXT: ds_store_b16 v0, v2 offset:32
+; GFX11-FAKE16-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
+ %val1 = add i16 %val, 2
%gep = getelementptr inbounds i16, ptr addrspace(3) %ptr, i16 16
store atomic i16 %val, ptr addrspace(3) %gep monotonic, align 2
+ store atomic i16 %val1, ptr addrspace(3) %gep monotonic, align 2
ret void
}
-; GCN-LABEL: {{^}}atomic_store_monotonic_i32:
-; GCN: s_waitcnt
-; GFX9-NOT: s_mov_b32 m0
-; CI-NEXT: s_mov_b32 m0
-; GCN-NEXT: ds_write_b32 v0, v1{{$}}
-; GCN-NEXT: s_waitcnt lgkmcnt(0)
-; GCN-NEXT: s_setpc_b64
define void @atomic_store_monotonic_i32(ptr addrspace(3) %ptr, i32 %val) {
+; CI-LABEL: atomic_store_monotonic_i32:
+; CI: ; %bb.0:
+; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CI-NEXT: s_mov_b32 m0, -1
+; CI-NEXT: ds_write_b32 v0, v1
+; CI-NEXT: s_waitcnt lgkmcnt(0)
+; CI-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: atomic_store_monotonic_i32:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ds_write_b32 v0, v1
+; GFX9-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: atomic_store_monotonic_i32:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: ds_store_b32 v0, v1
+; GFX11-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-NEXT: s_setpc_b64 s[30:31]
store atomic i32 %val, ptr addrspace(3) %ptr monotonic, align 4
ret void
}
-; GCN-LABEL: {{^}}atomic_store_monotonic_offset_i32:
-; GCN: s_waitcnt
-; GFX9-NOT: s_mov_b32 m0
-; CI-NEXT: s_mov_b32 m0
-; GCN-NEXT: ds_write_b32 v0, v1 offset:64{{$}}
-; GCN-NEXT: s_waitcnt lgkmcnt(0)
-; GCN-NEXT: s_setpc_b64
define void @atomic_store_monotonic_offset_i32(ptr addrspace(3) %ptr, i32 %val) {
+; CI-LABEL: atomic_store_monotonic_offset_i32:
+; CI: ; %bb.0:
+; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CI-NEXT: s_mov_b32 m0, -1
+; CI-NEXT: ds_write_b32 v0, v1 offset:64
+; CI-NEXT: s_waitcnt lgkmcnt(0)
+; CI-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: atomic_store_monotonic_offset_i32:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ds_write_b32 v0, v1 offset:64
+; GFX9-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: atomic_store_monotonic_offset_i32:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: ds_store_b32 v0, v1 offset:64
+; GFX11-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-NEXT: s_setpc_b64 s[30:31]
%gep = getelementptr inbounds i32, ptr addrspace(3) %ptr, i32 16
store atomic i32 %val, ptr addrspace(3) %gep monotonic, align 4
ret void
}
-; GCN-LABEL: {{^}}atomic_store_monotonic_i64:
-; GCN: s_waitcnt
-; GFX9-NOT: s_mov_b32 m0
-; CI-NEXT: s_mov_b32 m0
-; GCN-NEXT: ds_write_b64 v0, v[1:2]{{$}}
-; GCN-NEXT: s_waitcnt lgkmcnt(0)
-; GCN-NEXT: s_setpc_b64
define void @atomic_store_monotonic_i64(ptr addrspace(3) %ptr, i64 %val) {
+; CI-LABEL: atomic_store_monotonic_i64:
+; CI: ; %bb.0:
+; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CI-NEXT: s_mov_b32 m0, -1
+; CI-NEXT: ds_write_b64 v0, v[1:2]
+; CI-NEXT: s_waitcnt lgkmcnt(0)
+; CI-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: atomic_store_monotonic_i64:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ds_write_b64 v0, v[1:2]
+; GFX9-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: atomic_store_monotonic_i64:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: ds_store_b64 v0, v[1:2]
+; GFX11-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-NEXT: s_setpc_b64 s[30:31]
store atomic i64 %val, ptr addrspace(3) %ptr monotonic, align 8
ret void
}
-; GCN-LABEL: {{^}}atomic_store_monotonic_offset_i64:
-; GCN: s_waitcnt
-; GFX9-NOT: s_mov_b32 m0
-; CI-NEXT: s_mov_b32 m0
-; GCN-NEXT: ds_write_b64 v0, v[1:2] offset:128{{$}}
-; GCN-NEXT: s_waitcnt lgkmcnt(0)
-; GCN-NEXT: s_setpc_b64
define void @atomic_store_monotonic_offset_i64(ptr addrspace(3) %ptr, i64 %val) {
+; CI-LABEL: atomic_store_monotonic_offset_i64:
+; CI: ; %bb.0:
+; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CI-NEXT: s_mov_b32 m0, -1
+; CI-NEXT: ds_write_b64 v0, v[1:2] offset:128
+; CI-NEXT: s_waitcnt lgkmcnt(0)
+; CI-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: atomic_store_monotonic_offset_i64:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ds_write_b64 v0, v[1:2] offset:128
+; GFX9-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: atomic_store_monotonic_offset_i64:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: ds_store_b64 v0, v[1:2] offset:128
+; GFX11-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-NEXT: s_setpc_b64 s[30:31]
%gep = getelementptr inbounds i64, ptr addrspace(3) %ptr, i64 16
store atomic i64 %val, ptr addrspace(3) %gep monotonic, align 8
ret void
}
-; GCN-LABEL: {{^}}atomic_store_monotonic_f16:
-; GCN: s_waitcnt
-; GFX9-NOT: s_mov_b32 m0
-; CI-NEXT: s_mov_b32 m0
-; GCN-NEXT: ds_write_b16 v0, v1{{$}}
-; GCN-NEXT: s_waitcnt lgkmcnt(0)
-; GCN-NEXT: s_setpc_b64
define void @atomic_store_monotonic_f16(ptr addrspace(3) %ptr, i16 %arg.val) {
+; CI-LABEL: atomic_store_monotonic_f16:
+; CI: ; %bb.0:
+; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CI-NEXT: s_mov_b32 m0, -1
+; CI-NEXT: v_add_i32_e32 v2, vcc, 2, v1
+; CI-NEXT: ds_write_b16 v0, v1
+; CI-NEXT: ds_write_b16 v0, v2
+; CI-NEXT: s_waitcnt lgkmcnt(0)
+; CI-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: atomic_store_monotonic_f16:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_add_u16_e32 v2, 2, v1
+; GFX9-NEXT: ds_write_b16 v0, v1
+; GFX9-NEXT: ds_write_b16 v0, v2
+; GFX9-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-TRUE16-LABEL: atomic_store_monotonic_f16:
+; GFX11-TRUE16: ; %bb.0:
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT: v_add_nc_u16 v1.h, v1.l, 2
+; GFX11-TRUE16-NEXT: ds_store_b16 v0, v1
+; GFX11-TRUE16-NEXT: ds_store_b16_d16_hi v0, v1
+; GFX11-TRUE16-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: atomic_store_monotonic_f16:
+; GFX11-FAKE16: ; %bb.0:
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT: v_add_nc_u16 v2, v1, 2
+; GFX11-FAKE16-NEXT: ds_store_b16 v0, v1
+; GFX11-FAKE16-NEXT: ds_store_b16 v0, v2
+; GFX11-FAKE16-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
+ %arg.val1 = add i16 %arg.val, 2
%val = bitcast i16 %arg.val to half
+ %val1 = bitcast i16 %arg.val1 to half
store atomic half %val, ptr addrspace(3) %ptr monotonic, align 2
+ store atomic half %val1, ptr addrspace(3) %ptr monotonic, align 2
ret void
}
-; GCN-LABEL: {{^}}atomic_store_monotonic_offset_f16:
-; GCN: s_waitcnt
-; GFX9-NOT: s_mov_b32 m0
-; CI-NEXT: s_mov_b32 m0
-; GCN-NEXT: ds_write_b16 v0, v1 offset:32{{$}}
-; GCN-NEXT: s_waitcnt lgkmcnt(0)
-; GCN-NEXT: s_setpc_b64
define void @atomic_store_monotonic_offset_f16(ptr addrspace(3) %ptr, i16 %arg.val) {
+; CI-LABEL: atomic_store_monotonic_offset_f16:
+; CI: ; %bb.0:
+; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CI-NEXT: s_mov_b32 m0, -1
+; CI-NEXT: v_add_i32_e32 v2, vcc, 2, v1
+; CI-NEXT: ds_write_b16 v0, v1 offset:32
+; CI-NEXT: ds_write_b16 v0, v2 offset:32
+; CI-NEXT: s_waitcnt lgkmcnt(0)
+; CI-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: atomic_store_monotonic_offset_f16:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_add_u16_e32 v2, 2, v1
+; GFX9-NEXT: ds_write_b16 v0, v1 offset:32
+; GFX9-NEXT: ds_write_b16 v0, v2 offset:32
+; GFX9-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-TRUE16-LABEL: atomic_store_monotonic_offset_f16:
+; GFX11-TRUE16: ; %bb.0:
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT: v_add_nc_u16 v1.h, v1.l, 2
+; GFX11-TRUE16-NEXT: ds_store_b16 v0, v1 offset:32
+; GFX11-TRUE16-NEXT: ds_store_b16_d16_hi v0, v1 offset:32
+; GFX11-TRUE16-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: atomic_store_monotonic_offset_f16:
+; GFX11-FAKE16: ; %bb.0:
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT: v_add_nc_u16 v2, v1, 2
+; GFX11-FAKE16-NEXT: ds_store_b16 v0, v1 offset:32
+; GFX11-FAKE16-NEXT: ds_store_b16 v0, v2 offset:32
+; GFX11-FAKE16-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
+ %arg.val1 = add i16 %arg.val, 2
+ %val1 = bitcast i16 %arg.val1 to half
%val = bitcast i16 %arg.val to half
%gep = getelementptr inbounds half, ptr addrspace(3) %ptr, i32 16
store atomic half %val, ptr addrspace(3) %gep monotonic, align 2
+ store atomic half %val1, ptr addrspace(3) %gep monotonic, align 2
ret void
}
-; GCN-LABEL: {{^}}atomic_store_monotonic_bf16:
-; GCN: s_waitcnt
-; GFX9-NOT: s_mov_b32 m0
-; CI-NEXT: s_mov_b32 m0
-; GCN-NEXT: ds_write_b16 v0, v1{{$}}
-; GCN-NEXT: s_waitcnt lgkmcnt(0)
-; GCN-NEXT: s_setpc_b64
define void @atomic_store_monotonic_bf16(ptr addrspace(3) %ptr, i16 %arg.val) {
+; CI-LABEL: atomic_store_monotonic_bf16:
+; CI: ; %bb.0:
+; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CI-NEXT: s_mov_b32 m0, -1
+; CI-NEXT: v_add_i32_e32 v2, vcc, 2, v1
+; CI-NEXT: ds_write_b16 v0, v1
+; CI-NEXT: ds_write_b16 v0, v2
+; CI-NEXT: s_waitcnt lgkmcnt(0)
+; CI-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: atomic_store_monotonic_bf16:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_add_u16_e32 v2, 2, v1
+; GFX9-NEXT: ds_write_b16 v0, v1
+; GFX9-NEXT: ds_write_b16 v0, v2
+; GFX9-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-TRUE16-LABEL: atomic_store_monotonic_bf16:
+; GFX11-TRUE16: ; %bb.0:
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT: v_add_nc_u16 v1.h, v1.l, 2
+; GFX11-TRUE16-NEXT: ds_store_b16 v0, v1
+; GFX11-TRUE16-NEXT: ds_store_b16_d16_hi v0, v1
+; GFX11-TRUE16-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: atomic_store_monotonic_bf16:
+; GFX11-FAKE16: ; %bb.0:
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT: v_add_nc_u16 v2, v1, 2
+; GFX11-FAKE16-NEXT: ds_store_b16 v0, v1
+; GFX11-FAKE16-NEXT: ds_store_b16 v0, v2
+; GFX11-FAKE16-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
+ %arg.val1 = add i16 %arg.val, 2
+ %val1 = bitcast i16 %arg.val1 to bfloat
%val = bitcast i16 %arg.val to bfloat
store atomic bfloat %val, ptr addrspace(3) %ptr monotonic, align 2
+ store atomic bfloat %val1, ptr addrspace(3) %ptr monotonic, align 2
ret void
}
-; GCN-LABEL: {{^}}atomic_store_monotonic_offset_bf16:
-; GCN: s_waitcnt
-; GFX9-NOT: s_mov_b32 m0
-; CI-NEXT: s_mov_b32 m0
-; GCN-NEXT: ds_write_b16 v0, v1 offset:32{{$}}
-; GCN-NEXT: s_waitcnt lgkmcnt(0)
-; GCN-NEXT: s_setpc_b64
define void @atomic_store_monotonic_offset_bf16(ptr addrspace(3) %ptr, i16 %arg.val) {
+; CI-LABEL: atomic_store_monotonic_offset_bf16:
+; CI: ; %bb.0:
+; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CI-NEXT: s_mov_b32 m0, -1
+; CI-NEXT: v_add_i32_e32 v2, vcc, 2, v1
+; CI-NEXT: ds_write_b16 v0, v1 offset:32
+; CI-NEXT: ds_write_b16 v0, v2 offset:32
+; CI-NEXT: s_waitcnt lgkmcnt(0)
+; CI-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: atomic_store_monotonic_offset_bf16:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_add_u16_e32 v2, 2, v1
+; GFX9-NEXT: ds_write_b16 v0, v1 offset:32
+; GFX9-NEXT: ds_write_b16 v0, v2 offset:32
+; GFX9-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-TRUE16-LABEL: atomic_store_monotonic_offset_bf16:
+; GFX11-TRUE16: ; %bb.0:
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT: v_add_nc_u16 v1.h, v1.l, 2
+; GFX11-TRUE16-NEXT: ds_store_b16 v0, v1 offset:32
+; GFX11-TRUE16-NEXT: ds_store_b16_d16_hi v0, v1 offset:32
+; GFX11-TRUE16-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: atomic_store_monotonic_offset_bf16:
+; GFX11-FAKE16: ; %bb.0:
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT: v_add_nc_u16 v2, v1, 2
+; GFX11-FAKE16-NEXT: ds_store_b16 v0, v1 offset:32
+; GFX11-FAKE16-NEXT: ds_store_b16 v0, v2 offset:32
+; GFX11-FAKE16-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
+ %arg.val1 = add i16 %arg.val, 2
+ %val1 = bitcast i16 %arg.val1 to bfloat
%val = bitcast i16 %arg.val to bfloat
%gep = getelementptr inbounds bfloat, ptr addrspace(3) %ptr, i32 16
store atomic bfloat %val, ptr addrspace(3) %gep monotonic, align 2
+ store atomic bfloat %val1, ptr addrspace(3) %gep monotonic, align 2
ret void
}
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; GCN: {{.*}}
diff --git a/llvm/test/CodeGen/AMDGPU/chain-hi-to-lo.ll b/llvm/test/CodeGen/AMDGPU/chain-hi-to-lo.ll
index 806fe899a9149..c739ba2183ef9 100644
--- a/llvm/test/CodeGen/AMDGPU/chain-hi-to-lo.ll
+++ b/llvm/test/CodeGen/AMDGPU/chain-hi-to-lo.ll
@@ -224,15 +224,25 @@ define <2 x half> @chain_hi_to_lo_group() {
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-LABEL: chain_hi_to_lo_group:
-; GFX11: ; %bb.0: ; %bb
-; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: v_mov_b32_e32 v1, 0
-; GFX11-NEXT: ds_load_u16 v0, v1 offset:2
-; GFX11-NEXT: s_waitcnt lgkmcnt(0)
-; GFX11-NEXT: ds_load_u16_d16_hi v0, v1
-; GFX11-NEXT: s_waitcnt lgkmcnt(0)
-; GFX11-NEXT: s_setpc_b64 s[30:31]
+; GFX11-TRUE16-LABEL: chain_hi_to_lo_group:
+; GFX11-TRUE16: ; %bb.0: ; %bb
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT: v_mov_b32_e32 v1, 0
+; GFX11-TRUE16-NEXT: ds_load_u16_d16 v0, v1 offset:2
+; GFX11-TRUE16-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-TRUE16-NEXT: ds_load_u16_d16_hi v0, v1
+; GFX11-TRUE16-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: chain_hi_to_lo_group:
+; GFX11-FAKE16: ; %bb.0: ; %bb
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT: v_mov_b32_e32 v1, 0
+; GFX11-FAKE16-NEXT: ds_load_u16 v0, v1 offset:2
+; GFX11-FAKE16-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-FAKE16-NEXT: ds_load_u16_d16_hi v0, v1
+; GFX11-FAKE16-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
bb:
%gep_lo = getelementptr inbounds half, ptr addrspace(3) null, i64 1
%load_lo = load half, ptr addrspace(3) %gep_lo
@@ -263,14 +273,23 @@ define <2 x half> @chain_hi_to_lo_group_
diff erent_bases(ptr addrspace(3) %base_l
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-LABEL: chain_hi_to_lo_group_
diff erent_bases:
-; GFX11: ; %bb.0: ; %bb
-; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: ds_load_u16 v0, v0
-; GFX11-NEXT: s_waitcnt lgkmcnt(0)
-; GFX11-NEXT: ds_load_u16_d16_hi v0, v1
-; GFX11-NEXT: s_waitcnt lgkmcnt(0)
-; GFX11-NEXT: s_setpc_b64 s[30:31]
+; GFX11-TRUE16-LABEL: chain_hi_to_lo_group_
diff erent_bases:
+; GFX11-TRUE16: ; %bb.0: ; %bb
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT: ds_load_u16_d16 v0, v0
+; GFX11-TRUE16-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-TRUE16-NEXT: ds_load_u16_d16_hi v0, v1
+; GFX11-TRUE16-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: chain_hi_to_lo_group_
diff erent_bases:
+; GFX11-FAKE16: ; %bb.0: ; %bb
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT: ds_load_u16 v0, v0
+; GFX11-FAKE16-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-FAKE16-NEXT: ds_load_u16_d16_hi v0, v1
+; GFX11-FAKE16-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
bb:
%load_lo = load half, ptr addrspace(3) %base_lo
%load_hi = load half, ptr addrspace(3) %base_hi
@@ -780,16 +799,27 @@ define <2 x i16> @chain_hi_to_lo_group_other_dep_multi_chain(ptr addrspace(3) %p
; GFX10-NEXT: v_bfi_b32 v0, 0xffff, v1, v0
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-LABEL: chain_hi_to_lo_group_other_dep_multi_chain:
-; GFX11: ; %bb.0: ; %bb
-; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: ds_load_u16 v1, v0 offset:2
-; GFX11-NEXT: ds_load_u16_d16_hi v0, v0
-; GFX11-NEXT: s_waitcnt lgkmcnt(0)
-; GFX11-NEXT: v_pk_add_u16 v0, v0, 12 op_sel_hi:[1,0]
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NEXT: v_bfi_b32 v0, 0xffff, v1, v0
-; GFX11-NEXT: s_setpc_b64 s[30:31]
+; GFX11-TRUE16-LABEL: chain_hi_to_lo_group_other_dep_multi_chain:
+; GFX11-TRUE16: ; %bb.0: ; %bb
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT: ds_load_u16_d16 v1, v0 offset:2
+; GFX11-TRUE16-NEXT: ds_load_u16_d16_hi v0, v0
+; GFX11-TRUE16-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-TRUE16-NEXT: v_pk_add_u16 v0, v0, 12 op_sel_hi:[1,0]
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-TRUE16-NEXT: v_bfi_b32 v0, 0xffff, v1, v0
+; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: chain_hi_to_lo_group_other_dep_multi_chain:
+; GFX11-FAKE16: ; %bb.0: ; %bb
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT: ds_load_u16 v1, v0 offset:2
+; GFX11-FAKE16-NEXT: ds_load_u16_d16_hi v0, v0
+; GFX11-FAKE16-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-FAKE16-NEXT: v_pk_add_u16 v0, v0, 12 op_sel_hi:[1,0]
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-FAKE16-NEXT: v_bfi_b32 v0, 0xffff, v1, v0
+; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
bb:
%gep_lo = getelementptr inbounds i16, ptr addrspace(3) %ptr, i64 1
%load_lo = load volatile i16, ptr addrspace(3) %gep_lo
@@ -1047,12 +1077,12 @@ define <2 x i16> @chain_hi_to_lo_group_may_alias_store(ptr addrspace(3) %ptr, pt
; GFX11-TRUE16-LABEL: chain_hi_to_lo_group_may_alias_store:
; GFX11-TRUE16: ; %bb.0: ; %bb
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-TRUE16-NEXT: v_mov_b32_e32 v2, 0x7b
-; GFX11-TRUE16-NEXT: ds_load_u16 v3, v0
+; GFX11-TRUE16-NEXT: v_mov_b16_e32 v2.l, 0x7b
+; GFX11-TRUE16-NEXT: ds_load_u16_d16_hi v2, v0
; GFX11-TRUE16-NEXT: ds_store_b16 v1, v2
-; GFX11-TRUE16-NEXT: ds_load_u16 v0, v0 offset:2
+; GFX11-TRUE16-NEXT: ds_load_u16_d16 v2, v0 offset:2
; GFX11-TRUE16-NEXT: s_waitcnt lgkmcnt(0)
-; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.h, v3.l
+; GFX11-TRUE16-NEXT: v_mov_b32_e32 v0, v2
; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-FAKE16-LABEL: chain_hi_to_lo_group_may_alias_store:
diff --git a/llvm/test/CodeGen/AMDGPU/ds-sub-offset.ll b/llvm/test/CodeGen/AMDGPU/ds-sub-offset.ll
index 8f702da64c508..7819da8b97e55 100644
--- a/llvm/test/CodeGen/AMDGPU/ds-sub-offset.ll
+++ b/llvm/test/CodeGen/AMDGPU/ds-sub-offset.ll
@@ -2,7 +2,8 @@
; RUN: llc -mtriple=amdgcn--amdpal -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -enable-var-scope --check-prefix=CI %s
; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -enable-var-scope --check-prefix=GFX9 %s
; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -enable-var-scope --check-prefix=GFX10 %s
-; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -enable-var-scope --check-prefix=GFX11 %s
+; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx1100 -mattr=+real-true16 -verify-machineinstrs < %s | FileCheck -enable-var-scope --check-prefixes=GFX11,GFX11-TRUE16 %s
+; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx1100 -mattr=-real-true16 -verify-machineinstrs < %s | FileCheck -enable-var-scope --check-prefixes=GFX11,GFX11-FAKE16 %s
declare i32 @llvm.amdgcn.workitem.id.x() #0
@@ -227,13 +228,22 @@ define amdgpu_kernel void @add_x_shl_max_offset() #1 {
; GFX10-NEXT: ds_write_b8 v0, v1 offset:65535
; GFX10-NEXT: s_endpgm
;
-; GFX11-LABEL: add_x_shl_max_offset:
-; GFX11: ; %bb.0:
-; GFX11-NEXT: v_dual_mov_b32 v1, 13 :: v_dual_and_b32 v0, 0x3ff, v0
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NEXT: v_lshlrev_b32_e32 v0, 4, v0
-; GFX11-NEXT: ds_store_b8 v0, v1 offset:65535
-; GFX11-NEXT: s_endpgm
+; GFX11-TRUE16-LABEL: add_x_shl_max_offset:
+; GFX11-TRUE16: ; %bb.0:
+; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0x3ff, v0
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v1, 4, v0
+; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.l, 13
+; GFX11-TRUE16-NEXT: ds_store_b8 v1, v0 offset:65535
+; GFX11-TRUE16-NEXT: s_endpgm
+;
+; GFX11-FAKE16-LABEL: add_x_shl_max_offset:
+; GFX11-FAKE16: ; %bb.0:
+; GFX11-FAKE16-NEXT: v_dual_mov_b32 v1, 13 :: v_dual_and_b32 v0, 0x3ff, v0
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v0, 4, v0
+; GFX11-FAKE16-NEXT: ds_store_b8 v0, v1 offset:65535
+; GFX11-FAKE16-NEXT: s_endpgm
%x.i = tail call i32 @llvm.amdgcn.workitem.id.x()
%shl = shl i32 %x.i, 4
%add = add i32 %shl, 65535
@@ -271,14 +281,24 @@ define amdgpu_kernel void @add_x_shl_neg_to_sub_max_offset_alt() #1 {
; GFX10-NEXT: ds_write_b8 v0, v1
; GFX10-NEXT: s_endpgm
;
-; GFX11-LABEL: add_x_shl_neg_to_sub_max_offset_alt:
-; GFX11: ; %bb.0:
-; GFX11-NEXT: v_dual_mov_b32 v1, 13 :: v_dual_and_b32 v0, 0x3ff, v0
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NEXT: v_lshlrev_b32_e32 v0, 2, v0
-; GFX11-NEXT: v_xor_b32_e32 v0, 0xffff, v0
-; GFX11-NEXT: ds_store_b8 v0, v1
-; GFX11-NEXT: s_endpgm
+; GFX11-TRUE16-LABEL: add_x_shl_neg_to_sub_max_offset_alt:
+; GFX11-TRUE16: ; %bb.0:
+; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0x3ff, v0
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX11-TRUE16-NEXT: v_xor_b32_e32 v1, 0xffff, v0
+; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.l, 13
+; GFX11-TRUE16-NEXT: ds_store_b8 v1, v0
+; GFX11-TRUE16-NEXT: s_endpgm
+;
+; GFX11-FAKE16-LABEL: add_x_shl_neg_to_sub_max_offset_alt:
+; GFX11-FAKE16: ; %bb.0:
+; GFX11-FAKE16-NEXT: v_dual_mov_b32 v1, 13 :: v_dual_and_b32 v0, 0x3ff, v0
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX11-FAKE16-NEXT: v_xor_b32_e32 v0, 0xffff, v0
+; GFX11-FAKE16-NEXT: ds_store_b8 v0, v1
+; GFX11-FAKE16-NEXT: s_endpgm
%x.i = tail call i32 @llvm.amdgcn.workitem.id.x()
%.neg = mul i32 %x.i, -4
%add = add i32 %.neg, 65535
@@ -316,14 +336,24 @@ define amdgpu_kernel void @add_x_shl_neg_to_sub_max_offset_not_canonical() #1 {
; GFX10-NEXT: ds_write_b8 v0, v1
; GFX10-NEXT: s_endpgm
;
-; GFX11-LABEL: add_x_shl_neg_to_sub_max_offset_not_canonical:
-; GFX11: ; %bb.0:
-; GFX11-NEXT: v_dual_mov_b32 v1, 13 :: v_dual_and_b32 v0, 0x3ff, v0
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NEXT: v_lshlrev_b32_e32 v0, 2, v0
-; GFX11-NEXT: v_xor_b32_e32 v0, 0xffff, v0
-; GFX11-NEXT: ds_store_b8 v0, v1
-; GFX11-NEXT: s_endpgm
+; GFX11-TRUE16-LABEL: add_x_shl_neg_to_sub_max_offset_not_canonical:
+; GFX11-TRUE16: ; %bb.0:
+; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0x3ff, v0
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX11-TRUE16-NEXT: v_xor_b32_e32 v1, 0xffff, v0
+; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.l, 13
+; GFX11-TRUE16-NEXT: ds_store_b8 v1, v0
+; GFX11-TRUE16-NEXT: s_endpgm
+;
+; GFX11-FAKE16-LABEL: add_x_shl_neg_to_sub_max_offset_not_canonical:
+; GFX11-FAKE16: ; %bb.0:
+; GFX11-FAKE16-NEXT: v_dual_mov_b32 v1, 13 :: v_dual_and_b32 v0, 0x3ff, v0
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX11-FAKE16-NEXT: v_xor_b32_e32 v0, 0xffff, v0
+; GFX11-FAKE16-NEXT: ds_store_b8 v0, v1
+; GFX11-FAKE16-NEXT: s_endpgm
%x.i = call i32 @llvm.amdgcn.workitem.id.x() #0
%neg = sub i32 0, %x.i
%shl = shl i32 %neg, 2
@@ -359,14 +389,24 @@ define amdgpu_kernel void @add_x_shl_neg_to_sub_max_offset_p1() #1 {
; GFX10-NEXT: ds_write_b8 v0, v1
; GFX10-NEXT: s_endpgm
;
-; GFX11-LABEL: add_x_shl_neg_to_sub_max_offset_p1:
-; GFX11: ; %bb.0:
-; GFX11-NEXT: v_dual_mov_b32 v1, 13 :: v_dual_and_b32 v0, 0x3ff, v0
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NEXT: v_lshlrev_b32_e32 v0, 2, v0
-; GFX11-NEXT: v_sub_nc_u32_e32 v0, 0x10000, v0
-; GFX11-NEXT: ds_store_b8 v0, v1
-; GFX11-NEXT: s_endpgm
+; GFX11-TRUE16-LABEL: add_x_shl_neg_to_sub_max_offset_p1:
+; GFX11-TRUE16: ; %bb.0:
+; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0x3ff, v0
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX11-TRUE16-NEXT: v_sub_nc_u32_e32 v1, 0x10000, v0
+; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.l, 13
+; GFX11-TRUE16-NEXT: ds_store_b8 v1, v0
+; GFX11-TRUE16-NEXT: s_endpgm
+;
+; GFX11-FAKE16-LABEL: add_x_shl_neg_to_sub_max_offset_p1:
+; GFX11-FAKE16: ; %bb.0:
+; GFX11-FAKE16-NEXT: v_dual_mov_b32 v1, 13 :: v_dual_and_b32 v0, 0x3ff, v0
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX11-FAKE16-NEXT: v_sub_nc_u32_e32 v0, 0x10000, v0
+; GFX11-FAKE16-NEXT: ds_store_b8 v0, v1
+; GFX11-FAKE16-NEXT: s_endpgm
%x.i = call i32 @llvm.amdgcn.workitem.id.x() #0
%neg = sub i32 0, %x.i
%shl = shl i32 %neg, 2
diff --git a/llvm/test/CodeGen/AMDGPU/integer-mad-patterns.ll b/llvm/test/CodeGen/AMDGPU/integer-mad-patterns.ll
index dcb1d0e8c20a1..027576630c877 100644
--- a/llvm/test/CodeGen/AMDGPU/integer-mad-patterns.ll
+++ b/llvm/test/CodeGen/AMDGPU/integer-mad-patterns.ll
@@ -17,11 +17,15 @@
; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdpal -mcpu=gfx1030 < %s | FileCheck -check-prefixes=GFX10,GFX10-SDAG %s
; RUN: llc -global-isel=1 -mtriple=amdgcn-amd-amdpal -mcpu=gfx1030 < %s | FileCheck -check-prefixes=GFX10,GFX10-GISEL %s
-; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdpal -mcpu=gfx1100 < %s | FileCheck -check-prefixes=GFX11,GFX11-SDAG %s
-; RUN: llc -global-isel=1 -mtriple=amdgcn-amd-amdpal -mcpu=gfx1100 < %s | FileCheck -check-prefixes=GFX11,GFX11-GISEL %s
+; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdpal -mcpu=gfx1100 -mattr=+real-true16 < %s | FileCheck -check-prefixes=GFX11,GFX11-SDAG,GFX11-SDAG-TRUE16 %s
+; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdpal -mcpu=gfx1100 -mattr=-real-true16 < %s | FileCheck -check-prefixes=GFX11,GFX11-SDAG,GFX11-SDAG-FAKE16 %s
+; RUN: llc -global-isel=1 -mtriple=amdgcn-amd-amdpal -mcpu=gfx1100 -mattr=+real-true16 < %s | FileCheck -check-prefixes=GFX11,GFX11-GISEL,GFX11-GISEL-TRUE16 %s
+; RUN: llc -global-isel=1 -mtriple=amdgcn-amd-amdpal -mcpu=gfx1100 -mattr=-real-true16 < %s | FileCheck -check-prefixes=GFX11,GFX11-GISEL,GFX11-GISEL-FAKE16 %s
-; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdpal -mcpu=gfx1200 < %s | FileCheck -check-prefixes=GFX1200,GFX1200-SDAG %s
-; RUN: llc -global-isel=1 -mtriple=amdgcn-amd-amdpal -mcpu=gfx1200 < %s | FileCheck -check-prefixes=GFX1200,GFX1200-GISEL %s
+; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdpal -mcpu=gfx1200 -mattr=+real-true16 < %s | FileCheck -check-prefixes=GFX1200,GFX1200-SDAG,GFX1200-SDAG-TRUE16 %s
+; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdpal -mcpu=gfx1200 -mattr=-real-true16 < %s | FileCheck -check-prefixes=GFX1200,GFX1200-SDAG,GFX1200-SDAG-FAKE16 %s
+; RUN: llc -global-isel=1 -mtriple=amdgcn-amd-amdpal -mcpu=gfx1200 -mattr=+real-true16 < %s | FileCheck -check-prefixes=GFX1200,GFX1200-GISEL,GFX1200-GISEL-TRUE16 %s
+; RUN: llc -global-isel=1 -mtriple=amdgcn-amd-amdpal -mcpu=gfx1200 -mattr=-real-true16 < %s | FileCheck -check-prefixes=GFX1200,GFX1200-GISEL,GFX1200-GISEL-FAKE16 %s
; Test for integer mad formation for patterns used in clpeak
@@ -324,71 +328,137 @@ define signext i16 @clpeak_imad_pat_i16(i16 signext %x, i16 signext %y) {
; GFX10-GISEL-NEXT: v_bfe_i32 v0, v0, 0, 16
; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-SDAG-LABEL: clpeak_imad_pat_i16:
-; GFX11-SDAG: ; %bb.0: ; %entry
-; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SDAG-NEXT: v_mad_u16 v0, v1, v0, v1
-; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SDAG-NEXT: v_mad_u16 v1, v0, v1, v0
-; GFX11-SDAG-NEXT: v_mad_u16 v0, v1, v0, v1
-; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SDAG-NEXT: v_mad_u16 v0, v0, v1, v0
-; GFX11-SDAG-NEXT: v_bfe_i32 v0, v0, 0, 16
-; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-GISEL-LABEL: clpeak_imad_pat_i16:
-; GFX11-GISEL: ; %bb.0: ; %entry
-; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-GISEL-NEXT: v_add_nc_u16 v0, v0, 1
-; GFX11-GISEL-NEXT: v_add_nc_u16 v2, v1, 1
-; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
-; GFX11-GISEL-NEXT: v_mul_lo_u16 v3, v0, v1
-; GFX11-GISEL-NEXT: v_mad_u16 v0, v0, v1, 1
-; GFX11-GISEL-NEXT: v_mul_lo_u16 v1, v2, v3
-; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
-; GFX11-GISEL-NEXT: v_mul_lo_u16 v0, v0, v1
-; GFX11-GISEL-NEXT: v_mad_u16 v1, v2, v3, 1
-; GFX11-GISEL-NEXT: v_mul_lo_u16 v0, v0, v1
-; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-GISEL-NEXT: v_bfe_i32 v0, v0, 0, 16
-; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX1200-SDAG-LABEL: clpeak_imad_pat_i16:
-; GFX1200-SDAG: ; %bb.0: ; %entry
-; GFX1200-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1200-SDAG-NEXT: s_wait_expcnt 0x0
-; GFX1200-SDAG-NEXT: s_wait_samplecnt 0x0
-; GFX1200-SDAG-NEXT: s_wait_bvhcnt 0x0
-; GFX1200-SDAG-NEXT: s_wait_kmcnt 0x0
-; GFX1200-SDAG-NEXT: v_mad_u16 v0, v1, v0, v1
-; GFX1200-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1200-SDAG-NEXT: v_mad_u16 v1, v0, v1, v0
-; GFX1200-SDAG-NEXT: v_mad_u16 v0, v1, v0, v1
-; GFX1200-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1200-SDAG-NEXT: v_mad_u16 v0, v0, v1, v0
-; GFX1200-SDAG-NEXT: v_bfe_i32 v0, v0, 0, 16
-; GFX1200-SDAG-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX1200-GISEL-LABEL: clpeak_imad_pat_i16:
-; GFX1200-GISEL: ; %bb.0: ; %entry
-; GFX1200-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1200-GISEL-NEXT: s_wait_expcnt 0x0
-; GFX1200-GISEL-NEXT: s_wait_samplecnt 0x0
-; GFX1200-GISEL-NEXT: s_wait_bvhcnt 0x0
-; GFX1200-GISEL-NEXT: s_wait_kmcnt 0x0
-; GFX1200-GISEL-NEXT: v_add_nc_u16 v0, v0, 1
-; GFX1200-GISEL-NEXT: v_add_nc_u16 v2, v1, 1
-; GFX1200-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
-; GFX1200-GISEL-NEXT: v_mul_lo_u16 v3, v0, v1
-; GFX1200-GISEL-NEXT: v_mad_u16 v0, v0, v1, 1
-; GFX1200-GISEL-NEXT: v_mul_lo_u16 v1, v2, v3
-; GFX1200-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
-; GFX1200-GISEL-NEXT: v_mul_lo_u16 v0, v0, v1
-; GFX1200-GISEL-NEXT: v_mad_u16 v1, v2, v3, 1
-; GFX1200-GISEL-NEXT: v_mul_lo_u16 v0, v0, v1
-; GFX1200-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX1200-GISEL-NEXT: v_bfe_i32 v0, v0, 0, 16
-; GFX1200-GISEL-NEXT: s_setpc_b64 s[30:31]
+; GFX11-SDAG-TRUE16-LABEL: clpeak_imad_pat_i16:
+; GFX11-SDAG-TRUE16: ; %bb.0: ; %entry
+; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SDAG-TRUE16-NEXT: v_mad_u16 v0.l, v1.l, v0.l, v1.l
+; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-SDAG-TRUE16-NEXT: v_mad_u16 v0.h, v0.l, v1.l, v0.l
+; GFX11-SDAG-TRUE16-NEXT: v_mad_u16 v0.l, v0.h, v0.l, v0.h
+; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-SDAG-TRUE16-NEXT: v_mad_u16 v0.l, v0.l, v0.h, v0.l
+; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v0, v0, 0, 16
+; GFX11-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-SDAG-FAKE16-LABEL: clpeak_imad_pat_i16:
+; GFX11-SDAG-FAKE16: ; %bb.0: ; %entry
+; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SDAG-FAKE16-NEXT: v_mad_u16 v0, v1, v0, v1
+; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-SDAG-FAKE16-NEXT: v_mad_u16 v1, v0, v1, v0
+; GFX11-SDAG-FAKE16-NEXT: v_mad_u16 v0, v1, v0, v1
+; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-SDAG-FAKE16-NEXT: v_mad_u16 v0, v0, v1, v0
+; GFX11-SDAG-FAKE16-NEXT: v_bfe_i32 v0, v0, 0, 16
+; GFX11-SDAG-FAKE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GISEL-TRUE16-LABEL: clpeak_imad_pat_i16:
+; GFX11-GISEL-TRUE16: ; %bb.0: ; %entry
+; GFX11-GISEL-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GISEL-TRUE16-NEXT: v_add_nc_u16 v0.l, v0.l, 1
+; GFX11-GISEL-TRUE16-NEXT: v_add_nc_u16 v0.h, v1.l, 1
+; GFX11-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX11-GISEL-TRUE16-NEXT: v_mul_lo_u16 v1.h, v0.l, v1.l
+; GFX11-GISEL-TRUE16-NEXT: v_mad_u16 v0.l, v0.l, v1.l, 1
+; GFX11-GISEL-TRUE16-NEXT: v_mul_lo_u16 v1.l, v0.h, v1.h
+; GFX11-GISEL-TRUE16-NEXT: v_mad_u16 v0.h, v0.h, v1.h, 1
+; GFX11-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-GISEL-TRUE16-NEXT: v_mul_lo_u16 v0.l, v0.l, v1.l
+; GFX11-GISEL-TRUE16-NEXT: v_mul_lo_u16 v0.l, v0.l, v0.h
+; GFX11-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GISEL-TRUE16-NEXT: v_bfe_i32 v0, v0, 0, 16
+; GFX11-GISEL-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GISEL-FAKE16-LABEL: clpeak_imad_pat_i16:
+; GFX11-GISEL-FAKE16: ; %bb.0: ; %entry
+; GFX11-GISEL-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GISEL-FAKE16-NEXT: v_add_nc_u16 v0, v0, 1
+; GFX11-GISEL-FAKE16-NEXT: v_add_nc_u16 v2, v1, 1
+; GFX11-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX11-GISEL-FAKE16-NEXT: v_mul_lo_u16 v3, v0, v1
+; GFX11-GISEL-FAKE16-NEXT: v_mad_u16 v0, v0, v1, 1
+; GFX11-GISEL-FAKE16-NEXT: v_mul_lo_u16 v1, v2, v3
+; GFX11-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
+; GFX11-GISEL-FAKE16-NEXT: v_mul_lo_u16 v0, v0, v1
+; GFX11-GISEL-FAKE16-NEXT: v_mad_u16 v1, v2, v3, 1
+; GFX11-GISEL-FAKE16-NEXT: v_mul_lo_u16 v0, v0, v1
+; GFX11-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GISEL-FAKE16-NEXT: v_bfe_i32 v0, v0, 0, 16
+; GFX11-GISEL-FAKE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1200-SDAG-TRUE16-LABEL: clpeak_imad_pat_i16:
+; GFX1200-SDAG-TRUE16: ; %bb.0: ; %entry
+; GFX1200-SDAG-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1200-SDAG-TRUE16-NEXT: s_wait_expcnt 0x0
+; GFX1200-SDAG-TRUE16-NEXT: s_wait_samplecnt 0x0
+; GFX1200-SDAG-TRUE16-NEXT: s_wait_bvhcnt 0x0
+; GFX1200-SDAG-TRUE16-NEXT: s_wait_kmcnt 0x0
+; GFX1200-SDAG-TRUE16-NEXT: v_mad_u16 v0.l, v1.l, v0.l, v1.l
+; GFX1200-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1200-SDAG-TRUE16-NEXT: v_mad_u16 v0.h, v0.l, v1.l, v0.l
+; GFX1200-SDAG-TRUE16-NEXT: v_mad_u16 v0.l, v0.h, v0.l, v0.h
+; GFX1200-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1200-SDAG-TRUE16-NEXT: v_mad_u16 v0.l, v0.l, v0.h, v0.l
+; GFX1200-SDAG-TRUE16-NEXT: v_bfe_i32 v0, v0, 0, 16
+; GFX1200-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1200-SDAG-FAKE16-LABEL: clpeak_imad_pat_i16:
+; GFX1200-SDAG-FAKE16: ; %bb.0: ; %entry
+; GFX1200-SDAG-FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1200-SDAG-FAKE16-NEXT: s_wait_expcnt 0x0
+; GFX1200-SDAG-FAKE16-NEXT: s_wait_samplecnt 0x0
+; GFX1200-SDAG-FAKE16-NEXT: s_wait_bvhcnt 0x0
+; GFX1200-SDAG-FAKE16-NEXT: s_wait_kmcnt 0x0
+; GFX1200-SDAG-FAKE16-NEXT: v_mad_u16 v0, v1, v0, v1
+; GFX1200-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1200-SDAG-FAKE16-NEXT: v_mad_u16 v1, v0, v1, v0
+; GFX1200-SDAG-FAKE16-NEXT: v_mad_u16 v0, v1, v0, v1
+; GFX1200-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1200-SDAG-FAKE16-NEXT: v_mad_u16 v0, v0, v1, v0
+; GFX1200-SDAG-FAKE16-NEXT: v_bfe_i32 v0, v0, 0, 16
+; GFX1200-SDAG-FAKE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1200-GISEL-TRUE16-LABEL: clpeak_imad_pat_i16:
+; GFX1200-GISEL-TRUE16: ; %bb.0: ; %entry
+; GFX1200-GISEL-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1200-GISEL-TRUE16-NEXT: s_wait_expcnt 0x0
+; GFX1200-GISEL-TRUE16-NEXT: s_wait_samplecnt 0x0
+; GFX1200-GISEL-TRUE16-NEXT: s_wait_bvhcnt 0x0
+; GFX1200-GISEL-TRUE16-NEXT: s_wait_kmcnt 0x0
+; GFX1200-GISEL-TRUE16-NEXT: v_add_nc_u16 v0.l, v0.l, 1
+; GFX1200-GISEL-TRUE16-NEXT: v_add_nc_u16 v0.h, v1.l, 1
+; GFX1200-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX1200-GISEL-TRUE16-NEXT: v_mul_lo_u16 v1.h, v0.l, v1.l
+; GFX1200-GISEL-TRUE16-NEXT: v_mad_u16 v0.l, v0.l, v1.l, 1
+; GFX1200-GISEL-TRUE16-NEXT: v_mul_lo_u16 v1.l, v0.h, v1.h
+; GFX1200-GISEL-TRUE16-NEXT: v_mad_u16 v0.h, v0.h, v1.h, 1
+; GFX1200-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1200-GISEL-TRUE16-NEXT: v_mul_lo_u16 v0.l, v0.l, v1.l
+; GFX1200-GISEL-TRUE16-NEXT: v_mul_lo_u16 v0.l, v0.l, v0.h
+; GFX1200-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1200-GISEL-TRUE16-NEXT: v_bfe_i32 v0, v0, 0, 16
+; GFX1200-GISEL-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1200-GISEL-FAKE16-LABEL: clpeak_imad_pat_i16:
+; GFX1200-GISEL-FAKE16: ; %bb.0: ; %entry
+; GFX1200-GISEL-FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1200-GISEL-FAKE16-NEXT: s_wait_expcnt 0x0
+; GFX1200-GISEL-FAKE16-NEXT: s_wait_samplecnt 0x0
+; GFX1200-GISEL-FAKE16-NEXT: s_wait_bvhcnt 0x0
+; GFX1200-GISEL-FAKE16-NEXT: s_wait_kmcnt 0x0
+; GFX1200-GISEL-FAKE16-NEXT: v_add_nc_u16 v0, v0, 1
+; GFX1200-GISEL-FAKE16-NEXT: v_add_nc_u16 v2, v1, 1
+; GFX1200-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX1200-GISEL-FAKE16-NEXT: v_mul_lo_u16 v3, v0, v1
+; GFX1200-GISEL-FAKE16-NEXT: v_mad_u16 v0, v0, v1, 1
+; GFX1200-GISEL-FAKE16-NEXT: v_mul_lo_u16 v1, v2, v3
+; GFX1200-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
+; GFX1200-GISEL-FAKE16-NEXT: v_mul_lo_u16 v0, v0, v1
+; GFX1200-GISEL-FAKE16-NEXT: v_mad_u16 v1, v2, v3, 1
+; GFX1200-GISEL-FAKE16-NEXT: v_mul_lo_u16 v0, v0, v1
+; GFX1200-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1200-GISEL-FAKE16-NEXT: v_bfe_i32 v0, v0, 0, 16
+; GFX1200-GISEL-FAKE16-NEXT: s_setpc_b64 s[30:31]
entry:
%conv33 = add i16 %x, 1
%add = mul i16 %conv33, %y
@@ -1461,71 +1531,137 @@ define zeroext i16 @clpeak_umad_pat_i16(i16 zeroext %x, i16 zeroext %y) {
; GFX10-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-SDAG-LABEL: clpeak_umad_pat_i16:
-; GFX11-SDAG: ; %bb.0: ; %entry
-; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SDAG-NEXT: v_mad_u16 v0, v1, v0, v1
-; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SDAG-NEXT: v_mad_u16 v1, v0, v1, v0
-; GFX11-SDAG-NEXT: v_mad_u16 v0, v1, v0, v1
-; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SDAG-NEXT: v_mad_u16 v0, v0, v1, v0
-; GFX11-SDAG-NEXT: v_and_b32_e32 v0, 0xffff, v0
-; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-GISEL-LABEL: clpeak_umad_pat_i16:
-; GFX11-GISEL: ; %bb.0: ; %entry
-; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-GISEL-NEXT: v_add_nc_u16 v0, v0, 1
-; GFX11-GISEL-NEXT: v_add_nc_u16 v2, v1, 1
-; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
-; GFX11-GISEL-NEXT: v_mul_lo_u16 v3, v0, v1
-; GFX11-GISEL-NEXT: v_mad_u16 v0, v0, v1, 1
-; GFX11-GISEL-NEXT: v_mul_lo_u16 v1, v2, v3
-; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
-; GFX11-GISEL-NEXT: v_mul_lo_u16 v0, v0, v1
-; GFX11-GISEL-NEXT: v_mad_u16 v1, v2, v3, 1
-; GFX11-GISEL-NEXT: v_mul_lo_u16 v0, v0, v1
-; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
-; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX1200-SDAG-LABEL: clpeak_umad_pat_i16:
-; GFX1200-SDAG: ; %bb.0: ; %entry
-; GFX1200-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1200-SDAG-NEXT: s_wait_expcnt 0x0
-; GFX1200-SDAG-NEXT: s_wait_samplecnt 0x0
-; GFX1200-SDAG-NEXT: s_wait_bvhcnt 0x0
-; GFX1200-SDAG-NEXT: s_wait_kmcnt 0x0
-; GFX1200-SDAG-NEXT: v_mad_u16 v0, v1, v0, v1
-; GFX1200-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1200-SDAG-NEXT: v_mad_u16 v1, v0, v1, v0
-; GFX1200-SDAG-NEXT: v_mad_u16 v0, v1, v0, v1
-; GFX1200-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1200-SDAG-NEXT: v_mad_u16 v0, v0, v1, v0
-; GFX1200-SDAG-NEXT: v_and_b32_e32 v0, 0xffff, v0
-; GFX1200-SDAG-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX1200-GISEL-LABEL: clpeak_umad_pat_i16:
-; GFX1200-GISEL: ; %bb.0: ; %entry
-; GFX1200-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1200-GISEL-NEXT: s_wait_expcnt 0x0
-; GFX1200-GISEL-NEXT: s_wait_samplecnt 0x0
-; GFX1200-GISEL-NEXT: s_wait_bvhcnt 0x0
-; GFX1200-GISEL-NEXT: s_wait_kmcnt 0x0
-; GFX1200-GISEL-NEXT: v_add_nc_u16 v0, v0, 1
-; GFX1200-GISEL-NEXT: v_add_nc_u16 v2, v1, 1
-; GFX1200-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
-; GFX1200-GISEL-NEXT: v_mul_lo_u16 v3, v0, v1
-; GFX1200-GISEL-NEXT: v_mad_u16 v0, v0, v1, 1
-; GFX1200-GISEL-NEXT: v_mul_lo_u16 v1, v2, v3
-; GFX1200-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
-; GFX1200-GISEL-NEXT: v_mul_lo_u16 v0, v0, v1
-; GFX1200-GISEL-NEXT: v_mad_u16 v1, v2, v3, 1
-; GFX1200-GISEL-NEXT: v_mul_lo_u16 v0, v0, v1
-; GFX1200-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX1200-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
-; GFX1200-GISEL-NEXT: s_setpc_b64 s[30:31]
+; GFX11-SDAG-TRUE16-LABEL: clpeak_umad_pat_i16:
+; GFX11-SDAG-TRUE16: ; %bb.0: ; %entry
+; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SDAG-TRUE16-NEXT: v_mad_u16 v0.l, v1.l, v0.l, v1.l
+; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-SDAG-TRUE16-NEXT: v_mad_u16 v0.h, v0.l, v1.l, v0.l
+; GFX11-SDAG-TRUE16-NEXT: v_mad_u16 v0.l, v0.h, v0.l, v0.h
+; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-SDAG-TRUE16-NEXT: v_mad_u16 v0.l, v0.l, v0.h, v0.l
+; GFX11-SDAG-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff, v0
+; GFX11-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-SDAG-FAKE16-LABEL: clpeak_umad_pat_i16:
+; GFX11-SDAG-FAKE16: ; %bb.0: ; %entry
+; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SDAG-FAKE16-NEXT: v_mad_u16 v0, v1, v0, v1
+; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-SDAG-FAKE16-NEXT: v_mad_u16 v1, v0, v1, v0
+; GFX11-SDAG-FAKE16-NEXT: v_mad_u16 v0, v1, v0, v1
+; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-SDAG-FAKE16-NEXT: v_mad_u16 v0, v0, v1, v0
+; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v0, 0xffff, v0
+; GFX11-SDAG-FAKE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GISEL-TRUE16-LABEL: clpeak_umad_pat_i16:
+; GFX11-GISEL-TRUE16: ; %bb.0: ; %entry
+; GFX11-GISEL-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GISEL-TRUE16-NEXT: v_add_nc_u16 v0.l, v0.l, 1
+; GFX11-GISEL-TRUE16-NEXT: v_add_nc_u16 v0.h, v1.l, 1
+; GFX11-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX11-GISEL-TRUE16-NEXT: v_mul_lo_u16 v1.h, v0.l, v1.l
+; GFX11-GISEL-TRUE16-NEXT: v_mad_u16 v0.l, v0.l, v1.l, 1
+; GFX11-GISEL-TRUE16-NEXT: v_mul_lo_u16 v1.l, v0.h, v1.h
+; GFX11-GISEL-TRUE16-NEXT: v_mad_u16 v0.h, v0.h, v1.h, 1
+; GFX11-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-GISEL-TRUE16-NEXT: v_mul_lo_u16 v0.l, v0.l, v1.l
+; GFX11-GISEL-TRUE16-NEXT: v_mul_lo_u16 v0.l, v0.l, v0.h
+; GFX11-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GISEL-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff, v0
+; GFX11-GISEL-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GISEL-FAKE16-LABEL: clpeak_umad_pat_i16:
+; GFX11-GISEL-FAKE16: ; %bb.0: ; %entry
+; GFX11-GISEL-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GISEL-FAKE16-NEXT: v_add_nc_u16 v0, v0, 1
+; GFX11-GISEL-FAKE16-NEXT: v_add_nc_u16 v2, v1, 1
+; GFX11-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX11-GISEL-FAKE16-NEXT: v_mul_lo_u16 v3, v0, v1
+; GFX11-GISEL-FAKE16-NEXT: v_mad_u16 v0, v0, v1, 1
+; GFX11-GISEL-FAKE16-NEXT: v_mul_lo_u16 v1, v2, v3
+; GFX11-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
+; GFX11-GISEL-FAKE16-NEXT: v_mul_lo_u16 v0, v0, v1
+; GFX11-GISEL-FAKE16-NEXT: v_mad_u16 v1, v2, v3, 1
+; GFX11-GISEL-FAKE16-NEXT: v_mul_lo_u16 v0, v0, v1
+; GFX11-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GISEL-FAKE16-NEXT: v_and_b32_e32 v0, 0xffff, v0
+; GFX11-GISEL-FAKE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1200-SDAG-TRUE16-LABEL: clpeak_umad_pat_i16:
+; GFX1200-SDAG-TRUE16: ; %bb.0: ; %entry
+; GFX1200-SDAG-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1200-SDAG-TRUE16-NEXT: s_wait_expcnt 0x0
+; GFX1200-SDAG-TRUE16-NEXT: s_wait_samplecnt 0x0
+; GFX1200-SDAG-TRUE16-NEXT: s_wait_bvhcnt 0x0
+; GFX1200-SDAG-TRUE16-NEXT: s_wait_kmcnt 0x0
+; GFX1200-SDAG-TRUE16-NEXT: v_mad_u16 v0.l, v1.l, v0.l, v1.l
+; GFX1200-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1200-SDAG-TRUE16-NEXT: v_mad_u16 v0.h, v0.l, v1.l, v0.l
+; GFX1200-SDAG-TRUE16-NEXT: v_mad_u16 v0.l, v0.h, v0.l, v0.h
+; GFX1200-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1200-SDAG-TRUE16-NEXT: v_mad_u16 v0.l, v0.l, v0.h, v0.l
+; GFX1200-SDAG-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff, v0
+; GFX1200-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1200-SDAG-FAKE16-LABEL: clpeak_umad_pat_i16:
+; GFX1200-SDAG-FAKE16: ; %bb.0: ; %entry
+; GFX1200-SDAG-FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1200-SDAG-FAKE16-NEXT: s_wait_expcnt 0x0
+; GFX1200-SDAG-FAKE16-NEXT: s_wait_samplecnt 0x0
+; GFX1200-SDAG-FAKE16-NEXT: s_wait_bvhcnt 0x0
+; GFX1200-SDAG-FAKE16-NEXT: s_wait_kmcnt 0x0
+; GFX1200-SDAG-FAKE16-NEXT: v_mad_u16 v0, v1, v0, v1
+; GFX1200-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1200-SDAG-FAKE16-NEXT: v_mad_u16 v1, v0, v1, v0
+; GFX1200-SDAG-FAKE16-NEXT: v_mad_u16 v0, v1, v0, v1
+; GFX1200-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1200-SDAG-FAKE16-NEXT: v_mad_u16 v0, v0, v1, v0
+; GFX1200-SDAG-FAKE16-NEXT: v_and_b32_e32 v0, 0xffff, v0
+; GFX1200-SDAG-FAKE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1200-GISEL-TRUE16-LABEL: clpeak_umad_pat_i16:
+; GFX1200-GISEL-TRUE16: ; %bb.0: ; %entry
+; GFX1200-GISEL-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1200-GISEL-TRUE16-NEXT: s_wait_expcnt 0x0
+; GFX1200-GISEL-TRUE16-NEXT: s_wait_samplecnt 0x0
+; GFX1200-GISEL-TRUE16-NEXT: s_wait_bvhcnt 0x0
+; GFX1200-GISEL-TRUE16-NEXT: s_wait_kmcnt 0x0
+; GFX1200-GISEL-TRUE16-NEXT: v_add_nc_u16 v0.l, v0.l, 1
+; GFX1200-GISEL-TRUE16-NEXT: v_add_nc_u16 v0.h, v1.l, 1
+; GFX1200-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX1200-GISEL-TRUE16-NEXT: v_mul_lo_u16 v1.h, v0.l, v1.l
+; GFX1200-GISEL-TRUE16-NEXT: v_mad_u16 v0.l, v0.l, v1.l, 1
+; GFX1200-GISEL-TRUE16-NEXT: v_mul_lo_u16 v1.l, v0.h, v1.h
+; GFX1200-GISEL-TRUE16-NEXT: v_mad_u16 v0.h, v0.h, v1.h, 1
+; GFX1200-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1200-GISEL-TRUE16-NEXT: v_mul_lo_u16 v0.l, v0.l, v1.l
+; GFX1200-GISEL-TRUE16-NEXT: v_mul_lo_u16 v0.l, v0.l, v0.h
+; GFX1200-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1200-GISEL-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff, v0
+; GFX1200-GISEL-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1200-GISEL-FAKE16-LABEL: clpeak_umad_pat_i16:
+; GFX1200-GISEL-FAKE16: ; %bb.0: ; %entry
+; GFX1200-GISEL-FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1200-GISEL-FAKE16-NEXT: s_wait_expcnt 0x0
+; GFX1200-GISEL-FAKE16-NEXT: s_wait_samplecnt 0x0
+; GFX1200-GISEL-FAKE16-NEXT: s_wait_bvhcnt 0x0
+; GFX1200-GISEL-FAKE16-NEXT: s_wait_kmcnt 0x0
+; GFX1200-GISEL-FAKE16-NEXT: v_add_nc_u16 v0, v0, 1
+; GFX1200-GISEL-FAKE16-NEXT: v_add_nc_u16 v2, v1, 1
+; GFX1200-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX1200-GISEL-FAKE16-NEXT: v_mul_lo_u16 v3, v0, v1
+; GFX1200-GISEL-FAKE16-NEXT: v_mad_u16 v0, v0, v1, 1
+; GFX1200-GISEL-FAKE16-NEXT: v_mul_lo_u16 v1, v2, v3
+; GFX1200-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
+; GFX1200-GISEL-FAKE16-NEXT: v_mul_lo_u16 v0, v0, v1
+; GFX1200-GISEL-FAKE16-NEXT: v_mad_u16 v1, v2, v3, 1
+; GFX1200-GISEL-FAKE16-NEXT: v_mul_lo_u16 v0, v0, v1
+; GFX1200-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1200-GISEL-FAKE16-NEXT: v_and_b32_e32 v0, 0xffff, v0
+; GFX1200-GISEL-FAKE16-NEXT: s_setpc_b64 s[30:31]
entry:
%conv33 = add i16 %x, 1
%add = mul i16 %conv33, %y
@@ -4315,71 +4451,137 @@ define signext i8 @clpeak_imad_pat_i8(i8 signext %x, i8 signext %y) {
; GFX10-GISEL-NEXT: v_bfe_i32 v0, v0, 0, 8
; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-SDAG-LABEL: clpeak_imad_pat_i8:
-; GFX11-SDAG: ; %bb.0: ; %entry
-; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SDAG-NEXT: v_mad_u16 v0, v1, v0, v1
-; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SDAG-NEXT: v_mad_u16 v1, v0, v1, v0
-; GFX11-SDAG-NEXT: v_mad_u16 v0, v1, v0, v1
-; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SDAG-NEXT: v_mad_u16 v0, v0, v1, v0
-; GFX11-SDAG-NEXT: v_bfe_i32 v0, v0, 0, 8
-; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-GISEL-LABEL: clpeak_imad_pat_i8:
-; GFX11-GISEL: ; %bb.0: ; %entry
-; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-GISEL-NEXT: v_add_nc_u16 v0, v0, 1
-; GFX11-GISEL-NEXT: v_add_nc_u16 v2, v1, 1
-; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
-; GFX11-GISEL-NEXT: v_mul_lo_u16 v3, v0, v1
-; GFX11-GISEL-NEXT: v_mad_u16 v0, v0, v1, 1
-; GFX11-GISEL-NEXT: v_mul_lo_u16 v1, v2, v3
-; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
-; GFX11-GISEL-NEXT: v_mul_lo_u16 v0, v0, v1
-; GFX11-GISEL-NEXT: v_mad_u16 v1, v2, v3, 1
-; GFX11-GISEL-NEXT: v_mul_lo_u16 v0, v0, v1
-; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-GISEL-NEXT: v_bfe_i32 v0, v0, 0, 8
-; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX1200-SDAG-LABEL: clpeak_imad_pat_i8:
-; GFX1200-SDAG: ; %bb.0: ; %entry
-; GFX1200-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1200-SDAG-NEXT: s_wait_expcnt 0x0
-; GFX1200-SDAG-NEXT: s_wait_samplecnt 0x0
-; GFX1200-SDAG-NEXT: s_wait_bvhcnt 0x0
-; GFX1200-SDAG-NEXT: s_wait_kmcnt 0x0
-; GFX1200-SDAG-NEXT: v_mad_u16 v0, v1, v0, v1
-; GFX1200-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1200-SDAG-NEXT: v_mad_u16 v1, v0, v1, v0
-; GFX1200-SDAG-NEXT: v_mad_u16 v0, v1, v0, v1
-; GFX1200-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1200-SDAG-NEXT: v_mad_u16 v0, v0, v1, v0
-; GFX1200-SDAG-NEXT: v_bfe_i32 v0, v0, 0, 8
-; GFX1200-SDAG-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX1200-GISEL-LABEL: clpeak_imad_pat_i8:
-; GFX1200-GISEL: ; %bb.0: ; %entry
-; GFX1200-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1200-GISEL-NEXT: s_wait_expcnt 0x0
-; GFX1200-GISEL-NEXT: s_wait_samplecnt 0x0
-; GFX1200-GISEL-NEXT: s_wait_bvhcnt 0x0
-; GFX1200-GISEL-NEXT: s_wait_kmcnt 0x0
-; GFX1200-GISEL-NEXT: v_add_nc_u16 v0, v0, 1
-; GFX1200-GISEL-NEXT: v_add_nc_u16 v2, v1, 1
-; GFX1200-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
-; GFX1200-GISEL-NEXT: v_mul_lo_u16 v3, v0, v1
-; GFX1200-GISEL-NEXT: v_mad_u16 v0, v0, v1, 1
-; GFX1200-GISEL-NEXT: v_mul_lo_u16 v1, v2, v3
-; GFX1200-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
-; GFX1200-GISEL-NEXT: v_mul_lo_u16 v0, v0, v1
-; GFX1200-GISEL-NEXT: v_mad_u16 v1, v2, v3, 1
-; GFX1200-GISEL-NEXT: v_mul_lo_u16 v0, v0, v1
-; GFX1200-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX1200-GISEL-NEXT: v_bfe_i32 v0, v0, 0, 8
-; GFX1200-GISEL-NEXT: s_setpc_b64 s[30:31]
+; GFX11-SDAG-TRUE16-LABEL: clpeak_imad_pat_i8:
+; GFX11-SDAG-TRUE16: ; %bb.0: ; %entry
+; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SDAG-TRUE16-NEXT: v_mad_u16 v0.l, v1.l, v0.l, v1.l
+; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-SDAG-TRUE16-NEXT: v_mad_u16 v0.h, v0.l, v1.l, v0.l
+; GFX11-SDAG-TRUE16-NEXT: v_mad_u16 v0.l, v0.h, v0.l, v0.h
+; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-SDAG-TRUE16-NEXT: v_mad_u16 v0.l, v0.l, v0.h, v0.l
+; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v0, v0, 0, 8
+; GFX11-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-SDAG-FAKE16-LABEL: clpeak_imad_pat_i8:
+; GFX11-SDAG-FAKE16: ; %bb.0: ; %entry
+; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SDAG-FAKE16-NEXT: v_mad_u16 v0, v1, v0, v1
+; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-SDAG-FAKE16-NEXT: v_mad_u16 v1, v0, v1, v0
+; GFX11-SDAG-FAKE16-NEXT: v_mad_u16 v0, v1, v0, v1
+; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-SDAG-FAKE16-NEXT: v_mad_u16 v0, v0, v1, v0
+; GFX11-SDAG-FAKE16-NEXT: v_bfe_i32 v0, v0, 0, 8
+; GFX11-SDAG-FAKE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GISEL-TRUE16-LABEL: clpeak_imad_pat_i8:
+; GFX11-GISEL-TRUE16: ; %bb.0: ; %entry
+; GFX11-GISEL-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GISEL-TRUE16-NEXT: v_add_nc_u16 v0.l, v0.l, 1
+; GFX11-GISEL-TRUE16-NEXT: v_add_nc_u16 v0.h, v1.l, 1
+; GFX11-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX11-GISEL-TRUE16-NEXT: v_mul_lo_u16 v1.h, v0.l, v1.l
+; GFX11-GISEL-TRUE16-NEXT: v_mad_u16 v0.l, v0.l, v1.l, 1
+; GFX11-GISEL-TRUE16-NEXT: v_mul_lo_u16 v1.l, v0.h, v1.h
+; GFX11-GISEL-TRUE16-NEXT: v_mad_u16 v0.h, v0.h, v1.h, 1
+; GFX11-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-GISEL-TRUE16-NEXT: v_mul_lo_u16 v0.l, v0.l, v1.l
+; GFX11-GISEL-TRUE16-NEXT: v_mul_lo_u16 v0.l, v0.l, v0.h
+; GFX11-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GISEL-TRUE16-NEXT: v_bfe_i32 v0, v0, 0, 8
+; GFX11-GISEL-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GISEL-FAKE16-LABEL: clpeak_imad_pat_i8:
+; GFX11-GISEL-FAKE16: ; %bb.0: ; %entry
+; GFX11-GISEL-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GISEL-FAKE16-NEXT: v_add_nc_u16 v0, v0, 1
+; GFX11-GISEL-FAKE16-NEXT: v_add_nc_u16 v2, v1, 1
+; GFX11-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX11-GISEL-FAKE16-NEXT: v_mul_lo_u16 v3, v0, v1
+; GFX11-GISEL-FAKE16-NEXT: v_mad_u16 v0, v0, v1, 1
+; GFX11-GISEL-FAKE16-NEXT: v_mul_lo_u16 v1, v2, v3
+; GFX11-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
+; GFX11-GISEL-FAKE16-NEXT: v_mul_lo_u16 v0, v0, v1
+; GFX11-GISEL-FAKE16-NEXT: v_mad_u16 v1, v2, v3, 1
+; GFX11-GISEL-FAKE16-NEXT: v_mul_lo_u16 v0, v0, v1
+; GFX11-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GISEL-FAKE16-NEXT: v_bfe_i32 v0, v0, 0, 8
+; GFX11-GISEL-FAKE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1200-SDAG-TRUE16-LABEL: clpeak_imad_pat_i8:
+; GFX1200-SDAG-TRUE16: ; %bb.0: ; %entry
+; GFX1200-SDAG-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1200-SDAG-TRUE16-NEXT: s_wait_expcnt 0x0
+; GFX1200-SDAG-TRUE16-NEXT: s_wait_samplecnt 0x0
+; GFX1200-SDAG-TRUE16-NEXT: s_wait_bvhcnt 0x0
+; GFX1200-SDAG-TRUE16-NEXT: s_wait_kmcnt 0x0
+; GFX1200-SDAG-TRUE16-NEXT: v_mad_u16 v0.l, v1.l, v0.l, v1.l
+; GFX1200-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1200-SDAG-TRUE16-NEXT: v_mad_u16 v0.h, v0.l, v1.l, v0.l
+; GFX1200-SDAG-TRUE16-NEXT: v_mad_u16 v0.l, v0.h, v0.l, v0.h
+; GFX1200-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1200-SDAG-TRUE16-NEXT: v_mad_u16 v0.l, v0.l, v0.h, v0.l
+; GFX1200-SDAG-TRUE16-NEXT: v_bfe_i32 v0, v0, 0, 8
+; GFX1200-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1200-SDAG-FAKE16-LABEL: clpeak_imad_pat_i8:
+; GFX1200-SDAG-FAKE16: ; %bb.0: ; %entry
+; GFX1200-SDAG-FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1200-SDAG-FAKE16-NEXT: s_wait_expcnt 0x0
+; GFX1200-SDAG-FAKE16-NEXT: s_wait_samplecnt 0x0
+; GFX1200-SDAG-FAKE16-NEXT: s_wait_bvhcnt 0x0
+; GFX1200-SDAG-FAKE16-NEXT: s_wait_kmcnt 0x0
+; GFX1200-SDAG-FAKE16-NEXT: v_mad_u16 v0, v1, v0, v1
+; GFX1200-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1200-SDAG-FAKE16-NEXT: v_mad_u16 v1, v0, v1, v0
+; GFX1200-SDAG-FAKE16-NEXT: v_mad_u16 v0, v1, v0, v1
+; GFX1200-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1200-SDAG-FAKE16-NEXT: v_mad_u16 v0, v0, v1, v0
+; GFX1200-SDAG-FAKE16-NEXT: v_bfe_i32 v0, v0, 0, 8
+; GFX1200-SDAG-FAKE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1200-GISEL-TRUE16-LABEL: clpeak_imad_pat_i8:
+; GFX1200-GISEL-TRUE16: ; %bb.0: ; %entry
+; GFX1200-GISEL-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1200-GISEL-TRUE16-NEXT: s_wait_expcnt 0x0
+; GFX1200-GISEL-TRUE16-NEXT: s_wait_samplecnt 0x0
+; GFX1200-GISEL-TRUE16-NEXT: s_wait_bvhcnt 0x0
+; GFX1200-GISEL-TRUE16-NEXT: s_wait_kmcnt 0x0
+; GFX1200-GISEL-TRUE16-NEXT: v_add_nc_u16 v0.l, v0.l, 1
+; GFX1200-GISEL-TRUE16-NEXT: v_add_nc_u16 v0.h, v1.l, 1
+; GFX1200-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX1200-GISEL-TRUE16-NEXT: v_mul_lo_u16 v1.h, v0.l, v1.l
+; GFX1200-GISEL-TRUE16-NEXT: v_mad_u16 v0.l, v0.l, v1.l, 1
+; GFX1200-GISEL-TRUE16-NEXT: v_mul_lo_u16 v1.l, v0.h, v1.h
+; GFX1200-GISEL-TRUE16-NEXT: v_mad_u16 v0.h, v0.h, v1.h, 1
+; GFX1200-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1200-GISEL-TRUE16-NEXT: v_mul_lo_u16 v0.l, v0.l, v1.l
+; GFX1200-GISEL-TRUE16-NEXT: v_mul_lo_u16 v0.l, v0.l, v0.h
+; GFX1200-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1200-GISEL-TRUE16-NEXT: v_bfe_i32 v0, v0, 0, 8
+; GFX1200-GISEL-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1200-GISEL-FAKE16-LABEL: clpeak_imad_pat_i8:
+; GFX1200-GISEL-FAKE16: ; %bb.0: ; %entry
+; GFX1200-GISEL-FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1200-GISEL-FAKE16-NEXT: s_wait_expcnt 0x0
+; GFX1200-GISEL-FAKE16-NEXT: s_wait_samplecnt 0x0
+; GFX1200-GISEL-FAKE16-NEXT: s_wait_bvhcnt 0x0
+; GFX1200-GISEL-FAKE16-NEXT: s_wait_kmcnt 0x0
+; GFX1200-GISEL-FAKE16-NEXT: v_add_nc_u16 v0, v0, 1
+; GFX1200-GISEL-FAKE16-NEXT: v_add_nc_u16 v2, v1, 1
+; GFX1200-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX1200-GISEL-FAKE16-NEXT: v_mul_lo_u16 v3, v0, v1
+; GFX1200-GISEL-FAKE16-NEXT: v_mad_u16 v0, v0, v1, 1
+; GFX1200-GISEL-FAKE16-NEXT: v_mul_lo_u16 v1, v2, v3
+; GFX1200-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
+; GFX1200-GISEL-FAKE16-NEXT: v_mul_lo_u16 v0, v0, v1
+; GFX1200-GISEL-FAKE16-NEXT: v_mad_u16 v1, v2, v3, 1
+; GFX1200-GISEL-FAKE16-NEXT: v_mul_lo_u16 v0, v0, v1
+; GFX1200-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1200-GISEL-FAKE16-NEXT: v_bfe_i32 v0, v0, 0, 8
+; GFX1200-GISEL-FAKE16-NEXT: s_setpc_b64 s[30:31]
entry:
%conv33 = add i8 %x, 1
%add = mul i8 %conv33, %y
@@ -4584,113 +4786,221 @@ define <2 x i8> @clpeak_imad_pat_v2i8(<2 x i8> %x, <2 x i8> %y) {
; GFX10-GISEL-NEXT: v_mul_lo_u16 v1, v1, v3
; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-SDAG-LABEL: clpeak_imad_pat_v2i8:
-; GFX11-SDAG: ; %bb.0: ; %entry
-; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SDAG-NEXT: v_add_nc_u16 v1, v1, 1
-; GFX11-SDAG-NEXT: v_add_nc_u16 v0, v0, 1
-; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-SDAG-NEXT: v_mad_u16 v4, v1, v3, v1
-; GFX11-SDAG-NEXT: v_mad_u16 v5, v0, v2, v0
-; GFX11-SDAG-NEXT: v_mul_lo_u16 v1, v1, v3
-; GFX11-SDAG-NEXT: v_mul_lo_u16 v0, v0, v2
-; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
-; GFX11-SDAG-NEXT: v_mul_lo_u16 v3, v4, v3
-; GFX11-SDAG-NEXT: v_mul_lo_u16 v2, v5, v2
-; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-SDAG-NEXT: v_mad_u16 v1, v3, v1, v3
-; GFX11-SDAG-NEXT: v_mad_u16 v0, v2, v0, v2
-; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-SDAG-NEXT: v_mad_u16 v1, v1, v3, v1
-; GFX11-SDAG-NEXT: v_mad_u16 v0, v0, v2, v0
-; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-SDAG-NEXT: v_lshlrev_b16 v2, 8, v1
-; GFX11-SDAG-NEXT: v_and_b32_e32 v0, 0xff, v0
-; GFX11-SDAG-NEXT: v_and_b32_e32 v1, 0xff, v1
-; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2)
-; GFX11-SDAG-NEXT: v_or_b32_e32 v0, v0, v2
-; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-GISEL-LABEL: clpeak_imad_pat_v2i8:
-; GFX11-GISEL: ; %bb.0: ; %entry
-; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-GISEL-NEXT: v_add_nc_u16 v0, v0, 1
-; GFX11-GISEL-NEXT: v_add_nc_u16 v1, v1, 1
-; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-GISEL-NEXT: v_mad_u16 v4, v0, v2, v0
-; GFX11-GISEL-NEXT: v_mad_u16 v5, v1, v3, v1
-; GFX11-GISEL-NEXT: v_mad_u16 v0, v0, v2, 1
-; GFX11-GISEL-NEXT: v_mad_u16 v1, v1, v3, 1
-; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
-; GFX11-GISEL-NEXT: v_mul_lo_u16 v6, v4, v2
-; GFX11-GISEL-NEXT: v_mul_lo_u16 v7, v5, v3
-; GFX11-GISEL-NEXT: v_mad_u16 v2, v4, v2, 1
-; GFX11-GISEL-NEXT: v_mad_u16 v3, v5, v3, 1
-; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
-; GFX11-GISEL-NEXT: v_mul_lo_u16 v0, v6, v0
-; GFX11-GISEL-NEXT: v_mul_lo_u16 v1, v7, v1
-; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-GISEL-NEXT: v_mul_lo_u16 v0, v0, v2
-; GFX11-GISEL-NEXT: v_mul_lo_u16 v1, v1, v3
-; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX1200-SDAG-LABEL: clpeak_imad_pat_v2i8:
-; GFX1200-SDAG: ; %bb.0: ; %entry
-; GFX1200-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1200-SDAG-NEXT: s_wait_expcnt 0x0
-; GFX1200-SDAG-NEXT: s_wait_samplecnt 0x0
-; GFX1200-SDAG-NEXT: s_wait_bvhcnt 0x0
-; GFX1200-SDAG-NEXT: s_wait_kmcnt 0x0
-; GFX1200-SDAG-NEXT: v_add_nc_u16 v1, v1, 1
-; GFX1200-SDAG-NEXT: v_add_nc_u16 v0, v0, 1
-; GFX1200-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX1200-SDAG-NEXT: v_mad_u16 v4, v1, v3, v1
-; GFX1200-SDAG-NEXT: v_mad_u16 v5, v0, v2, v0
-; GFX1200-SDAG-NEXT: v_mul_lo_u16 v1, v1, v3
-; GFX1200-SDAG-NEXT: v_mul_lo_u16 v0, v0, v2
-; GFX1200-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
-; GFX1200-SDAG-NEXT: v_mul_lo_u16 v3, v4, v3
-; GFX1200-SDAG-NEXT: v_mul_lo_u16 v2, v5, v2
-; GFX1200-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX1200-SDAG-NEXT: v_mad_u16 v1, v3, v1, v3
-; GFX1200-SDAG-NEXT: v_mad_u16 v0, v2, v0, v2
-; GFX1200-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX1200-SDAG-NEXT: v_mad_u16 v1, v1, v3, v1
-; GFX1200-SDAG-NEXT: v_mad_u16 v0, v0, v2, v0
-; GFX1200-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX1200-SDAG-NEXT: v_lshlrev_b16 v2, 8, v1
-; GFX1200-SDAG-NEXT: v_and_b32_e32 v0, 0xff, v0
-; GFX1200-SDAG-NEXT: v_and_b32_e32 v1, 0xff, v1
-; GFX1200-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2)
-; GFX1200-SDAG-NEXT: v_or_b32_e32 v0, v0, v2
-; GFX1200-SDAG-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX1200-GISEL-LABEL: clpeak_imad_pat_v2i8:
-; GFX1200-GISEL: ; %bb.0: ; %entry
-; GFX1200-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1200-GISEL-NEXT: s_wait_expcnt 0x0
-; GFX1200-GISEL-NEXT: s_wait_samplecnt 0x0
-; GFX1200-GISEL-NEXT: s_wait_bvhcnt 0x0
-; GFX1200-GISEL-NEXT: s_wait_kmcnt 0x0
-; GFX1200-GISEL-NEXT: v_add_nc_u16 v0, v0, 1
-; GFX1200-GISEL-NEXT: v_add_nc_u16 v1, v1, 1
-; GFX1200-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX1200-GISEL-NEXT: v_mad_u16 v4, v0, v2, v0
-; GFX1200-GISEL-NEXT: v_mad_u16 v5, v1, v3, v1
-; GFX1200-GISEL-NEXT: v_mad_u16 v0, v0, v2, 1
-; GFX1200-GISEL-NEXT: v_mad_u16 v1, v1, v3, 1
-; GFX1200-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
-; GFX1200-GISEL-NEXT: v_mul_lo_u16 v6, v4, v2
-; GFX1200-GISEL-NEXT: v_mul_lo_u16 v7, v5, v3
-; GFX1200-GISEL-NEXT: v_mad_u16 v2, v4, v2, 1
-; GFX1200-GISEL-NEXT: v_mad_u16 v3, v5, v3, 1
-; GFX1200-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
-; GFX1200-GISEL-NEXT: v_mul_lo_u16 v0, v6, v0
-; GFX1200-GISEL-NEXT: v_mul_lo_u16 v1, v7, v1
-; GFX1200-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX1200-GISEL-NEXT: v_mul_lo_u16 v0, v0, v2
-; GFX1200-GISEL-NEXT: v_mul_lo_u16 v1, v1, v3
-; GFX1200-GISEL-NEXT: s_setpc_b64 s[30:31]
+; GFX11-SDAG-TRUE16-LABEL: clpeak_imad_pat_v2i8:
+; GFX11-SDAG-TRUE16: ; %bb.0: ; %entry
+; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SDAG-TRUE16-NEXT: v_add_nc_u16 v0.h, v1.l, 1
+; GFX11-SDAG-TRUE16-NEXT: v_add_nc_u16 v0.l, v0.l, 1
+; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-SDAG-TRUE16-NEXT: v_mad_u16 v1.l, v0.h, v3.l, v0.h
+; GFX11-SDAG-TRUE16-NEXT: v_mad_u16 v1.h, v0.l, v2.l, v0.l
+; GFX11-SDAG-TRUE16-NEXT: v_mul_lo_u16 v0.h, v0.h, v3.l
+; GFX11-SDAG-TRUE16-NEXT: v_mul_lo_u16 v0.l, v0.l, v2.l
+; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX11-SDAG-TRUE16-NEXT: v_mul_lo_u16 v1.l, v1.l, v3.l
+; GFX11-SDAG-TRUE16-NEXT: v_mul_lo_u16 v1.h, v1.h, v2.l
+; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-SDAG-TRUE16-NEXT: v_mad_u16 v0.h, v1.l, v0.h, v1.l
+; GFX11-SDAG-TRUE16-NEXT: v_mad_u16 v0.l, v1.h, v0.l, v1.h
+; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-SDAG-TRUE16-NEXT: v_mad_u16 v0.h, v0.h, v1.l, v0.h
+; GFX11-SDAG-TRUE16-NEXT: v_mad_u16 v0.l, v0.l, v1.h, v0.l
+; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-SDAG-TRUE16-NEXT: v_lshlrev_b16 v1.l, 8, v0.h
+; GFX11-SDAG-TRUE16-NEXT: v_and_b16 v0.l, 0xff, v0.l
+; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-SDAG-TRUE16-NEXT: v_or_b16 v0.l, v0.l, v1.l
+; GFX11-SDAG-TRUE16-NEXT: v_and_b16 v1.l, 0xff, v0.h
+; GFX11-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-SDAG-FAKE16-LABEL: clpeak_imad_pat_v2i8:
+; GFX11-SDAG-FAKE16: ; %bb.0: ; %entry
+; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SDAG-FAKE16-NEXT: v_add_nc_u16 v1, v1, 1
+; GFX11-SDAG-FAKE16-NEXT: v_add_nc_u16 v0, v0, 1
+; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-SDAG-FAKE16-NEXT: v_mad_u16 v4, v1, v3, v1
+; GFX11-SDAG-FAKE16-NEXT: v_mad_u16 v5, v0, v2, v0
+; GFX11-SDAG-FAKE16-NEXT: v_mul_lo_u16 v1, v1, v3
+; GFX11-SDAG-FAKE16-NEXT: v_mul_lo_u16 v0, v0, v2
+; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX11-SDAG-FAKE16-NEXT: v_mul_lo_u16 v3, v4, v3
+; GFX11-SDAG-FAKE16-NEXT: v_mul_lo_u16 v2, v5, v2
+; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-SDAG-FAKE16-NEXT: v_mad_u16 v1, v3, v1, v3
+; GFX11-SDAG-FAKE16-NEXT: v_mad_u16 v0, v2, v0, v2
+; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-SDAG-FAKE16-NEXT: v_mad_u16 v1, v1, v3, v1
+; GFX11-SDAG-FAKE16-NEXT: v_mad_u16 v0, v0, v2, v0
+; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-SDAG-FAKE16-NEXT: v_lshlrev_b16 v2, 8, v1
+; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v0, 0xff, v0
+; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v1, 0xff, v1
+; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2)
+; GFX11-SDAG-FAKE16-NEXT: v_or_b32_e32 v0, v0, v2
+; GFX11-SDAG-FAKE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GISEL-TRUE16-LABEL: clpeak_imad_pat_v2i8:
+; GFX11-GISEL-TRUE16: ; %bb.0: ; %entry
+; GFX11-GISEL-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GISEL-TRUE16-NEXT: v_add_nc_u16 v0.l, v0.l, 1
+; GFX11-GISEL-TRUE16-NEXT: v_add_nc_u16 v0.h, v1.l, 1
+; GFX11-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-GISEL-TRUE16-NEXT: v_mad_u16 v1.l, v0.l, v2.l, v0.l
+; GFX11-GISEL-TRUE16-NEXT: v_mad_u16 v1.h, v0.h, v3.l, v0.h
+; GFX11-GISEL-TRUE16-NEXT: v_mad_u16 v0.l, v0.l, v2.l, 1
+; GFX11-GISEL-TRUE16-NEXT: v_mad_u16 v0.h, v0.h, v3.l, 1
+; GFX11-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX11-GISEL-TRUE16-NEXT: v_mul_lo_u16 v2.h, v1.l, v2.l
+; GFX11-GISEL-TRUE16-NEXT: v_mul_lo_u16 v3.h, v1.h, v3.l
+; GFX11-GISEL-TRUE16-NEXT: v_mad_u16 v1.l, v1.l, v2.l, 1
+; GFX11-GISEL-TRUE16-NEXT: v_mad_u16 v1.h, v1.h, v3.l, 1
+; GFX11-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX11-GISEL-TRUE16-NEXT: v_mul_lo_u16 v0.l, v2.h, v0.l
+; GFX11-GISEL-TRUE16-NEXT: v_mul_lo_u16 v0.h, v3.h, v0.h
+; GFX11-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-GISEL-TRUE16-NEXT: v_mul_lo_u16 v0.l, v0.l, v1.l
+; GFX11-GISEL-TRUE16-NEXT: v_mul_lo_u16 v1.l, v0.h, v1.h
+; GFX11-GISEL-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GISEL-FAKE16-LABEL: clpeak_imad_pat_v2i8:
+; GFX11-GISEL-FAKE16: ; %bb.0: ; %entry
+; GFX11-GISEL-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GISEL-FAKE16-NEXT: v_add_nc_u16 v0, v0, 1
+; GFX11-GISEL-FAKE16-NEXT: v_add_nc_u16 v1, v1, 1
+; GFX11-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-GISEL-FAKE16-NEXT: v_mad_u16 v4, v0, v2, v0
+; GFX11-GISEL-FAKE16-NEXT: v_mad_u16 v5, v1, v3, v1
+; GFX11-GISEL-FAKE16-NEXT: v_mad_u16 v0, v0, v2, 1
+; GFX11-GISEL-FAKE16-NEXT: v_mad_u16 v1, v1, v3, 1
+; GFX11-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX11-GISEL-FAKE16-NEXT: v_mul_lo_u16 v6, v4, v2
+; GFX11-GISEL-FAKE16-NEXT: v_mul_lo_u16 v7, v5, v3
+; GFX11-GISEL-FAKE16-NEXT: v_mad_u16 v2, v4, v2, 1
+; GFX11-GISEL-FAKE16-NEXT: v_mad_u16 v3, v5, v3, 1
+; GFX11-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX11-GISEL-FAKE16-NEXT: v_mul_lo_u16 v0, v6, v0
+; GFX11-GISEL-FAKE16-NEXT: v_mul_lo_u16 v1, v7, v1
+; GFX11-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-GISEL-FAKE16-NEXT: v_mul_lo_u16 v0, v0, v2
+; GFX11-GISEL-FAKE16-NEXT: v_mul_lo_u16 v1, v1, v3
+; GFX11-GISEL-FAKE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1200-SDAG-TRUE16-LABEL: clpeak_imad_pat_v2i8:
+; GFX1200-SDAG-TRUE16: ; %bb.0: ; %entry
+; GFX1200-SDAG-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1200-SDAG-TRUE16-NEXT: s_wait_expcnt 0x0
+; GFX1200-SDAG-TRUE16-NEXT: s_wait_samplecnt 0x0
+; GFX1200-SDAG-TRUE16-NEXT: s_wait_bvhcnt 0x0
+; GFX1200-SDAG-TRUE16-NEXT: s_wait_kmcnt 0x0
+; GFX1200-SDAG-TRUE16-NEXT: v_add_nc_u16 v0.h, v1.l, 1
+; GFX1200-SDAG-TRUE16-NEXT: v_add_nc_u16 v0.l, v0.l, 1
+; GFX1200-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX1200-SDAG-TRUE16-NEXT: v_mad_u16 v1.l, v0.h, v3.l, v0.h
+; GFX1200-SDAG-TRUE16-NEXT: v_mad_u16 v1.h, v0.l, v2.l, v0.l
+; GFX1200-SDAG-TRUE16-NEXT: v_mul_lo_u16 v0.h, v0.h, v3.l
+; GFX1200-SDAG-TRUE16-NEXT: v_mul_lo_u16 v0.l, v0.l, v2.l
+; GFX1200-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX1200-SDAG-TRUE16-NEXT: v_mul_lo_u16 v1.l, v1.l, v3.l
+; GFX1200-SDAG-TRUE16-NEXT: v_mul_lo_u16 v1.h, v1.h, v2.l
+; GFX1200-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX1200-SDAG-TRUE16-NEXT: v_mad_u16 v0.h, v1.l, v0.h, v1.l
+; GFX1200-SDAG-TRUE16-NEXT: v_mad_u16 v0.l, v1.h, v0.l, v1.h
+; GFX1200-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX1200-SDAG-TRUE16-NEXT: v_mad_u16 v0.h, v0.h, v1.l, v0.h
+; GFX1200-SDAG-TRUE16-NEXT: v_mad_u16 v0.l, v0.l, v1.h, v0.l
+; GFX1200-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX1200-SDAG-TRUE16-NEXT: v_lshlrev_b16 v1.l, 8, v0.h
+; GFX1200-SDAG-TRUE16-NEXT: v_and_b16 v0.l, 0xff, v0.l
+; GFX1200-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1200-SDAG-TRUE16-NEXT: v_or_b16 v0.l, v0.l, v1.l
+; GFX1200-SDAG-TRUE16-NEXT: v_and_b16 v1.l, 0xff, v0.h
+; GFX1200-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1200-SDAG-FAKE16-LABEL: clpeak_imad_pat_v2i8:
+; GFX1200-SDAG-FAKE16: ; %bb.0: ; %entry
+; GFX1200-SDAG-FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1200-SDAG-FAKE16-NEXT: s_wait_expcnt 0x0
+; GFX1200-SDAG-FAKE16-NEXT: s_wait_samplecnt 0x0
+; GFX1200-SDAG-FAKE16-NEXT: s_wait_bvhcnt 0x0
+; GFX1200-SDAG-FAKE16-NEXT: s_wait_kmcnt 0x0
+; GFX1200-SDAG-FAKE16-NEXT: v_add_nc_u16 v1, v1, 1
+; GFX1200-SDAG-FAKE16-NEXT: v_add_nc_u16 v0, v0, 1
+; GFX1200-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX1200-SDAG-FAKE16-NEXT: v_mad_u16 v4, v1, v3, v1
+; GFX1200-SDAG-FAKE16-NEXT: v_mad_u16 v5, v0, v2, v0
+; GFX1200-SDAG-FAKE16-NEXT: v_mul_lo_u16 v1, v1, v3
+; GFX1200-SDAG-FAKE16-NEXT: v_mul_lo_u16 v0, v0, v2
+; GFX1200-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX1200-SDAG-FAKE16-NEXT: v_mul_lo_u16 v3, v4, v3
+; GFX1200-SDAG-FAKE16-NEXT: v_mul_lo_u16 v2, v5, v2
+; GFX1200-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX1200-SDAG-FAKE16-NEXT: v_mad_u16 v1, v3, v1, v3
+; GFX1200-SDAG-FAKE16-NEXT: v_mad_u16 v0, v2, v0, v2
+; GFX1200-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX1200-SDAG-FAKE16-NEXT: v_mad_u16 v1, v1, v3, v1
+; GFX1200-SDAG-FAKE16-NEXT: v_mad_u16 v0, v0, v2, v0
+; GFX1200-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX1200-SDAG-FAKE16-NEXT: v_lshlrev_b16 v2, 8, v1
+; GFX1200-SDAG-FAKE16-NEXT: v_and_b32_e32 v0, 0xff, v0
+; GFX1200-SDAG-FAKE16-NEXT: v_and_b32_e32 v1, 0xff, v1
+; GFX1200-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2)
+; GFX1200-SDAG-FAKE16-NEXT: v_or_b32_e32 v0, v0, v2
+; GFX1200-SDAG-FAKE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1200-GISEL-TRUE16-LABEL: clpeak_imad_pat_v2i8:
+; GFX1200-GISEL-TRUE16: ; %bb.0: ; %entry
+; GFX1200-GISEL-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1200-GISEL-TRUE16-NEXT: s_wait_expcnt 0x0
+; GFX1200-GISEL-TRUE16-NEXT: s_wait_samplecnt 0x0
+; GFX1200-GISEL-TRUE16-NEXT: s_wait_bvhcnt 0x0
+; GFX1200-GISEL-TRUE16-NEXT: s_wait_kmcnt 0x0
+; GFX1200-GISEL-TRUE16-NEXT: v_add_nc_u16 v0.l, v0.l, 1
+; GFX1200-GISEL-TRUE16-NEXT: v_add_nc_u16 v0.h, v1.l, 1
+; GFX1200-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX1200-GISEL-TRUE16-NEXT: v_mad_u16 v1.l, v0.l, v2.l, v0.l
+; GFX1200-GISEL-TRUE16-NEXT: v_mad_u16 v1.h, v0.h, v3.l, v0.h
+; GFX1200-GISEL-TRUE16-NEXT: v_mad_u16 v0.l, v0.l, v2.l, 1
+; GFX1200-GISEL-TRUE16-NEXT: v_mad_u16 v0.h, v0.h, v3.l, 1
+; GFX1200-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX1200-GISEL-TRUE16-NEXT: v_mul_lo_u16 v2.h, v1.l, v2.l
+; GFX1200-GISEL-TRUE16-NEXT: v_mul_lo_u16 v3.h, v1.h, v3.l
+; GFX1200-GISEL-TRUE16-NEXT: v_mad_u16 v1.l, v1.l, v2.l, 1
+; GFX1200-GISEL-TRUE16-NEXT: v_mad_u16 v1.h, v1.h, v3.l, 1
+; GFX1200-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX1200-GISEL-TRUE16-NEXT: v_mul_lo_u16 v0.l, v2.h, v0.l
+; GFX1200-GISEL-TRUE16-NEXT: v_mul_lo_u16 v0.h, v3.h, v0.h
+; GFX1200-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX1200-GISEL-TRUE16-NEXT: v_mul_lo_u16 v0.l, v0.l, v1.l
+; GFX1200-GISEL-TRUE16-NEXT: v_mul_lo_u16 v1.l, v0.h, v1.h
+; GFX1200-GISEL-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1200-GISEL-FAKE16-LABEL: clpeak_imad_pat_v2i8:
+; GFX1200-GISEL-FAKE16: ; %bb.0: ; %entry
+; GFX1200-GISEL-FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1200-GISEL-FAKE16-NEXT: s_wait_expcnt 0x0
+; GFX1200-GISEL-FAKE16-NEXT: s_wait_samplecnt 0x0
+; GFX1200-GISEL-FAKE16-NEXT: s_wait_bvhcnt 0x0
+; GFX1200-GISEL-FAKE16-NEXT: s_wait_kmcnt 0x0
+; GFX1200-GISEL-FAKE16-NEXT: v_add_nc_u16 v0, v0, 1
+; GFX1200-GISEL-FAKE16-NEXT: v_add_nc_u16 v1, v1, 1
+; GFX1200-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX1200-GISEL-FAKE16-NEXT: v_mad_u16 v4, v0, v2, v0
+; GFX1200-GISEL-FAKE16-NEXT: v_mad_u16 v5, v1, v3, v1
+; GFX1200-GISEL-FAKE16-NEXT: v_mad_u16 v0, v0, v2, 1
+; GFX1200-GISEL-FAKE16-NEXT: v_mad_u16 v1, v1, v3, 1
+; GFX1200-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX1200-GISEL-FAKE16-NEXT: v_mul_lo_u16 v6, v4, v2
+; GFX1200-GISEL-FAKE16-NEXT: v_mul_lo_u16 v7, v5, v3
+; GFX1200-GISEL-FAKE16-NEXT: v_mad_u16 v2, v4, v2, 1
+; GFX1200-GISEL-FAKE16-NEXT: v_mad_u16 v3, v5, v3, 1
+; GFX1200-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX1200-GISEL-FAKE16-NEXT: v_mul_lo_u16 v0, v6, v0
+; GFX1200-GISEL-FAKE16-NEXT: v_mul_lo_u16 v1, v7, v1
+; GFX1200-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX1200-GISEL-FAKE16-NEXT: v_mul_lo_u16 v0, v0, v2
+; GFX1200-GISEL-FAKE16-NEXT: v_mul_lo_u16 v1, v1, v3
+; GFX1200-GISEL-FAKE16-NEXT: s_setpc_b64 s[30:31]
entry:
%y18 = add <2 x i8> %x, <i8 1, i8 1>
%add = mul <2 x i8> %y18, %y
@@ -7656,103 +7966,201 @@ define signext i16 @clpeak_imad_pat_i16_x2(i16 signext %x, i16 signext %y) {
; GFX10-GISEL-NEXT: v_bfe_i32 v0, v0, 0, 16
; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-SDAG-LABEL: clpeak_imad_pat_i16_x2:
-; GFX11-SDAG: ; %bb.0: ; %entry
-; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SDAG-NEXT: v_mad_u16 v0, v1, v0, v1
-; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SDAG-NEXT: v_mad_u16 v1, v0, v1, v0
-; GFX11-SDAG-NEXT: v_mad_u16 v0, v1, v0, v1
-; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SDAG-NEXT: v_mad_u16 v1, v0, v1, v0
-; GFX11-SDAG-NEXT: v_mad_u16 v0, v1, v0, v1
-; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SDAG-NEXT: v_mad_u16 v1, v0, v1, v0
-; GFX11-SDAG-NEXT: v_mad_u16 v0, v1, v0, v1
-; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SDAG-NEXT: v_mad_u16 v0, v0, v1, v0
-; GFX11-SDAG-NEXT: v_bfe_i32 v0, v0, 0, 16
-; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-GISEL-LABEL: clpeak_imad_pat_i16_x2:
-; GFX11-GISEL: ; %bb.0: ; %entry
-; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-GISEL-NEXT: v_add_nc_u16 v0, v0, 1
-; GFX11-GISEL-NEXT: v_add_nc_u16 v2, v1, 1
-; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
-; GFX11-GISEL-NEXT: v_mul_lo_u16 v3, v0, v1
-; GFX11-GISEL-NEXT: v_mad_u16 v0, v0, v1, 1
-; GFX11-GISEL-NEXT: v_mul_lo_u16 v1, v2, v3
-; GFX11-GISEL-NEXT: v_mad_u16 v2, v2, v3, 1
-; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
-; GFX11-GISEL-NEXT: v_mul_lo_u16 v3, v0, v1
-; GFX11-GISEL-NEXT: v_mad_u16 v0, v0, v1, 1
-; GFX11-GISEL-NEXT: v_mul_lo_u16 v1, v2, v3
-; GFX11-GISEL-NEXT: v_mad_u16 v2, v2, v3, 1
-; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
-; GFX11-GISEL-NEXT: v_mul_lo_u16 v3, v0, v1
-; GFX11-GISEL-NEXT: v_mad_u16 v0, v0, v1, 1
-; GFX11-GISEL-NEXT: v_mul_lo_u16 v1, v2, v3
-; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
-; GFX11-GISEL-NEXT: v_mul_lo_u16 v0, v0, v1
-; GFX11-GISEL-NEXT: v_mad_u16 v1, v2, v3, 1
-; GFX11-GISEL-NEXT: v_mul_lo_u16 v0, v0, v1
-; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-GISEL-NEXT: v_bfe_i32 v0, v0, 0, 16
-; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX1200-SDAG-LABEL: clpeak_imad_pat_i16_x2:
-; GFX1200-SDAG: ; %bb.0: ; %entry
-; GFX1200-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1200-SDAG-NEXT: s_wait_expcnt 0x0
-; GFX1200-SDAG-NEXT: s_wait_samplecnt 0x0
-; GFX1200-SDAG-NEXT: s_wait_bvhcnt 0x0
-; GFX1200-SDAG-NEXT: s_wait_kmcnt 0x0
-; GFX1200-SDAG-NEXT: v_mad_u16 v0, v1, v0, v1
-; GFX1200-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1200-SDAG-NEXT: v_mad_u16 v1, v0, v1, v0
-; GFX1200-SDAG-NEXT: v_mad_u16 v0, v1, v0, v1
-; GFX1200-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1200-SDAG-NEXT: v_mad_u16 v1, v0, v1, v0
-; GFX1200-SDAG-NEXT: v_mad_u16 v0, v1, v0, v1
-; GFX1200-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1200-SDAG-NEXT: v_mad_u16 v1, v0, v1, v0
-; GFX1200-SDAG-NEXT: v_mad_u16 v0, v1, v0, v1
-; GFX1200-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1200-SDAG-NEXT: v_mad_u16 v0, v0, v1, v0
-; GFX1200-SDAG-NEXT: v_bfe_i32 v0, v0, 0, 16
-; GFX1200-SDAG-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX1200-GISEL-LABEL: clpeak_imad_pat_i16_x2:
-; GFX1200-GISEL: ; %bb.0: ; %entry
-; GFX1200-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1200-GISEL-NEXT: s_wait_expcnt 0x0
-; GFX1200-GISEL-NEXT: s_wait_samplecnt 0x0
-; GFX1200-GISEL-NEXT: s_wait_bvhcnt 0x0
-; GFX1200-GISEL-NEXT: s_wait_kmcnt 0x0
-; GFX1200-GISEL-NEXT: v_add_nc_u16 v0, v0, 1
-; GFX1200-GISEL-NEXT: v_add_nc_u16 v2, v1, 1
-; GFX1200-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
-; GFX1200-GISEL-NEXT: v_mul_lo_u16 v3, v0, v1
-; GFX1200-GISEL-NEXT: v_mad_u16 v0, v0, v1, 1
-; GFX1200-GISEL-NEXT: v_mul_lo_u16 v1, v2, v3
-; GFX1200-GISEL-NEXT: v_mad_u16 v2, v2, v3, 1
-; GFX1200-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
-; GFX1200-GISEL-NEXT: v_mul_lo_u16 v3, v0, v1
-; GFX1200-GISEL-NEXT: v_mad_u16 v0, v0, v1, 1
-; GFX1200-GISEL-NEXT: v_mul_lo_u16 v1, v2, v3
-; GFX1200-GISEL-NEXT: v_mad_u16 v2, v2, v3, 1
-; GFX1200-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
-; GFX1200-GISEL-NEXT: v_mul_lo_u16 v3, v0, v1
-; GFX1200-GISEL-NEXT: v_mad_u16 v0, v0, v1, 1
-; GFX1200-GISEL-NEXT: v_mul_lo_u16 v1, v2, v3
-; GFX1200-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
-; GFX1200-GISEL-NEXT: v_mul_lo_u16 v0, v0, v1
-; GFX1200-GISEL-NEXT: v_mad_u16 v1, v2, v3, 1
-; GFX1200-GISEL-NEXT: v_mul_lo_u16 v0, v0, v1
-; GFX1200-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX1200-GISEL-NEXT: v_bfe_i32 v0, v0, 0, 16
-; GFX1200-GISEL-NEXT: s_setpc_b64 s[30:31]
+; GFX11-SDAG-TRUE16-LABEL: clpeak_imad_pat_i16_x2:
+; GFX11-SDAG-TRUE16: ; %bb.0: ; %entry
+; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SDAG-TRUE16-NEXT: v_mad_u16 v0.l, v1.l, v0.l, v1.l
+; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-SDAG-TRUE16-NEXT: v_mad_u16 v0.h, v0.l, v1.l, v0.l
+; GFX11-SDAG-TRUE16-NEXT: v_mad_u16 v0.l, v0.h, v0.l, v0.h
+; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-SDAG-TRUE16-NEXT: v_mad_u16 v0.h, v0.l, v0.h, v0.l
+; GFX11-SDAG-TRUE16-NEXT: v_mad_u16 v0.l, v0.h, v0.l, v0.h
+; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-SDAG-TRUE16-NEXT: v_mad_u16 v0.h, v0.l, v0.h, v0.l
+; GFX11-SDAG-TRUE16-NEXT: v_mad_u16 v0.l, v0.h, v0.l, v0.h
+; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-SDAG-TRUE16-NEXT: v_mad_u16 v0.l, v0.l, v0.h, v0.l
+; GFX11-SDAG-TRUE16-NEXT: v_bfe_i32 v0, v0, 0, 16
+; GFX11-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-SDAG-FAKE16-LABEL: clpeak_imad_pat_i16_x2:
+; GFX11-SDAG-FAKE16: ; %bb.0: ; %entry
+; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SDAG-FAKE16-NEXT: v_mad_u16 v0, v1, v0, v1
+; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-SDAG-FAKE16-NEXT: v_mad_u16 v1, v0, v1, v0
+; GFX11-SDAG-FAKE16-NEXT: v_mad_u16 v0, v1, v0, v1
+; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-SDAG-FAKE16-NEXT: v_mad_u16 v1, v0, v1, v0
+; GFX11-SDAG-FAKE16-NEXT: v_mad_u16 v0, v1, v0, v1
+; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-SDAG-FAKE16-NEXT: v_mad_u16 v1, v0, v1, v0
+; GFX11-SDAG-FAKE16-NEXT: v_mad_u16 v0, v1, v0, v1
+; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-SDAG-FAKE16-NEXT: v_mad_u16 v0, v0, v1, v0
+; GFX11-SDAG-FAKE16-NEXT: v_bfe_i32 v0, v0, 0, 16
+; GFX11-SDAG-FAKE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GISEL-TRUE16-LABEL: clpeak_imad_pat_i16_x2:
+; GFX11-GISEL-TRUE16: ; %bb.0: ; %entry
+; GFX11-GISEL-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GISEL-TRUE16-NEXT: v_add_nc_u16 v0.l, v0.l, 1
+; GFX11-GISEL-TRUE16-NEXT: v_add_nc_u16 v0.h, v1.l, 1
+; GFX11-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX11-GISEL-TRUE16-NEXT: v_mul_lo_u16 v1.h, v0.l, v1.l
+; GFX11-GISEL-TRUE16-NEXT: v_mad_u16 v0.l, v0.l, v1.l, 1
+; GFX11-GISEL-TRUE16-NEXT: v_mul_lo_u16 v1.l, v0.h, v1.h
+; GFX11-GISEL-TRUE16-NEXT: v_mad_u16 v0.h, v0.h, v1.h, 1
+; GFX11-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX11-GISEL-TRUE16-NEXT: v_mul_lo_u16 v1.h, v0.l, v1.l
+; GFX11-GISEL-TRUE16-NEXT: v_mad_u16 v0.l, v0.l, v1.l, 1
+; GFX11-GISEL-TRUE16-NEXT: v_mul_lo_u16 v1.l, v0.h, v1.h
+; GFX11-GISEL-TRUE16-NEXT: v_mad_u16 v0.h, v0.h, v1.h, 1
+; GFX11-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX11-GISEL-TRUE16-NEXT: v_mul_lo_u16 v1.h, v0.l, v1.l
+; GFX11-GISEL-TRUE16-NEXT: v_mad_u16 v0.l, v0.l, v1.l, 1
+; GFX11-GISEL-TRUE16-NEXT: v_mul_lo_u16 v1.l, v0.h, v1.h
+; GFX11-GISEL-TRUE16-NEXT: v_mad_u16 v0.h, v0.h, v1.h, 1
+; GFX11-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-GISEL-TRUE16-NEXT: v_mul_lo_u16 v0.l, v0.l, v1.l
+; GFX11-GISEL-TRUE16-NEXT: v_mul_lo_u16 v0.l, v0.l, v0.h
+; GFX11-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GISEL-TRUE16-NEXT: v_bfe_i32 v0, v0, 0, 16
+; GFX11-GISEL-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GISEL-FAKE16-LABEL: clpeak_imad_pat_i16_x2:
+; GFX11-GISEL-FAKE16: ; %bb.0: ; %entry
+; GFX11-GISEL-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GISEL-FAKE16-NEXT: v_add_nc_u16 v0, v0, 1
+; GFX11-GISEL-FAKE16-NEXT: v_add_nc_u16 v2, v1, 1
+; GFX11-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX11-GISEL-FAKE16-NEXT: v_mul_lo_u16 v3, v0, v1
+; GFX11-GISEL-FAKE16-NEXT: v_mad_u16 v0, v0, v1, 1
+; GFX11-GISEL-FAKE16-NEXT: v_mul_lo_u16 v1, v2, v3
+; GFX11-GISEL-FAKE16-NEXT: v_mad_u16 v2, v2, v3, 1
+; GFX11-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX11-GISEL-FAKE16-NEXT: v_mul_lo_u16 v3, v0, v1
+; GFX11-GISEL-FAKE16-NEXT: v_mad_u16 v0, v0, v1, 1
+; GFX11-GISEL-FAKE16-NEXT: v_mul_lo_u16 v1, v2, v3
+; GFX11-GISEL-FAKE16-NEXT: v_mad_u16 v2, v2, v3, 1
+; GFX11-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX11-GISEL-FAKE16-NEXT: v_mul_lo_u16 v3, v0, v1
+; GFX11-GISEL-FAKE16-NEXT: v_mad_u16 v0, v0, v1, 1
+; GFX11-GISEL-FAKE16-NEXT: v_mul_lo_u16 v1, v2, v3
+; GFX11-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
+; GFX11-GISEL-FAKE16-NEXT: v_mul_lo_u16 v0, v0, v1
+; GFX11-GISEL-FAKE16-NEXT: v_mad_u16 v1, v2, v3, 1
+; GFX11-GISEL-FAKE16-NEXT: v_mul_lo_u16 v0, v0, v1
+; GFX11-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GISEL-FAKE16-NEXT: v_bfe_i32 v0, v0, 0, 16
+; GFX11-GISEL-FAKE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1200-SDAG-TRUE16-LABEL: clpeak_imad_pat_i16_x2:
+; GFX1200-SDAG-TRUE16: ; %bb.0: ; %entry
+; GFX1200-SDAG-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1200-SDAG-TRUE16-NEXT: s_wait_expcnt 0x0
+; GFX1200-SDAG-TRUE16-NEXT: s_wait_samplecnt 0x0
+; GFX1200-SDAG-TRUE16-NEXT: s_wait_bvhcnt 0x0
+; GFX1200-SDAG-TRUE16-NEXT: s_wait_kmcnt 0x0
+; GFX1200-SDAG-TRUE16-NEXT: v_mad_u16 v0.l, v1.l, v0.l, v1.l
+; GFX1200-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1200-SDAG-TRUE16-NEXT: v_mad_u16 v0.h, v0.l, v1.l, v0.l
+; GFX1200-SDAG-TRUE16-NEXT: v_mad_u16 v0.l, v0.h, v0.l, v0.h
+; GFX1200-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1200-SDAG-TRUE16-NEXT: v_mad_u16 v0.h, v0.l, v0.h, v0.l
+; GFX1200-SDAG-TRUE16-NEXT: v_mad_u16 v0.l, v0.h, v0.l, v0.h
+; GFX1200-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1200-SDAG-TRUE16-NEXT: v_mad_u16 v0.h, v0.l, v0.h, v0.l
+; GFX1200-SDAG-TRUE16-NEXT: v_mad_u16 v0.l, v0.h, v0.l, v0.h
+; GFX1200-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1200-SDAG-TRUE16-NEXT: v_mad_u16 v0.l, v0.l, v0.h, v0.l
+; GFX1200-SDAG-TRUE16-NEXT: v_bfe_i32 v0, v0, 0, 16
+; GFX1200-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1200-SDAG-FAKE16-LABEL: clpeak_imad_pat_i16_x2:
+; GFX1200-SDAG-FAKE16: ; %bb.0: ; %entry
+; GFX1200-SDAG-FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1200-SDAG-FAKE16-NEXT: s_wait_expcnt 0x0
+; GFX1200-SDAG-FAKE16-NEXT: s_wait_samplecnt 0x0
+; GFX1200-SDAG-FAKE16-NEXT: s_wait_bvhcnt 0x0
+; GFX1200-SDAG-FAKE16-NEXT: s_wait_kmcnt 0x0
+; GFX1200-SDAG-FAKE16-NEXT: v_mad_u16 v0, v1, v0, v1
+; GFX1200-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1200-SDAG-FAKE16-NEXT: v_mad_u16 v1, v0, v1, v0
+; GFX1200-SDAG-FAKE16-NEXT: v_mad_u16 v0, v1, v0, v1
+; GFX1200-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1200-SDAG-FAKE16-NEXT: v_mad_u16 v1, v0, v1, v0
+; GFX1200-SDAG-FAKE16-NEXT: v_mad_u16 v0, v1, v0, v1
+; GFX1200-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1200-SDAG-FAKE16-NEXT: v_mad_u16 v1, v0, v1, v0
+; GFX1200-SDAG-FAKE16-NEXT: v_mad_u16 v0, v1, v0, v1
+; GFX1200-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1200-SDAG-FAKE16-NEXT: v_mad_u16 v0, v0, v1, v0
+; GFX1200-SDAG-FAKE16-NEXT: v_bfe_i32 v0, v0, 0, 16
+; GFX1200-SDAG-FAKE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1200-GISEL-TRUE16-LABEL: clpeak_imad_pat_i16_x2:
+; GFX1200-GISEL-TRUE16: ; %bb.0: ; %entry
+; GFX1200-GISEL-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1200-GISEL-TRUE16-NEXT: s_wait_expcnt 0x0
+; GFX1200-GISEL-TRUE16-NEXT: s_wait_samplecnt 0x0
+; GFX1200-GISEL-TRUE16-NEXT: s_wait_bvhcnt 0x0
+; GFX1200-GISEL-TRUE16-NEXT: s_wait_kmcnt 0x0
+; GFX1200-GISEL-TRUE16-NEXT: v_add_nc_u16 v0.l, v0.l, 1
+; GFX1200-GISEL-TRUE16-NEXT: v_add_nc_u16 v0.h, v1.l, 1
+; GFX1200-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX1200-GISEL-TRUE16-NEXT: v_mul_lo_u16 v1.h, v0.l, v1.l
+; GFX1200-GISEL-TRUE16-NEXT: v_mad_u16 v0.l, v0.l, v1.l, 1
+; GFX1200-GISEL-TRUE16-NEXT: v_mul_lo_u16 v1.l, v0.h, v1.h
+; GFX1200-GISEL-TRUE16-NEXT: v_mad_u16 v0.h, v0.h, v1.h, 1
+; GFX1200-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX1200-GISEL-TRUE16-NEXT: v_mul_lo_u16 v1.h, v0.l, v1.l
+; GFX1200-GISEL-TRUE16-NEXT: v_mad_u16 v0.l, v0.l, v1.l, 1
+; GFX1200-GISEL-TRUE16-NEXT: v_mul_lo_u16 v1.l, v0.h, v1.h
+; GFX1200-GISEL-TRUE16-NEXT: v_mad_u16 v0.h, v0.h, v1.h, 1
+; GFX1200-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX1200-GISEL-TRUE16-NEXT: v_mul_lo_u16 v1.h, v0.l, v1.l
+; GFX1200-GISEL-TRUE16-NEXT: v_mad_u16 v0.l, v0.l, v1.l, 1
+; GFX1200-GISEL-TRUE16-NEXT: v_mul_lo_u16 v1.l, v0.h, v1.h
+; GFX1200-GISEL-TRUE16-NEXT: v_mad_u16 v0.h, v0.h, v1.h, 1
+; GFX1200-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1200-GISEL-TRUE16-NEXT: v_mul_lo_u16 v0.l, v0.l, v1.l
+; GFX1200-GISEL-TRUE16-NEXT: v_mul_lo_u16 v0.l, v0.l, v0.h
+; GFX1200-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1200-GISEL-TRUE16-NEXT: v_bfe_i32 v0, v0, 0, 16
+; GFX1200-GISEL-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1200-GISEL-FAKE16-LABEL: clpeak_imad_pat_i16_x2:
+; GFX1200-GISEL-FAKE16: ; %bb.0: ; %entry
+; GFX1200-GISEL-FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1200-GISEL-FAKE16-NEXT: s_wait_expcnt 0x0
+; GFX1200-GISEL-FAKE16-NEXT: s_wait_samplecnt 0x0
+; GFX1200-GISEL-FAKE16-NEXT: s_wait_bvhcnt 0x0
+; GFX1200-GISEL-FAKE16-NEXT: s_wait_kmcnt 0x0
+; GFX1200-GISEL-FAKE16-NEXT: v_add_nc_u16 v0, v0, 1
+; GFX1200-GISEL-FAKE16-NEXT: v_add_nc_u16 v2, v1, 1
+; GFX1200-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX1200-GISEL-FAKE16-NEXT: v_mul_lo_u16 v3, v0, v1
+; GFX1200-GISEL-FAKE16-NEXT: v_mad_u16 v0, v0, v1, 1
+; GFX1200-GISEL-FAKE16-NEXT: v_mul_lo_u16 v1, v2, v3
+; GFX1200-GISEL-FAKE16-NEXT: v_mad_u16 v2, v2, v3, 1
+; GFX1200-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX1200-GISEL-FAKE16-NEXT: v_mul_lo_u16 v3, v0, v1
+; GFX1200-GISEL-FAKE16-NEXT: v_mad_u16 v0, v0, v1, 1
+; GFX1200-GISEL-FAKE16-NEXT: v_mul_lo_u16 v1, v2, v3
+; GFX1200-GISEL-FAKE16-NEXT: v_mad_u16 v2, v2, v3, 1
+; GFX1200-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX1200-GISEL-FAKE16-NEXT: v_mul_lo_u16 v3, v0, v1
+; GFX1200-GISEL-FAKE16-NEXT: v_mad_u16 v0, v0, v1, 1
+; GFX1200-GISEL-FAKE16-NEXT: v_mul_lo_u16 v1, v2, v3
+; GFX1200-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
+; GFX1200-GISEL-FAKE16-NEXT: v_mul_lo_u16 v0, v0, v1
+; GFX1200-GISEL-FAKE16-NEXT: v_mad_u16 v1, v2, v3, 1
+; GFX1200-GISEL-FAKE16-NEXT: v_mul_lo_u16 v0, v0, v1
+; GFX1200-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1200-GISEL-FAKE16-NEXT: v_bfe_i32 v0, v0, 0, 16
+; GFX1200-GISEL-FAKE16-NEXT: s_setpc_b64 s[30:31]
entry:
%conv69 = add i16 %x, 1
%add = mul i16 %conv69, %y
@@ -7915,103 +8323,201 @@ define zeroext i16 @clpeak_umad_pat_i16_x2(i16 zeroext %x, i16 zeroext %y) {
; GFX10-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-SDAG-LABEL: clpeak_umad_pat_i16_x2:
-; GFX11-SDAG: ; %bb.0: ; %entry
-; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SDAG-NEXT: v_mad_u16 v0, v1, v0, v1
-; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SDAG-NEXT: v_mad_u16 v1, v0, v1, v0
-; GFX11-SDAG-NEXT: v_mad_u16 v0, v1, v0, v1
-; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SDAG-NEXT: v_mad_u16 v1, v0, v1, v0
-; GFX11-SDAG-NEXT: v_mad_u16 v0, v1, v0, v1
-; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SDAG-NEXT: v_mad_u16 v1, v0, v1, v0
-; GFX11-SDAG-NEXT: v_mad_u16 v0, v1, v0, v1
-; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SDAG-NEXT: v_mad_u16 v0, v0, v1, v0
-; GFX11-SDAG-NEXT: v_and_b32_e32 v0, 0xffff, v0
-; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-GISEL-LABEL: clpeak_umad_pat_i16_x2:
-; GFX11-GISEL: ; %bb.0: ; %entry
-; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-GISEL-NEXT: v_add_nc_u16 v0, v0, 1
-; GFX11-GISEL-NEXT: v_add_nc_u16 v2, v1, 1
-; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
-; GFX11-GISEL-NEXT: v_mul_lo_u16 v3, v0, v1
-; GFX11-GISEL-NEXT: v_mad_u16 v0, v0, v1, 1
-; GFX11-GISEL-NEXT: v_mul_lo_u16 v1, v2, v3
-; GFX11-GISEL-NEXT: v_mad_u16 v2, v2, v3, 1
-; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
-; GFX11-GISEL-NEXT: v_mul_lo_u16 v3, v0, v1
-; GFX11-GISEL-NEXT: v_mad_u16 v0, v0, v1, 1
-; GFX11-GISEL-NEXT: v_mul_lo_u16 v1, v2, v3
-; GFX11-GISEL-NEXT: v_mad_u16 v2, v2, v3, 1
-; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
-; GFX11-GISEL-NEXT: v_mul_lo_u16 v3, v0, v1
-; GFX11-GISEL-NEXT: v_mad_u16 v0, v0, v1, 1
-; GFX11-GISEL-NEXT: v_mul_lo_u16 v1, v2, v3
-; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
-; GFX11-GISEL-NEXT: v_mul_lo_u16 v0, v0, v1
-; GFX11-GISEL-NEXT: v_mad_u16 v1, v2, v3, 1
-; GFX11-GISEL-NEXT: v_mul_lo_u16 v0, v0, v1
-; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
-; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX1200-SDAG-LABEL: clpeak_umad_pat_i16_x2:
-; GFX1200-SDAG: ; %bb.0: ; %entry
-; GFX1200-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1200-SDAG-NEXT: s_wait_expcnt 0x0
-; GFX1200-SDAG-NEXT: s_wait_samplecnt 0x0
-; GFX1200-SDAG-NEXT: s_wait_bvhcnt 0x0
-; GFX1200-SDAG-NEXT: s_wait_kmcnt 0x0
-; GFX1200-SDAG-NEXT: v_mad_u16 v0, v1, v0, v1
-; GFX1200-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1200-SDAG-NEXT: v_mad_u16 v1, v0, v1, v0
-; GFX1200-SDAG-NEXT: v_mad_u16 v0, v1, v0, v1
-; GFX1200-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1200-SDAG-NEXT: v_mad_u16 v1, v0, v1, v0
-; GFX1200-SDAG-NEXT: v_mad_u16 v0, v1, v0, v1
-; GFX1200-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1200-SDAG-NEXT: v_mad_u16 v1, v0, v1, v0
-; GFX1200-SDAG-NEXT: v_mad_u16 v0, v1, v0, v1
-; GFX1200-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1200-SDAG-NEXT: v_mad_u16 v0, v0, v1, v0
-; GFX1200-SDAG-NEXT: v_and_b32_e32 v0, 0xffff, v0
-; GFX1200-SDAG-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX1200-GISEL-LABEL: clpeak_umad_pat_i16_x2:
-; GFX1200-GISEL: ; %bb.0: ; %entry
-; GFX1200-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1200-GISEL-NEXT: s_wait_expcnt 0x0
-; GFX1200-GISEL-NEXT: s_wait_samplecnt 0x0
-; GFX1200-GISEL-NEXT: s_wait_bvhcnt 0x0
-; GFX1200-GISEL-NEXT: s_wait_kmcnt 0x0
-; GFX1200-GISEL-NEXT: v_add_nc_u16 v0, v0, 1
-; GFX1200-GISEL-NEXT: v_add_nc_u16 v2, v1, 1
-; GFX1200-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
-; GFX1200-GISEL-NEXT: v_mul_lo_u16 v3, v0, v1
-; GFX1200-GISEL-NEXT: v_mad_u16 v0, v0, v1, 1
-; GFX1200-GISEL-NEXT: v_mul_lo_u16 v1, v2, v3
-; GFX1200-GISEL-NEXT: v_mad_u16 v2, v2, v3, 1
-; GFX1200-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
-; GFX1200-GISEL-NEXT: v_mul_lo_u16 v3, v0, v1
-; GFX1200-GISEL-NEXT: v_mad_u16 v0, v0, v1, 1
-; GFX1200-GISEL-NEXT: v_mul_lo_u16 v1, v2, v3
-; GFX1200-GISEL-NEXT: v_mad_u16 v2, v2, v3, 1
-; GFX1200-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
-; GFX1200-GISEL-NEXT: v_mul_lo_u16 v3, v0, v1
-; GFX1200-GISEL-NEXT: v_mad_u16 v0, v0, v1, 1
-; GFX1200-GISEL-NEXT: v_mul_lo_u16 v1, v2, v3
-; GFX1200-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
-; GFX1200-GISEL-NEXT: v_mul_lo_u16 v0, v0, v1
-; GFX1200-GISEL-NEXT: v_mad_u16 v1, v2, v3, 1
-; GFX1200-GISEL-NEXT: v_mul_lo_u16 v0, v0, v1
-; GFX1200-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX1200-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
-; GFX1200-GISEL-NEXT: s_setpc_b64 s[30:31]
+; GFX11-SDAG-TRUE16-LABEL: clpeak_umad_pat_i16_x2:
+; GFX11-SDAG-TRUE16: ; %bb.0: ; %entry
+; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SDAG-TRUE16-NEXT: v_mad_u16 v0.l, v1.l, v0.l, v1.l
+; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-SDAG-TRUE16-NEXT: v_mad_u16 v0.h, v0.l, v1.l, v0.l
+; GFX11-SDAG-TRUE16-NEXT: v_mad_u16 v0.l, v0.h, v0.l, v0.h
+; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-SDAG-TRUE16-NEXT: v_mad_u16 v0.h, v0.l, v0.h, v0.l
+; GFX11-SDAG-TRUE16-NEXT: v_mad_u16 v0.l, v0.h, v0.l, v0.h
+; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-SDAG-TRUE16-NEXT: v_mad_u16 v0.h, v0.l, v0.h, v0.l
+; GFX11-SDAG-TRUE16-NEXT: v_mad_u16 v0.l, v0.h, v0.l, v0.h
+; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-SDAG-TRUE16-NEXT: v_mad_u16 v0.l, v0.l, v0.h, v0.l
+; GFX11-SDAG-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff, v0
+; GFX11-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-SDAG-FAKE16-LABEL: clpeak_umad_pat_i16_x2:
+; GFX11-SDAG-FAKE16: ; %bb.0: ; %entry
+; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SDAG-FAKE16-NEXT: v_mad_u16 v0, v1, v0, v1
+; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-SDAG-FAKE16-NEXT: v_mad_u16 v1, v0, v1, v0
+; GFX11-SDAG-FAKE16-NEXT: v_mad_u16 v0, v1, v0, v1
+; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-SDAG-FAKE16-NEXT: v_mad_u16 v1, v0, v1, v0
+; GFX11-SDAG-FAKE16-NEXT: v_mad_u16 v0, v1, v0, v1
+; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-SDAG-FAKE16-NEXT: v_mad_u16 v1, v0, v1, v0
+; GFX11-SDAG-FAKE16-NEXT: v_mad_u16 v0, v1, v0, v1
+; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-SDAG-FAKE16-NEXT: v_mad_u16 v0, v0, v1, v0
+; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v0, 0xffff, v0
+; GFX11-SDAG-FAKE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GISEL-TRUE16-LABEL: clpeak_umad_pat_i16_x2:
+; GFX11-GISEL-TRUE16: ; %bb.0: ; %entry
+; GFX11-GISEL-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GISEL-TRUE16-NEXT: v_add_nc_u16 v0.l, v0.l, 1
+; GFX11-GISEL-TRUE16-NEXT: v_add_nc_u16 v0.h, v1.l, 1
+; GFX11-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX11-GISEL-TRUE16-NEXT: v_mul_lo_u16 v1.h, v0.l, v1.l
+; GFX11-GISEL-TRUE16-NEXT: v_mad_u16 v0.l, v0.l, v1.l, 1
+; GFX11-GISEL-TRUE16-NEXT: v_mul_lo_u16 v1.l, v0.h, v1.h
+; GFX11-GISEL-TRUE16-NEXT: v_mad_u16 v0.h, v0.h, v1.h, 1
+; GFX11-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX11-GISEL-TRUE16-NEXT: v_mul_lo_u16 v1.h, v0.l, v1.l
+; GFX11-GISEL-TRUE16-NEXT: v_mad_u16 v0.l, v0.l, v1.l, 1
+; GFX11-GISEL-TRUE16-NEXT: v_mul_lo_u16 v1.l, v0.h, v1.h
+; GFX11-GISEL-TRUE16-NEXT: v_mad_u16 v0.h, v0.h, v1.h, 1
+; GFX11-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX11-GISEL-TRUE16-NEXT: v_mul_lo_u16 v1.h, v0.l, v1.l
+; GFX11-GISEL-TRUE16-NEXT: v_mad_u16 v0.l, v0.l, v1.l, 1
+; GFX11-GISEL-TRUE16-NEXT: v_mul_lo_u16 v1.l, v0.h, v1.h
+; GFX11-GISEL-TRUE16-NEXT: v_mad_u16 v0.h, v0.h, v1.h, 1
+; GFX11-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-GISEL-TRUE16-NEXT: v_mul_lo_u16 v0.l, v0.l, v1.l
+; GFX11-GISEL-TRUE16-NEXT: v_mul_lo_u16 v0.l, v0.l, v0.h
+; GFX11-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GISEL-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff, v0
+; GFX11-GISEL-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GISEL-FAKE16-LABEL: clpeak_umad_pat_i16_x2:
+; GFX11-GISEL-FAKE16: ; %bb.0: ; %entry
+; GFX11-GISEL-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GISEL-FAKE16-NEXT: v_add_nc_u16 v0, v0, 1
+; GFX11-GISEL-FAKE16-NEXT: v_add_nc_u16 v2, v1, 1
+; GFX11-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX11-GISEL-FAKE16-NEXT: v_mul_lo_u16 v3, v0, v1
+; GFX11-GISEL-FAKE16-NEXT: v_mad_u16 v0, v0, v1, 1
+; GFX11-GISEL-FAKE16-NEXT: v_mul_lo_u16 v1, v2, v3
+; GFX11-GISEL-FAKE16-NEXT: v_mad_u16 v2, v2, v3, 1
+; GFX11-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX11-GISEL-FAKE16-NEXT: v_mul_lo_u16 v3, v0, v1
+; GFX11-GISEL-FAKE16-NEXT: v_mad_u16 v0, v0, v1, 1
+; GFX11-GISEL-FAKE16-NEXT: v_mul_lo_u16 v1, v2, v3
+; GFX11-GISEL-FAKE16-NEXT: v_mad_u16 v2, v2, v3, 1
+; GFX11-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX11-GISEL-FAKE16-NEXT: v_mul_lo_u16 v3, v0, v1
+; GFX11-GISEL-FAKE16-NEXT: v_mad_u16 v0, v0, v1, 1
+; GFX11-GISEL-FAKE16-NEXT: v_mul_lo_u16 v1, v2, v3
+; GFX11-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
+; GFX11-GISEL-FAKE16-NEXT: v_mul_lo_u16 v0, v0, v1
+; GFX11-GISEL-FAKE16-NEXT: v_mad_u16 v1, v2, v3, 1
+; GFX11-GISEL-FAKE16-NEXT: v_mul_lo_u16 v0, v0, v1
+; GFX11-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GISEL-FAKE16-NEXT: v_and_b32_e32 v0, 0xffff, v0
+; GFX11-GISEL-FAKE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1200-SDAG-TRUE16-LABEL: clpeak_umad_pat_i16_x2:
+; GFX1200-SDAG-TRUE16: ; %bb.0: ; %entry
+; GFX1200-SDAG-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1200-SDAG-TRUE16-NEXT: s_wait_expcnt 0x0
+; GFX1200-SDAG-TRUE16-NEXT: s_wait_samplecnt 0x0
+; GFX1200-SDAG-TRUE16-NEXT: s_wait_bvhcnt 0x0
+; GFX1200-SDAG-TRUE16-NEXT: s_wait_kmcnt 0x0
+; GFX1200-SDAG-TRUE16-NEXT: v_mad_u16 v0.l, v1.l, v0.l, v1.l
+; GFX1200-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1200-SDAG-TRUE16-NEXT: v_mad_u16 v0.h, v0.l, v1.l, v0.l
+; GFX1200-SDAG-TRUE16-NEXT: v_mad_u16 v0.l, v0.h, v0.l, v0.h
+; GFX1200-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1200-SDAG-TRUE16-NEXT: v_mad_u16 v0.h, v0.l, v0.h, v0.l
+; GFX1200-SDAG-TRUE16-NEXT: v_mad_u16 v0.l, v0.h, v0.l, v0.h
+; GFX1200-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1200-SDAG-TRUE16-NEXT: v_mad_u16 v0.h, v0.l, v0.h, v0.l
+; GFX1200-SDAG-TRUE16-NEXT: v_mad_u16 v0.l, v0.h, v0.l, v0.h
+; GFX1200-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1200-SDAG-TRUE16-NEXT: v_mad_u16 v0.l, v0.l, v0.h, v0.l
+; GFX1200-SDAG-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff, v0
+; GFX1200-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1200-SDAG-FAKE16-LABEL: clpeak_umad_pat_i16_x2:
+; GFX1200-SDAG-FAKE16: ; %bb.0: ; %entry
+; GFX1200-SDAG-FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1200-SDAG-FAKE16-NEXT: s_wait_expcnt 0x0
+; GFX1200-SDAG-FAKE16-NEXT: s_wait_samplecnt 0x0
+; GFX1200-SDAG-FAKE16-NEXT: s_wait_bvhcnt 0x0
+; GFX1200-SDAG-FAKE16-NEXT: s_wait_kmcnt 0x0
+; GFX1200-SDAG-FAKE16-NEXT: v_mad_u16 v0, v1, v0, v1
+; GFX1200-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1200-SDAG-FAKE16-NEXT: v_mad_u16 v1, v0, v1, v0
+; GFX1200-SDAG-FAKE16-NEXT: v_mad_u16 v0, v1, v0, v1
+; GFX1200-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1200-SDAG-FAKE16-NEXT: v_mad_u16 v1, v0, v1, v0
+; GFX1200-SDAG-FAKE16-NEXT: v_mad_u16 v0, v1, v0, v1
+; GFX1200-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1200-SDAG-FAKE16-NEXT: v_mad_u16 v1, v0, v1, v0
+; GFX1200-SDAG-FAKE16-NEXT: v_mad_u16 v0, v1, v0, v1
+; GFX1200-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1200-SDAG-FAKE16-NEXT: v_mad_u16 v0, v0, v1, v0
+; GFX1200-SDAG-FAKE16-NEXT: v_and_b32_e32 v0, 0xffff, v0
+; GFX1200-SDAG-FAKE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1200-GISEL-TRUE16-LABEL: clpeak_umad_pat_i16_x2:
+; GFX1200-GISEL-TRUE16: ; %bb.0: ; %entry
+; GFX1200-GISEL-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1200-GISEL-TRUE16-NEXT: s_wait_expcnt 0x0
+; GFX1200-GISEL-TRUE16-NEXT: s_wait_samplecnt 0x0
+; GFX1200-GISEL-TRUE16-NEXT: s_wait_bvhcnt 0x0
+; GFX1200-GISEL-TRUE16-NEXT: s_wait_kmcnt 0x0
+; GFX1200-GISEL-TRUE16-NEXT: v_add_nc_u16 v0.l, v0.l, 1
+; GFX1200-GISEL-TRUE16-NEXT: v_add_nc_u16 v0.h, v1.l, 1
+; GFX1200-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX1200-GISEL-TRUE16-NEXT: v_mul_lo_u16 v1.h, v0.l, v1.l
+; GFX1200-GISEL-TRUE16-NEXT: v_mad_u16 v0.l, v0.l, v1.l, 1
+; GFX1200-GISEL-TRUE16-NEXT: v_mul_lo_u16 v1.l, v0.h, v1.h
+; GFX1200-GISEL-TRUE16-NEXT: v_mad_u16 v0.h, v0.h, v1.h, 1
+; GFX1200-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX1200-GISEL-TRUE16-NEXT: v_mul_lo_u16 v1.h, v0.l, v1.l
+; GFX1200-GISEL-TRUE16-NEXT: v_mad_u16 v0.l, v0.l, v1.l, 1
+; GFX1200-GISEL-TRUE16-NEXT: v_mul_lo_u16 v1.l, v0.h, v1.h
+; GFX1200-GISEL-TRUE16-NEXT: v_mad_u16 v0.h, v0.h, v1.h, 1
+; GFX1200-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX1200-GISEL-TRUE16-NEXT: v_mul_lo_u16 v1.h, v0.l, v1.l
+; GFX1200-GISEL-TRUE16-NEXT: v_mad_u16 v0.l, v0.l, v1.l, 1
+; GFX1200-GISEL-TRUE16-NEXT: v_mul_lo_u16 v1.l, v0.h, v1.h
+; GFX1200-GISEL-TRUE16-NEXT: v_mad_u16 v0.h, v0.h, v1.h, 1
+; GFX1200-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1200-GISEL-TRUE16-NEXT: v_mul_lo_u16 v0.l, v0.l, v1.l
+; GFX1200-GISEL-TRUE16-NEXT: v_mul_lo_u16 v0.l, v0.l, v0.h
+; GFX1200-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1200-GISEL-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff, v0
+; GFX1200-GISEL-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1200-GISEL-FAKE16-LABEL: clpeak_umad_pat_i16_x2:
+; GFX1200-GISEL-FAKE16: ; %bb.0: ; %entry
+; GFX1200-GISEL-FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1200-GISEL-FAKE16-NEXT: s_wait_expcnt 0x0
+; GFX1200-GISEL-FAKE16-NEXT: s_wait_samplecnt 0x0
+; GFX1200-GISEL-FAKE16-NEXT: s_wait_bvhcnt 0x0
+; GFX1200-GISEL-FAKE16-NEXT: s_wait_kmcnt 0x0
+; GFX1200-GISEL-FAKE16-NEXT: v_add_nc_u16 v0, v0, 1
+; GFX1200-GISEL-FAKE16-NEXT: v_add_nc_u16 v2, v1, 1
+; GFX1200-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX1200-GISEL-FAKE16-NEXT: v_mul_lo_u16 v3, v0, v1
+; GFX1200-GISEL-FAKE16-NEXT: v_mad_u16 v0, v0, v1, 1
+; GFX1200-GISEL-FAKE16-NEXT: v_mul_lo_u16 v1, v2, v3
+; GFX1200-GISEL-FAKE16-NEXT: v_mad_u16 v2, v2, v3, 1
+; GFX1200-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX1200-GISEL-FAKE16-NEXT: v_mul_lo_u16 v3, v0, v1
+; GFX1200-GISEL-FAKE16-NEXT: v_mad_u16 v0, v0, v1, 1
+; GFX1200-GISEL-FAKE16-NEXT: v_mul_lo_u16 v1, v2, v3
+; GFX1200-GISEL-FAKE16-NEXT: v_mad_u16 v2, v2, v3, 1
+; GFX1200-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX1200-GISEL-FAKE16-NEXT: v_mul_lo_u16 v3, v0, v1
+; GFX1200-GISEL-FAKE16-NEXT: v_mad_u16 v0, v0, v1, 1
+; GFX1200-GISEL-FAKE16-NEXT: v_mul_lo_u16 v1, v2, v3
+; GFX1200-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
+; GFX1200-GISEL-FAKE16-NEXT: v_mul_lo_u16 v0, v0, v1
+; GFX1200-GISEL-FAKE16-NEXT: v_mad_u16 v1, v2, v3, 1
+; GFX1200-GISEL-FAKE16-NEXT: v_mul_lo_u16 v0, v0, v1
+; GFX1200-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1200-GISEL-FAKE16-NEXT: v_and_b32_e32 v0, 0xffff, v0
+; GFX1200-GISEL-FAKE16-NEXT: s_setpc_b64 s[30:31]
entry:
%conv69 = add i16 %x, 1
%add = mul i16 %conv69, %y
@@ -8792,51 +9298,95 @@ define <2 x i16> @multi_use_mul_mad_i16_var(i16 %x, i16 %y, i16 %z0, i16 %z1) {
; GFX10-GISEL-NEXT: v_lshl_or_b32 v0, v0, 16, v1
; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-SDAG-LABEL: multi_use_mul_mad_i16_var:
-; GFX11-SDAG: ; %bb.0: ; %entry
-; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SDAG-NEXT: v_mad_u16 v2, v0, v1, v2
-; GFX11-SDAG-NEXT: v_mad_u16 v0, v0, v1, v3
-; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-SDAG-NEXT: v_perm_b32 v0, v0, v2, 0x5040100
-; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-GISEL-LABEL: multi_use_mul_mad_i16_var:
-; GFX11-GISEL: ; %bb.0: ; %entry
-; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-GISEL-NEXT: v_mad_u16 v2, v0, v1, v2
-; GFX11-GISEL-NEXT: v_mad_u16 v0, v0, v1, v3
-; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v2
-; GFX11-GISEL-NEXT: v_lshl_or_b32 v0, v0, 16, v1
-; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX1200-SDAG-LABEL: multi_use_mul_mad_i16_var:
-; GFX1200-SDAG: ; %bb.0: ; %entry
-; GFX1200-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1200-SDAG-NEXT: s_wait_expcnt 0x0
-; GFX1200-SDAG-NEXT: s_wait_samplecnt 0x0
-; GFX1200-SDAG-NEXT: s_wait_bvhcnt 0x0
-; GFX1200-SDAG-NEXT: s_wait_kmcnt 0x0
-; GFX1200-SDAG-NEXT: v_mad_u16 v2, v0, v1, v2
-; GFX1200-SDAG-NEXT: v_mad_u16 v0, v0, v1, v3
-; GFX1200-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX1200-SDAG-NEXT: v_perm_b32 v0, v0, v2, 0x5040100
-; GFX1200-SDAG-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX1200-GISEL-LABEL: multi_use_mul_mad_i16_var:
-; GFX1200-GISEL: ; %bb.0: ; %entry
-; GFX1200-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1200-GISEL-NEXT: s_wait_expcnt 0x0
-; GFX1200-GISEL-NEXT: s_wait_samplecnt 0x0
-; GFX1200-GISEL-NEXT: s_wait_bvhcnt 0x0
-; GFX1200-GISEL-NEXT: s_wait_kmcnt 0x0
-; GFX1200-GISEL-NEXT: v_mad_u16 v2, v0, v1, v2
-; GFX1200-GISEL-NEXT: v_mad_u16 v0, v0, v1, v3
-; GFX1200-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1200-GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v2
-; GFX1200-GISEL-NEXT: v_lshl_or_b32 v0, v0, 16, v1
-; GFX1200-GISEL-NEXT: s_setpc_b64 s[30:31]
+; GFX11-SDAG-TRUE16-LABEL: multi_use_mul_mad_i16_var:
+; GFX11-SDAG-TRUE16: ; %bb.0: ; %entry
+; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v0.h, v0.l
+; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-SDAG-TRUE16-NEXT: v_mad_u16 v0.l, v0.h, v1.l, v2.l
+; GFX11-SDAG-TRUE16-NEXT: v_mad_u16 v0.h, v0.h, v1.l, v3.l
+; GFX11-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-SDAG-FAKE16-LABEL: multi_use_mul_mad_i16_var:
+; GFX11-SDAG-FAKE16: ; %bb.0: ; %entry
+; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SDAG-FAKE16-NEXT: v_mad_u16 v2, v0, v1, v2
+; GFX11-SDAG-FAKE16-NEXT: v_mad_u16 v0, v0, v1, v3
+; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-SDAG-FAKE16-NEXT: v_perm_b32 v0, v0, v2, 0x5040100
+; GFX11-SDAG-FAKE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GISEL-TRUE16-LABEL: multi_use_mul_mad_i16_var:
+; GFX11-GISEL-TRUE16: ; %bb.0: ; %entry
+; GFX11-GISEL-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GISEL-TRUE16-NEXT: v_mad_u16 v2.l, v0.l, v1.l, v2.l
+; GFX11-GISEL-TRUE16-NEXT: v_mad_u16 v2.h, v0.l, v1.l, v3.l
+; GFX11-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GISEL-TRUE16-NEXT: v_mov_b32_e32 v0, v2
+; GFX11-GISEL-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GISEL-FAKE16-LABEL: multi_use_mul_mad_i16_var:
+; GFX11-GISEL-FAKE16: ; %bb.0: ; %entry
+; GFX11-GISEL-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GISEL-FAKE16-NEXT: v_mad_u16 v2, v0, v1, v2
+; GFX11-GISEL-FAKE16-NEXT: v_mad_u16 v0, v0, v1, v3
+; GFX11-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-GISEL-FAKE16-NEXT: v_and_b32_e32 v1, 0xffff, v2
+; GFX11-GISEL-FAKE16-NEXT: v_lshl_or_b32 v0, v0, 16, v1
+; GFX11-GISEL-FAKE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1200-SDAG-TRUE16-LABEL: multi_use_mul_mad_i16_var:
+; GFX1200-SDAG-TRUE16: ; %bb.0: ; %entry
+; GFX1200-SDAG-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1200-SDAG-TRUE16-NEXT: s_wait_expcnt 0x0
+; GFX1200-SDAG-TRUE16-NEXT: s_wait_samplecnt 0x0
+; GFX1200-SDAG-TRUE16-NEXT: s_wait_bvhcnt 0x0
+; GFX1200-SDAG-TRUE16-NEXT: s_wait_kmcnt 0x0
+; GFX1200-SDAG-TRUE16-NEXT: v_mov_b16_e32 v0.h, v0.l
+; GFX1200-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1200-SDAG-TRUE16-NEXT: v_mad_u16 v0.l, v0.h, v1.l, v2.l
+; GFX1200-SDAG-TRUE16-NEXT: v_mad_u16 v0.h, v0.h, v1.l, v3.l
+; GFX1200-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1200-SDAG-FAKE16-LABEL: multi_use_mul_mad_i16_var:
+; GFX1200-SDAG-FAKE16: ; %bb.0: ; %entry
+; GFX1200-SDAG-FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1200-SDAG-FAKE16-NEXT: s_wait_expcnt 0x0
+; GFX1200-SDAG-FAKE16-NEXT: s_wait_samplecnt 0x0
+; GFX1200-SDAG-FAKE16-NEXT: s_wait_bvhcnt 0x0
+; GFX1200-SDAG-FAKE16-NEXT: s_wait_kmcnt 0x0
+; GFX1200-SDAG-FAKE16-NEXT: v_mad_u16 v2, v0, v1, v2
+; GFX1200-SDAG-FAKE16-NEXT: v_mad_u16 v0, v0, v1, v3
+; GFX1200-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1200-SDAG-FAKE16-NEXT: v_perm_b32 v0, v0, v2, 0x5040100
+; GFX1200-SDAG-FAKE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1200-GISEL-TRUE16-LABEL: multi_use_mul_mad_i16_var:
+; GFX1200-GISEL-TRUE16: ; %bb.0: ; %entry
+; GFX1200-GISEL-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1200-GISEL-TRUE16-NEXT: s_wait_expcnt 0x0
+; GFX1200-GISEL-TRUE16-NEXT: s_wait_samplecnt 0x0
+; GFX1200-GISEL-TRUE16-NEXT: s_wait_bvhcnt 0x0
+; GFX1200-GISEL-TRUE16-NEXT: s_wait_kmcnt 0x0
+; GFX1200-GISEL-TRUE16-NEXT: v_mad_u16 v2.l, v0.l, v1.l, v2.l
+; GFX1200-GISEL-TRUE16-NEXT: v_mad_u16 v2.h, v0.l, v1.l, v3.l
+; GFX1200-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1200-GISEL-TRUE16-NEXT: v_mov_b32_e32 v0, v2
+; GFX1200-GISEL-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1200-GISEL-FAKE16-LABEL: multi_use_mul_mad_i16_var:
+; GFX1200-GISEL-FAKE16: ; %bb.0: ; %entry
+; GFX1200-GISEL-FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1200-GISEL-FAKE16-NEXT: s_wait_expcnt 0x0
+; GFX1200-GISEL-FAKE16-NEXT: s_wait_samplecnt 0x0
+; GFX1200-GISEL-FAKE16-NEXT: s_wait_bvhcnt 0x0
+; GFX1200-GISEL-FAKE16-NEXT: s_wait_kmcnt 0x0
+; GFX1200-GISEL-FAKE16-NEXT: v_mad_u16 v2, v0, v1, v2
+; GFX1200-GISEL-FAKE16-NEXT: v_mad_u16 v0, v0, v1, v3
+; GFX1200-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1200-GISEL-FAKE16-NEXT: v_and_b32_e32 v1, 0xffff, v2
+; GFX1200-GISEL-FAKE16-NEXT: v_lshl_or_b32 v0, v0, 16, v1
+; GFX1200-GISEL-FAKE16-NEXT: s_setpc_b64 s[30:31]
entry:
%mul = mul i16 %x, %y
%add0 = add i16 %mul, %z0
@@ -8956,27 +9506,93 @@ define i16 @other_use_mul_mad_i16_var(i16 %x, i16 %y, i16 %z, ptr addrspace(3) %
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-LABEL: other_use_mul_mad_i16_var:
-; GFX11: ; %bb.0: ; %entry
-; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: v_mul_lo_u16 v4, v0, v1
-; GFX11-NEXT: v_mad_u16 v0, v0, v1, v2
-; GFX11-NEXT: ds_store_b16 v3, v4
-; GFX11-NEXT: s_waitcnt lgkmcnt(0)
-; GFX11-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX1200-LABEL: other_use_mul_mad_i16_var:
-; GFX1200: ; %bb.0: ; %entry
-; GFX1200-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1200-NEXT: s_wait_expcnt 0x0
-; GFX1200-NEXT: s_wait_samplecnt 0x0
-; GFX1200-NEXT: s_wait_bvhcnt 0x0
-; GFX1200-NEXT: s_wait_kmcnt 0x0
-; GFX1200-NEXT: v_mul_lo_u16 v4, v0, v1
-; GFX1200-NEXT: v_mad_u16 v0, v0, v1, v2
-; GFX1200-NEXT: ds_store_b16 v3, v4
-; GFX1200-NEXT: s_wait_dscnt 0x0
-; GFX1200-NEXT: s_setpc_b64 s[30:31]
+; GFX11-SDAG-TRUE16-LABEL: other_use_mul_mad_i16_var:
+; GFX11-SDAG-TRUE16: ; %bb.0: ; %entry
+; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SDAG-TRUE16-NEXT: v_mul_lo_u16 v0.h, v0.l, v1.l
+; GFX11-SDAG-TRUE16-NEXT: v_mad_u16 v0.l, v0.l, v1.l, v2.l
+; GFX11-SDAG-TRUE16-NEXT: ds_store_b16_d16_hi v3, v0
+; GFX11-SDAG-TRUE16-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-SDAG-FAKE16-LABEL: other_use_mul_mad_i16_var:
+; GFX11-SDAG-FAKE16: ; %bb.0: ; %entry
+; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SDAG-FAKE16-NEXT: v_mul_lo_u16 v4, v0, v1
+; GFX11-SDAG-FAKE16-NEXT: v_mad_u16 v0, v0, v1, v2
+; GFX11-SDAG-FAKE16-NEXT: ds_store_b16 v3, v4
+; GFX11-SDAG-FAKE16-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-SDAG-FAKE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GISEL-TRUE16-LABEL: other_use_mul_mad_i16_var:
+; GFX11-GISEL-TRUE16: ; %bb.0: ; %entry
+; GFX11-GISEL-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GISEL-TRUE16-NEXT: v_mul_lo_u16 v0.h, v0.l, v1.l
+; GFX11-GISEL-TRUE16-NEXT: v_mad_u16 v0.l, v0.l, v1.l, v2.l
+; GFX11-GISEL-TRUE16-NEXT: ds_store_b16_d16_hi v3, v0
+; GFX11-GISEL-TRUE16-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-GISEL-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GISEL-FAKE16-LABEL: other_use_mul_mad_i16_var:
+; GFX11-GISEL-FAKE16: ; %bb.0: ; %entry
+; GFX11-GISEL-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GISEL-FAKE16-NEXT: v_mul_lo_u16 v4, v0, v1
+; GFX11-GISEL-FAKE16-NEXT: v_mad_u16 v0, v0, v1, v2
+; GFX11-GISEL-FAKE16-NEXT: ds_store_b16 v3, v4
+; GFX11-GISEL-FAKE16-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-GISEL-FAKE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1200-SDAG-TRUE16-LABEL: other_use_mul_mad_i16_var:
+; GFX1200-SDAG-TRUE16: ; %bb.0: ; %entry
+; GFX1200-SDAG-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1200-SDAG-TRUE16-NEXT: s_wait_expcnt 0x0
+; GFX1200-SDAG-TRUE16-NEXT: s_wait_samplecnt 0x0
+; GFX1200-SDAG-TRUE16-NEXT: s_wait_bvhcnt 0x0
+; GFX1200-SDAG-TRUE16-NEXT: s_wait_kmcnt 0x0
+; GFX1200-SDAG-TRUE16-NEXT: v_mul_lo_u16 v0.h, v0.l, v1.l
+; GFX1200-SDAG-TRUE16-NEXT: v_mad_u16 v0.l, v0.l, v1.l, v2.l
+; GFX1200-SDAG-TRUE16-NEXT: ds_store_b16_d16_hi v3, v0
+; GFX1200-SDAG-TRUE16-NEXT: s_wait_dscnt 0x0
+; GFX1200-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1200-SDAG-FAKE16-LABEL: other_use_mul_mad_i16_var:
+; GFX1200-SDAG-FAKE16: ; %bb.0: ; %entry
+; GFX1200-SDAG-FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1200-SDAG-FAKE16-NEXT: s_wait_expcnt 0x0
+; GFX1200-SDAG-FAKE16-NEXT: s_wait_samplecnt 0x0
+; GFX1200-SDAG-FAKE16-NEXT: s_wait_bvhcnt 0x0
+; GFX1200-SDAG-FAKE16-NEXT: s_wait_kmcnt 0x0
+; GFX1200-SDAG-FAKE16-NEXT: v_mul_lo_u16 v4, v0, v1
+; GFX1200-SDAG-FAKE16-NEXT: v_mad_u16 v0, v0, v1, v2
+; GFX1200-SDAG-FAKE16-NEXT: ds_store_b16 v3, v4
+; GFX1200-SDAG-FAKE16-NEXT: s_wait_dscnt 0x0
+; GFX1200-SDAG-FAKE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1200-GISEL-TRUE16-LABEL: other_use_mul_mad_i16_var:
+; GFX1200-GISEL-TRUE16: ; %bb.0: ; %entry
+; GFX1200-GISEL-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1200-GISEL-TRUE16-NEXT: s_wait_expcnt 0x0
+; GFX1200-GISEL-TRUE16-NEXT: s_wait_samplecnt 0x0
+; GFX1200-GISEL-TRUE16-NEXT: s_wait_bvhcnt 0x0
+; GFX1200-GISEL-TRUE16-NEXT: s_wait_kmcnt 0x0
+; GFX1200-GISEL-TRUE16-NEXT: v_mul_lo_u16 v0.h, v0.l, v1.l
+; GFX1200-GISEL-TRUE16-NEXT: v_mad_u16 v0.l, v0.l, v1.l, v2.l
+; GFX1200-GISEL-TRUE16-NEXT: ds_store_b16_d16_hi v3, v0
+; GFX1200-GISEL-TRUE16-NEXT: s_wait_dscnt 0x0
+; GFX1200-GISEL-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1200-GISEL-FAKE16-LABEL: other_use_mul_mad_i16_var:
+; GFX1200-GISEL-FAKE16: ; %bb.0: ; %entry
+; GFX1200-GISEL-FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1200-GISEL-FAKE16-NEXT: s_wait_expcnt 0x0
+; GFX1200-GISEL-FAKE16-NEXT: s_wait_samplecnt 0x0
+; GFX1200-GISEL-FAKE16-NEXT: s_wait_bvhcnt 0x0
+; GFX1200-GISEL-FAKE16-NEXT: s_wait_kmcnt 0x0
+; GFX1200-GISEL-FAKE16-NEXT: v_mul_lo_u16 v4, v0, v1
+; GFX1200-GISEL-FAKE16-NEXT: v_mad_u16 v0, v0, v1, v2
+; GFX1200-GISEL-FAKE16-NEXT: ds_store_b16 v3, v4
+; GFX1200-GISEL-FAKE16-NEXT: s_wait_dscnt 0x0
+; GFX1200-GISEL-FAKE16-NEXT: s_setpc_b64 s[30:31]
entry:
%mul = mul i16 %x, %y
%add0 = add i16 %mul, %z
More information about the llvm-commits
mailing list