[llvm] [AMDGPU][True16][CodeGen] flat/global/scratch load/store pseudo for true16 (PR #127945)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Feb 20 08:33:15 PST 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-amdgpu
Author: Brox Chen (broxigarchen)
<details>
<summary>Changes</summary>
added load/store pseudo for:
flat_store (flat_load already in place)
global_load/global_store
scratch_load/scratch_store
in true16 mode and updated the codegen test file
---
Patch is 132.22 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/127945.diff
11 Files Affected:
- (modified) llvm/lib/Target/AMDGPU/FLATInstructions.td (+222-32)
- (modified) llvm/test/CodeGen/AMDGPU/bf16.ll (+219-143)
- (modified) llvm/test/CodeGen/AMDGPU/chain-hi-to-lo.ll (+155-73)
- (modified) llvm/test/CodeGen/AMDGPU/fcanonicalize.f16.ll (+278-125)
- (modified) llvm/test/CodeGen/AMDGPU/flat-scratch.ll (+206-100)
- (modified) llvm/test/CodeGen/AMDGPU/fmed3.ll (+4-7)
- (modified) llvm/test/CodeGen/AMDGPU/icmp.i16.ll (+20-20)
- (modified) llvm/test/CodeGen/AMDGPU/mad.u16.ll (+5-10)
- (modified) llvm/test/CodeGen/AMDGPU/minimummaximum.ll (+22-10)
- (modified) llvm/test/CodeGen/AMDGPU/shrink-add-sub-constant.ll (+6-11)
- (modified) llvm/test/CodeGen/AMDGPU/v_pack.ll (+12-24)
``````````diff
diff --git a/llvm/lib/Target/AMDGPU/FLATInstructions.td b/llvm/lib/Target/AMDGPU/FLATInstructions.td
index 7988a9ac0ce55..f48d1d8c011da 100644
--- a/llvm/lib/Target/AMDGPU/FLATInstructions.td
+++ b/llvm/lib/Target/AMDGPU/FLATInstructions.td
@@ -255,6 +255,12 @@ class FLAT_Store_Pseudo <string opName, RegisterClass vdataClass,
let enabled_saddr = EnableSaddr;
}
+multiclass FLAT_Store_Pseudo_t16<string opName> {
+ def "" : FLAT_Store_Pseudo<opName, VGPR_32>;
+ let OtherPredicates = [HasTrue16BitInsts] in
+ def _t16 : FLAT_Store_Pseudo<opName#"_t16", VGPR_16>, True16D16Table<NAME#"_D16_HI", NAME>;
+}
+
multiclass FLAT_Global_Load_Pseudo<string opName, RegisterClass regClass, bit HasTiedInput = 0> {
let is_flat_global = 1 in {
def "" : FLAT_Load_Pseudo<opName, regClass, HasTiedInput, 1>,
@@ -264,6 +270,21 @@ multiclass FLAT_Global_Load_Pseudo<string opName, RegisterClass regClass, bit Ha
}
}
+multiclass FLAT_Global_Load_Pseudo_t16<string opName> {
+ defm "" : FLAT_Global_Load_Pseudo<opName, VGPR_32, 1>;
+
+ defvar Name16 = opName#"_t16";
+ let OtherPredicates = [HasTrue16BitInsts],
+ SubtargetPredicate = HasFlatGlobalInsts, is_flat_global = 1 in {
+ def _t16 : FLAT_Load_Pseudo<Name16, VGPR_16, 0, 1>,
+ GlobalSaddrTable<0, Name16>,
+ True16D16Table<NAME#"_HI", NAME>;
+ def _SADDR_t16 : FLAT_Load_Pseudo<Name16, VGPR_16, 0, 1, 1>,
+ GlobalSaddrTable<1, Name16>,
+ True16D16Table<NAME#"_HI_SADDR", NAME#"_SADDR">;
+ }
+}
+
class FLAT_Global_Load_AddTid_Pseudo <string opName, RegisterClass regClass,
bit HasTiedOutput = 0, bit EnableSaddr = 0> : FLAT_Pseudo<
opName,
@@ -300,6 +321,21 @@ multiclass FLAT_Global_Store_Pseudo<string opName, RegisterClass regClass> {
}
}
+multiclass FLAT_Global_Store_Pseudo_t16<string opName> {
+ defm "" : FLAT_Global_Store_Pseudo<opName, VGPR_32>;
+
+ defvar Name16 = opName#"_t16";
+ let OtherPredicates = [HasTrue16BitInsts],
+ SubtargetPredicate = HasFlatGlobalInsts, is_flat_global = 1 in {
+ def _t16 : FLAT_Store_Pseudo<Name16, VGPR_16, 1>,
+ GlobalSaddrTable<0, Name16>,
+ True16D16Table<NAME#"_D16_HI", NAME>;
+ def _SADDR_t16 : FLAT_Store_Pseudo<Name16, VGPR_16, 1, 1>,
+ GlobalSaddrTable<1, Name16>,
+ True16D16Table<NAME#"_D16_HI_SADDR", NAME#"_SADDR">;
+ }
+}
+
class FLAT_Global_Load_LDS_Pseudo <string opName, bit EnableSaddr = 0> : FLAT_Pseudo<
opName,
(outs ),
@@ -456,6 +492,29 @@ multiclass FLAT_Scratch_Load_Pseudo<string opName, RegisterClass regClass, bit H
FlatScratchInst<opName, "ST">;
}
+multiclass FLAT_Scratch_Load_Pseudo_t16<string opName> {
+ defm "" : FLAT_Scratch_Load_Pseudo<opName, VGPR_32, 1>;
+
+ defvar Name16 = opName#"_t16";
+ let OtherPredicates = [HasTrue16BitInsts], is_flat_scratch = 1 in {
+ def _t16 : FLAT_Scratch_Load_Pseudo<Name16, VGPR_16, 0>,
+ FlatScratchInst<Name16, "SV">,
+ True16D16Table<NAME#"_HI", NAME>;
+ def _SADDR_t16 : FLAT_Scratch_Load_Pseudo<Name16, VGPR_16, 0, 1>,
+ FlatScratchInst<Name16, "SS">,
+ True16D16Table<NAME#"_HI_SADDR", NAME#"_SADDR">;
+ let SubtargetPredicate = HasFlatScratchSVSMode in
+ def _SVS_t16 : FLAT_Scratch_Load_Pseudo<Name16, VGPR_16, 0, 1, 1>,
+ FlatScratchInst<Name16, "SVS">,
+ True16D16Table<NAME#"_HI_SVS", NAME#"_SVS">;
+
+ let SubtargetPredicate = HasFlatScratchSTMode in
+ def _ST_t16 : FLAT_Scratch_Load_Pseudo<Name16, VGPR_16, 0, 0, 0, 0>,
+ FlatScratchInst<Name16, "ST">,
+ True16D16Table<NAME#"_HI_ST", NAME#"_ST">;
+ }
+}
+
multiclass FLAT_Scratch_Store_Pseudo<string opName, RegisterClass regClass> {
def "" : FLAT_Scratch_Store_Pseudo<opName, regClass>,
FlatScratchInst<opName, "SV">;
@@ -471,6 +530,31 @@ multiclass FLAT_Scratch_Store_Pseudo<string opName, RegisterClass regClass> {
FlatScratchInst<opName, "ST">;
}
+multiclass FLAT_Scratch_Store_Pseudo_t16<string opName> {
+ defm "" : FLAT_Scratch_Store_Pseudo<opName, VGPR_32>;
+
+ defvar Name16 = opName#"_t16";
+ let OtherPredicates = [HasTrue16BitInsts], is_flat_scratch = 1 in {
+ def _t16 : FLAT_Scratch_Store_Pseudo<Name16, VGPR_16>,
+ FlatScratchInst<Name16, "SV">,
+ True16D16Table<NAME#"_D16_HI", NAME>;
+ def _SADDR_t16 : FLAT_Scratch_Store_Pseudo<Name16, VGPR_16, 1>,
+ FlatScratchInst<Name16, "SS">,
+ True16D16Table<NAME#"_D16_HI_SADDR", NAME#"_SADDR">;
+
+ let SubtargetPredicate = HasFlatScratchSVSMode in
+ def _SVS_t16 : FLAT_Scratch_Store_Pseudo<Name16, VGPR_16, 1, 1>,
+ FlatScratchInst<Name16, "SVS">,
+ True16D16Table<NAME#"_D16_HI_SVS", NAME#"_SVS">;
+
+ let SubtargetPredicate = HasFlatScratchSTMode in
+ def _ST_t16 : FLAT_Scratch_Store_Pseudo<Name16, VGPR_16, 0, 0, 0>,
+ FlatScratchInst<Name16, "ST">,
+ True16D16Table<NAME#"_D16_HI_ST", NAME#"_ST">;
+ }
+}
+
+
class FLAT_Scratch_Load_LDS_Pseudo <string opName, bit EnableSaddr = 0,
bit EnableSVE = 0,
bit EnableVaddr = !or(EnableSVE, !not(EnableSaddr))> : FLAT_Pseudo<
@@ -665,8 +749,6 @@ def FLAT_LOAD_DWORDX2 : FLAT_Load_Pseudo <"flat_load_dwordx2", VReg_64>;
def FLAT_LOAD_DWORDX4 : FLAT_Load_Pseudo <"flat_load_dwordx4", VReg_128>;
def FLAT_LOAD_DWORDX3 : FLAT_Load_Pseudo <"flat_load_dwordx3", VReg_96>;
-def FLAT_STORE_BYTE : FLAT_Store_Pseudo <"flat_store_byte", VGPR_32>;
-def FLAT_STORE_SHORT : FLAT_Store_Pseudo <"flat_store_short", VGPR_32>;
def FLAT_STORE_DWORD : FLAT_Store_Pseudo <"flat_store_dword", VGPR_32>;
def FLAT_STORE_DWORDX2 : FLAT_Store_Pseudo <"flat_store_dwordx2", VReg_64>;
def FLAT_STORE_DWORDX4 : FLAT_Store_Pseudo <"flat_store_dwordx4", VReg_128>;
@@ -686,6 +768,9 @@ def FLAT_STORE_BYTE_D16_HI : FLAT_Store_Pseudo <"flat_store_byte_d16_hi", VGPR_
def FLAT_STORE_SHORT_D16_HI : FLAT_Store_Pseudo <"flat_store_short_d16_hi", VGPR_32>;
}
+defm FLAT_STORE_BYTE : FLAT_Store_Pseudo_t16 <"flat_store_byte">;
+defm FLAT_STORE_SHORT : FLAT_Store_Pseudo_t16 <"flat_store_short">;
+
defm FLAT_ATOMIC_CMPSWAP : FLAT_Atomic_Pseudo <"flat_atomic_cmpswap",
VGPR_32, i32, v2i32, VReg_64>;
@@ -834,19 +919,22 @@ defm GLOBAL_LOAD_DWORDX3 : FLAT_Global_Load_Pseudo <"global_load_dwordx3", VReg
defm GLOBAL_LOAD_DWORDX4 : FLAT_Global_Load_Pseudo <"global_load_dwordx4", VReg_128>;
let TiedSourceNotRead = 1 in {
-defm GLOBAL_LOAD_UBYTE_D16 : FLAT_Global_Load_Pseudo <"global_load_ubyte_d16", VGPR_32, 1>;
-defm GLOBAL_LOAD_UBYTE_D16_HI : FLAT_Global_Load_Pseudo <"global_load_ubyte_d16_hi", VGPR_32, 1>;
-defm GLOBAL_LOAD_SBYTE_D16 : FLAT_Global_Load_Pseudo <"global_load_sbyte_d16", VGPR_32, 1>;
defm GLOBAL_LOAD_SBYTE_D16_HI : FLAT_Global_Load_Pseudo <"global_load_sbyte_d16_hi", VGPR_32, 1>;
-defm GLOBAL_LOAD_SHORT_D16 : FLAT_Global_Load_Pseudo <"global_load_short_d16", VGPR_32, 1>;
defm GLOBAL_LOAD_SHORT_D16_HI : FLAT_Global_Load_Pseudo <"global_load_short_d16_hi", VGPR_32, 1>;
+defm GLOBAL_LOAD_UBYTE_D16_HI : FLAT_Global_Load_Pseudo <"global_load_ubyte_d16_hi", VGPR_32, 1>;
+defm GLOBAL_LOAD_SBYTE_D16 : FLAT_Global_Load_Pseudo_t16 <"global_load_sbyte_d16">;
+defm GLOBAL_LOAD_SHORT_D16 : FLAT_Global_Load_Pseudo_t16 <"global_load_short_d16">;
+defm GLOBAL_LOAD_UBYTE_D16 : FLAT_Global_Load_Pseudo_t16 <"global_load_ubyte_d16">;
}
+defm GLOBAL_STORE_BYTE_D16_HI : FLAT_Global_Store_Pseudo <"global_store_byte_d16_hi", VGPR_32>;
+defm GLOBAL_STORE_SHORT_D16_HI : FLAT_Global_Store_Pseudo <"global_store_short_d16_hi", VGPR_32>;
+
let OtherPredicates = [HasGFX10_BEncoding] in
defm GLOBAL_LOAD_DWORD_ADDTID : FLAT_Global_Load_AddTid_Pseudo <"global_load_dword_addtid", VGPR_32>;
-defm GLOBAL_STORE_BYTE : FLAT_Global_Store_Pseudo <"global_store_byte", VGPR_32>;
-defm GLOBAL_STORE_SHORT : FLAT_Global_Store_Pseudo <"global_store_short", VGPR_32>;
+defm GLOBAL_STORE_BYTE : FLAT_Global_Store_Pseudo_t16 <"global_store_byte">;
+defm GLOBAL_STORE_SHORT : FLAT_Global_Store_Pseudo_t16 <"global_store_short">;
defm GLOBAL_STORE_DWORD : FLAT_Global_Store_Pseudo <"global_store_dword", VGPR_32>;
defm GLOBAL_STORE_DWORDX2 : FLAT_Global_Store_Pseudo <"global_store_dwordx2", VReg_64>;
defm GLOBAL_STORE_DWORDX3 : FLAT_Global_Store_Pseudo <"global_store_dwordx3", VReg_96>;
@@ -854,9 +942,6 @@ defm GLOBAL_STORE_DWORDX4 : FLAT_Global_Store_Pseudo <"global_store_dwordx4", VR
let OtherPredicates = [HasGFX10_BEncoding] in
defm GLOBAL_STORE_DWORD_ADDTID : FLAT_Global_Store_AddTid_Pseudo <"global_store_dword_addtid", VGPR_32>;
-defm GLOBAL_STORE_BYTE_D16_HI : FLAT_Global_Store_Pseudo <"global_store_byte_d16_hi", VGPR_32>;
-defm GLOBAL_STORE_SHORT_D16_HI : FLAT_Global_Store_Pseudo <"global_store_short_d16_hi", VGPR_32>;
-
defm GLOBAL_ATOMIC_CMPSWAP : FLAT_Global_Atomic_Pseudo <"global_atomic_cmpswap",
VGPR_32, i32, v2i32, VReg_64>;
@@ -970,24 +1055,24 @@ defm SCRATCH_LOAD_DWORDX3 : FLAT_Scratch_Load_Pseudo <"scratch_load_dwordx3", V
defm SCRATCH_LOAD_DWORDX4 : FLAT_Scratch_Load_Pseudo <"scratch_load_dwordx4", VReg_128>;
let TiedSourceNotRead = 1 in {
-defm SCRATCH_LOAD_UBYTE_D16 : FLAT_Scratch_Load_Pseudo <"scratch_load_ubyte_d16", VGPR_32, 1>;
defm SCRATCH_LOAD_UBYTE_D16_HI : FLAT_Scratch_Load_Pseudo <"scratch_load_ubyte_d16_hi", VGPR_32, 1>;
-defm SCRATCH_LOAD_SBYTE_D16 : FLAT_Scratch_Load_Pseudo <"scratch_load_sbyte_d16", VGPR_32, 1>;
defm SCRATCH_LOAD_SBYTE_D16_HI : FLAT_Scratch_Load_Pseudo <"scratch_load_sbyte_d16_hi", VGPR_32, 1>;
-defm SCRATCH_LOAD_SHORT_D16 : FLAT_Scratch_Load_Pseudo <"scratch_load_short_d16", VGPR_32, 1>;
defm SCRATCH_LOAD_SHORT_D16_HI : FLAT_Scratch_Load_Pseudo <"scratch_load_short_d16_hi", VGPR_32, 1>;
+defm SCRATCH_LOAD_UBYTE_D16 : FLAT_Scratch_Load_Pseudo_t16 <"scratch_load_ubyte_d16">;
+defm SCRATCH_LOAD_SBYTE_D16 : FLAT_Scratch_Load_Pseudo_t16 <"scratch_load_sbyte_d16">;
+defm SCRATCH_LOAD_SHORT_D16 : FLAT_Scratch_Load_Pseudo_t16 <"scratch_load_short_d16">;
}
-defm SCRATCH_STORE_BYTE : FLAT_Scratch_Store_Pseudo <"scratch_store_byte", VGPR_32>;
-defm SCRATCH_STORE_SHORT : FLAT_Scratch_Store_Pseudo <"scratch_store_short", VGPR_32>;
+defm SCRATCH_STORE_BYTE_D16_HI : FLAT_Scratch_Store_Pseudo <"scratch_store_byte_d16_hi", VGPR_32>;
+defm SCRATCH_STORE_SHORT_D16_HI : FLAT_Scratch_Store_Pseudo <"scratch_store_short_d16_hi", VGPR_32>;
+
+defm SCRATCH_STORE_BYTE : FLAT_Scratch_Store_Pseudo_t16 <"scratch_store_byte">;
+defm SCRATCH_STORE_SHORT : FLAT_Scratch_Store_Pseudo_t16 <"scratch_store_short">;
defm SCRATCH_STORE_DWORD : FLAT_Scratch_Store_Pseudo <"scratch_store_dword", VGPR_32>;
defm SCRATCH_STORE_DWORDX2 : FLAT_Scratch_Store_Pseudo <"scratch_store_dwordx2", VReg_64>;
defm SCRATCH_STORE_DWORDX3 : FLAT_Scratch_Store_Pseudo <"scratch_store_dwordx3", VReg_96>;
defm SCRATCH_STORE_DWORDX4 : FLAT_Scratch_Store_Pseudo <"scratch_store_dwordx4", VReg_128>;
-defm SCRATCH_STORE_BYTE_D16_HI : FLAT_Scratch_Store_Pseudo <"scratch_store_byte_d16_hi", VGPR_32>;
-defm SCRATCH_STORE_SHORT_D16_HI : FLAT_Scratch_Store_Pseudo <"scratch_store_short_d16_hi", VGPR_32>;
-
defm SCRATCH_LOAD_LDS_UBYTE : FLAT_Scratch_Load_LDS_Pseudo <"scratch_load_lds_ubyte">;
defm SCRATCH_LOAD_LDS_SBYTE : FLAT_Scratch_Load_LDS_Pseudo <"scratch_load_lds_sbyte">;
defm SCRATCH_LOAD_LDS_USHORT : FLAT_Scratch_Load_LDS_Pseudo <"scratch_load_lds_ushort">;
@@ -1071,11 +1156,21 @@ class FlatSignedLoadPat_D16 <FLAT_Pseudo inst, SDPatternOperator node, ValueType
(inst $vaddr, $offset, 0, $in)
>;
+class FlatSignedLoadPat_D16_t16 <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
+ (vt (node (GlobalOffset (i64 VReg_64:$vaddr), i32:$offset))),
+ (inst $vaddr, $offset, (i32 0))
+>;
+
class GlobalLoadSaddrPat_D16 <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
(vt (node (GlobalSAddr (i64 SReg_64:$saddr), (i32 VGPR_32:$voffset), i32:$offset), vt:$in)),
(inst $saddr, $voffset, $offset, 0, $in)
>;
+class GlobalLoadSaddrPat_D16_t16 <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
+ (vt (node (GlobalSAddr (i64 SReg_64:$saddr), (i32 VGPR_32:$voffset), i32:$offset))),
+ (inst $saddr, $voffset, $offset, (i32 0))
+>;
+
class FlatLoadSignedPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
(vt (node (GlobalOffset (i64 VReg_64:$vaddr), i32:$offset))),
(inst $vaddr, $offset)
@@ -1208,6 +1303,11 @@ class ScratchLoadSignedPat_D16 <FLAT_Pseudo inst, SDPatternOperator node, ValueT
(inst $vaddr, $offset, 0, $in)
>;
+class ScratchLoadSignedPat_D16_t16 <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
+ (vt (node (ScratchOffset (i32 VGPR_32:$vaddr), i32:$offset))),
+ (inst $vaddr, $offset, 0)
+>;
+
class ScratchStoreSignedPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
(node vt:$data, (ScratchOffset (i32 VGPR_32:$vaddr), i32:$offset)),
(inst getVregSrcForVT<vt>.ret:$data, $vaddr, $offset)
@@ -1223,6 +1323,11 @@ class ScratchLoadSaddrPat_D16 <FLAT_Pseudo inst, SDPatternOperator node, ValueTy
(inst $saddr, $offset, 0, $in)
>;
+class ScratchLoadSaddrPat_D16_t16 <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
+ (vt (node (ScratchSAddr (i32 SGPR_32:$saddr), i32:$offset))),
+ (inst $saddr, $offset, 0)
+>;
+
class ScratchStoreSaddrPat <FLAT_Pseudo inst, SDPatternOperator node,
ValueType vt> : GCNPat <
(node vt:$data, (ScratchSAddr (i32 SGPR_32:$saddr), i32:$offset)),
@@ -1245,6 +1350,11 @@ class ScratchLoadSVaddrPat_D16 <FLAT_Pseudo inst, SDPatternOperator node, ValueT
(inst $vaddr, $saddr, $offset, 0, $in)
>;
+class ScratchLoadSVaddrPat_D16_t16 <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
+ (vt (node (ScratchSVAddr (i32 VGPR_32:$vaddr), (i32 SGPR_32:$saddr), i32:$offset))),
+ (inst $vaddr, $saddr, $offset, 0)
+>;
+
multiclass GlobalFLATLoadPats<FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> {
def : FlatLoadSignedPat <inst, node, vt> {
let AddedComplexity = 10;
@@ -1265,6 +1375,16 @@ multiclass GlobalFLATLoadPats_D16<FLAT_Pseudo inst, SDPatternOperator node, Valu
}
}
+multiclass GlobalFLATLoadPats_D16_t16<string inst, SDPatternOperator node, ValueType vt> {
+ def : FlatSignedLoadPat_D16_t16<!cast<FLAT_Pseudo>(inst#"_t16"), node, vt> {
+ let AddedComplexity = 10;
+ }
+
+ def : GlobalLoadSaddrPat_D16_t16<!cast<FLAT_Pseudo>(inst#"_SADDR_t16"), node, vt> {
+ let AddedComplexity = 11;
+ }
+}
+
multiclass GlobalFLATStorePats<FLAT_Pseudo inst, SDPatternOperator node,
ValueType vt> {
def : FlatStoreSignedPat <inst, node, vt> {
@@ -1276,6 +1396,16 @@ multiclass GlobalFLATStorePats<FLAT_Pseudo inst, SDPatternOperator node,
}
}
+multiclass GlobalFLATStorePats_D16_t16<string inst, SDPatternOperator node, ValueType vt> {
+ def : FlatStoreSignedPat<!cast<FLAT_Pseudo>(inst#"_t16"), node, vt> {
+ let AddedComplexity = 10;
+ }
+
+ def : GlobalStoreSaddrPat<!cast<FLAT_Pseudo>(inst#"_SADDR_t16"), node, vt> {
+ let AddedComplexity = 11;
+ }
+}
+
multiclass GlobalFLATAtomicPatsNoRtnBase<string inst, string node, ValueType vt,
ValueType data_vt = vt> {
let AddedComplexity = 11 in
@@ -1358,6 +1488,22 @@ multiclass ScratchFLATStorePats<FLAT_Pseudo inst, SDPatternOperator node,
}
}
+multiclass ScratchFLATStorePats_t16<string inst, SDPatternOperator node,
+ ValueType vt> {
+ def : ScratchStoreSignedPat <!cast<FLAT_Pseudo>(inst#"_t16"), node, vt> {
+ let AddedComplexity = 25;
+ }
+
+ def : ScratchStoreSaddrPat<!cast<FLAT_Pseudo>(inst#"_SADDR_t16"), node, vt> {
+ let AddedComplexity = 26;
+ }
+
+ def : ScratchStoreSVaddrPat<!cast<FLAT_Pseudo>(inst#"_SVS_t16"), node, vt> {
+ let SubtargetPredicate = HasFlatScratchSVSMode;
+ let AddedComplexity = 27;
+ }
+}
+
multiclass ScratchFLATLoadPats_D16<FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> {
def : ScratchLoadSignedPat_D16 <inst, node, vt> {
let AddedComplexity = 25;
@@ -1373,6 +1519,21 @@ multiclass ScratchFLATLoadPats_D16<FLAT_Pseudo inst, SDPatternOperator node, Val
}
}
+multiclass ScratchFLATLoadPats_D16_t16<string inst, SDPatternOperator node, ValueType vt> {
+ def : ScratchLoadSignedPat_D16_t16 <!cast<FLAT_Pseudo>(inst#"_t16"), node, vt> {
+ let AddedComplexity = 25;
+ }
+
+ def : ScratchLoadSaddrPat_D16_t16<!cast<FLAT_Pseudo>(inst#"_SADDR_t16"), node, vt> {
+ let AddedComplexity = 26;
+ }
+
+ def : ScratchLoadSVaddrPat_D16_t16 <!cast<FLAT_Pseudo>(inst#"_SVS_t16"), node, vt> {
+ let SubtargetPredicate = HasFlatScratchSVSMode;
+ let AddedComplexity = 27;
+ }
+}
+
let OtherPredicates = [HasFlatAddressSpace] in {
def : FlatLoadPat <FLAT_LOAD_UBYTE, atomic_load_8_flat, i32>;
@@ -1409,6 +1570,8 @@ let OtherPredicates = [D16PreservesUnusedBits, HasFlatAddressSpace], True16Predi
def : FlatLoadPat_D16_t16<FLAT_LOAD_UBYTE_D16_t16, zextloadi8_flat, i16>;
def : FlatLoadPat_D16_t16<FLAT_LOAD_SBYTE_D16_t16, sextloadi8_flat, i16>;
def : FlatLoadPat_D16_t16<FLAT_LOAD_SHORT_D16_t16, load_flat, i16>;
+ def : FlatStorePat <FLAT_STORE_BYTE_t16, truncstorei8_flat, i16>;
+ def : FlatStorePat <FLAT_STORE_SHORT_t16, store_flat, i16>;
} // End let OtherPredicates = [D16PreservesUnusedBits, HasFlatAddressSpace], True16Predicate = UseRealTrue16Insts
def : FlatLoadPat <FLAT_LOAD_DWORD, atomic_load_32_flat, i32>;
@@ -1489,9 +1652,6 @@ let SubtargetPredicate = isGFX12Plus in {
defm : FlatAtomicNoRtnPatWithAddrSpace<"FLAT_ATOMIC_COND_SUB_U32", "int_amdgcn_atomic_cond_sub_u32", "flat_addrspace", i32>;
}
-def : FlatStorePat <FLAT_STORE_BYTE, truncstorei8_flat, i16>;
-def : FlatStorePat <FLAT_STORE_SHORT, store_flat, i16>;
-
let OtherPredicates = [HasD16LoadStore] in {
def : FlatStorePat <FLAT_STORE_SHORT_D16_HI, truncstorei16_hi16_flat, i32>;
def : FlatStorePat <FLAT_STORE_BYTE_D16_HI, truncstorei8_hi16_flat, i32>;
@@ -1531,15 +1691,28 @@ defm : GlobalFLATLoadPats <GLOBAL_LOAD_SBYTE, atomic_load_sext_8_global, i16>;
defm : GlobalFLATLoadPats <GLOBAL_LOAD_UBYTE, extloadi8_global, i32>;
defm : GlobalFLATLoadPats <GLOBAL_LOAD_UBYTE, zextloadi8_global, i32>;
defm : GlobalFLATLoadPats <GLOBAL_LOAD_SBYTE, sextloadi8_global, i32>;
-defm : GlobalFLATLoadPats <GLOBAL_LOAD_UBYTE, extloadi8_global, i16>;
-defm : GlobalFLATLoadPats <GLOBAL_LOAD_UBYTE, zextloadi8_global, i16>;
-defm : GlobalFLATLoadPats <GLOBAL_LOAD_SBYTE, sextloadi8_global, i16>;
defm : GlobalFLATLoadPats <GLOBAL_LOAD_USHORT, extloadi16_global, i32>;
defm : GlobalFLATLoadPats <GLOBAL_LOAD_USHORT, zextloadi16_global, i32>;
defm : GlobalFLATLoadPats <GLOBAL_LOAD_SSHORT, sextloadi16_global, i32>;
+
+foreach p = [NotHasTrue16BitInsts, UseFakeTrue16Insts] in
+let OtherPredicates = [HasFlatGlobalInsts], True16Predicate = p in {
+defm : GlobalFLATLoadPats <GLOBAL_LOAD_UBYTE, extloadi8_global, i16>;
+defm : GlobalFLATLoadPats <GLOBAL_LOAD_UBYTE, zextloadi8_global, i16>;
+defm : GlobalFLATLoadPats <GLOBAL_LOAD_SBYTE, sextloadi8_global, i16>;
defm : GlobalFLATLoadPats <GLOBAL_LOAD_SSHORT, atomic_load_sext_16_global, i32>;
defm : GlobalFLATLoadPats <GLOBAL_LOAD_USHORT, atomic_load_zext_16_global, i32>;
defm : GlobalFLATLoadPats <GLOBAL_LOAD_USHORT, load_global, i16>;
+}
+
+let OtherPredicates = [HasFlatGlobalInsts, D16PreservesUnusedBits], True16Predicate = UseRealTrue16Insts in {
+defm : GlobalFLATLoadPats_D16_t16<"GLOBAL_LOAD_UBYTE_D16", extloadi8_global, i16>;
+defm : GlobalFLATLoadPats_D16_t16<"GLOBAL_LOAD_UBYTE_D16", zextloadi8_global, i16>;
+defm : GlobalFLATLoadPats_D16_t16<"GLOBAL_LOAD_SBYTE_D16", sextloadi8_global, i16>;
+defm : GlobalFLATLoadPats_D16_t16<"GLOBAL_LOAD_SHORT_D16", load_global, i16>;
+defm : GlobalFLATStorePats_D16_t16<"GLOBAL_STORE_BYTE", truncstorei8_global, i16>;
+defm : GlobalFLATStorePats_D16_t16<"GLOBAL_STORE_SHORT", store_global, i16>;
+} // end OtherPredicates = [HasFlatGlobalInsts, D16PreservesUnusedBits], True16Predicate = UseRealTrue16Insts
foreach vt = Reg32Types.types in {
defm : GlobalFLATLoadPats <GLOBAL_LOAD_DWORD, load_global, vt>;
@@ -1565,11 +1738,15 @@ defm : GlobalFLATLoadPats <GLOBAL_LOAD_DWORD, atomic_load_32_globa...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/127945
More information about the llvm-commits
mailing list