[llvm] [AMDGPU] Remove TH_BYPASS from CPol (PR #139887)
via llvm-commits
llvm-commits at lists.llvm.org
Wed May 14 05:17:34 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-amdgpu
Author: None (Shoreshen)
<details>
<summary>Changes</summary>
1. Remove TH_BYPASS from CPol
2. Keep availability for parsing "BYPASS" as TH
3. Stop printing "BYPASS" for asm printer
---
Patch is 60.09 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/139887.diff
12 Files Affected:
- (modified) llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp (+2-2)
- (modified) llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp (+2-3)
- (modified) llvm/lib/Target/AMDGPU/SIDefines.h (-1)
- (modified) llvm/test/CodeGen/AMDGPU/lower-buffer-fat-pointers-lastuse-metadata.ll (+1-1)
- (modified) llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-lastuse.ll (+1-1)
- (modified) llvm/test/CodeGen/AMDGPU/memory-legalizer-global-lastuse.ll (+1-1)
- (modified) llvm/test/CodeGen/AMDGPU/memory-legalizer-private-lastuse.ll (+1-1)
- (modified) llvm/test/MC/AMDGPU/gfx12_asm_features.s (+2-2)
- (modified) llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vbuffer_mtbuf.txt (+16-16)
- (modified) llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vbuffer_mubuf.txt (+40-40)
- (modified) llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vimage.txt (+3-3)
- (modified) llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vsample.txt (+2-2)
``````````diff
diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
index 28370b8670f05..4023ce996e0b3 100644
--- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
+++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
@@ -5086,7 +5086,7 @@ bool AMDGPUAsmParser::validateTHAndScopeBits(const MCInst &Inst,
(TH == AMDGPU::CPol::TH_NT_HT)))
return PrintError("invalid th value for SMEM instruction");
- if (TH == AMDGPU::CPol::TH_BYPASS) {
+ if (TH == AMDGPU::CPol::TH_WB) { // TH_LU == TH_WB == 3
if ((Scope != AMDGPU::CPol::SCOPE_SYS &&
CPol & AMDGPU::CPol::TH_REAL_BYPASS) ||
(Scope == AMDGPU::CPol::SCOPE_SYS &&
@@ -6774,7 +6774,7 @@ ParseStatus AMDGPUAsmParser::parseTH(OperandVector &Operands, int64_t &TH) {
.Case("RT_NT", AMDGPU::CPol::TH_RT_NT)
.Case("NT_HT", AMDGPU::CPol::TH_NT_HT)
.Case("NT_WB", AMDGPU::CPol::TH_NT_WB)
- .Case("BYPASS", AMDGPU::CPol::TH_BYPASS)
+ .Case("BYPASS", AMDGPU::CPol::TH_LU)
.Default(0xffffffff);
}
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp
index a56bca514aff3..b49889403bb88 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp
@@ -206,9 +206,8 @@ void AMDGPUInstPrinter::printTH(const MCInst *MI, int64_t TH, int64_t Scope,
case AMDGPU::CPol::TH_HT:
O << "HT";
break;
- case AMDGPU::CPol::TH_BYPASS: // or LU or WB
- O << (Scope == AMDGPU::CPol::SCOPE_SYS ? "BYPASS"
- : (IsStore ? "WB" : "LU"));
+ case AMDGPU::CPol::TH_LU: // TH_LU == TH_WB == 3
+ O << (IsStore ? "WB" : "LU");
break;
case AMDGPU::CPol::TH_NT_RT:
O << "NT_RT";
diff --git a/llvm/lib/Target/AMDGPU/SIDefines.h b/llvm/lib/Target/AMDGPU/SIDefines.h
index 0f603a43fd626..e75e71b064a38 100644
--- a/llvm/lib/Target/AMDGPU/SIDefines.h
+++ b/llvm/lib/Target/AMDGPU/SIDefines.h
@@ -379,7 +379,6 @@ enum CPol {
TH_RT_NT = 5, // regular (CU, SE), non-temporal (MALL)
TH_NT_HT = 6, // non-temporal (CU, SE), high-temporal (MALL)
TH_NT_WB = 7, // non-temporal (CU, SE), high-temporal with write-back (MALL)
- TH_BYPASS = 3, // only to be used with scope = 3
TH_RESERVED = 7, // unused value for load insts
diff --git a/llvm/test/CodeGen/AMDGPU/lower-buffer-fat-pointers-lastuse-metadata.ll b/llvm/test/CodeGen/AMDGPU/lower-buffer-fat-pointers-lastuse-metadata.ll
index 2b10d469acf5c..6490320817a09 100644
--- a/llvm/test/CodeGen/AMDGPU/lower-buffer-fat-pointers-lastuse-metadata.ll
+++ b/llvm/test/CodeGen/AMDGPU/lower-buffer-fat-pointers-lastuse-metadata.ll
@@ -106,7 +106,7 @@ define amdgpu_kernel void @buffer_last_use_and_volatile_load(ptr addrspace(7) %i
; GFX12-NEXT: s_mov_b32 s13, s2
; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX12-NEXT: s_or_b64 s[8:9], s[8:9], s[12:13]
-; GFX12-NEXT: buffer_load_b32 v0, v0, s[8:11], null offen th:TH_LOAD_BYPASS scope:SCOPE_SYS
+; GFX12-NEXT: buffer_load_b32 v0, v0, s[8:11], null offen th:TH_LOAD_LU scope:SCOPE_SYS
; GFX12-NEXT: s_clause 0x1
; GFX12-NEXT: s_load_b32 s13, s[4:5], 0x30
; GFX12-NEXT: s_load_b128 s[0:3], s[4:5], 0x20
diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-lastuse.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-lastuse.ll
index a00af8e5b6582..a1ae26bddca59 100644
--- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-lastuse.ll
+++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-lastuse.ll
@@ -71,7 +71,7 @@ define amdgpu_kernel void @flat_last_use_and_volatile_load(ptr %in, ptr %out) {
; GFX12-NEXT: s_wait_kmcnt 0x0
; GFX12-NEXT: v_mov_b32_e32 v0, s2
; GFX12-NEXT: v_mov_b32_e32 v1, s3
-; GFX12-NEXT: flat_load_b32 v2, v[0:1] th:TH_LOAD_BYPASS scope:SCOPE_SYS
+; GFX12-NEXT: flat_load_b32 v2, v[0:1] th:TH_LOAD_LU scope:SCOPE_SYS
; GFX12-NEXT: s_wait_bvhcnt 0x0
; GFX12-NEXT: s_wait_samplecnt 0x0
; GFX12-NEXT: s_wait_loadcnt 0x0
diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-lastuse.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-lastuse.ll
index 5f952b98041f3..c51532353166f 100644
--- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-lastuse.ll
+++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-lastuse.ll
@@ -52,7 +52,7 @@ define amdgpu_kernel void @global_last_use_and_volatile_load(ptr addrspace(1) %i
; GFX12-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
; GFX12-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
; GFX12-NEXT: s_wait_kmcnt 0x0
-; GFX12-NEXT: global_load_b32 v1, v0, s[2:3] th:TH_LOAD_BYPASS scope:SCOPE_SYS
+; GFX12-NEXT: global_load_b32 v1, v0, s[2:3] th:TH_LOAD_LU scope:SCOPE_SYS
; GFX12-NEXT: s_wait_bvhcnt 0x0
; GFX12-NEXT: s_wait_samplecnt 0x0
; GFX12-NEXT: s_wait_loadcnt 0x0
diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-private-lastuse.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-private-lastuse.ll
index bc905fa564f8a..d12aa49052d4a 100644
--- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-private-lastuse.ll
+++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-private-lastuse.ll
@@ -51,7 +51,7 @@ define amdgpu_kernel void @private_last_use_and_volatile_load(ptr addrspace(5) %
; GFX12-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
; GFX12-NEXT: v_mov_b32_e32 v0, 0
; GFX12-NEXT: s_wait_kmcnt 0x0
-; GFX12-NEXT: scratch_load_b32 v1, off, s2 th:TH_LOAD_BYPASS scope:SCOPE_SYS
+; GFX12-NEXT: scratch_load_b32 v1, off, s2 th:TH_LOAD_LU scope:SCOPE_SYS
; GFX12-NEXT: s_wait_bvhcnt 0x0
; GFX12-NEXT: s_wait_samplecnt 0x0
; GFX12-NEXT: s_wait_loadcnt 0x0
diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_features.s b/llvm/test/MC/AMDGPU/gfx12_asm_features.s
index ba1e0d6462ac8..6d83934687176 100644
--- a/llvm/test/MC/AMDGPU/gfx12_asm_features.s
+++ b/llvm/test/MC/AMDGPU/gfx12_asm_features.s
@@ -76,7 +76,7 @@ buffer_load_b32 v5, off, s[8:11], s3 offset:8388607 scope:SCOPE_DEV th:TH_LOAD_N
// GFX12: buffer_load_b32 v5, off, s[8:11], s3 offset:8388607 th:TH_LOAD_NT_HT scope:SCOPE_DEV ; encoding: [0x03,0x00,0x05,0xc4,0x05,0x10,0xe8,0x00,0x00,0xff,0xff,0x7f]
tbuffer_load_d16_format_x v4, off, ttmp[4:7], s3 format:[BUF_FMT_8_UINT] offset:8388607 th:TH_LOAD_BYPASS scope:SCOPE_SYS
-// GFX12: tbuffer_load_d16_format_x v4, off, ttmp[4:7], s3 format:[BUF_FMT_8_UINT] offset:8388607 th:TH_LOAD_BYPASS scope:SCOPE_SYS ; encoding: [0x03,0x00,0x22,0xc4,0x04,0xe0,0xbc,0x02,0x00,0xff,0xff,0x7f]
+// GFX12: tbuffer_load_d16_format_x v4, off, ttmp[4:7], s3 format:[BUF_FMT_8_UINT] offset:8388607 th:TH_LOAD_LU scope:SCOPE_SYS ; encoding: [0x03,0x00,0x22,0xc4,0x04,0xe0,0xbc,0x02,0x00,0xff,0xff,0x7f]
tbuffer_load_d16_format_x v4, off, ttmp[4:7], s3 format:[BUF_FMT_8_UINT] offset:8388607 scope:SCOPE_SYS th:TH_LOAD_BYPASS
-// GFX12: tbuffer_load_d16_format_x v4, off, ttmp[4:7], s3 format:[BUF_FMT_8_UINT] offset:8388607 th:TH_LOAD_BYPASS scope:SCOPE_SYS ; encoding: [0x03,0x00,0x22,0xc4,0x04,0xe0,0xbc,0x02,0x00,0xff,0xff,0x7f]
+// GFX12: tbuffer_load_d16_format_x v4, off, ttmp[4:7], s3 format:[BUF_FMT_8_UINT] offset:8388607 th:TH_LOAD_LU scope:SCOPE_SYS ; encoding: [0x03,0x00,0x22,0xc4,0x04,0xe0,0xbc,0x02,0x00,0xff,0xff,0x7f]
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vbuffer_mtbuf.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vbuffer_mtbuf.txt
index 8d8cfc172ad75..541e302c76935 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vbuffer_mtbuf.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vbuffer_mtbuf.txt
@@ -36,7 +36,7 @@
# GFX12: tbuffer_load_d16_format_x v4, off, ttmp[4:7], s3 format:[BUF_FMT_8_SSCALED] offset:8388607 th:TH_LOAD_NT_HT scope:SCOPE_DEV ; encoding: [0x03,0x00,0x22,0xc4,0x04,0xe0,0x68,0x02,0x00,0xff,0xff,0x7f]
0x03,0x00,0x22,0xc4,0x04,0xe0,0x68,0x02,0x00,0xff,0xff,0x7f
-# GFX12: tbuffer_load_d16_format_x v4, off, ttmp[4:7], s3 format:[BUF_FMT_8_UINT] offset:8388607 th:TH_LOAD_BYPASS scope:SCOPE_SYS ; encoding: [0x03,0x00,0x22,0xc4,0x04,0xe0,0xbc,0x02,0x00,0xff,0xff,0x7f]
+# GFX12: tbuffer_load_d16_format_x v4, off, ttmp[4:7], s3 format:[BUF_FMT_8_UINT] offset:8388607 th:TH_LOAD_LU scope:SCOPE_SYS ; encoding: [0x03,0x00,0x22,0xc4,0x04,0xe0,0xbc,0x02,0x00,0xff,0xff,0x7f]
0x03,0x00,0x22,0xc4,0x04,0xe0,0xbc,0x02,0x00,0xff,0xff,0x7f
# GFX12: tbuffer_load_d16_format_x v4, off, ttmp[4:7], s3 format:[BUF_FMT_8_UINT] offset:8388607 scope:SCOPE_SE ; encoding: [0x03,0x00,0x22,0xc4,0x04,0xe0,0x84,0x02,0x00,0xff,0xff,0x7f]
@@ -78,7 +78,7 @@
# GFX12: tbuffer_load_d16_format_xy v4, off, ttmp[4:7], s3 format:[BUF_FMT_16_USCALED] offset:8388607 th:TH_LOAD_NT_HT scope:SCOPE_DEV ; encoding: [0x03,0x40,0x22,0xc4,0x04,0xe0,0xe8,0x04,0x00,0xff,0xff,0x7f]
0x03,0x40,0x22,0xc4,0x04,0xe0,0xe8,0x04,0x00,0xff,0xff,0x7f
-# GFX12: tbuffer_load_d16_format_xy v4, off, ttmp[4:7], s3 format:[BUF_FMT_16_SSCALED] offset:8388607 th:TH_LOAD_BYPASS scope:SCOPE_SYS ; encoding: [0x03,0x40,0x22,0xc4,0x04,0xe0,0x3c,0x05,0x00,0xff,0xff,0x7f]
+# GFX12: tbuffer_load_d16_format_xy v4, off, ttmp[4:7], s3 format:[BUF_FMT_16_SSCALED] offset:8388607 th:TH_LOAD_LU scope:SCOPE_SYS ; encoding: [0x03,0x40,0x22,0xc4,0x04,0xe0,0x3c,0x05,0x00,0xff,0xff,0x7f]
0x03,0x40,0x22,0xc4,0x04,0xe0,0x3c,0x05,0x00,0xff,0xff,0x7f
# GFX12: tbuffer_load_d16_format_xy v4, off, ttmp[4:7], s3 format:[BUF_FMT_16_SSCALED] offset:8388607 scope:SCOPE_SE ; encoding: [0x03,0x40,0x22,0xc4,0x04,0xe0,0x04,0x05,0x00,0xff,0xff,0x7f]
@@ -120,7 +120,7 @@
# GFX12: tbuffer_load_d16_format_xyz v[4:5], off, ttmp[4:7], s3 format:[BUF_FMT_8_8_UNORM] offset:8388607 th:TH_LOAD_NT_HT scope:SCOPE_DEV ; encoding: [0x03,0x80,0x22,0xc4,0x04,0xe0,0x68,0x07,0x00,0xff,0xff,0x7f]
0x03,0x80,0x22,0xc4,0x04,0xe0,0x68,0x07,0x00,0xff,0xff,0x7f
-# GFX12: tbuffer_load_d16_format_xyz v[4:5], off, ttmp[4:7], s3 format:[BUF_FMT_8_8_SNORM] offset:8388607 th:TH_LOAD_BYPASS scope:SCOPE_SYS ; encoding: [0x03,0x80,0x22,0xc4,0x04,0xe0,0xbc,0x07,0x00,0xff,0xff,0x7f]
+# GFX12: tbuffer_load_d16_format_xyz v[4:5], off, ttmp[4:7], s3 format:[BUF_FMT_8_8_SNORM] offset:8388607 th:TH_LOAD_LU scope:SCOPE_SYS ; encoding: [0x03,0x80,0x22,0xc4,0x04,0xe0,0xbc,0x07,0x00,0xff,0xff,0x7f]
0x03,0x80,0x22,0xc4,0x04,0xe0,0xbc,0x07,0x00,0xff,0xff,0x7f
# GFX12: tbuffer_load_d16_format_xyz v[4:5], off, ttmp[4:7], s3 format:[BUF_FMT_8_8_SNORM] offset:8388607 scope:SCOPE_SE ; encoding: [0x03,0x80,0x22,0xc4,0x04,0xe0,0x84,0x07,0x00,0xff,0xff,0x7f]
@@ -162,7 +162,7 @@
# GFX12: tbuffer_load_d16_format_xyzw v[4:5], off, ttmp[4:7], s3 format:[BUF_FMT_8_8_SINT] offset:8388607 th:TH_LOAD_NT_HT scope:SCOPE_DEV ; encoding: [0x03,0xc0,0x22,0xc4,0x04,0xe0,0xe8,0x09,0x00,0xff,0xff,0x7f]
0x03,0xc0,0x22,0xc4,0x04,0xe0,0xe8,0x09,0x00,0xff,0xff,0x7f
-# GFX12: tbuffer_load_d16_format_xyzw v[4:5], off, ttmp[4:7], s3 format:[BUF_FMT_32_UINT] offset:8388607 th:TH_LOAD_BYPASS scope:SCOPE_SYS ; encoding: [0x03,0xc0,0x22,0xc4,0x04,0xe0,0x3c,0x0a,0x00,0xff,0xff,0x7f]
+# GFX12: tbuffer_load_d16_format_xyzw v[4:5], off, ttmp[4:7], s3 format:[BUF_FMT_32_UINT] offset:8388607 th:TH_LOAD_LU scope:SCOPE_SYS ; encoding: [0x03,0xc0,0x22,0xc4,0x04,0xe0,0x3c,0x0a,0x00,0xff,0xff,0x7f]
0x03,0xc0,0x22,0xc4,0x04,0xe0,0x3c,0x0a,0x00,0xff,0xff,0x7f
# GFX12: tbuffer_load_d16_format_xyzw v[4:5], off, ttmp[4:7], s3 format:[BUF_FMT_32_UINT] offset:8388607 scope:SCOPE_SE ; encoding: [0x03,0xc0,0x22,0xc4,0x04,0xe0,0x04,0x0a,0x00,0xff,0xff,0x7f]
@@ -204,7 +204,7 @@
# GFX12: tbuffer_load_format_x v4, off, ttmp[4:7], s3 format:[BUF_FMT_16_16_SNORM] offset:8388607 th:TH_LOAD_NT_HT scope:SCOPE_DEV ; encoding: [0x03,0x00,0x20,0xc4,0x04,0xe0,0x68,0x0c,0x00,0xff,0xff,0x7f]
0x03,0x00,0x20,0xc4,0x04,0xe0,0x68,0x0c,0x00,0xff,0xff,0x7f
-# GFX12: tbuffer_load_format_x v4, off, ttmp[4:7], s3 format:[BUF_FMT_16_16_USCALED] offset:8388607 th:TH_LOAD_BYPASS scope:SCOPE_SYS ; encoding: [0x03,0x00,0x20,0xc4,0x04,0xe0,0xbc,0x0c,0x00,0xff,0xff,0x7f]
+# GFX12: tbuffer_load_format_x v4, off, ttmp[4:7], s3 format:[BUF_FMT_16_16_USCALED] offset:8388607 th:TH_LOAD_LU scope:SCOPE_SYS ; encoding: [0x03,0x00,0x20,0xc4,0x04,0xe0,0xbc,0x0c,0x00,0xff,0xff,0x7f]
0x03,0x00,0x20,0xc4,0x04,0xe0,0xbc,0x0c,0x00,0xff,0xff,0x7f
# GFX12: tbuffer_load_format_x v4, off, ttmp[4:7], s3 format:[BUF_FMT_16_16_USCALED] offset:8388607 scope:SCOPE_SE ; encoding: [0x03,0x00,0x20,0xc4,0x04,0xe0,0x84,0x0c,0x00,0xff,0xff,0x7f]
@@ -246,7 +246,7 @@
# GFX12: tbuffer_load_format_xy v[4:5], off, ttmp[4:7], s3 format:[BUF_FMT_16_16_FLOAT] offset:8388607 th:TH_LOAD_NT_HT scope:SCOPE_DEV ; encoding: [0x03,0x40,0x20,0xc4,0x04,0xe0,0xe8,0x0e,0x00,0xff,0xff,0x7f]
0x03,0x40,0x20,0xc4,0x04,0xe0,0xe8,0x0e,0x00,0xff,0xff,0x7f
-# GFX12: tbuffer_load_format_xy v[4:5], off, ttmp[4:7], s3 format:[BUF_FMT_10_11_11_FLOAT] offset:8388607 th:TH_LOAD_BYPASS scope:SCOPE_SYS ; encoding: [0x03,0x40,0x20,0xc4,0x04,0xe0,0x3c,0x0f,0x00,0xff,0xff,0x7f]
+# GFX12: tbuffer_load_format_xy v[4:5], off, ttmp[4:7], s3 format:[BUF_FMT_10_11_11_FLOAT] offset:8388607 th:TH_LOAD_LU scope:SCOPE_SYS ; encoding: [0x03,0x40,0x20,0xc4,0x04,0xe0,0x3c,0x0f,0x00,0xff,0xff,0x7f]
0x03,0x40,0x20,0xc4,0x04,0xe0,0x3c,0x0f,0x00,0xff,0xff,0x7f
# GFX12: tbuffer_load_format_xy v[4:5], off, ttmp[4:7], s3 format:[BUF_FMT_10_11_11_FLOAT] offset:8388607 scope:SCOPE_SE ; encoding: [0x03,0x40,0x20,0xc4,0x04,0xe0,0x04,0x0f,0x00,0xff,0xff,0x7f]
@@ -288,7 +288,7 @@
# GFX12: tbuffer_load_format_xyz v[4:6], off, ttmp[4:7], s3 format:[BUF_FMT_10_10_10_2_UINT] offset:8388607 th:TH_LOAD_NT_HT scope:SCOPE_DEV ; encoding: [0x03,0x80,0x20,0xc4,0x04,0xe0,0x68,0x11,0x00,0xff,0xff,0x7f]
0x03,0x80,0x20,0xc4,0x04,0xe0,0x68,0x11,0x00,0xff,0xff,0x7f
-# GFX12: tbuffer_load_format_xyz v[4:6], off, ttmp[4:7], s3 format:[BUF_FMT_10_10_10_2_SINT] offset:8388607 th:TH_LOAD_BYPASS scope:SCOPE_SYS ; encoding: [0x03,0x80,0x20,0xc4,0x04,0xe0,0xbc,0x11,0x00,0xff,0xff,0x7f]
+# GFX12: tbuffer_load_format_xyz v[4:6], off, ttmp[4:7], s3 format:[BUF_FMT_10_10_10_2_SINT] offset:8388607 th:TH_LOAD_LU scope:SCOPE_SYS ; encoding: [0x03,0x80,0x20,0xc4,0x04,0xe0,0xbc,0x11,0x00,0xff,0xff,0x7f]
0x03,0x80,0x20,0xc4,0x04,0xe0,0xbc,0x11,0x00,0xff,0xff,0x7f
# GFX12: tbuffer_load_format_xyz v[4:6], off, ttmp[4:7], s3 format:[BUF_FMT_10_10_10_2_SINT] offset:8388607 scope:SCOPE_SE ; encoding: [0x03,0x80,0x20,0xc4,0x04,0xe0,0x84,0x11,0x00,0xff,0xff,0x7f]
@@ -330,7 +330,7 @@
# GFX12: tbuffer_load_format_xyzw v[4:7], off, ttmp[4:7], s3 format:[BUF_FMT_2_10_10_10_SSCALED] offset:8388607 th:TH_LOAD_NT_HT scope:SCOPE_DEV ; encoding: [0x03,0xc0,0x20,0xc4,0x04,0xe0,0xe8,0x13,0x00,0xff,0xff,0x7f]
0x03,0xc0,0x20,0xc4,0x04,0xe0,0xe8,0x13,0x00,0xff,0xff,0x7f
-# GFX12: tbuffer_load_format_xyzw v[4:7], off, ttmp[4:7], s3 format:[BUF_FMT_2_10_10_10_UINT] offset:8388607 th:TH_LOAD_BYPASS scope:SCOPE_SYS ; encoding: [0x03,0xc0,0x20,0xc4,0x04,0xe0,0x3c,0x14,0x00,0xff,0xff,0x7f]
+# GFX12: tbuffer_load_format_xyzw v[4:7], off, ttmp[4:7], s3 format:[BUF_FMT_2_10_10_10_UINT] offset:8388607 th:TH_LOAD_LU scope:SCOPE_SYS ; encoding: [0x03,0xc0,0x20,0xc4,0x04,0xe0,0x3c,0x14,0x00,0xff,0xff,0x7f]
0x03,0xc0,0x20,0xc4,0x04,0xe0,0x3c,0x14,0x00,0xff,0xff,0x7f
# GFX12: tbuffer_load_format_xyzw v[4:7], off, ttmp[4:7], s3 format:[BUF_FMT_2_10_10_10_UINT] offset:8388607 scope:SCOPE_SE ; encoding: [0x03,0xc0,0x20,0xc4,0x04,0xe0,0x04,0x14,0x00,0xff,0xff,0x7f]
@@ -372,7 +372,7 @@
# GFX12: tbuffer_store_d16_format_x v4, off, ttmp[4:7], s3 format:[BUF_FMT_8_8_8_8_USCALED] offset:8388607 th:TH_STORE_NT_HT scope:SCOPE_DEV ; encoding: [0x03,0x00,0x23,0xc4,0x04,0xe0,0x68,0x16,0x00,0xff,0xff,0x7f]
0x03,0x00,0x23,0xc4,0x04,0xe0,0x68,0x16,0x00,0xff,0xff,0x7f
-# GFX12: tbuffer_store_d16_format_x v4, off, ttmp[4:7], s3 format:[BUF_FMT_8_8_8_8_SSCALED] offset:8388607 th:TH_STORE_BYPASS scope:SCOPE_SYS ; encoding: [0x03,0x00,0x23,0xc4,0x04,0xe0,0xbc,0x16,0x00,0xff,0xff,0x7f]
+# GFX12: tbuffer_store_d16_format_x v4, off, ttmp[4:7], s3 format:[BUF_FMT_8_8_8_8_SSCALED] offset:8388607 th:TH_STORE_WB scope:SCOPE_SYS ; encoding: [0x03,0x00,0x23,0xc4,0x04,0xe0,0xbc,0x16,0x00,0xff,0xff,0x7f]
0x03,0x00,0x23,0xc4,0x04,0xe0,0xbc,0x16,0x00,0xff,0xff,0x7f
# GFX12: tbuffer_store_d16_format_x v4, off, ttmp[4:7], s3 format:[BUF_FMT_8_8_8_8_SSCALED] offset:8388607 scope:SCOPE_SE ; encoding: [0x03,0x00,0x23,0xc4,0x04,0xe0,0x84,0x16,0x00,0xff,0xff,0x7f]
@@ -414,7 +414,7 @@
# GFX12: tbuffer_store_d16_format_xy v4, off, ttmp[4:7], s3 format:[BUF_FMT_32_32_SINT] offset:8388607 th:TH_STORE_NT_HT scope:SCOPE_DEV ; encoding: [0x03,0x40,0x23,0xc4,0x04,0xe0,0xe8,0x18,0x00,0xff,0xff,0x7f]
0x03,0x40,0x23,0xc4,0x04,0xe0,0xe8,0x18,0x00,0xff,0xff,0x7f
-# GFX12: tbuffer_store_d16_format_xy v4, off, ttmp[4:7], s3 format:[BUF_FMT_32_32_FLOAT] offset:8388607 th:TH_STORE_BYPASS scope:SCOPE_SYS ; encoding: [0x03,0x40,0x23,0xc4,0x04,0xe0,0x3c,0x19,0x00,0xff,0xff,0x7f]
+# GFX12: tbuffer_store_d16_format_xy v4, off, ttmp[4:7], s3 format:[BUF_FMT_32_32_FLOAT] offset:8388607 th:TH_STORE_WB scope:SCOPE_SYS ; encoding: [0x03,0x40,0x23,0xc4,0x04,0xe0,0x3c,0x19,0x00,0xff,0xff,0x7f]
0x03,0x40,0x23,0xc4,0x04,0xe0,0x3c,0x19,0x00,0xff,0xff,0x7f
# GFX12: tbuffer_store_d16_format_xy v4, off, ttmp[4:7], s3 format:[BUF_FMT_32_32_FLOAT] offset:8388607 scope:SCOPE_SE ; encoding: [0x03,0x40,0x23,0xc4,0x04,0xe0,0x04,0x19,0x00,0xff,0xff,0x7f]
@@ -456,7 +456,7 @@
# GFX12: tbuffer_store_d16_format_xyz v[4:5], off, ttmp[4:7], s3 format:[BUF_FMT_16_16_16_16_SSCALED] offset:8388607 th:TH_STORE_NT_HT scope:SCOPE_DEV ; encoding: [0x03,0x80,0x23,0xc4,0x04,0xe0,0x68,0x1b,0x00,0xff,0xff,0x7f]
0x03,0x80,0x23,0xc4,0x04,0xe0,0x68,0x1b,0x00,0xff,0xff,0x7f
-# GFX12: tbuffer_store_d16_format_xyz v[4:5], off, ttmp[4:7], s3 format:[BUF_FMT_16_16_16_16_UINT] offset:8388607 th:TH_STORE_BYPASS scope:SCOPE_SYS ; encoding: [0x03,0x80,0x23,0xc4,0x04,0xe0,0xbc,0x1b,0x00,0xff,0xff,0x7f]
+# GFX12: tbuffer_store_d16_format_xyz v[4:5], off, ttmp[4:7], s3 format:[BUF_FMT_16_16_16_16_UINT] offset:8388607 th:TH_STORE_WB scope:SCOPE_SYS ; encoding: [0x03,0x80,0x23,0xc4,0x04,0xe0,0xbc,0x1b,0x00,0xff,0xff,0x7f]
0x03,0x80,0x23,0xc4,0x04,0xe0,0xbc,0x1b,0x00,0xff,0xff,0x7f
# GFX12: tbuffer_store_d16_format_xyz v[4:5], off, ttmp[4:7], s3 format:[BUF_FMT_16_16_16_16_UINT] offset:8388607 scope:SCOPE_SE ; encoding: [0x03,0x80,0x23,0xc4,0x04,0xe0,0x84,0x1b,0x00,0xff,0xff,0x7f]
@@ -498,7 +498,7 @@
# GFX12: tbuffer_store_d16_format_xyzw v[4:5], off, ttmp[4:7], s3 format:[BUF_FMT_32_32_32_SINT] offset:8388607 th:TH_STORE_NT_HT scope:SCOPE_DEV ; encoding: [0x03,0xc0,0x23,0xc4,0x04,0xe0,0xe8,0x1d,0x00,0xff,0xff,0x7f]
0x03,0xc0,0x23,0xc4,0x04,0xe0,0xe8,0x1d,0x00,0xff,0xff,0x7f
-# GFX12: tbuffer_store_d16_format_xyzw v[4:5], off, ttmp[4:7], s3 format:[BUF_FMT_32_32_32_FLOAT] offset:8388607 th:TH_STORE_BYPASS scope:SCOPE_SYS ; encoding: [0x03,0xc0,0x23,0xc4,0x04,0xe0,0x3c,0x1e,0x00,0xff,0xff,0x7f]
+# GFX12: tbuffer_store_d16_format_xyzw v[4:5], off, ttmp[4:7], s3 format:[BUF_FMT_32_32_32_FLOAT] offset:8388607 th:TH_STORE_WB scope:SCOPE_SYS ; encoding: [0x03,0xc0,0x23,0xc4,0x04,0xe0,0x3c,0x1e,0x00,0xff,0xff,0x7f]
0x03,0xc0,0x23,0xc4,0x04,0xe0,0x3c,0x1e,0x00,0xff,0xff,0x7f
# GFX12: tbuffer_store_d16_format_xyzw v[4:5], off, ttmp[4:7], s3 format:[BUF_FMT_32_32_32_FLOAT] offset:8388607 scope:SCOPE_SE ; encoding: [0x03,0xc0,0x23,0xc4,0x04,0xe0,0x04,0x1e,0x00,0xff,0xff,0x7f]
@@ -540,7 +540,7 @@
# GFX12: tbuffer_store_format_x v4, off, ttmp[4:7], s3 format:[BUF_FMT_8_USCALED] offset:8388607 th:TH_STORE_NT_HT scope:SCOPE_DEV ; encoding: [0x03,0x00,0x21,0xc4,0x04,0xe0,0xe8,0x01,0x00,0xff,0xff,0x7f]
0x03,0x00,0x21,0xc4,0x04,0xe0,0xe8,0x01,0x00,0xff,0xff,0x7f
-# GFX12: tbuffer_store_format_x v4, off, ttmp[4:7], s3 format:[BUF_FMT_8_SSCALED] offset:8388607 th:TH_STORE_BYPASS scope:SCOPE_SYS ; encoding: [0x03,0x00,0x21,0xc4,0x04,0xe0,0x3c,0x02,0x00,0xff,0xff,0x7f]
+# GFX12: tbuffer_store_format_x v4, off, ttmp[4:7], s3 format:[BUF_FMT_8_SSCALED] offset:8388607 th:TH_STORE_WB scope:SCOPE_SYS ; encoding: [0x03,0x00,0x21,0xc4,0x04,0xe0,0x3c,0x02,0x00,0xff,0xff,0x7f]
0x03,0x00,0x21,0xc4,0x04,0xe0,0x3c,0x02,0x00,0xff,0xff,0x7f
# GFX12: tbuffer_store_format_x v4, off, ttmp[4:7], s3 format...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/139887
More information about the llvm-commits
mailing list