[llvm] [AMDGPU] Remove TH_BYPASS from CPol (PR #139887)

via llvm-commits llvm-commits at lists.llvm.org
Wed May 14 05:17:34 PDT 2025


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-backend-amdgpu

Author: None (Shoreshen)

<details>
<summary>Changes</summary>

1. Remove TH_BYPASS from CPol 
2. Keep availability for parsing "BYPASS" as TH
3. Stop printing "BYPASS" for asm printer

---

Patch is 60.09 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/139887.diff


12 Files Affected:

- (modified) llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp (+2-2) 
- (modified) llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp (+2-3) 
- (modified) llvm/lib/Target/AMDGPU/SIDefines.h (-1) 
- (modified) llvm/test/CodeGen/AMDGPU/lower-buffer-fat-pointers-lastuse-metadata.ll (+1-1) 
- (modified) llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-lastuse.ll (+1-1) 
- (modified) llvm/test/CodeGen/AMDGPU/memory-legalizer-global-lastuse.ll (+1-1) 
- (modified) llvm/test/CodeGen/AMDGPU/memory-legalizer-private-lastuse.ll (+1-1) 
- (modified) llvm/test/MC/AMDGPU/gfx12_asm_features.s (+2-2) 
- (modified) llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vbuffer_mtbuf.txt (+16-16) 
- (modified) llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vbuffer_mubuf.txt (+40-40) 
- (modified) llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vimage.txt (+3-3) 
- (modified) llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vsample.txt (+2-2) 


``````````diff
diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
index 28370b8670f05..4023ce996e0b3 100644
--- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
+++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
@@ -5086,7 +5086,7 @@ bool AMDGPUAsmParser::validateTHAndScopeBits(const MCInst &Inst,
        (TH == AMDGPU::CPol::TH_NT_HT)))
     return PrintError("invalid th value for SMEM instruction");
 
-  if (TH == AMDGPU::CPol::TH_BYPASS) {
+  if (TH == AMDGPU::CPol::TH_WB) { // TH_LU == TH_WB == 3
     if ((Scope != AMDGPU::CPol::SCOPE_SYS &&
          CPol & AMDGPU::CPol::TH_REAL_BYPASS) ||
         (Scope == AMDGPU::CPol::SCOPE_SYS &&
@@ -6774,7 +6774,7 @@ ParseStatus AMDGPUAsmParser::parseTH(OperandVector &Operands, int64_t &TH) {
                 .Case("RT_NT", AMDGPU::CPol::TH_RT_NT)
                 .Case("NT_HT", AMDGPU::CPol::TH_NT_HT)
                 .Case("NT_WB", AMDGPU::CPol::TH_NT_WB)
-                .Case("BYPASS", AMDGPU::CPol::TH_BYPASS)
+                .Case("BYPASS", AMDGPU::CPol::TH_LU)
                 .Default(0xffffffff);
   }
 
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp
index a56bca514aff3..b49889403bb88 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp
@@ -206,9 +206,8 @@ void AMDGPUInstPrinter::printTH(const MCInst *MI, int64_t TH, int64_t Scope,
       case AMDGPU::CPol::TH_HT:
         O << "HT";
         break;
-      case AMDGPU::CPol::TH_BYPASS: // or LU or WB
-        O << (Scope == AMDGPU::CPol::SCOPE_SYS ? "BYPASS"
-                                               : (IsStore ? "WB" : "LU"));
+      case AMDGPU::CPol::TH_LU: // TH_LU == TH_WB == 3
+        O << (IsStore ? "WB" : "LU");
         break;
       case AMDGPU::CPol::TH_NT_RT:
         O << "NT_RT";
diff --git a/llvm/lib/Target/AMDGPU/SIDefines.h b/llvm/lib/Target/AMDGPU/SIDefines.h
index 0f603a43fd626..e75e71b064a38 100644
--- a/llvm/lib/Target/AMDGPU/SIDefines.h
+++ b/llvm/lib/Target/AMDGPU/SIDefines.h
@@ -379,7 +379,6 @@ enum CPol {
   TH_RT_NT = 5,  // regular (CU, SE), non-temporal (MALL)
   TH_NT_HT = 6,  // non-temporal (CU, SE), high-temporal (MALL)
   TH_NT_WB = 7,  // non-temporal (CU, SE), high-temporal with write-back (MALL)
-  TH_BYPASS = 3, // only to be used with scope = 3
 
   TH_RESERVED = 7, // unused value for load insts
 
diff --git a/llvm/test/CodeGen/AMDGPU/lower-buffer-fat-pointers-lastuse-metadata.ll b/llvm/test/CodeGen/AMDGPU/lower-buffer-fat-pointers-lastuse-metadata.ll
index 2b10d469acf5c..6490320817a09 100644
--- a/llvm/test/CodeGen/AMDGPU/lower-buffer-fat-pointers-lastuse-metadata.ll
+++ b/llvm/test/CodeGen/AMDGPU/lower-buffer-fat-pointers-lastuse-metadata.ll
@@ -106,7 +106,7 @@ define amdgpu_kernel void @buffer_last_use_and_volatile_load(ptr addrspace(7) %i
 ; GFX12-NEXT:    s_mov_b32 s13, s2
 ; GFX12-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
 ; GFX12-NEXT:    s_or_b64 s[8:9], s[8:9], s[12:13]
-; GFX12-NEXT:    buffer_load_b32 v0, v0, s[8:11], null offen th:TH_LOAD_BYPASS scope:SCOPE_SYS
+; GFX12-NEXT:    buffer_load_b32 v0, v0, s[8:11], null offen th:TH_LOAD_LU scope:SCOPE_SYS
 ; GFX12-NEXT:    s_clause 0x1
 ; GFX12-NEXT:    s_load_b32 s13, s[4:5], 0x30
 ; GFX12-NEXT:    s_load_b128 s[0:3], s[4:5], 0x20
diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-lastuse.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-lastuse.ll
index a00af8e5b6582..a1ae26bddca59 100644
--- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-lastuse.ll
+++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-lastuse.ll
@@ -71,7 +71,7 @@ define amdgpu_kernel void @flat_last_use_and_volatile_load(ptr %in, ptr %out) {
 ; GFX12-NEXT:    s_wait_kmcnt 0x0
 ; GFX12-NEXT:    v_mov_b32_e32 v0, s2
 ; GFX12-NEXT:    v_mov_b32_e32 v1, s3
-; GFX12-NEXT:    flat_load_b32 v2, v[0:1] th:TH_LOAD_BYPASS scope:SCOPE_SYS
+; GFX12-NEXT:    flat_load_b32 v2, v[0:1] th:TH_LOAD_LU scope:SCOPE_SYS
 ; GFX12-NEXT:    s_wait_bvhcnt 0x0
 ; GFX12-NEXT:    s_wait_samplecnt 0x0
 ; GFX12-NEXT:    s_wait_loadcnt 0x0
diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-lastuse.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-lastuse.ll
index 5f952b98041f3..c51532353166f 100644
--- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-lastuse.ll
+++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-lastuse.ll
@@ -52,7 +52,7 @@ define amdgpu_kernel void @global_last_use_and_volatile_load(ptr addrspace(1) %i
 ; GFX12-NEXT:    s_load_b64 s[2:3], s[4:5], 0x0
 ; GFX12-NEXT:    s_load_b64 s[0:1], s[4:5], 0x8
 ; GFX12-NEXT:    s_wait_kmcnt 0x0
-; GFX12-NEXT:    global_load_b32 v1, v0, s[2:3] th:TH_LOAD_BYPASS scope:SCOPE_SYS
+; GFX12-NEXT:    global_load_b32 v1, v0, s[2:3] th:TH_LOAD_LU scope:SCOPE_SYS
 ; GFX12-NEXT:    s_wait_bvhcnt 0x0
 ; GFX12-NEXT:    s_wait_samplecnt 0x0
 ; GFX12-NEXT:    s_wait_loadcnt 0x0
diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-private-lastuse.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-private-lastuse.ll
index bc905fa564f8a..d12aa49052d4a 100644
--- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-private-lastuse.ll
+++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-private-lastuse.ll
@@ -51,7 +51,7 @@ define amdgpu_kernel void @private_last_use_and_volatile_load(ptr addrspace(5) %
 ; GFX12-NEXT:    s_load_b64 s[0:1], s[4:5], 0x8
 ; GFX12-NEXT:    v_mov_b32_e32 v0, 0
 ; GFX12-NEXT:    s_wait_kmcnt 0x0
-; GFX12-NEXT:    scratch_load_b32 v1, off, s2 th:TH_LOAD_BYPASS scope:SCOPE_SYS
+; GFX12-NEXT:    scratch_load_b32 v1, off, s2 th:TH_LOAD_LU scope:SCOPE_SYS
 ; GFX12-NEXT:    s_wait_bvhcnt 0x0
 ; GFX12-NEXT:    s_wait_samplecnt 0x0
 ; GFX12-NEXT:    s_wait_loadcnt 0x0
diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_features.s b/llvm/test/MC/AMDGPU/gfx12_asm_features.s
index ba1e0d6462ac8..6d83934687176 100644
--- a/llvm/test/MC/AMDGPU/gfx12_asm_features.s
+++ b/llvm/test/MC/AMDGPU/gfx12_asm_features.s
@@ -76,7 +76,7 @@ buffer_load_b32 v5, off, s[8:11], s3 offset:8388607 scope:SCOPE_DEV th:TH_LOAD_N
 // GFX12: buffer_load_b32 v5, off, s[8:11], s3 offset:8388607 th:TH_LOAD_NT_HT scope:SCOPE_DEV ; encoding: [0x03,0x00,0x05,0xc4,0x05,0x10,0xe8,0x00,0x00,0xff,0xff,0x7f]
 
 tbuffer_load_d16_format_x v4, off, ttmp[4:7], s3 format:[BUF_FMT_8_UINT] offset:8388607 th:TH_LOAD_BYPASS scope:SCOPE_SYS
-// GFX12: tbuffer_load_d16_format_x v4, off, ttmp[4:7], s3 format:[BUF_FMT_8_UINT] offset:8388607 th:TH_LOAD_BYPASS scope:SCOPE_SYS ; encoding: [0x03,0x00,0x22,0xc4,0x04,0xe0,0xbc,0x02,0x00,0xff,0xff,0x7f]
+// GFX12: tbuffer_load_d16_format_x v4, off, ttmp[4:7], s3 format:[BUF_FMT_8_UINT] offset:8388607 th:TH_LOAD_LU scope:SCOPE_SYS ; encoding: [0x03,0x00,0x22,0xc4,0x04,0xe0,0xbc,0x02,0x00,0xff,0xff,0x7f]
 
 tbuffer_load_d16_format_x v4, off, ttmp[4:7], s3 format:[BUF_FMT_8_UINT] offset:8388607 scope:SCOPE_SYS th:TH_LOAD_BYPASS
-// GFX12: tbuffer_load_d16_format_x v4, off, ttmp[4:7], s3 format:[BUF_FMT_8_UINT] offset:8388607 th:TH_LOAD_BYPASS scope:SCOPE_SYS ; encoding: [0x03,0x00,0x22,0xc4,0x04,0xe0,0xbc,0x02,0x00,0xff,0xff,0x7f]
+// GFX12: tbuffer_load_d16_format_x v4, off, ttmp[4:7], s3 format:[BUF_FMT_8_UINT] offset:8388607 th:TH_LOAD_LU scope:SCOPE_SYS ; encoding: [0x03,0x00,0x22,0xc4,0x04,0xe0,0xbc,0x02,0x00,0xff,0xff,0x7f]
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vbuffer_mtbuf.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vbuffer_mtbuf.txt
index 8d8cfc172ad75..541e302c76935 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vbuffer_mtbuf.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vbuffer_mtbuf.txt
@@ -36,7 +36,7 @@
 # GFX12: tbuffer_load_d16_format_x v4, off, ttmp[4:7], s3 format:[BUF_FMT_8_SSCALED] offset:8388607 th:TH_LOAD_NT_HT scope:SCOPE_DEV 		; encoding:  [0x03,0x00,0x22,0xc4,0x04,0xe0,0x68,0x02,0x00,0xff,0xff,0x7f]
 0x03,0x00,0x22,0xc4,0x04,0xe0,0x68,0x02,0x00,0xff,0xff,0x7f
 
-# GFX12: tbuffer_load_d16_format_x v4, off, ttmp[4:7], s3 format:[BUF_FMT_8_UINT] offset:8388607 th:TH_LOAD_BYPASS scope:SCOPE_SYS 		; encoding:  [0x03,0x00,0x22,0xc4,0x04,0xe0,0xbc,0x02,0x00,0xff,0xff,0x7f]
+# GFX12: tbuffer_load_d16_format_x v4, off, ttmp[4:7], s3 format:[BUF_FMT_8_UINT] offset:8388607 th:TH_LOAD_LU scope:SCOPE_SYS 		; encoding:  [0x03,0x00,0x22,0xc4,0x04,0xe0,0xbc,0x02,0x00,0xff,0xff,0x7f]
 0x03,0x00,0x22,0xc4,0x04,0xe0,0xbc,0x02,0x00,0xff,0xff,0x7f
 
 # GFX12: tbuffer_load_d16_format_x v4, off, ttmp[4:7], s3 format:[BUF_FMT_8_UINT] offset:8388607 scope:SCOPE_SE 		; encoding:  [0x03,0x00,0x22,0xc4,0x04,0xe0,0x84,0x02,0x00,0xff,0xff,0x7f]
@@ -78,7 +78,7 @@
 # GFX12: tbuffer_load_d16_format_xy v4, off, ttmp[4:7], s3 format:[BUF_FMT_16_USCALED] offset:8388607 th:TH_LOAD_NT_HT scope:SCOPE_DEV 		; encoding:  [0x03,0x40,0x22,0xc4,0x04,0xe0,0xe8,0x04,0x00,0xff,0xff,0x7f]
 0x03,0x40,0x22,0xc4,0x04,0xe0,0xe8,0x04,0x00,0xff,0xff,0x7f
 
-# GFX12: tbuffer_load_d16_format_xy v4, off, ttmp[4:7], s3 format:[BUF_FMT_16_SSCALED] offset:8388607 th:TH_LOAD_BYPASS scope:SCOPE_SYS 		; encoding:  [0x03,0x40,0x22,0xc4,0x04,0xe0,0x3c,0x05,0x00,0xff,0xff,0x7f]
+# GFX12: tbuffer_load_d16_format_xy v4, off, ttmp[4:7], s3 format:[BUF_FMT_16_SSCALED] offset:8388607 th:TH_LOAD_LU scope:SCOPE_SYS 		; encoding:  [0x03,0x40,0x22,0xc4,0x04,0xe0,0x3c,0x05,0x00,0xff,0xff,0x7f]
 0x03,0x40,0x22,0xc4,0x04,0xe0,0x3c,0x05,0x00,0xff,0xff,0x7f
 
 # GFX12: tbuffer_load_d16_format_xy v4, off, ttmp[4:7], s3 format:[BUF_FMT_16_SSCALED] offset:8388607 scope:SCOPE_SE 		; encoding:  [0x03,0x40,0x22,0xc4,0x04,0xe0,0x04,0x05,0x00,0xff,0xff,0x7f]
@@ -120,7 +120,7 @@
 # GFX12: tbuffer_load_d16_format_xyz v[4:5], off, ttmp[4:7], s3 format:[BUF_FMT_8_8_UNORM] offset:8388607 th:TH_LOAD_NT_HT scope:SCOPE_DEV 		; encoding:  [0x03,0x80,0x22,0xc4,0x04,0xe0,0x68,0x07,0x00,0xff,0xff,0x7f]
 0x03,0x80,0x22,0xc4,0x04,0xe0,0x68,0x07,0x00,0xff,0xff,0x7f
 
-# GFX12: tbuffer_load_d16_format_xyz v[4:5], off, ttmp[4:7], s3 format:[BUF_FMT_8_8_SNORM] offset:8388607 th:TH_LOAD_BYPASS scope:SCOPE_SYS 		; encoding:  [0x03,0x80,0x22,0xc4,0x04,0xe0,0xbc,0x07,0x00,0xff,0xff,0x7f]
+# GFX12: tbuffer_load_d16_format_xyz v[4:5], off, ttmp[4:7], s3 format:[BUF_FMT_8_8_SNORM] offset:8388607 th:TH_LOAD_LU scope:SCOPE_SYS 		; encoding:  [0x03,0x80,0x22,0xc4,0x04,0xe0,0xbc,0x07,0x00,0xff,0xff,0x7f]
 0x03,0x80,0x22,0xc4,0x04,0xe0,0xbc,0x07,0x00,0xff,0xff,0x7f
 
 # GFX12: tbuffer_load_d16_format_xyz v[4:5], off, ttmp[4:7], s3 format:[BUF_FMT_8_8_SNORM] offset:8388607 scope:SCOPE_SE 		; encoding:  [0x03,0x80,0x22,0xc4,0x04,0xe0,0x84,0x07,0x00,0xff,0xff,0x7f]
@@ -162,7 +162,7 @@
 # GFX12: tbuffer_load_d16_format_xyzw v[4:5], off, ttmp[4:7], s3 format:[BUF_FMT_8_8_SINT] offset:8388607 th:TH_LOAD_NT_HT scope:SCOPE_DEV 		; encoding:  [0x03,0xc0,0x22,0xc4,0x04,0xe0,0xe8,0x09,0x00,0xff,0xff,0x7f]
 0x03,0xc0,0x22,0xc4,0x04,0xe0,0xe8,0x09,0x00,0xff,0xff,0x7f
 
-# GFX12: tbuffer_load_d16_format_xyzw v[4:5], off, ttmp[4:7], s3 format:[BUF_FMT_32_UINT] offset:8388607 th:TH_LOAD_BYPASS scope:SCOPE_SYS 		; encoding:  [0x03,0xc0,0x22,0xc4,0x04,0xe0,0x3c,0x0a,0x00,0xff,0xff,0x7f]
+# GFX12: tbuffer_load_d16_format_xyzw v[4:5], off, ttmp[4:7], s3 format:[BUF_FMT_32_UINT] offset:8388607 th:TH_LOAD_LU scope:SCOPE_SYS 		; encoding:  [0x03,0xc0,0x22,0xc4,0x04,0xe0,0x3c,0x0a,0x00,0xff,0xff,0x7f]
 0x03,0xc0,0x22,0xc4,0x04,0xe0,0x3c,0x0a,0x00,0xff,0xff,0x7f
 
 # GFX12: tbuffer_load_d16_format_xyzw v[4:5], off, ttmp[4:7], s3 format:[BUF_FMT_32_UINT] offset:8388607 scope:SCOPE_SE 		; encoding:  [0x03,0xc0,0x22,0xc4,0x04,0xe0,0x04,0x0a,0x00,0xff,0xff,0x7f]
@@ -204,7 +204,7 @@
 # GFX12: tbuffer_load_format_x v4, off, ttmp[4:7], s3 format:[BUF_FMT_16_16_SNORM] offset:8388607 th:TH_LOAD_NT_HT scope:SCOPE_DEV 		; encoding:  [0x03,0x00,0x20,0xc4,0x04,0xe0,0x68,0x0c,0x00,0xff,0xff,0x7f]
 0x03,0x00,0x20,0xc4,0x04,0xe0,0x68,0x0c,0x00,0xff,0xff,0x7f
 
-# GFX12: tbuffer_load_format_x v4, off, ttmp[4:7], s3 format:[BUF_FMT_16_16_USCALED] offset:8388607 th:TH_LOAD_BYPASS scope:SCOPE_SYS 		; encoding:  [0x03,0x00,0x20,0xc4,0x04,0xe0,0xbc,0x0c,0x00,0xff,0xff,0x7f]
+# GFX12: tbuffer_load_format_x v4, off, ttmp[4:7], s3 format:[BUF_FMT_16_16_USCALED] offset:8388607 th:TH_LOAD_LU scope:SCOPE_SYS 		; encoding:  [0x03,0x00,0x20,0xc4,0x04,0xe0,0xbc,0x0c,0x00,0xff,0xff,0x7f]
 0x03,0x00,0x20,0xc4,0x04,0xe0,0xbc,0x0c,0x00,0xff,0xff,0x7f
 
 # GFX12: tbuffer_load_format_x v4, off, ttmp[4:7], s3 format:[BUF_FMT_16_16_USCALED] offset:8388607 scope:SCOPE_SE 		; encoding:  [0x03,0x00,0x20,0xc4,0x04,0xe0,0x84,0x0c,0x00,0xff,0xff,0x7f]
@@ -246,7 +246,7 @@
 # GFX12: tbuffer_load_format_xy v[4:5], off, ttmp[4:7], s3 format:[BUF_FMT_16_16_FLOAT] offset:8388607 th:TH_LOAD_NT_HT scope:SCOPE_DEV 		; encoding:  [0x03,0x40,0x20,0xc4,0x04,0xe0,0xe8,0x0e,0x00,0xff,0xff,0x7f]
 0x03,0x40,0x20,0xc4,0x04,0xe0,0xe8,0x0e,0x00,0xff,0xff,0x7f
 
-# GFX12: tbuffer_load_format_xy v[4:5], off, ttmp[4:7], s3 format:[BUF_FMT_10_11_11_FLOAT] offset:8388607 th:TH_LOAD_BYPASS scope:SCOPE_SYS 		; encoding:  [0x03,0x40,0x20,0xc4,0x04,0xe0,0x3c,0x0f,0x00,0xff,0xff,0x7f]
+# GFX12: tbuffer_load_format_xy v[4:5], off, ttmp[4:7], s3 format:[BUF_FMT_10_11_11_FLOAT] offset:8388607 th:TH_LOAD_LU scope:SCOPE_SYS 		; encoding:  [0x03,0x40,0x20,0xc4,0x04,0xe0,0x3c,0x0f,0x00,0xff,0xff,0x7f]
 0x03,0x40,0x20,0xc4,0x04,0xe0,0x3c,0x0f,0x00,0xff,0xff,0x7f
 
 # GFX12: tbuffer_load_format_xy v[4:5], off, ttmp[4:7], s3 format:[BUF_FMT_10_11_11_FLOAT] offset:8388607 scope:SCOPE_SE 		; encoding:  [0x03,0x40,0x20,0xc4,0x04,0xe0,0x04,0x0f,0x00,0xff,0xff,0x7f]
@@ -288,7 +288,7 @@
 # GFX12: tbuffer_load_format_xyz v[4:6], off, ttmp[4:7], s3 format:[BUF_FMT_10_10_10_2_UINT] offset:8388607 th:TH_LOAD_NT_HT scope:SCOPE_DEV 		; encoding:  [0x03,0x80,0x20,0xc4,0x04,0xe0,0x68,0x11,0x00,0xff,0xff,0x7f]
 0x03,0x80,0x20,0xc4,0x04,0xe0,0x68,0x11,0x00,0xff,0xff,0x7f
 
-# GFX12: tbuffer_load_format_xyz v[4:6], off, ttmp[4:7], s3 format:[BUF_FMT_10_10_10_2_SINT] offset:8388607 th:TH_LOAD_BYPASS scope:SCOPE_SYS 		; encoding:  [0x03,0x80,0x20,0xc4,0x04,0xe0,0xbc,0x11,0x00,0xff,0xff,0x7f]
+# GFX12: tbuffer_load_format_xyz v[4:6], off, ttmp[4:7], s3 format:[BUF_FMT_10_10_10_2_SINT] offset:8388607 th:TH_LOAD_LU scope:SCOPE_SYS 		; encoding:  [0x03,0x80,0x20,0xc4,0x04,0xe0,0xbc,0x11,0x00,0xff,0xff,0x7f]
 0x03,0x80,0x20,0xc4,0x04,0xe0,0xbc,0x11,0x00,0xff,0xff,0x7f
 
 # GFX12: tbuffer_load_format_xyz v[4:6], off, ttmp[4:7], s3 format:[BUF_FMT_10_10_10_2_SINT] offset:8388607 scope:SCOPE_SE 		; encoding:  [0x03,0x80,0x20,0xc4,0x04,0xe0,0x84,0x11,0x00,0xff,0xff,0x7f]
@@ -330,7 +330,7 @@
 # GFX12: tbuffer_load_format_xyzw v[4:7], off, ttmp[4:7], s3 format:[BUF_FMT_2_10_10_10_SSCALED] offset:8388607 th:TH_LOAD_NT_HT scope:SCOPE_DEV 		; encoding:  [0x03,0xc0,0x20,0xc4,0x04,0xe0,0xe8,0x13,0x00,0xff,0xff,0x7f]
 0x03,0xc0,0x20,0xc4,0x04,0xe0,0xe8,0x13,0x00,0xff,0xff,0x7f
 
-# GFX12: tbuffer_load_format_xyzw v[4:7], off, ttmp[4:7], s3 format:[BUF_FMT_2_10_10_10_UINT] offset:8388607 th:TH_LOAD_BYPASS scope:SCOPE_SYS 		; encoding:  [0x03,0xc0,0x20,0xc4,0x04,0xe0,0x3c,0x14,0x00,0xff,0xff,0x7f]
+# GFX12: tbuffer_load_format_xyzw v[4:7], off, ttmp[4:7], s3 format:[BUF_FMT_2_10_10_10_UINT] offset:8388607 th:TH_LOAD_LU scope:SCOPE_SYS 		; encoding:  [0x03,0xc0,0x20,0xc4,0x04,0xe0,0x3c,0x14,0x00,0xff,0xff,0x7f]
 0x03,0xc0,0x20,0xc4,0x04,0xe0,0x3c,0x14,0x00,0xff,0xff,0x7f
 
 # GFX12: tbuffer_load_format_xyzw v[4:7], off, ttmp[4:7], s3 format:[BUF_FMT_2_10_10_10_UINT] offset:8388607 scope:SCOPE_SE 		; encoding:  [0x03,0xc0,0x20,0xc4,0x04,0xe0,0x04,0x14,0x00,0xff,0xff,0x7f]
@@ -372,7 +372,7 @@
 # GFX12: tbuffer_store_d16_format_x v4, off, ttmp[4:7], s3 format:[BUF_FMT_8_8_8_8_USCALED] offset:8388607 th:TH_STORE_NT_HT scope:SCOPE_DEV 		; encoding:  [0x03,0x00,0x23,0xc4,0x04,0xe0,0x68,0x16,0x00,0xff,0xff,0x7f]
 0x03,0x00,0x23,0xc4,0x04,0xe0,0x68,0x16,0x00,0xff,0xff,0x7f
 
-# GFX12: tbuffer_store_d16_format_x v4, off, ttmp[4:7], s3 format:[BUF_FMT_8_8_8_8_SSCALED] offset:8388607 th:TH_STORE_BYPASS scope:SCOPE_SYS 		; encoding:  [0x03,0x00,0x23,0xc4,0x04,0xe0,0xbc,0x16,0x00,0xff,0xff,0x7f]
+# GFX12: tbuffer_store_d16_format_x v4, off, ttmp[4:7], s3 format:[BUF_FMT_8_8_8_8_SSCALED] offset:8388607 th:TH_STORE_WB scope:SCOPE_SYS 		; encoding:  [0x03,0x00,0x23,0xc4,0x04,0xe0,0xbc,0x16,0x00,0xff,0xff,0x7f]
 0x03,0x00,0x23,0xc4,0x04,0xe0,0xbc,0x16,0x00,0xff,0xff,0x7f
 
 # GFX12: tbuffer_store_d16_format_x v4, off, ttmp[4:7], s3 format:[BUF_FMT_8_8_8_8_SSCALED] offset:8388607 scope:SCOPE_SE 		; encoding:  [0x03,0x00,0x23,0xc4,0x04,0xe0,0x84,0x16,0x00,0xff,0xff,0x7f]
@@ -414,7 +414,7 @@
 # GFX12: tbuffer_store_d16_format_xy v4, off, ttmp[4:7], s3 format:[BUF_FMT_32_32_SINT] offset:8388607 th:TH_STORE_NT_HT scope:SCOPE_DEV 		; encoding:  [0x03,0x40,0x23,0xc4,0x04,0xe0,0xe8,0x18,0x00,0xff,0xff,0x7f]
 0x03,0x40,0x23,0xc4,0x04,0xe0,0xe8,0x18,0x00,0xff,0xff,0x7f
 
-# GFX12: tbuffer_store_d16_format_xy v4, off, ttmp[4:7], s3 format:[BUF_FMT_32_32_FLOAT] offset:8388607 th:TH_STORE_BYPASS scope:SCOPE_SYS 		; encoding:  [0x03,0x40,0x23,0xc4,0x04,0xe0,0x3c,0x19,0x00,0xff,0xff,0x7f]
+# GFX12: tbuffer_store_d16_format_xy v4, off, ttmp[4:7], s3 format:[BUF_FMT_32_32_FLOAT] offset:8388607 th:TH_STORE_WB scope:SCOPE_SYS 		; encoding:  [0x03,0x40,0x23,0xc4,0x04,0xe0,0x3c,0x19,0x00,0xff,0xff,0x7f]
 0x03,0x40,0x23,0xc4,0x04,0xe0,0x3c,0x19,0x00,0xff,0xff,0x7f
 
 # GFX12: tbuffer_store_d16_format_xy v4, off, ttmp[4:7], s3 format:[BUF_FMT_32_32_FLOAT] offset:8388607 scope:SCOPE_SE 		; encoding:  [0x03,0x40,0x23,0xc4,0x04,0xe0,0x04,0x19,0x00,0xff,0xff,0x7f]
@@ -456,7 +456,7 @@
 # GFX12: tbuffer_store_d16_format_xyz v[4:5], off, ttmp[4:7], s3 format:[BUF_FMT_16_16_16_16_SSCALED] offset:8388607 th:TH_STORE_NT_HT scope:SCOPE_DEV 		; encoding:  [0x03,0x80,0x23,0xc4,0x04,0xe0,0x68,0x1b,0x00,0xff,0xff,0x7f]
 0x03,0x80,0x23,0xc4,0x04,0xe0,0x68,0x1b,0x00,0xff,0xff,0x7f
 
-# GFX12: tbuffer_store_d16_format_xyz v[4:5], off, ttmp[4:7], s3 format:[BUF_FMT_16_16_16_16_UINT] offset:8388607 th:TH_STORE_BYPASS scope:SCOPE_SYS 		; encoding:  [0x03,0x80,0x23,0xc4,0x04,0xe0,0xbc,0x1b,0x00,0xff,0xff,0x7f]
+# GFX12: tbuffer_store_d16_format_xyz v[4:5], off, ttmp[4:7], s3 format:[BUF_FMT_16_16_16_16_UINT] offset:8388607 th:TH_STORE_WB scope:SCOPE_SYS 		; encoding:  [0x03,0x80,0x23,0xc4,0x04,0xe0,0xbc,0x1b,0x00,0xff,0xff,0x7f]
 0x03,0x80,0x23,0xc4,0x04,0xe0,0xbc,0x1b,0x00,0xff,0xff,0x7f
 
 # GFX12: tbuffer_store_d16_format_xyz v[4:5], off, ttmp[4:7], s3 format:[BUF_FMT_16_16_16_16_UINT] offset:8388607 scope:SCOPE_SE 		; encoding:  [0x03,0x80,0x23,0xc4,0x04,0xe0,0x84,0x1b,0x00,0xff,0xff,0x7f]
@@ -498,7 +498,7 @@
 # GFX12: tbuffer_store_d16_format_xyzw v[4:5], off, ttmp[4:7], s3 format:[BUF_FMT_32_32_32_SINT] offset:8388607 th:TH_STORE_NT_HT scope:SCOPE_DEV 		; encoding:  [0x03,0xc0,0x23,0xc4,0x04,0xe0,0xe8,0x1d,0x00,0xff,0xff,0x7f]
 0x03,0xc0,0x23,0xc4,0x04,0xe0,0xe8,0x1d,0x00,0xff,0xff,0x7f
 
-# GFX12: tbuffer_store_d16_format_xyzw v[4:5], off, ttmp[4:7], s3 format:[BUF_FMT_32_32_32_FLOAT] offset:8388607 th:TH_STORE_BYPASS scope:SCOPE_SYS 		; encoding:  [0x03,0xc0,0x23,0xc4,0x04,0xe0,0x3c,0x1e,0x00,0xff,0xff,0x7f]
+# GFX12: tbuffer_store_d16_format_xyzw v[4:5], off, ttmp[4:7], s3 format:[BUF_FMT_32_32_32_FLOAT] offset:8388607 th:TH_STORE_WB scope:SCOPE_SYS 		; encoding:  [0x03,0xc0,0x23,0xc4,0x04,0xe0,0x3c,0x1e,0x00,0xff,0xff,0x7f]
 0x03,0xc0,0x23,0xc4,0x04,0xe0,0x3c,0x1e,0x00,0xff,0xff,0x7f
 
 # GFX12: tbuffer_store_d16_format_xyzw v[4:5], off, ttmp[4:7], s3 format:[BUF_FMT_32_32_32_FLOAT] offset:8388607 scope:SCOPE_SE 		; encoding:  [0x03,0xc0,0x23,0xc4,0x04,0xe0,0x04,0x1e,0x00,0xff,0xff,0x7f]
@@ -540,7 +540,7 @@
 # GFX12: tbuffer_store_format_x v4, off, ttmp[4:7], s3 format:[BUF_FMT_8_USCALED] offset:8388607 th:TH_STORE_NT_HT scope:SCOPE_DEV 		; encoding:  [0x03,0x00,0x21,0xc4,0x04,0xe0,0xe8,0x01,0x00,0xff,0xff,0x7f]
 0x03,0x00,0x21,0xc4,0x04,0xe0,0xe8,0x01,0x00,0xff,0xff,0x7f
 
-# GFX12: tbuffer_store_format_x v4, off, ttmp[4:7], s3 format:[BUF_FMT_8_SSCALED] offset:8388607 th:TH_STORE_BYPASS scope:SCOPE_SYS 		; encoding:  [0x03,0x00,0x21,0xc4,0x04,0xe0,0x3c,0x02,0x00,0xff,0xff,0x7f]
+# GFX12: tbuffer_store_format_x v4, off, ttmp[4:7], s3 format:[BUF_FMT_8_SSCALED] offset:8388607 th:TH_STORE_WB scope:SCOPE_SYS 		; encoding:  [0x03,0x00,0x21,0xc4,0x04,0xe0,0x3c,0x02,0x00,0xff,0xff,0x7f]
 0x03,0x00,0x21,0xc4,0x04,0xe0,0x3c,0x02,0x00,0xff,0xff,0x7f
 
 # GFX12: tbuffer_store_format_x v4, off, ttmp[4:7], s3 format...
[truncated]

``````````

</details>


https://github.com/llvm/llvm-project/pull/139887


More information about the llvm-commits mailing list