[llvm] [AMDGPU] Take BUF instructions into account in mayAccessScratchThroughFlat (PR #170274)
via llvm-commits
llvm-commits at lists.llvm.org
Tue Dec 2 08:14:39 PST 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-amdgpu
Author: Pierre van Houtryve (Pierre-vh)
<details>
<summary>Changes</summary>
BUF instructions can access the scratch address space, so SIInsertWaitCnt needs to be able
to track the SCRATCH_WRITE_ACCESS event for such BUF instructions.
The release-vgprs.mir test had to be updated because BUF instructions w/o a MMO are now
tracked as a SCRATCH_WRITE_ACCESS. I added a MMO that touches global to keep the test result unchanged.
---
Full diff: https://github.com/llvm/llvm-project/pull/170274.diff
2 Files Affected:
- (modified) llvm/lib/Target/AMDGPU/SIInstrInfo.cpp (+2-1)
- (modified) llvm/test/CodeGen/AMDGPU/release-vgprs.mir (+21-21)
``````````diff
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index da019b6e476df..1ce3fe9b9eb65 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -4400,7 +4400,8 @@ bool SIInstrInfo::isAlwaysGDS(uint16_t Opcode) const {
}
bool SIInstrInfo::mayAccessScratchThroughFlat(const MachineInstr &MI) const {
- if (!isFLAT(MI) || isFLATGlobal(MI))
+ // Instructions that access scratch use FLAT encoding or BUF encodings.
+ if ((!isFLAT(MI) && !isBUF(MI)) || isFLATGlobal(MI))
return false;
// If scratch is not initialized, we can never access it.
diff --git a/llvm/test/CodeGen/AMDGPU/release-vgprs.mir b/llvm/test/CodeGen/AMDGPU/release-vgprs.mir
index c845a4c82b9cc..821ea9c695a09 100644
--- a/llvm/test/CodeGen/AMDGPU/release-vgprs.mir
+++ b/llvm/test/CodeGen/AMDGPU/release-vgprs.mir
@@ -34,15 +34,15 @@ machineFunctionInfo:
body: |
bb.0:
; OPT-LABEL: name: tbuffer_store1
- ; OPT: TBUFFER_STORE_FORMAT_XYZW_OFFSET_exact killed renamable $vgpr0_vgpr1_vgpr2_vgpr3, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, killed renamable $sgpr4, 42, 117, 0, 0, implicit $exec
+ ; OPT: TBUFFER_STORE_FORMAT_XYZW_OFFSET_exact killed renamable $vgpr0_vgpr1_vgpr2_vgpr3, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, killed renamable $sgpr4, 42, 117, 0, 0, implicit $exec :: (volatile store (s32), addrspace 1)
; OPT-NEXT: S_NOP 0
; OPT-NEXT: S_SENDMSG 3, implicit $exec, implicit $m0
; OPT-NEXT: S_ENDPGM 0, implicit $vgpr97
;
; NOOPT-LABEL: name: tbuffer_store1
- ; NOOPT: TBUFFER_STORE_FORMAT_XYZW_OFFSET_exact killed renamable $vgpr0_vgpr1_vgpr2_vgpr3, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, killed renamable $sgpr4, 42, 117, 0, 0, implicit $exec
+ ; NOOPT: TBUFFER_STORE_FORMAT_XYZW_OFFSET_exact killed renamable $vgpr0_vgpr1_vgpr2_vgpr3, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, killed renamable $sgpr4, 42, 117, 0, 0, implicit $exec :: (volatile store (s32), addrspace 1)
; NOOPT-NEXT: S_ENDPGM 0, implicit $vgpr97
- TBUFFER_STORE_FORMAT_XYZW_OFFSET_exact killed renamable $vgpr0_vgpr1_vgpr2_vgpr3, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, killed renamable $sgpr4, 42, 117, 0, 0, implicit $exec
+ TBUFFER_STORE_FORMAT_XYZW_OFFSET_exact killed renamable $vgpr0_vgpr1_vgpr2_vgpr3, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, killed renamable $sgpr4, 42, 117, 0, 0, implicit $exec :: (volatile store (s32), addrspace 1)
S_ENDPGM 0, implicit $vgpr97
...
@@ -107,15 +107,15 @@ machineFunctionInfo:
body: |
bb.0:
; OPT-LABEL: name: buffer_store_format
- ; OPT: BUFFER_STORE_FORMAT_D16_X_OFFEN_exact killed renamable $vgpr0, killed renamable $vgpr1, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, killed renamable $sgpr4, 0, 0, 0, implicit $exec
+ ; OPT: BUFFER_STORE_FORMAT_D16_X_OFFEN_exact killed renamable $vgpr0, killed renamable $vgpr1, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, killed renamable $sgpr4, 0, 0, 0, implicit $exec :: (volatile store (s32), addrspace 1)
; OPT-NEXT: S_NOP 0
; OPT-NEXT: S_SENDMSG 3, implicit $exec, implicit $m0
; OPT-NEXT: S_ENDPGM 0, implicit $vgpr97
;
; NOOPT-LABEL: name: buffer_store_format
- ; NOOPT: BUFFER_STORE_FORMAT_D16_X_OFFEN_exact killed renamable $vgpr0, killed renamable $vgpr1, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, killed renamable $sgpr4, 0, 0, 0, implicit $exec
+ ; NOOPT: BUFFER_STORE_FORMAT_D16_X_OFFEN_exact killed renamable $vgpr0, killed renamable $vgpr1, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, killed renamable $sgpr4, 0, 0, 0, implicit $exec :: (volatile store (s32), addrspace 1)
; NOOPT-NEXT: S_ENDPGM 0, implicit $vgpr97
- BUFFER_STORE_FORMAT_D16_X_OFFEN_exact killed renamable $vgpr0, killed renamable $vgpr1, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, killed renamable $sgpr4, 0, 0, 0, implicit $exec
+ BUFFER_STORE_FORMAT_D16_X_OFFEN_exact killed renamable $vgpr0, killed renamable $vgpr1, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, killed renamable $sgpr4, 0, 0, 0, implicit $exec :: (volatile store (s32), addrspace 1)
S_ENDPGM 0, implicit $vgpr97
...
@@ -218,7 +218,7 @@ body: |
; OPT: bb.0:
; OPT-NEXT: successors: %bb.2(0x80000000)
; OPT-NEXT: {{ $}}
- ; OPT-NEXT: TBUFFER_STORE_FORMAT_X_OFFSET_exact killed renamable $vgpr0, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 125, 0, 0, implicit $exec
+ ; OPT-NEXT: TBUFFER_STORE_FORMAT_X_OFFSET_exact killed renamable $vgpr0, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 125, 0, 0, implicit $exec :: (volatile store (s32), addrspace 1)
; OPT-NEXT: $vgpr1 = V_ADD_U32_e32 $vgpr0, $vgpr2, implicit $exec
; OPT-NEXT: S_BRANCH %bb.2
; OPT-NEXT: {{ $}}
@@ -226,7 +226,7 @@ body: |
; OPT-NEXT: successors: %bb.2(0x80000000)
; OPT-NEXT: {{ $}}
; OPT-NEXT: $vgpr1 = V_ADD_U32_e32 $vgpr0, $vgpr2, implicit $exec
- ; OPT-NEXT: TBUFFER_STORE_FORMAT_X_OFFSET_exact killed renamable $vgpr0, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 125, 0, 0, implicit $exec
+ ; OPT-NEXT: TBUFFER_STORE_FORMAT_X_OFFSET_exact killed renamable $vgpr0, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 125, 0, 0, implicit $exec :: (volatile store (s32), addrspace 1)
; OPT-NEXT: S_BRANCH %bb.2
; OPT-NEXT: {{ $}}
; OPT-NEXT: bb.2:
@@ -238,7 +238,7 @@ body: |
; NOOPT: bb.0:
; NOOPT-NEXT: successors: %bb.2(0x80000000)
; NOOPT-NEXT: {{ $}}
- ; NOOPT-NEXT: TBUFFER_STORE_FORMAT_X_OFFSET_exact killed renamable $vgpr0, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 125, 0, 0, implicit $exec
+ ; NOOPT-NEXT: TBUFFER_STORE_FORMAT_X_OFFSET_exact killed renamable $vgpr0, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 125, 0, 0, implicit $exec :: (volatile store (s32), addrspace 1)
; NOOPT-NEXT: $vgpr1 = V_ADD_U32_e32 $vgpr0, $vgpr2, implicit $exec
; NOOPT-NEXT: S_BRANCH %bb.2
; NOOPT-NEXT: {{ $}}
@@ -246,7 +246,7 @@ body: |
; NOOPT-NEXT: successors: %bb.2(0x80000000)
; NOOPT-NEXT: {{ $}}
; NOOPT-NEXT: $vgpr1 = V_ADD_U32_e32 $vgpr0, $vgpr2, implicit $exec
- ; NOOPT-NEXT: TBUFFER_STORE_FORMAT_X_OFFSET_exact killed renamable $vgpr0, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 125, 0, 0, implicit $exec
+ ; NOOPT-NEXT: TBUFFER_STORE_FORMAT_X_OFFSET_exact killed renamable $vgpr0, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 125, 0, 0, implicit $exec :: (volatile store (s32), addrspace 1)
; NOOPT-NEXT: S_BRANCH %bb.2
; NOOPT-NEXT: {{ $}}
; NOOPT-NEXT: bb.2:
@@ -254,7 +254,7 @@ body: |
bb.0:
successors: %bb.2
- TBUFFER_STORE_FORMAT_X_OFFSET_exact killed renamable $vgpr0, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 125, 0, 0, implicit $exec
+ TBUFFER_STORE_FORMAT_X_OFFSET_exact killed renamable $vgpr0, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 125, 0, 0, implicit $exec :: (volatile store (s32), addrspace 1)
$vgpr1 = V_ADD_U32_e32 renamable $vgpr0, renamable $vgpr2, implicit $exec
S_BRANCH %bb.2
@@ -262,7 +262,7 @@ body: |
successors: %bb.2
$vgpr1 = V_ADD_U32_e32 renamable $vgpr0, renamable $vgpr2, implicit $exec
- TBUFFER_STORE_FORMAT_X_OFFSET_exact killed renamable $vgpr0, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 125, 0, 0, implicit $exec
+ TBUFFER_STORE_FORMAT_X_OFFSET_exact killed renamable $vgpr0, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 125, 0, 0, implicit $exec :: (volatile store (s32), addrspace 1)
S_BRANCH %bb.2
bb.2:
@@ -281,7 +281,7 @@ body: |
; OPT-NEXT: successors: %bb.2(0x80000000)
; OPT-NEXT: {{ $}}
; OPT-NEXT: $vgpr1 = V_ADD_U32_e32 $vgpr0, $vgpr2, implicit $exec
- ; OPT-NEXT: TBUFFER_STORE_FORMAT_X_OFFSET_exact killed renamable $vgpr0, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 125, 0, 0, implicit $exec
+ ; OPT-NEXT: TBUFFER_STORE_FORMAT_X_OFFSET_exact killed renamable $vgpr0, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 125, 0, 0, implicit $exec :: (volatile store (s32), addrspace 1)
; OPT-NEXT: S_BRANCH %bb.2
; OPT-NEXT: {{ $}}
; OPT-NEXT: bb.1:
@@ -311,7 +311,7 @@ body: |
; NOOPT-NEXT: successors: %bb.2(0x80000000)
; NOOPT-NEXT: {{ $}}
; NOOPT-NEXT: $vgpr1 = V_ADD_U32_e32 $vgpr0, $vgpr2, implicit $exec
- ; NOOPT-NEXT: TBUFFER_STORE_FORMAT_X_OFFSET_exact killed renamable $vgpr0, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 125, 0, 0, implicit $exec
+ ; NOOPT-NEXT: TBUFFER_STORE_FORMAT_X_OFFSET_exact killed renamable $vgpr0, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 125, 0, 0, implicit $exec :: (volatile store (s32), addrspace 1)
; NOOPT-NEXT: S_BRANCH %bb.2
; NOOPT-NEXT: {{ $}}
; NOOPT-NEXT: bb.1:
@@ -337,7 +337,7 @@ body: |
successors: %bb.2
$vgpr1 = V_ADD_U32_e32 renamable $vgpr0, renamable $vgpr2, implicit $exec
- TBUFFER_STORE_FORMAT_X_OFFSET_exact killed renamable $vgpr0, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 125, 0, 0, implicit $exec
+ TBUFFER_STORE_FORMAT_X_OFFSET_exact killed renamable $vgpr0, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 125, 0, 0, implicit $exec :: (volatile store (s32), addrspace 1)
S_BRANCH %bb.2
bb.1:
@@ -408,14 +408,14 @@ body: |
; OPT: bb.0:
; OPT-NEXT: successors: %bb.1(0x80000000)
; OPT-NEXT: {{ $}}
- ; OPT-NEXT: renamable $vgpr0 = BUFFER_LOAD_FORMAT_X_IDXEN killed renamable $vgpr0, renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec
+ ; OPT-NEXT: renamable $vgpr0 = BUFFER_LOAD_FORMAT_X_IDXEN killed renamable $vgpr0, renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (volatile load (s32), addrspace 1)
; OPT-NEXT: S_BRANCH %bb.1
; OPT-NEXT: {{ $}}
; OPT-NEXT: bb.1:
; OPT-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
; OPT-NEXT: {{ $}}
; OPT-NEXT: S_WAITCNT 1015
- ; OPT-NEXT: TBUFFER_STORE_FORMAT_XYZW_OFFEN_exact killed renamable $vgpr0_vgpr1_vgpr2_vgpr3, killed renamable $vgpr4, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 115, 0, 0, implicit $exec
+ ; OPT-NEXT: TBUFFER_STORE_FORMAT_XYZW_OFFEN_exact killed renamable $vgpr0_vgpr1_vgpr2_vgpr3, killed renamable $vgpr4, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 115, 0, 0, implicit $exec :: (volatile store (s32), addrspace 1)
; OPT-NEXT: S_CMP_LG_U32 killed renamable $sgpr3, renamable $sgpr4, implicit-def $scc
; OPT-NEXT: S_CBRANCH_SCC1 %bb.1, implicit killed $scc
; OPT-NEXT: S_BRANCH %bb.2
@@ -429,14 +429,14 @@ body: |
; NOOPT: bb.0:
; NOOPT-NEXT: successors: %bb.1(0x80000000)
; NOOPT-NEXT: {{ $}}
- ; NOOPT-NEXT: renamable $vgpr0 = BUFFER_LOAD_FORMAT_X_IDXEN killed renamable $vgpr0, renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec
+ ; NOOPT-NEXT: renamable $vgpr0 = BUFFER_LOAD_FORMAT_X_IDXEN killed renamable $vgpr0, renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (volatile load (s32), addrspace 1)
; NOOPT-NEXT: S_BRANCH %bb.1
; NOOPT-NEXT: {{ $}}
; NOOPT-NEXT: bb.1:
; NOOPT-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
; NOOPT-NEXT: {{ $}}
; NOOPT-NEXT: S_WAITCNT 1015
- ; NOOPT-NEXT: TBUFFER_STORE_FORMAT_XYZW_OFFEN_exact killed renamable $vgpr0_vgpr1_vgpr2_vgpr3, killed renamable $vgpr4, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 115, 0, 0, implicit $exec
+ ; NOOPT-NEXT: TBUFFER_STORE_FORMAT_XYZW_OFFEN_exact killed renamable $vgpr0_vgpr1_vgpr2_vgpr3, killed renamable $vgpr4, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 115, 0, 0, implicit $exec :: (volatile store (s32), addrspace 1)
; NOOPT-NEXT: S_CMP_LG_U32 killed renamable $sgpr3, renamable $sgpr4, implicit-def $scc
; NOOPT-NEXT: S_CBRANCH_SCC1 %bb.1, implicit killed $scc
; NOOPT-NEXT: S_BRANCH %bb.2
@@ -446,13 +446,13 @@ body: |
bb.0:
successors: %bb.1
- renamable $vgpr0 = BUFFER_LOAD_FORMAT_X_IDXEN killed renamable $vgpr0, renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec
+ renamable $vgpr0 = BUFFER_LOAD_FORMAT_X_IDXEN killed renamable $vgpr0, renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (volatile load (s32), addrspace 1)
S_BRANCH %bb.1
bb.1:
successors: %bb.1, %bb.2
- TBUFFER_STORE_FORMAT_XYZW_OFFEN_exact killed renamable $vgpr0_vgpr1_vgpr2_vgpr3, killed renamable $vgpr4, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 115, 0, 0, implicit $exec
+ TBUFFER_STORE_FORMAT_XYZW_OFFEN_exact killed renamable $vgpr0_vgpr1_vgpr2_vgpr3, killed renamable $vgpr4, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 115, 0, 0, implicit $exec :: (volatile store (s32), addrspace 1)
S_CMP_LG_U32 killed renamable $sgpr3, renamable $sgpr4, implicit-def $scc
S_CBRANCH_SCC1 %bb.1, implicit killed $scc
S_BRANCH %bb.2
``````````
</details>
https://github.com/llvm/llvm-project/pull/170274
More information about the llvm-commits
mailing list