[llvm] [AMDGPU] Extend getMemOperandAACheckLimit (PR #172084)
via llvm-commits
llvm-commits at lists.llvm.org
Fri Dec 12 13:18:43 PST 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-amdgpu
Author: Gang Chen (cmc-rep)
<details>
<summary>Changes</summary>
---
Patch is 96.05 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/172084.diff
2 Files Affected:
- (modified) llvm/lib/Target/AMDGPU/SIInstrInfo.h (+2)
- (modified) llvm/test/CodeGen/AMDGPU/scratch-simple.ll (+316-323)
``````````diff
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.h b/llvm/lib/Target/AMDGPU/SIInstrInfo.h
index b1d6563bf3c0b..f0ebd4f531d1b 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.h
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.h
@@ -440,6 +440,8 @@ class SIInstrInfo final : public AMDGPUGenInstrInfo {
bool foldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, Register Reg,
MachineRegisterInfo *MRI) const final;
+ unsigned getMemOperandAACheckLimit() const override { return 32; }
+
unsigned getMachineCSELookAheadLimit() const override { return 500; }
MachineInstr *convertToThreeAddress(MachineInstr &MI, LiveVariables *LV,
diff --git a/llvm/test/CodeGen/AMDGPU/scratch-simple.ll b/llvm/test/CodeGen/AMDGPU/scratch-simple.ll
index c253f42e0d3c8..21459ed620071 100644
--- a/llvm/test/CodeGen/AMDGPU/scratch-simple.ll
+++ b/llvm/test/CodeGen/AMDGPU/scratch-simple.ll
@@ -98,25 +98,20 @@ define amdgpu_ps float @ps_main(i32 %idx) {
; SI-NEXT: buffer_store_dword v13, off, s[4:7], 0 offset:224
; SI-NEXT: buffer_store_dword v15, off, s[4:7], 0 offset:216
; SI-NEXT: buffer_store_dword v14, off, s[4:7], 0 offset:196
-; SI-NEXT: v_mov_b32_e32 v17, 0x3f20e7f4
; SI-NEXT: v_mov_b32_e32 v18, 0x3f3d349c
; SI-NEXT: buffer_load_dword v0, v0, s[4:7], 0 offen
+; SI-NEXT: v_mov_b32_e32 v17, 0x3f20e7f4
+; SI-NEXT: buffer_store_dword v18, off, s[4:7], 0 offset:796
+; SI-NEXT: buffer_store_dword v13, off, s[4:7], 0 offset:792
+; SI-NEXT: buffer_store_dword v4, off, s[4:7], 0 offset:788
+; SI-NEXT: s_waitcnt expcnt(2)
+; SI-NEXT: v_mov_b32_e32 v18, 0xbf523be1
; SI-NEXT: buffer_store_dword v17, off, s[4:7], 0 offset:832
; SI-NEXT: buffer_store_dword v11, off, s[4:7], 0 offset:828
; SI-NEXT: buffer_store_dword v10, off, s[4:7], 0 offset:824
; SI-NEXT: buffer_store_dword v9, off, s[4:7], 0 offset:820
; SI-NEXT: s_waitcnt expcnt(3)
; SI-NEXT: v_mov_b32_e32 v17, 0x3703c499
-; SI-NEXT: buffer_store_dword v18, off, s[4:7], 0 offset:796
-; SI-NEXT: buffer_store_dword v13, off, s[4:7], 0 offset:792
-; SI-NEXT: buffer_store_dword v4, off, s[4:7], 0 offset:788
-; SI-NEXT: s_waitcnt expcnt(2)
-; SI-NEXT: v_mov_b32_e32 v18, 0xbf523be1
-; SI-NEXT: buffer_store_dword v17, off, s[4:7], 0 offset:816
-; SI-NEXT: buffer_store_dword v8, off, s[4:7], 0 offset:812
-; SI-NEXT: buffer_store_dword v7, off, s[4:7], 0 offset:808
-; SI-NEXT: buffer_store_dword v2, off, s[4:7], 0 offset:804
-; SI-NEXT: buffer_store_dword v12, off, s[4:7], 0 offset:800
; SI-NEXT: buffer_store_dword v18, off, s[4:7], 0 offset:784
; SI-NEXT: buffer_store_dword v5, off, s[4:7], 0 offset:780
; SI-NEXT: buffer_store_dword v15, off, s[4:7], 0 offset:776
@@ -125,6 +120,11 @@ define amdgpu_ps float @ps_main(i32 %idx) {
; SI-NEXT: buffer_store_dword v5, off, s[4:7], 0 offset:764
; SI-NEXT: s_waitcnt expcnt(0)
; SI-NEXT: v_mov_b32_e32 v5, 0xbf5f2ee2
+; SI-NEXT: buffer_store_dword v17, off, s[4:7], 0 offset:816
+; SI-NEXT: buffer_store_dword v8, off, s[4:7], 0 offset:812
+; SI-NEXT: buffer_store_dword v7, off, s[4:7], 0 offset:808
+; SI-NEXT: buffer_store_dword v2, off, s[4:7], 0 offset:804
+; SI-NEXT: buffer_store_dword v12, off, s[4:7], 0 offset:800
; SI-NEXT: buffer_store_dword v5, off, s[4:7], 0 offset:760
; SI-NEXT: buffer_store_dword v4, off, s[4:7], 0 offset:756
; SI-NEXT: buffer_store_dword v18, off, s[4:7], 0 offset:752
@@ -214,23 +214,18 @@ define amdgpu_ps float @ps_main(i32 %idx) {
; VI-NEXT: buffer_store_dword v13, off, s[4:7], 0 offset:224
; VI-NEXT: buffer_store_dword v15, off, s[4:7], 0 offset:216
; VI-NEXT: buffer_store_dword v14, off, s[4:7], 0 offset:196
-; VI-NEXT: v_mov_b32_e32 v17, 0x3f20e7f4
; VI-NEXT: v_mov_b32_e32 v18, 0x3f3d349c
; VI-NEXT: buffer_load_dword v0, v0, s[4:7], 0 offen
+; VI-NEXT: v_mov_b32_e32 v17, 0x3f20e7f4
+; VI-NEXT: buffer_store_dword v18, off, s[4:7], 0 offset:796
+; VI-NEXT: buffer_store_dword v13, off, s[4:7], 0 offset:792
+; VI-NEXT: buffer_store_dword v4, off, s[4:7], 0 offset:788
+; VI-NEXT: v_mov_b32_e32 v18, 0xbf523be1
; VI-NEXT: buffer_store_dword v17, off, s[4:7], 0 offset:832
; VI-NEXT: buffer_store_dword v11, off, s[4:7], 0 offset:828
; VI-NEXT: buffer_store_dword v10, off, s[4:7], 0 offset:824
; VI-NEXT: buffer_store_dword v9, off, s[4:7], 0 offset:820
; VI-NEXT: v_mov_b32_e32 v17, 0x3703c499
-; VI-NEXT: buffer_store_dword v18, off, s[4:7], 0 offset:796
-; VI-NEXT: buffer_store_dword v13, off, s[4:7], 0 offset:792
-; VI-NEXT: buffer_store_dword v4, off, s[4:7], 0 offset:788
-; VI-NEXT: v_mov_b32_e32 v18, 0xbf523be1
-; VI-NEXT: buffer_store_dword v17, off, s[4:7], 0 offset:816
-; VI-NEXT: buffer_store_dword v8, off, s[4:7], 0 offset:812
-; VI-NEXT: buffer_store_dword v7, off, s[4:7], 0 offset:808
-; VI-NEXT: buffer_store_dword v2, off, s[4:7], 0 offset:804
-; VI-NEXT: buffer_store_dword v12, off, s[4:7], 0 offset:800
; VI-NEXT: buffer_store_dword v18, off, s[4:7], 0 offset:784
; VI-NEXT: buffer_store_dword v5, off, s[4:7], 0 offset:780
; VI-NEXT: buffer_store_dword v15, off, s[4:7], 0 offset:776
@@ -238,6 +233,11 @@ define amdgpu_ps float @ps_main(i32 %idx) {
; VI-NEXT: buffer_store_dword v16, off, s[4:7], 0 offset:768
; VI-NEXT: buffer_store_dword v5, off, s[4:7], 0 offset:764
; VI-NEXT: v_mov_b32_e32 v5, 0xbf5f2ee2
+; VI-NEXT: buffer_store_dword v17, off, s[4:7], 0 offset:816
+; VI-NEXT: buffer_store_dword v8, off, s[4:7], 0 offset:812
+; VI-NEXT: buffer_store_dword v7, off, s[4:7], 0 offset:808
+; VI-NEXT: buffer_store_dword v2, off, s[4:7], 0 offset:804
+; VI-NEXT: buffer_store_dword v12, off, s[4:7], 0 offset:800
; VI-NEXT: buffer_store_dword v5, off, s[4:7], 0 offset:760
; VI-NEXT: buffer_store_dword v4, off, s[4:7], 0 offset:756
; VI-NEXT: buffer_store_dword v18, off, s[4:7], 0 offset:752
@@ -324,25 +324,19 @@ define amdgpu_ps float @ps_main(i32 %idx) {
; GFX9-MUBUF-NEXT: buffer_store_dword v13, off, s[0:3], 0 offset:224
; GFX9-MUBUF-NEXT: buffer_store_dword v15, off, s[0:3], 0 offset:216
; GFX9-MUBUF-NEXT: buffer_store_dword v14, off, s[0:3], 0 offset:196
-; GFX9-MUBUF-NEXT: v_mov_b32_e32 v17, 0x3f20e7f4
; GFX9-MUBUF-NEXT: v_mov_b32_e32 v18, 0x3f3d349c
; GFX9-MUBUF-NEXT: v_add_u32_e32 v1, 0x200, v0
; GFX9-MUBUF-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen
-; GFX9-MUBUF-NEXT: s_nop 0
+; GFX9-MUBUF-NEXT: v_mov_b32_e32 v17, 0x3f20e7f4
+; GFX9-MUBUF-NEXT: buffer_store_dword v18, off, s[0:3], 0 offset:796
+; GFX9-MUBUF-NEXT: buffer_store_dword v13, off, s[0:3], 0 offset:792
+; GFX9-MUBUF-NEXT: buffer_store_dword v4, off, s[0:3], 0 offset:788
+; GFX9-MUBUF-NEXT: v_mov_b32_e32 v18, 0xbf523be1
; GFX9-MUBUF-NEXT: buffer_store_dword v17, off, s[0:3], 0 offset:832
; GFX9-MUBUF-NEXT: buffer_store_dword v11, off, s[0:3], 0 offset:828
; GFX9-MUBUF-NEXT: buffer_store_dword v10, off, s[0:3], 0 offset:824
; GFX9-MUBUF-NEXT: buffer_store_dword v9, off, s[0:3], 0 offset:820
; GFX9-MUBUF-NEXT: v_mov_b32_e32 v17, 0x3703c499
-; GFX9-MUBUF-NEXT: buffer_store_dword v18, off, s[0:3], 0 offset:796
-; GFX9-MUBUF-NEXT: buffer_store_dword v13, off, s[0:3], 0 offset:792
-; GFX9-MUBUF-NEXT: buffer_store_dword v4, off, s[0:3], 0 offset:788
-; GFX9-MUBUF-NEXT: v_mov_b32_e32 v18, 0xbf523be1
-; GFX9-MUBUF-NEXT: buffer_store_dword v17, off, s[0:3], 0 offset:816
-; GFX9-MUBUF-NEXT: buffer_store_dword v8, off, s[0:3], 0 offset:812
-; GFX9-MUBUF-NEXT: buffer_store_dword v7, off, s[0:3], 0 offset:808
-; GFX9-MUBUF-NEXT: buffer_store_dword v2, off, s[0:3], 0 offset:804
-; GFX9-MUBUF-NEXT: buffer_store_dword v12, off, s[0:3], 0 offset:800
; GFX9-MUBUF-NEXT: buffer_store_dword v18, off, s[0:3], 0 offset:784
; GFX9-MUBUF-NEXT: buffer_store_dword v5, off, s[0:3], 0 offset:780
; GFX9-MUBUF-NEXT: buffer_store_dword v15, off, s[0:3], 0 offset:776
@@ -350,6 +344,11 @@ define amdgpu_ps float @ps_main(i32 %idx) {
; GFX9-MUBUF-NEXT: buffer_store_dword v16, off, s[0:3], 0 offset:768
; GFX9-MUBUF-NEXT: buffer_store_dword v5, off, s[0:3], 0 offset:764
; GFX9-MUBUF-NEXT: v_mov_b32_e32 v5, 0xbf5f2ee2
+; GFX9-MUBUF-NEXT: buffer_store_dword v17, off, s[0:3], 0 offset:816
+; GFX9-MUBUF-NEXT: buffer_store_dword v8, off, s[0:3], 0 offset:812
+; GFX9-MUBUF-NEXT: buffer_store_dword v7, off, s[0:3], 0 offset:808
+; GFX9-MUBUF-NEXT: buffer_store_dword v2, off, s[0:3], 0 offset:804
+; GFX9-MUBUF-NEXT: buffer_store_dword v12, off, s[0:3], 0 offset:800
; GFX9-MUBUF-NEXT: buffer_store_dword v5, off, s[0:3], 0 offset:760
; GFX9-MUBUF-NEXT: buffer_store_dword v4, off, s[0:3], 0 offset:756
; GFX9-MUBUF-NEXT: buffer_store_dword v18, off, s[0:3], 0 offset:752
@@ -377,21 +376,14 @@ define amdgpu_ps float @ps_main(i32 %idx) {
; GFX10_W32-MUBUF-NEXT: s_mov_b32 s2, -1
; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v1, 0xbf20e7f4
; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v2, 0x3f3d349e
-; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v3, 0x3f523be1
; GFX10_W32-MUBUF-NEXT: s_mov_b32 s3, 0x31c16000
-; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v4, 0x3f5f2ee2
; GFX10_W32-MUBUF-NEXT: s_add_u32 s0, s0, s4
-; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v5, 0x3f638e37
; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v7, 0xbefcd8a3
; GFX10_W32-MUBUF-NEXT: s_addc_u32 s1, s1, 0
; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v8, 0xbefcd89f
; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v9, 0xbeae29dc
; GFX10_W32-MUBUF-NEXT: buffer_store_dword v1, off, s[0:3], 0 offset:320
; GFX10_W32-MUBUF-NEXT: buffer_store_dword v2, off, s[0:3], 0 offset:316
-; GFX10_W32-MUBUF-NEXT: buffer_store_dword v3, off, s[0:3], 0 offset:312
-; GFX10_W32-MUBUF-NEXT: buffer_store_dword v4, off, s[0:3], 0 offset:308
-; GFX10_W32-MUBUF-NEXT: buffer_store_dword v5, off, s[0:3], 0 offset:304
-; GFX10_W32-MUBUF-NEXT: buffer_store_dword v4, off, s[0:3], 0 offset:300
; GFX10_W32-MUBUF-NEXT: buffer_store_dword v2, off, s[0:3], 0 offset:292
; GFX10_W32-MUBUF-NEXT: buffer_store_dword v1, off, s[0:3], 0 offset:288
; GFX10_W32-MUBUF-NEXT: buffer_store_dword v7, off, s[0:3], 0 offset:284
@@ -423,7 +415,9 @@ define amdgpu_ps float @ps_main(i32 %idx) {
; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v15, 0xbf5f2ee3
; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v12, 0xbf3d349e
; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v14, 0xbf523be3
+; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v3, 0x3f523be1
; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v16, 0xbf638e39
+; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v4, 0x3f5f2ee2
; GFX10_W32-MUBUF-NEXT: v_and_b32_e32 v0, 0x1fc, v0
; GFX10_W32-MUBUF-NEXT: buffer_store_dword v10, off, s[0:3], 0 offset:228
; GFX10_W32-MUBUF-NEXT: buffer_store_dword v12, off, s[0:3], 0 offset:224
@@ -434,6 +428,11 @@ define amdgpu_ps float @ps_main(i32 %idx) {
; GFX10_W32-MUBUF-NEXT: buffer_store_dword v15, off, s[0:3], 0 offset:208
; GFX10_W32-MUBUF-NEXT: buffer_store_dword v14, off, s[0:3], 0 offset:204
; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v14, 0x3f20e7f4
+; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v5, 0x3f638e37
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v3, off, s[0:3], 0 offset:312
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v4, off, s[0:3], 0 offset:308
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v5, off, s[0:3], 0 offset:304
+; GFX10_W32-MUBUF-NEXT: buffer_store_dword v4, off, s[0:3], 0 offset:300
; GFX10_W32-MUBUF-NEXT: buffer_store_dword v3, off, s[0:3], 0 offset:296
; GFX10_W32-MUBUF-NEXT: buffer_store_dword v12, off, s[0:3], 0 offset:200
; GFX10_W32-MUBUF-NEXT: buffer_store_dword v17, off, s[0:3], 0 offset:196
@@ -488,21 +487,14 @@ define amdgpu_ps float @ps_main(i32 %idx) {
; GFX10_W64-MUBUF-NEXT: s_mov_b32 s2, -1
; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v1, 0xbf20e7f4
; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v2, 0x3f3d349e
-; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v3, 0x3f523be1
; GFX10_W64-MUBUF-NEXT: s_mov_b32 s3, 0x31e16000
-; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v4, 0x3f5f2ee2
; GFX10_W64-MUBUF-NEXT: s_add_u32 s0, s0, s4
-; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v5, 0x3f638e37
; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v7, 0xbefcd8a3
; GFX10_W64-MUBUF-NEXT: s_addc_u32 s1, s1, 0
; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v8, 0xbefcd89f
; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v9, 0xbeae29dc
; GFX10_W64-MUBUF-NEXT: buffer_store_dword v1, off, s[0:3], 0 offset:320
; GFX10_W64-MUBUF-NEXT: buffer_store_dword v2, off, s[0:3], 0 offset:316
-; GFX10_W64-MUBUF-NEXT: buffer_store_dword v3, off, s[0:3], 0 offset:312
-; GFX10_W64-MUBUF-NEXT: buffer_store_dword v4, off, s[0:3], 0 offset:308
-; GFX10_W64-MUBUF-NEXT: buffer_store_dword v5, off, s[0:3], 0 offset:304
-; GFX10_W64-MUBUF-NEXT: buffer_store_dword v4, off, s[0:3], 0 offset:300
; GFX10_W64-MUBUF-NEXT: buffer_store_dword v2, off, s[0:3], 0 offset:292
; GFX10_W64-MUBUF-NEXT: buffer_store_dword v1, off, s[0:3], 0 offset:288
; GFX10_W64-MUBUF-NEXT: buffer_store_dword v7, off, s[0:3], 0 offset:284
@@ -534,7 +526,9 @@ define amdgpu_ps float @ps_main(i32 %idx) {
; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v15, 0xbf5f2ee3
; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v12, 0xbf3d349e
; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v14, 0xbf523be3
+; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v3, 0x3f523be1
; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v16, 0xbf638e39
+; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v4, 0x3f5f2ee2
; GFX10_W64-MUBUF-NEXT: v_and_b32_e32 v0, 0x1fc, v0
; GFX10_W64-MUBUF-NEXT: buffer_store_dword v10, off, s[0:3], 0 offset:228
; GFX10_W64-MUBUF-NEXT: buffer_store_dword v12, off, s[0:3], 0 offset:224
@@ -545,6 +539,11 @@ define amdgpu_ps float @ps_main(i32 %idx) {
; GFX10_W64-MUBUF-NEXT: buffer_store_dword v15, off, s[0:3], 0 offset:208
; GFX10_W64-MUBUF-NEXT: buffer_store_dword v14, off, s[0:3], 0 offset:204
; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v14, 0x3f20e7f4
+; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v5, 0x3f638e37
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v3, off, s[0:3], 0 offset:312
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v4, off, s[0:3], 0 offset:308
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v5, off, s[0:3], 0 offset:304
+; GFX10_W64-MUBUF-NEXT: buffer_store_dword v4, off, s[0:3], 0 offset:300
; GFX10_W64-MUBUF-NEXT: buffer_store_dword v3, off, s[0:3], 0 offset:296
; GFX10_W64-MUBUF-NEXT: buffer_store_dword v12, off, s[0:3], 0 offset:200
; GFX10_W64-MUBUF-NEXT: buffer_store_dword v17, off, s[0:3], 0 offset:196
@@ -1102,25 +1101,20 @@ define amdgpu_vs float @vs_main(i32 %idx) {
; SI-NEXT: buffer_store_dword v13, off, s[4:7], 0 offset:224
; SI-NEXT: buffer_store_dword v15, off, s[4:7], 0 offset:216
; SI-NEXT: buffer_store_dword v14, off, s[4:7], 0 offset:196
-; SI-NEXT: v_mov_b32_e32 v17, 0x3f20e7f4
; SI-NEXT: v_mov_b32_e32 v18, 0x3f3d349c
; SI-NEXT: buffer_load_dword v0, v0, s[4:7], 0 offen
+; SI-NEXT: v_mov_b32_e32 v17, 0x3f20e7f4
+; SI-NEXT: buffer_store_dword v18, off, s[4:7], 0 offset:796
+; SI-NEXT: buffer_store_dword v13, off, s[4:7], 0 offset:792
+; SI-NEXT: buffer_store_dword v4, off, s[4:7], 0 offset:788
+; SI-NEXT: s_waitcnt expcnt(2)
+; SI-NEXT: v_mov_b32_e32 v18, 0xbf523be1
; SI-NEXT: buffer_store_dword v17, off, s[4:7], 0 offset:832
; SI-NEXT: buffer_store_dword v11, off, s[4:7], 0 offset:828
; SI-NEXT: buffer_store_dword v10, off, s[4:7], 0 offset:824
; SI-NEXT: buffer_store_dword v9, off, s[4:7], 0 offset:820
; SI-NEXT: s_waitcnt expcnt(3)
; SI-NEXT: v_mov_b32_e32 v17, 0x3703c499
-; SI-NEXT: buffer_store_dword v18, off, s[4:7], 0 offset:796
-; SI-NEXT: buffer_store_dword v13, off, s[4:7], 0 offset:792
-; SI-NEXT: buffer_store_dword v4, off, s[4:7], 0 offset:788
-; SI-NEXT: s_waitcnt expcnt(2)
-; SI-NEXT: v_mov_b32_e32 v18, 0xbf523be1
-; SI-NEXT: buffer_store_dword v17, off, s[4:7], 0 offset:816
-; SI-NEXT: buffer_store_dword v8, off, s[4:7], 0 offset:812
-; SI-NEXT: buffer_store_dword v7, off, s[4:7], 0 offset:808
-; SI-NEXT: buffer_store_dword v2, off, s[4:7], 0 offset:804
-; SI-NEXT: buffer_store_dword v12, off, s[4:7], 0 offset:800
; SI-NEXT: buffer_store_dword v18, off, s[4:7], 0 offset:784
; SI-NEXT: buffer_store_dword v5, off, s[4:7], 0 offset:780
; SI-NEXT: buffer_store_dword v15, off, s[4:7], 0 offset:776
@@ -1129,6 +1123,11 @@ define amdgpu_vs float @vs_main(i32 %idx) {
; SI-NEXT: buffer_store_dword v5, off, s[4:7], 0 offset:764
; SI-NEXT: s_waitcnt expcnt(0)
; SI-NEXT: v_mov_b32_e32 v5, 0xbf5f2ee2
+; SI-NEXT: buffer_store_dword v17, off, s[4:7], 0 offset:816
+; SI-NEXT: buffer_store_dword v8, off, s[4:7], 0 offset:812
+; SI-NEXT: buffer_store_dword v7, off, s[4:7], 0 offset:808
+; SI-NEXT: buffer_store_dword v2, off, s[4:7], 0 offset:804
+; SI-NEXT: buffer_store_dword v12, off, s[4:7], 0 offset:800
; SI-NEXT: buffer_store_dword v5, off, s[4:7], 0 offset:760
; SI-NEXT: buffer_store_dword v4, off, s[4:7], 0 offset:756
; SI-NEXT: buffer_store_dword v18, off, s[4:7], 0 offset:752
@@ -1218,23 +1217,18 @@ define amdgpu_vs float @vs_main(i32 %idx) {
; VI-NEXT: buffer_store_dword v13, off, s[4:7], 0 offset:224
; VI-NEXT: buffer_store_dword v15, off, s[4:7], 0 offset:216
; VI-NEXT: buffer_store_dword v14, off, s[4:7], 0 offset:196
-; VI-NEXT: v_mov_b32_e32 v17, 0x3f20e7f4
; VI-NEXT: v_mov_b32_e32 v18, 0x3f3d349c
; VI-NEXT: buffer_load_dword v0, v0, s[4:7], 0 offen
+; VI-NEXT: v_mov_b32_e32 v17, 0x3f20e7f4
+; VI-NEXT: buffer_store_dword v18, off, s[4:7], 0 offset:796
+; VI-NEXT: buffer_store_dword v13, off, s[4:7], 0 offset:792
+; VI-NEXT: buffer_store_dword v4, off, s[4:7], 0 offset:788
+; VI-NEXT: v_mov_b32_e32 v18, 0xbf523be1
; VI-NEXT: buffer_store_dword v17, off, s[4:7], 0 offset:832
; VI-NEXT: buffer_store_dword v11, off, s[4:7], 0 offset:828
; VI-NEXT: buffer_store_dword v10, off, s[4:7], 0 offset:824
; VI-NEXT: buffer_store_dword v9, off, s[4:7], 0 offset:820
; VI-NEXT: v_mov_b32_e32 v17, 0x3703c499
-; VI-NEXT: buffer_store_dword v18, off, s[4:7], 0 offset:796
-; VI-NEXT: buffer_store_dword v13, off, s[4:7], 0 offset:792
-; VI-NEXT: buffer_store_dword v4, off, s[4:7], 0 offset:788
-; VI-NEXT: v_mov_b32_e32 v18, 0xbf523be1
-; VI-NEXT: buffer_store_dword v17, off, s[4:7], 0 offset:816
-; VI-NEXT: buffer_store_dword v8, off, s[4:7], 0 offset:812
-; VI-NEXT: buffer_store_dword v7, off, s[4:7], 0 offset:808
-; VI-NEXT: buffer_store_dword v2, off, s[4:7], 0 offset:804
-; VI-NEXT: buffer_store_dword v12, off, s[4:7], 0 offset:800
; VI-NEXT: buffer_store_dword v18, off, s[4:7], 0 offset:784
; VI-NEXT: buffer_store_dword v5, off, s[4:7], 0 offset:780
; VI-NEXT: buffer_store_dword v15, off, s[4:7], 0 offset:776
@@ -1242,6 +1236,11 @@ define amdgpu_vs float @vs_main(i32 %idx) {
; VI-NEXT: buffer_store_dword v16, off, s[4:7], 0 offset:768
; VI-NEXT: buffer_store_dword v5, off, s[4:7], 0 offset:764
; VI-NEXT: v_mov_b32_e32 v5, 0xbf5f2ee2
+; VI-NEXT: buffer_store_dword v17, off, s[4:7], 0 offset:816
+; VI-NEXT: buffer_store_dword v8, off, s[4:7], 0 offset:812
+; VI-NEXT: buffer_store_dword v7, off, s[4:7], 0 offset:808
+; VI-NEXT: buffer_store_dword v2, off, s[4:7], 0 offset:804
+; VI-NEXT: buffer_store_dword v12, off, s[4:7], 0 offset:800
; VI-NEXT: buffer_store_dword v5, off, s[4:7], 0 offset:760
; VI-NEXT: buffer_store_dword v4, off, s[4:7], 0 offset:756
; VI-NEXT: buffer_store_dword v18, off, s[4:7], 0 offset:752
@@ -1328,25 +1327,19 @@ define amdgpu_vs float @vs_main(i32 %idx) {
; GFX9-MUBUF-NEXT: buffer_store_dword v13, off, s[0:3], 0 offset:224
; GFX9-MUBUF-NEXT: buffer_store_dword v15, off, s[0:3], 0 offset:216
; GFX9-MUBUF-NEXT: buffer_store_dword v14, off, s[0:3], 0 offset:196
-; GFX9-MUBUF-NEXT: v_mov_b32_e32 v17, 0x3f20e7f4
; GFX9-MUBUF-NEXT: v_mov_b32_e32 v18, 0x3f3d349c
; GFX9-MUBUF-NEXT: v_add_u32_e32 v1, 0x200, v0
; GFX9-MUBUF-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen
-; GFX9-MUBUF-NEXT: s_nop 0
+; GFX9-MUBUF-NEXT: v_mov_b32_e32 v17, 0x3f20e7f4
+; GFX9-MUBUF-NEXT: buffer_store_dword v18, off, s[0:3], 0 offset:796
+; GFX9-MUBUF-NEXT: buffe...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/172084
More information about the llvm-commits
mailing list