[llvm] [AMDGPU] Extend getMemOperandAACheckLimit (PR #172084)

via llvm-commits llvm-commits at lists.llvm.org
Fri Dec 12 13:18:43 PST 2025


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-backend-amdgpu

Author: Gang Chen (cmc-rep)

<details>
<summary>Changes</summary>



---

Patch is 96.05 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/172084.diff


2 Files Affected:

- (modified) llvm/lib/Target/AMDGPU/SIInstrInfo.h (+2) 
- (modified) llvm/test/CodeGen/AMDGPU/scratch-simple.ll (+316-323) 


``````````diff
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.h b/llvm/lib/Target/AMDGPU/SIInstrInfo.h
index b1d6563bf3c0b..f0ebd4f531d1b 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.h
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.h
@@ -440,6 +440,8 @@ class SIInstrInfo final : public AMDGPUGenInstrInfo {
   bool foldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, Register Reg,
                      MachineRegisterInfo *MRI) const final;
 
+  unsigned getMemOperandAACheckLimit() const override { return 32; }
+
   unsigned getMachineCSELookAheadLimit() const override { return 500; }
 
   MachineInstr *convertToThreeAddress(MachineInstr &MI, LiveVariables *LV,
diff --git a/llvm/test/CodeGen/AMDGPU/scratch-simple.ll b/llvm/test/CodeGen/AMDGPU/scratch-simple.ll
index c253f42e0d3c8..21459ed620071 100644
--- a/llvm/test/CodeGen/AMDGPU/scratch-simple.ll
+++ b/llvm/test/CodeGen/AMDGPU/scratch-simple.ll
@@ -98,25 +98,20 @@ define amdgpu_ps float @ps_main(i32 %idx) {
 ; SI-NEXT:    buffer_store_dword v13, off, s[4:7], 0 offset:224
 ; SI-NEXT:    buffer_store_dword v15, off, s[4:7], 0 offset:216
 ; SI-NEXT:    buffer_store_dword v14, off, s[4:7], 0 offset:196
-; SI-NEXT:    v_mov_b32_e32 v17, 0x3f20e7f4
 ; SI-NEXT:    v_mov_b32_e32 v18, 0x3f3d349c
 ; SI-NEXT:    buffer_load_dword v0, v0, s[4:7], 0 offen
+; SI-NEXT:    v_mov_b32_e32 v17, 0x3f20e7f4
+; SI-NEXT:    buffer_store_dword v18, off, s[4:7], 0 offset:796
+; SI-NEXT:    buffer_store_dword v13, off, s[4:7], 0 offset:792
+; SI-NEXT:    buffer_store_dword v4, off, s[4:7], 0 offset:788
+; SI-NEXT:    s_waitcnt expcnt(2)
+; SI-NEXT:    v_mov_b32_e32 v18, 0xbf523be1
 ; SI-NEXT:    buffer_store_dword v17, off, s[4:7], 0 offset:832
 ; SI-NEXT:    buffer_store_dword v11, off, s[4:7], 0 offset:828
 ; SI-NEXT:    buffer_store_dword v10, off, s[4:7], 0 offset:824
 ; SI-NEXT:    buffer_store_dword v9, off, s[4:7], 0 offset:820
 ; SI-NEXT:    s_waitcnt expcnt(3)
 ; SI-NEXT:    v_mov_b32_e32 v17, 0x3703c499
-; SI-NEXT:    buffer_store_dword v18, off, s[4:7], 0 offset:796
-; SI-NEXT:    buffer_store_dword v13, off, s[4:7], 0 offset:792
-; SI-NEXT:    buffer_store_dword v4, off, s[4:7], 0 offset:788
-; SI-NEXT:    s_waitcnt expcnt(2)
-; SI-NEXT:    v_mov_b32_e32 v18, 0xbf523be1
-; SI-NEXT:    buffer_store_dword v17, off, s[4:7], 0 offset:816
-; SI-NEXT:    buffer_store_dword v8, off, s[4:7], 0 offset:812
-; SI-NEXT:    buffer_store_dword v7, off, s[4:7], 0 offset:808
-; SI-NEXT:    buffer_store_dword v2, off, s[4:7], 0 offset:804
-; SI-NEXT:    buffer_store_dword v12, off, s[4:7], 0 offset:800
 ; SI-NEXT:    buffer_store_dword v18, off, s[4:7], 0 offset:784
 ; SI-NEXT:    buffer_store_dword v5, off, s[4:7], 0 offset:780
 ; SI-NEXT:    buffer_store_dword v15, off, s[4:7], 0 offset:776
@@ -125,6 +120,11 @@ define amdgpu_ps float @ps_main(i32 %idx) {
 ; SI-NEXT:    buffer_store_dword v5, off, s[4:7], 0 offset:764
 ; SI-NEXT:    s_waitcnt expcnt(0)
 ; SI-NEXT:    v_mov_b32_e32 v5, 0xbf5f2ee2
+; SI-NEXT:    buffer_store_dword v17, off, s[4:7], 0 offset:816
+; SI-NEXT:    buffer_store_dword v8, off, s[4:7], 0 offset:812
+; SI-NEXT:    buffer_store_dword v7, off, s[4:7], 0 offset:808
+; SI-NEXT:    buffer_store_dword v2, off, s[4:7], 0 offset:804
+; SI-NEXT:    buffer_store_dword v12, off, s[4:7], 0 offset:800
 ; SI-NEXT:    buffer_store_dword v5, off, s[4:7], 0 offset:760
 ; SI-NEXT:    buffer_store_dword v4, off, s[4:7], 0 offset:756
 ; SI-NEXT:    buffer_store_dword v18, off, s[4:7], 0 offset:752
@@ -214,23 +214,18 @@ define amdgpu_ps float @ps_main(i32 %idx) {
 ; VI-NEXT:    buffer_store_dword v13, off, s[4:7], 0 offset:224
 ; VI-NEXT:    buffer_store_dword v15, off, s[4:7], 0 offset:216
 ; VI-NEXT:    buffer_store_dword v14, off, s[4:7], 0 offset:196
-; VI-NEXT:    v_mov_b32_e32 v17, 0x3f20e7f4
 ; VI-NEXT:    v_mov_b32_e32 v18, 0x3f3d349c
 ; VI-NEXT:    buffer_load_dword v0, v0, s[4:7], 0 offen
+; VI-NEXT:    v_mov_b32_e32 v17, 0x3f20e7f4
+; VI-NEXT:    buffer_store_dword v18, off, s[4:7], 0 offset:796
+; VI-NEXT:    buffer_store_dword v13, off, s[4:7], 0 offset:792
+; VI-NEXT:    buffer_store_dword v4, off, s[4:7], 0 offset:788
+; VI-NEXT:    v_mov_b32_e32 v18, 0xbf523be1
 ; VI-NEXT:    buffer_store_dword v17, off, s[4:7], 0 offset:832
 ; VI-NEXT:    buffer_store_dword v11, off, s[4:7], 0 offset:828
 ; VI-NEXT:    buffer_store_dword v10, off, s[4:7], 0 offset:824
 ; VI-NEXT:    buffer_store_dword v9, off, s[4:7], 0 offset:820
 ; VI-NEXT:    v_mov_b32_e32 v17, 0x3703c499
-; VI-NEXT:    buffer_store_dword v18, off, s[4:7], 0 offset:796
-; VI-NEXT:    buffer_store_dword v13, off, s[4:7], 0 offset:792
-; VI-NEXT:    buffer_store_dword v4, off, s[4:7], 0 offset:788
-; VI-NEXT:    v_mov_b32_e32 v18, 0xbf523be1
-; VI-NEXT:    buffer_store_dword v17, off, s[4:7], 0 offset:816
-; VI-NEXT:    buffer_store_dword v8, off, s[4:7], 0 offset:812
-; VI-NEXT:    buffer_store_dword v7, off, s[4:7], 0 offset:808
-; VI-NEXT:    buffer_store_dword v2, off, s[4:7], 0 offset:804
-; VI-NEXT:    buffer_store_dword v12, off, s[4:7], 0 offset:800
 ; VI-NEXT:    buffer_store_dword v18, off, s[4:7], 0 offset:784
 ; VI-NEXT:    buffer_store_dword v5, off, s[4:7], 0 offset:780
 ; VI-NEXT:    buffer_store_dword v15, off, s[4:7], 0 offset:776
@@ -238,6 +233,11 @@ define amdgpu_ps float @ps_main(i32 %idx) {
 ; VI-NEXT:    buffer_store_dword v16, off, s[4:7], 0 offset:768
 ; VI-NEXT:    buffer_store_dword v5, off, s[4:7], 0 offset:764
 ; VI-NEXT:    v_mov_b32_e32 v5, 0xbf5f2ee2
+; VI-NEXT:    buffer_store_dword v17, off, s[4:7], 0 offset:816
+; VI-NEXT:    buffer_store_dword v8, off, s[4:7], 0 offset:812
+; VI-NEXT:    buffer_store_dword v7, off, s[4:7], 0 offset:808
+; VI-NEXT:    buffer_store_dword v2, off, s[4:7], 0 offset:804
+; VI-NEXT:    buffer_store_dword v12, off, s[4:7], 0 offset:800
 ; VI-NEXT:    buffer_store_dword v5, off, s[4:7], 0 offset:760
 ; VI-NEXT:    buffer_store_dword v4, off, s[4:7], 0 offset:756
 ; VI-NEXT:    buffer_store_dword v18, off, s[4:7], 0 offset:752
@@ -324,25 +324,19 @@ define amdgpu_ps float @ps_main(i32 %idx) {
 ; GFX9-MUBUF-NEXT:    buffer_store_dword v13, off, s[0:3], 0 offset:224
 ; GFX9-MUBUF-NEXT:    buffer_store_dword v15, off, s[0:3], 0 offset:216
 ; GFX9-MUBUF-NEXT:    buffer_store_dword v14, off, s[0:3], 0 offset:196
-; GFX9-MUBUF-NEXT:    v_mov_b32_e32 v17, 0x3f20e7f4
 ; GFX9-MUBUF-NEXT:    v_mov_b32_e32 v18, 0x3f3d349c
 ; GFX9-MUBUF-NEXT:    v_add_u32_e32 v1, 0x200, v0
 ; GFX9-MUBUF-NEXT:    buffer_load_dword v0, v0, s[0:3], 0 offen
-; GFX9-MUBUF-NEXT:    s_nop 0
+; GFX9-MUBUF-NEXT:    v_mov_b32_e32 v17, 0x3f20e7f4
+; GFX9-MUBUF-NEXT:    buffer_store_dword v18, off, s[0:3], 0 offset:796
+; GFX9-MUBUF-NEXT:    buffer_store_dword v13, off, s[0:3], 0 offset:792
+; GFX9-MUBUF-NEXT:    buffer_store_dword v4, off, s[0:3], 0 offset:788
+; GFX9-MUBUF-NEXT:    v_mov_b32_e32 v18, 0xbf523be1
 ; GFX9-MUBUF-NEXT:    buffer_store_dword v17, off, s[0:3], 0 offset:832
 ; GFX9-MUBUF-NEXT:    buffer_store_dword v11, off, s[0:3], 0 offset:828
 ; GFX9-MUBUF-NEXT:    buffer_store_dword v10, off, s[0:3], 0 offset:824
 ; GFX9-MUBUF-NEXT:    buffer_store_dword v9, off, s[0:3], 0 offset:820
 ; GFX9-MUBUF-NEXT:    v_mov_b32_e32 v17, 0x3703c499
-; GFX9-MUBUF-NEXT:    buffer_store_dword v18, off, s[0:3], 0 offset:796
-; GFX9-MUBUF-NEXT:    buffer_store_dword v13, off, s[0:3], 0 offset:792
-; GFX9-MUBUF-NEXT:    buffer_store_dword v4, off, s[0:3], 0 offset:788
-; GFX9-MUBUF-NEXT:    v_mov_b32_e32 v18, 0xbf523be1
-; GFX9-MUBUF-NEXT:    buffer_store_dword v17, off, s[0:3], 0 offset:816
-; GFX9-MUBUF-NEXT:    buffer_store_dword v8, off, s[0:3], 0 offset:812
-; GFX9-MUBUF-NEXT:    buffer_store_dword v7, off, s[0:3], 0 offset:808
-; GFX9-MUBUF-NEXT:    buffer_store_dword v2, off, s[0:3], 0 offset:804
-; GFX9-MUBUF-NEXT:    buffer_store_dword v12, off, s[0:3], 0 offset:800
 ; GFX9-MUBUF-NEXT:    buffer_store_dword v18, off, s[0:3], 0 offset:784
 ; GFX9-MUBUF-NEXT:    buffer_store_dword v5, off, s[0:3], 0 offset:780
 ; GFX9-MUBUF-NEXT:    buffer_store_dword v15, off, s[0:3], 0 offset:776
@@ -350,6 +344,11 @@ define amdgpu_ps float @ps_main(i32 %idx) {
 ; GFX9-MUBUF-NEXT:    buffer_store_dword v16, off, s[0:3], 0 offset:768
 ; GFX9-MUBUF-NEXT:    buffer_store_dword v5, off, s[0:3], 0 offset:764
 ; GFX9-MUBUF-NEXT:    v_mov_b32_e32 v5, 0xbf5f2ee2
+; GFX9-MUBUF-NEXT:    buffer_store_dword v17, off, s[0:3], 0 offset:816
+; GFX9-MUBUF-NEXT:    buffer_store_dword v8, off, s[0:3], 0 offset:812
+; GFX9-MUBUF-NEXT:    buffer_store_dword v7, off, s[0:3], 0 offset:808
+; GFX9-MUBUF-NEXT:    buffer_store_dword v2, off, s[0:3], 0 offset:804
+; GFX9-MUBUF-NEXT:    buffer_store_dword v12, off, s[0:3], 0 offset:800
 ; GFX9-MUBUF-NEXT:    buffer_store_dword v5, off, s[0:3], 0 offset:760
 ; GFX9-MUBUF-NEXT:    buffer_store_dword v4, off, s[0:3], 0 offset:756
 ; GFX9-MUBUF-NEXT:    buffer_store_dword v18, off, s[0:3], 0 offset:752
@@ -377,21 +376,14 @@ define amdgpu_ps float @ps_main(i32 %idx) {
 ; GFX10_W32-MUBUF-NEXT:    s_mov_b32 s2, -1
 ; GFX10_W32-MUBUF-NEXT:    v_mov_b32_e32 v1, 0xbf20e7f4
 ; GFX10_W32-MUBUF-NEXT:    v_mov_b32_e32 v2, 0x3f3d349e
-; GFX10_W32-MUBUF-NEXT:    v_mov_b32_e32 v3, 0x3f523be1
 ; GFX10_W32-MUBUF-NEXT:    s_mov_b32 s3, 0x31c16000
-; GFX10_W32-MUBUF-NEXT:    v_mov_b32_e32 v4, 0x3f5f2ee2
 ; GFX10_W32-MUBUF-NEXT:    s_add_u32 s0, s0, s4
-; GFX10_W32-MUBUF-NEXT:    v_mov_b32_e32 v5, 0x3f638e37
 ; GFX10_W32-MUBUF-NEXT:    v_mov_b32_e32 v7, 0xbefcd8a3
 ; GFX10_W32-MUBUF-NEXT:    s_addc_u32 s1, s1, 0
 ; GFX10_W32-MUBUF-NEXT:    v_mov_b32_e32 v8, 0xbefcd89f
 ; GFX10_W32-MUBUF-NEXT:    v_mov_b32_e32 v9, 0xbeae29dc
 ; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v1, off, s[0:3], 0 offset:320
 ; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v2, off, s[0:3], 0 offset:316
-; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v3, off, s[0:3], 0 offset:312
-; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v4, off, s[0:3], 0 offset:308
-; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v5, off, s[0:3], 0 offset:304
-; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v4, off, s[0:3], 0 offset:300
 ; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v2, off, s[0:3], 0 offset:292
 ; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v1, off, s[0:3], 0 offset:288
 ; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v7, off, s[0:3], 0 offset:284
@@ -423,7 +415,9 @@ define amdgpu_ps float @ps_main(i32 %idx) {
 ; GFX10_W32-MUBUF-NEXT:    v_mov_b32_e32 v15, 0xbf5f2ee3
 ; GFX10_W32-MUBUF-NEXT:    v_mov_b32_e32 v12, 0xbf3d349e
 ; GFX10_W32-MUBUF-NEXT:    v_mov_b32_e32 v14, 0xbf523be3
+; GFX10_W32-MUBUF-NEXT:    v_mov_b32_e32 v3, 0x3f523be1
 ; GFX10_W32-MUBUF-NEXT:    v_mov_b32_e32 v16, 0xbf638e39
+; GFX10_W32-MUBUF-NEXT:    v_mov_b32_e32 v4, 0x3f5f2ee2
 ; GFX10_W32-MUBUF-NEXT:    v_and_b32_e32 v0, 0x1fc, v0
 ; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v10, off, s[0:3], 0 offset:228
 ; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v12, off, s[0:3], 0 offset:224
@@ -434,6 +428,11 @@ define amdgpu_ps float @ps_main(i32 %idx) {
 ; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v15, off, s[0:3], 0 offset:208
 ; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v14, off, s[0:3], 0 offset:204
 ; GFX10_W32-MUBUF-NEXT:    v_mov_b32_e32 v14, 0x3f20e7f4
+; GFX10_W32-MUBUF-NEXT:    v_mov_b32_e32 v5, 0x3f638e37
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v3, off, s[0:3], 0 offset:312
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v4, off, s[0:3], 0 offset:308
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v5, off, s[0:3], 0 offset:304
+; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v4, off, s[0:3], 0 offset:300
 ; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v3, off, s[0:3], 0 offset:296
 ; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v12, off, s[0:3], 0 offset:200
 ; GFX10_W32-MUBUF-NEXT:    buffer_store_dword v17, off, s[0:3], 0 offset:196
@@ -488,21 +487,14 @@ define amdgpu_ps float @ps_main(i32 %idx) {
 ; GFX10_W64-MUBUF-NEXT:    s_mov_b32 s2, -1
 ; GFX10_W64-MUBUF-NEXT:    v_mov_b32_e32 v1, 0xbf20e7f4
 ; GFX10_W64-MUBUF-NEXT:    v_mov_b32_e32 v2, 0x3f3d349e
-; GFX10_W64-MUBUF-NEXT:    v_mov_b32_e32 v3, 0x3f523be1
 ; GFX10_W64-MUBUF-NEXT:    s_mov_b32 s3, 0x31e16000
-; GFX10_W64-MUBUF-NEXT:    v_mov_b32_e32 v4, 0x3f5f2ee2
 ; GFX10_W64-MUBUF-NEXT:    s_add_u32 s0, s0, s4
-; GFX10_W64-MUBUF-NEXT:    v_mov_b32_e32 v5, 0x3f638e37
 ; GFX10_W64-MUBUF-NEXT:    v_mov_b32_e32 v7, 0xbefcd8a3
 ; GFX10_W64-MUBUF-NEXT:    s_addc_u32 s1, s1, 0
 ; GFX10_W64-MUBUF-NEXT:    v_mov_b32_e32 v8, 0xbefcd89f
 ; GFX10_W64-MUBUF-NEXT:    v_mov_b32_e32 v9, 0xbeae29dc
 ; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v1, off, s[0:3], 0 offset:320
 ; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v2, off, s[0:3], 0 offset:316
-; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v3, off, s[0:3], 0 offset:312
-; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v4, off, s[0:3], 0 offset:308
-; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v5, off, s[0:3], 0 offset:304
-; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v4, off, s[0:3], 0 offset:300
 ; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v2, off, s[0:3], 0 offset:292
 ; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v1, off, s[0:3], 0 offset:288
 ; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v7, off, s[0:3], 0 offset:284
@@ -534,7 +526,9 @@ define amdgpu_ps float @ps_main(i32 %idx) {
 ; GFX10_W64-MUBUF-NEXT:    v_mov_b32_e32 v15, 0xbf5f2ee3
 ; GFX10_W64-MUBUF-NEXT:    v_mov_b32_e32 v12, 0xbf3d349e
 ; GFX10_W64-MUBUF-NEXT:    v_mov_b32_e32 v14, 0xbf523be3
+; GFX10_W64-MUBUF-NEXT:    v_mov_b32_e32 v3, 0x3f523be1
 ; GFX10_W64-MUBUF-NEXT:    v_mov_b32_e32 v16, 0xbf638e39
+; GFX10_W64-MUBUF-NEXT:    v_mov_b32_e32 v4, 0x3f5f2ee2
 ; GFX10_W64-MUBUF-NEXT:    v_and_b32_e32 v0, 0x1fc, v0
 ; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v10, off, s[0:3], 0 offset:228
 ; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v12, off, s[0:3], 0 offset:224
@@ -545,6 +539,11 @@ define amdgpu_ps float @ps_main(i32 %idx) {
 ; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v15, off, s[0:3], 0 offset:208
 ; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v14, off, s[0:3], 0 offset:204
 ; GFX10_W64-MUBUF-NEXT:    v_mov_b32_e32 v14, 0x3f20e7f4
+; GFX10_W64-MUBUF-NEXT:    v_mov_b32_e32 v5, 0x3f638e37
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v3, off, s[0:3], 0 offset:312
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v4, off, s[0:3], 0 offset:308
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v5, off, s[0:3], 0 offset:304
+; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v4, off, s[0:3], 0 offset:300
 ; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v3, off, s[0:3], 0 offset:296
 ; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v12, off, s[0:3], 0 offset:200
 ; GFX10_W64-MUBUF-NEXT:    buffer_store_dword v17, off, s[0:3], 0 offset:196
@@ -1102,25 +1101,20 @@ define amdgpu_vs float @vs_main(i32 %idx) {
 ; SI-NEXT:    buffer_store_dword v13, off, s[4:7], 0 offset:224
 ; SI-NEXT:    buffer_store_dword v15, off, s[4:7], 0 offset:216
 ; SI-NEXT:    buffer_store_dword v14, off, s[4:7], 0 offset:196
-; SI-NEXT:    v_mov_b32_e32 v17, 0x3f20e7f4
 ; SI-NEXT:    v_mov_b32_e32 v18, 0x3f3d349c
 ; SI-NEXT:    buffer_load_dword v0, v0, s[4:7], 0 offen
+; SI-NEXT:    v_mov_b32_e32 v17, 0x3f20e7f4
+; SI-NEXT:    buffer_store_dword v18, off, s[4:7], 0 offset:796
+; SI-NEXT:    buffer_store_dword v13, off, s[4:7], 0 offset:792
+; SI-NEXT:    buffer_store_dword v4, off, s[4:7], 0 offset:788
+; SI-NEXT:    s_waitcnt expcnt(2)
+; SI-NEXT:    v_mov_b32_e32 v18, 0xbf523be1
 ; SI-NEXT:    buffer_store_dword v17, off, s[4:7], 0 offset:832
 ; SI-NEXT:    buffer_store_dword v11, off, s[4:7], 0 offset:828
 ; SI-NEXT:    buffer_store_dword v10, off, s[4:7], 0 offset:824
 ; SI-NEXT:    buffer_store_dword v9, off, s[4:7], 0 offset:820
 ; SI-NEXT:    s_waitcnt expcnt(3)
 ; SI-NEXT:    v_mov_b32_e32 v17, 0x3703c499
-; SI-NEXT:    buffer_store_dword v18, off, s[4:7], 0 offset:796
-; SI-NEXT:    buffer_store_dword v13, off, s[4:7], 0 offset:792
-; SI-NEXT:    buffer_store_dword v4, off, s[4:7], 0 offset:788
-; SI-NEXT:    s_waitcnt expcnt(2)
-; SI-NEXT:    v_mov_b32_e32 v18, 0xbf523be1
-; SI-NEXT:    buffer_store_dword v17, off, s[4:7], 0 offset:816
-; SI-NEXT:    buffer_store_dword v8, off, s[4:7], 0 offset:812
-; SI-NEXT:    buffer_store_dword v7, off, s[4:7], 0 offset:808
-; SI-NEXT:    buffer_store_dword v2, off, s[4:7], 0 offset:804
-; SI-NEXT:    buffer_store_dword v12, off, s[4:7], 0 offset:800
 ; SI-NEXT:    buffer_store_dword v18, off, s[4:7], 0 offset:784
 ; SI-NEXT:    buffer_store_dword v5, off, s[4:7], 0 offset:780
 ; SI-NEXT:    buffer_store_dword v15, off, s[4:7], 0 offset:776
@@ -1129,6 +1123,11 @@ define amdgpu_vs float @vs_main(i32 %idx) {
 ; SI-NEXT:    buffer_store_dword v5, off, s[4:7], 0 offset:764
 ; SI-NEXT:    s_waitcnt expcnt(0)
 ; SI-NEXT:    v_mov_b32_e32 v5, 0xbf5f2ee2
+; SI-NEXT:    buffer_store_dword v17, off, s[4:7], 0 offset:816
+; SI-NEXT:    buffer_store_dword v8, off, s[4:7], 0 offset:812
+; SI-NEXT:    buffer_store_dword v7, off, s[4:7], 0 offset:808
+; SI-NEXT:    buffer_store_dword v2, off, s[4:7], 0 offset:804
+; SI-NEXT:    buffer_store_dword v12, off, s[4:7], 0 offset:800
 ; SI-NEXT:    buffer_store_dword v5, off, s[4:7], 0 offset:760
 ; SI-NEXT:    buffer_store_dword v4, off, s[4:7], 0 offset:756
 ; SI-NEXT:    buffer_store_dword v18, off, s[4:7], 0 offset:752
@@ -1218,23 +1217,18 @@ define amdgpu_vs float @vs_main(i32 %idx) {
 ; VI-NEXT:    buffer_store_dword v13, off, s[4:7], 0 offset:224
 ; VI-NEXT:    buffer_store_dword v15, off, s[4:7], 0 offset:216
 ; VI-NEXT:    buffer_store_dword v14, off, s[4:7], 0 offset:196
-; VI-NEXT:    v_mov_b32_e32 v17, 0x3f20e7f4
 ; VI-NEXT:    v_mov_b32_e32 v18, 0x3f3d349c
 ; VI-NEXT:    buffer_load_dword v0, v0, s[4:7], 0 offen
+; VI-NEXT:    v_mov_b32_e32 v17, 0x3f20e7f4
+; VI-NEXT:    buffer_store_dword v18, off, s[4:7], 0 offset:796
+; VI-NEXT:    buffer_store_dword v13, off, s[4:7], 0 offset:792
+; VI-NEXT:    buffer_store_dword v4, off, s[4:7], 0 offset:788
+; VI-NEXT:    v_mov_b32_e32 v18, 0xbf523be1
 ; VI-NEXT:    buffer_store_dword v17, off, s[4:7], 0 offset:832
 ; VI-NEXT:    buffer_store_dword v11, off, s[4:7], 0 offset:828
 ; VI-NEXT:    buffer_store_dword v10, off, s[4:7], 0 offset:824
 ; VI-NEXT:    buffer_store_dword v9, off, s[4:7], 0 offset:820
 ; VI-NEXT:    v_mov_b32_e32 v17, 0x3703c499
-; VI-NEXT:    buffer_store_dword v18, off, s[4:7], 0 offset:796
-; VI-NEXT:    buffer_store_dword v13, off, s[4:7], 0 offset:792
-; VI-NEXT:    buffer_store_dword v4, off, s[4:7], 0 offset:788
-; VI-NEXT:    v_mov_b32_e32 v18, 0xbf523be1
-; VI-NEXT:    buffer_store_dword v17, off, s[4:7], 0 offset:816
-; VI-NEXT:    buffer_store_dword v8, off, s[4:7], 0 offset:812
-; VI-NEXT:    buffer_store_dword v7, off, s[4:7], 0 offset:808
-; VI-NEXT:    buffer_store_dword v2, off, s[4:7], 0 offset:804
-; VI-NEXT:    buffer_store_dword v12, off, s[4:7], 0 offset:800
 ; VI-NEXT:    buffer_store_dword v18, off, s[4:7], 0 offset:784
 ; VI-NEXT:    buffer_store_dword v5, off, s[4:7], 0 offset:780
 ; VI-NEXT:    buffer_store_dword v15, off, s[4:7], 0 offset:776
@@ -1242,6 +1236,11 @@ define amdgpu_vs float @vs_main(i32 %idx) {
 ; VI-NEXT:    buffer_store_dword v16, off, s[4:7], 0 offset:768
 ; VI-NEXT:    buffer_store_dword v5, off, s[4:7], 0 offset:764
 ; VI-NEXT:    v_mov_b32_e32 v5, 0xbf5f2ee2
+; VI-NEXT:    buffer_store_dword v17, off, s[4:7], 0 offset:816
+; VI-NEXT:    buffer_store_dword v8, off, s[4:7], 0 offset:812
+; VI-NEXT:    buffer_store_dword v7, off, s[4:7], 0 offset:808
+; VI-NEXT:    buffer_store_dword v2, off, s[4:7], 0 offset:804
+; VI-NEXT:    buffer_store_dword v12, off, s[4:7], 0 offset:800
 ; VI-NEXT:    buffer_store_dword v5, off, s[4:7], 0 offset:760
 ; VI-NEXT:    buffer_store_dword v4, off, s[4:7], 0 offset:756
 ; VI-NEXT:    buffer_store_dword v18, off, s[4:7], 0 offset:752
@@ -1328,25 +1327,19 @@ define amdgpu_vs float @vs_main(i32 %idx) {
 ; GFX9-MUBUF-NEXT:    buffer_store_dword v13, off, s[0:3], 0 offset:224
 ; GFX9-MUBUF-NEXT:    buffer_store_dword v15, off, s[0:3], 0 offset:216
 ; GFX9-MUBUF-NEXT:    buffer_store_dword v14, off, s[0:3], 0 offset:196
-; GFX9-MUBUF-NEXT:    v_mov_b32_e32 v17, 0x3f20e7f4
 ; GFX9-MUBUF-NEXT:    v_mov_b32_e32 v18, 0x3f3d349c
 ; GFX9-MUBUF-NEXT:    v_add_u32_e32 v1, 0x200, v0
 ; GFX9-MUBUF-NEXT:    buffer_load_dword v0, v0, s[0:3], 0 offen
-; GFX9-MUBUF-NEXT:    s_nop 0
+; GFX9-MUBUF-NEXT:    v_mov_b32_e32 v17, 0x3f20e7f4
+; GFX9-MUBUF-NEXT:    buffer_store_dword v18, off, s[0:3], 0 offset:796
+; GFX9-MUBUF-NEXT:    buffe...
[truncated]

``````````

</details>


https://github.com/llvm/llvm-project/pull/172084


More information about the llvm-commits mailing list