[llvm] d0c0838 - [AMDGPU] Remove return VGPRs from callee save list

Carl Ritson via llvm-commits llvm-commits at lists.llvm.org
Wed Jun 14 22:07:12 PDT 2023


Author: Carl Ritson
Date: 2023-06-15T14:05:32+09:00
New Revision: d0c083870538a4155986fe29dc164e49bce37820

URL: https://github.com/llvm/llvm-project/commit/d0c083870538a4155986fe29dc164e49bce37820
DIFF: https://github.com/llvm/llvm-project/commit/d0c083870538a4155986fe29dc164e49bce37820.diff

LOG: [AMDGPU] Remove return VGPRs from callee save list

There is no need to generate spill/restore for registers used in
return value.  This matters for amdgpu_gfx calling convention
where CSR and Ret definitions overlap.

Reviewed By: sebastian-ne

Differential Revision: https://reviews.llvm.org/D152892

Added: 
    

Modified: 
    llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
    llvm/test/CodeGen/AMDGPU/gfx-callable-return-types.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
index 7d97a07035234..6d9a2390d3d8f 100644
--- a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
@@ -1501,6 +1501,7 @@ void SIFrameLowering::determineCalleeSaves(MachineFunction &MF,
   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
   const SIRegisterInfo *TRI = ST.getRegisterInfo();
 
+  MachineInstr *ReturnMI = nullptr;
   for (MachineBasicBlock &MBB : MF) {
     for (MachineInstr &MI : MBB) {
       // WRITELANE instructions used for SGPR spills can overwrite the inactive
@@ -1517,6 +1518,23 @@ void SIFrameLowering::determineCalleeSaves(MachineFunction &MF,
         MFI->allocateWWMSpill(MF, MI.getOperand(0).getReg());
       else if (MI.getOpcode() == AMDGPU::V_READLANE_B32)
         MFI->allocateWWMSpill(MF, MI.getOperand(1).getReg());
+      else if (MI.getOpcode() == AMDGPU::SI_RETURN ||
+               MI.getOpcode() == AMDGPU::SI_RETURN_TO_EPILOG) {
+        // We expect all return to be the same size.
+        assert(!ReturnMI ||
+               (count_if(MI.operands(), [](auto Op) { return Op.isReg(); }) ==
+                count_if(ReturnMI->operands(), [](auto Op) { return Op.isReg(); })));
+        ReturnMI = &MI;
+      }
+    }
+  }
+
+  // Remove any VGPRs used in the return value because these do not need to be saved.
+  // This prevents CSR restore from clobbering return VGPRs.
+  if (ReturnMI) {
+    for (auto &Op : ReturnMI->operands()) {
+      if (Op.isReg())
+        SavedVGPRs.reset(Op.getReg());
     }
   }
 

diff  --git a/llvm/test/CodeGen/AMDGPU/gfx-callable-return-types.ll b/llvm/test/CodeGen/AMDGPU/gfx-callable-return-types.ll
index 0935d3c11085b..7d4a5d35fa687 100644
--- a/llvm/test/CodeGen/AMDGPU/gfx-callable-return-types.ll
+++ b/llvm/test/CodeGen/AMDGPU/gfx-callable-return-types.ll
@@ -450,102 +450,6 @@ define amdgpu_gfx <100 x i32> @return_100xi32() #0 {
 ; GFX9-LABEL: return_100xi32:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT:    buffer_store_dword v40, off, s[0:3], s32 offset:124 ; 4-byte Folded Spill
-; GFX9-NEXT:    buffer_store_dword v41, off, s[0:3], s32 offset:120 ; 4-byte Folded Spill
-; GFX9-NEXT:    buffer_store_dword v42, off, s[0:3], s32 offset:116 ; 4-byte Folded Spill
-; GFX9-NEXT:    buffer_store_dword v43, off, s[0:3], s32 offset:112 ; 4-byte Folded Spill
-; GFX9-NEXT:    buffer_store_dword v44, off, s[0:3], s32 offset:108 ; 4-byte Folded Spill
-; GFX9-NEXT:    buffer_store_dword v45, off, s[0:3], s32 offset:104 ; 4-byte Folded Spill
-; GFX9-NEXT:    buffer_store_dword v46, off, s[0:3], s32 offset:100 ; 4-byte Folded Spill
-; GFX9-NEXT:    buffer_store_dword v47, off, s[0:3], s32 offset:96 ; 4-byte Folded Spill
-; GFX9-NEXT:    buffer_store_dword v56, off, s[0:3], s32 offset:92 ; 4-byte Folded Spill
-; GFX9-NEXT:    buffer_store_dword v57, off, s[0:3], s32 offset:88 ; 4-byte Folded Spill
-; GFX9-NEXT:    buffer_store_dword v58, off, s[0:3], s32 offset:84 ; 4-byte Folded Spill
-; GFX9-NEXT:    buffer_store_dword v59, off, s[0:3], s32 offset:80 ; 4-byte Folded Spill
-; GFX9-NEXT:    buffer_store_dword v60, off, s[0:3], s32 offset:76 ; 4-byte Folded Spill
-; GFX9-NEXT:    buffer_store_dword v61, off, s[0:3], s32 offset:72 ; 4-byte Folded Spill
-; GFX9-NEXT:    buffer_store_dword v62, off, s[0:3], s32 offset:68 ; 4-byte Folded Spill
-; GFX9-NEXT:    buffer_store_dword v63, off, s[0:3], s32 offset:64 ; 4-byte Folded Spill
-; GFX9-NEXT:    buffer_store_dword v72, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill
-; GFX9-NEXT:    buffer_store_dword v73, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill
-; GFX9-NEXT:    buffer_store_dword v74, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill
-; GFX9-NEXT:    buffer_store_dword v75, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill
-; GFX9-NEXT:    buffer_store_dword v76, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill
-; GFX9-NEXT:    buffer_store_dword v77, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill
-; GFX9-NEXT:    buffer_store_dword v78, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill
-; GFX9-NEXT:    buffer_store_dword v79, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill
-; GFX9-NEXT:    buffer_store_dword v88, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill
-; GFX9-NEXT:    buffer_store_dword v89, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill
-; GFX9-NEXT:    buffer_store_dword v90, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill
-; GFX9-NEXT:    buffer_store_dword v91, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill
-; GFX9-NEXT:    buffer_store_dword v92, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill
-; GFX9-NEXT:    buffer_store_dword v93, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill
-; GFX9-NEXT:    buffer_store_dword v94, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill
-; GFX9-NEXT:    buffer_store_dword v95, off, s[0:3], s32 ; 4-byte Folded Spill
-; GFX9-NEXT:    v_mov_b32_e32 v72, 0
-; GFX9-NEXT:    v_mov_b32_e32 v73, 0
-; GFX9-NEXT:    v_mov_b32_e32 v74, 0
-; GFX9-NEXT:    v_mov_b32_e32 v75, 0
-; GFX9-NEXT:    v_mov_b32_e32 v76, 0
-; GFX9-NEXT:    v_mov_b32_e32 v77, 0
-; GFX9-NEXT:    v_mov_b32_e32 v78, 0
-; GFX9-NEXT:    v_mov_b32_e32 v79, 0
-; GFX9-NEXT:    v_mov_b32_e32 v88, 0
-; GFX9-NEXT:    v_mov_b32_e32 v89, 0
-; GFX9-NEXT:    v_mov_b32_e32 v90, 0
-; GFX9-NEXT:    v_mov_b32_e32 v91, 0
-; GFX9-NEXT:    v_mov_b32_e32 v92, 0
-; GFX9-NEXT:    v_mov_b32_e32 v93, 0
-; GFX9-NEXT:    v_mov_b32_e32 v94, 0
-; GFX9-NEXT:    v_mov_b32_e32 v95, 0
-; GFX9-NEXT:    v_mov_b32_e32 v40, 0
-; GFX9-NEXT:    v_mov_b32_e32 v41, 0
-; GFX9-NEXT:    v_mov_b32_e32 v42, 0
-; GFX9-NEXT:    v_mov_b32_e32 v43, 0
-; GFX9-NEXT:    v_mov_b32_e32 v44, 0
-; GFX9-NEXT:    v_mov_b32_e32 v45, 0
-; GFX9-NEXT:    v_mov_b32_e32 v46, 0
-; GFX9-NEXT:    v_mov_b32_e32 v47, 0
-; GFX9-NEXT:    v_mov_b32_e32 v56, 0
-; GFX9-NEXT:    v_mov_b32_e32 v57, 0
-; GFX9-NEXT:    v_mov_b32_e32 v58, 0
-; GFX9-NEXT:    v_mov_b32_e32 v59, 0
-; GFX9-NEXT:    v_mov_b32_e32 v60, 0
-; GFX9-NEXT:    v_mov_b32_e32 v61, 0
-; GFX9-NEXT:    v_mov_b32_e32 v62, 0
-; GFX9-NEXT:    v_mov_b32_e32 v63, 0
-; GFX9-NEXT:    buffer_load_dword v95, off, s[0:3], s32 ; 4-byte Folded Reload
-; GFX9-NEXT:    buffer_load_dword v94, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload
-; GFX9-NEXT:    buffer_load_dword v93, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload
-; GFX9-NEXT:    buffer_load_dword v92, off, s[0:3], s32 offset:12 ; 4-byte Folded Reload
-; GFX9-NEXT:    buffer_load_dword v91, off, s[0:3], s32 offset:16 ; 4-byte Folded Reload
-; GFX9-NEXT:    buffer_load_dword v90, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload
-; GFX9-NEXT:    buffer_load_dword v89, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload
-; GFX9-NEXT:    buffer_load_dword v88, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload
-; GFX9-NEXT:    buffer_load_dword v79, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload
-; GFX9-NEXT:    buffer_load_dword v78, off, s[0:3], s32 offset:36 ; 4-byte Folded Reload
-; GFX9-NEXT:    buffer_load_dword v77, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload
-; GFX9-NEXT:    buffer_load_dword v76, off, s[0:3], s32 offset:44 ; 4-byte Folded Reload
-; GFX9-NEXT:    buffer_load_dword v75, off, s[0:3], s32 offset:48 ; 4-byte Folded Reload
-; GFX9-NEXT:    buffer_load_dword v74, off, s[0:3], s32 offset:52 ; 4-byte Folded Reload
-; GFX9-NEXT:    buffer_load_dword v73, off, s[0:3], s32 offset:56 ; 4-byte Folded Reload
-; GFX9-NEXT:    buffer_load_dword v72, off, s[0:3], s32 offset:60 ; 4-byte Folded Reload
-; GFX9-NEXT:    buffer_load_dword v63, off, s[0:3], s32 offset:64 ; 4-byte Folded Reload
-; GFX9-NEXT:    buffer_load_dword v62, off, s[0:3], s32 offset:68 ; 4-byte Folded Reload
-; GFX9-NEXT:    buffer_load_dword v61, off, s[0:3], s32 offset:72 ; 4-byte Folded Reload
-; GFX9-NEXT:    buffer_load_dword v60, off, s[0:3], s32 offset:76 ; 4-byte Folded Reload
-; GFX9-NEXT:    buffer_load_dword v59, off, s[0:3], s32 offset:80 ; 4-byte Folded Reload
-; GFX9-NEXT:    buffer_load_dword v58, off, s[0:3], s32 offset:84 ; 4-byte Folded Reload
-; GFX9-NEXT:    buffer_load_dword v57, off, s[0:3], s32 offset:88 ; 4-byte Folded Reload
-; GFX9-NEXT:    buffer_load_dword v56, off, s[0:3], s32 offset:92 ; 4-byte Folded Reload
-; GFX9-NEXT:    buffer_load_dword v47, off, s[0:3], s32 offset:96 ; 4-byte Folded Reload
-; GFX9-NEXT:    buffer_load_dword v46, off, s[0:3], s32 offset:100 ; 4-byte Folded Reload
-; GFX9-NEXT:    buffer_load_dword v45, off, s[0:3], s32 offset:104 ; 4-byte Folded Reload
-; GFX9-NEXT:    buffer_load_dword v44, off, s[0:3], s32 offset:108 ; 4-byte Folded Reload
-; GFX9-NEXT:    buffer_load_dword v43, off, s[0:3], s32 offset:112 ; 4-byte Folded Reload
-; GFX9-NEXT:    buffer_load_dword v42, off, s[0:3], s32 offset:116 ; 4-byte Folded Reload
-; GFX9-NEXT:    buffer_load_dword v41, off, s[0:3], s32 offset:120 ; 4-byte Folded Reload
-; GFX9-NEXT:    buffer_load_dword v40, off, s[0:3], s32 offset:124 ; 4-byte Folded Reload
 ; GFX9-NEXT:    v_mov_b32_e32 v99, 0
 ; GFX9-NEXT:    v_mov_b32_e32 v0, 0
 ; GFX9-NEXT:    v_mov_b32_e32 v1, 0
@@ -583,6 +487,14 @@ define amdgpu_gfx <100 x i32> @return_100xi32() #0 {
 ; GFX9-NEXT:    v_mov_b32_e32 v69, 0
 ; GFX9-NEXT:    v_mov_b32_e32 v70, 0
 ; GFX9-NEXT:    v_mov_b32_e32 v71, 0
+; GFX9-NEXT:    v_mov_b32_e32 v72, 0
+; GFX9-NEXT:    v_mov_b32_e32 v73, 0
+; GFX9-NEXT:    v_mov_b32_e32 v74, 0
+; GFX9-NEXT:    v_mov_b32_e32 v75, 0
+; GFX9-NEXT:    v_mov_b32_e32 v76, 0
+; GFX9-NEXT:    v_mov_b32_e32 v77, 0
+; GFX9-NEXT:    v_mov_b32_e32 v78, 0
+; GFX9-NEXT:    v_mov_b32_e32 v79, 0
 ; GFX9-NEXT:    v_mov_b32_e32 v80, 0
 ; GFX9-NEXT:    v_mov_b32_e32 v81, 0
 ; GFX9-NEXT:    v_mov_b32_e32 v82, 0
@@ -591,6 +503,14 @@ define amdgpu_gfx <100 x i32> @return_100xi32() #0 {
 ; GFX9-NEXT:    v_mov_b32_e32 v85, 0
 ; GFX9-NEXT:    v_mov_b32_e32 v86, 0
 ; GFX9-NEXT:    v_mov_b32_e32 v87, 0
+; GFX9-NEXT:    v_mov_b32_e32 v88, 0
+; GFX9-NEXT:    v_mov_b32_e32 v89, 0
+; GFX9-NEXT:    v_mov_b32_e32 v90, 0
+; GFX9-NEXT:    v_mov_b32_e32 v91, 0
+; GFX9-NEXT:    v_mov_b32_e32 v92, 0
+; GFX9-NEXT:    v_mov_b32_e32 v93, 0
+; GFX9-NEXT:    v_mov_b32_e32 v94, 0
+; GFX9-NEXT:    v_mov_b32_e32 v95, 0
 ; GFX9-NEXT:    v_mov_b32_e32 v96, 0
 ; GFX9-NEXT:    v_mov_b32_e32 v97, 0
 ; GFX9-NEXT:    v_mov_b32_e32 v98, 0
@@ -606,6 +526,14 @@ define amdgpu_gfx <100 x i32> @return_100xi32() #0 {
 ; GFX9-NEXT:    v_mov_b32_e32 v37, 0
 ; GFX9-NEXT:    v_mov_b32_e32 v38, 0
 ; GFX9-NEXT:    v_mov_b32_e32 v39, 0
+; GFX9-NEXT:    v_mov_b32_e32 v40, 0
+; GFX9-NEXT:    v_mov_b32_e32 v41, 0
+; GFX9-NEXT:    v_mov_b32_e32 v42, 0
+; GFX9-NEXT:    v_mov_b32_e32 v43, 0
+; GFX9-NEXT:    v_mov_b32_e32 v44, 0
+; GFX9-NEXT:    v_mov_b32_e32 v45, 0
+; GFX9-NEXT:    v_mov_b32_e32 v46, 0
+; GFX9-NEXT:    v_mov_b32_e32 v47, 0
 ; GFX9-NEXT:    v_mov_b32_e32 v48, 0
 ; GFX9-NEXT:    v_mov_b32_e32 v49, 0
 ; GFX9-NEXT:    v_mov_b32_e32 v50, 0
@@ -614,110 +542,20 @@ define amdgpu_gfx <100 x i32> @return_100xi32() #0 {
 ; GFX9-NEXT:    v_mov_b32_e32 v53, 0
 ; GFX9-NEXT:    v_mov_b32_e32 v54, 0
 ; GFX9-NEXT:    v_mov_b32_e32 v55, 0
-; GFX9-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-NEXT:    v_mov_b32_e32 v56, 0
+; GFX9-NEXT:    v_mov_b32_e32 v57, 0
+; GFX9-NEXT:    v_mov_b32_e32 v58, 0
+; GFX9-NEXT:    v_mov_b32_e32 v59, 0
+; GFX9-NEXT:    v_mov_b32_e32 v60, 0
+; GFX9-NEXT:    v_mov_b32_e32 v61, 0
+; GFX9-NEXT:    v_mov_b32_e32 v62, 0
+; GFX9-NEXT:    v_mov_b32_e32 v63, 0
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX10-LABEL: return_100xi32:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
-; GFX10-NEXT:    buffer_store_dword v40, off, s[0:3], s32 offset:124 ; 4-byte Folded Spill
-; GFX10-NEXT:    buffer_store_dword v41, off, s[0:3], s32 offset:120 ; 4-byte Folded Spill
-; GFX10-NEXT:    buffer_store_dword v42, off, s[0:3], s32 offset:116 ; 4-byte Folded Spill
-; GFX10-NEXT:    buffer_store_dword v43, off, s[0:3], s32 offset:112 ; 4-byte Folded Spill
-; GFX10-NEXT:    buffer_store_dword v44, off, s[0:3], s32 offset:108 ; 4-byte Folded Spill
-; GFX10-NEXT:    buffer_store_dword v45, off, s[0:3], s32 offset:104 ; 4-byte Folded Spill
-; GFX10-NEXT:    buffer_store_dword v46, off, s[0:3], s32 offset:100 ; 4-byte Folded Spill
-; GFX10-NEXT:    buffer_store_dword v47, off, s[0:3], s32 offset:96 ; 4-byte Folded Spill
-; GFX10-NEXT:    buffer_store_dword v56, off, s[0:3], s32 offset:92 ; 4-byte Folded Spill
-; GFX10-NEXT:    buffer_store_dword v57, off, s[0:3], s32 offset:88 ; 4-byte Folded Spill
-; GFX10-NEXT:    buffer_store_dword v58, off, s[0:3], s32 offset:84 ; 4-byte Folded Spill
-; GFX10-NEXT:    buffer_store_dword v59, off, s[0:3], s32 offset:80 ; 4-byte Folded Spill
-; GFX10-NEXT:    buffer_store_dword v60, off, s[0:3], s32 offset:76 ; 4-byte Folded Spill
-; GFX10-NEXT:    buffer_store_dword v61, off, s[0:3], s32 offset:72 ; 4-byte Folded Spill
-; GFX10-NEXT:    buffer_store_dword v62, off, s[0:3], s32 offset:68 ; 4-byte Folded Spill
-; GFX10-NEXT:    buffer_store_dword v63, off, s[0:3], s32 offset:64 ; 4-byte Folded Spill
-; GFX10-NEXT:    buffer_store_dword v72, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill
-; GFX10-NEXT:    buffer_store_dword v73, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill
-; GFX10-NEXT:    buffer_store_dword v74, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill
-; GFX10-NEXT:    buffer_store_dword v75, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill
-; GFX10-NEXT:    buffer_store_dword v76, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill
-; GFX10-NEXT:    buffer_store_dword v77, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill
-; GFX10-NEXT:    buffer_store_dword v78, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill
-; GFX10-NEXT:    buffer_store_dword v79, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill
-; GFX10-NEXT:    buffer_store_dword v88, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill
-; GFX10-NEXT:    buffer_store_dword v89, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill
-; GFX10-NEXT:    buffer_store_dword v90, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill
-; GFX10-NEXT:    buffer_store_dword v91, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill
-; GFX10-NEXT:    buffer_store_dword v92, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill
-; GFX10-NEXT:    buffer_store_dword v93, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill
-; GFX10-NEXT:    buffer_store_dword v94, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill
-; GFX10-NEXT:    buffer_store_dword v95, off, s[0:3], s32 ; 4-byte Folded Spill
-; GFX10-NEXT:    v_mov_b32_e32 v40, 0
-; GFX10-NEXT:    v_mov_b32_e32 v41, 0
-; GFX10-NEXT:    v_mov_b32_e32 v42, 0
-; GFX10-NEXT:    v_mov_b32_e32 v43, 0
-; GFX10-NEXT:    v_mov_b32_e32 v44, 0
-; GFX10-NEXT:    v_mov_b32_e32 v45, 0
-; GFX10-NEXT:    v_mov_b32_e32 v46, 0
-; GFX10-NEXT:    v_mov_b32_e32 v47, 0
-; GFX10-NEXT:    v_mov_b32_e32 v56, 0
-; GFX10-NEXT:    v_mov_b32_e32 v57, 0
-; GFX10-NEXT:    v_mov_b32_e32 v58, 0
-; GFX10-NEXT:    v_mov_b32_e32 v59, 0
-; GFX10-NEXT:    v_mov_b32_e32 v60, 0
-; GFX10-NEXT:    v_mov_b32_e32 v61, 0
-; GFX10-NEXT:    v_mov_b32_e32 v62, 0
-; GFX10-NEXT:    v_mov_b32_e32 v63, 0
-; GFX10-NEXT:    v_mov_b32_e32 v72, 0
-; GFX10-NEXT:    v_mov_b32_e32 v73, 0
-; GFX10-NEXT:    v_mov_b32_e32 v74, 0
-; GFX10-NEXT:    v_mov_b32_e32 v75, 0
-; GFX10-NEXT:    v_mov_b32_e32 v76, 0
-; GFX10-NEXT:    v_mov_b32_e32 v77, 0
-; GFX10-NEXT:    v_mov_b32_e32 v78, 0
-; GFX10-NEXT:    v_mov_b32_e32 v79, 0
-; GFX10-NEXT:    v_mov_b32_e32 v88, 0
-; GFX10-NEXT:    v_mov_b32_e32 v89, 0
-; GFX10-NEXT:    v_mov_b32_e32 v90, 0
-; GFX10-NEXT:    v_mov_b32_e32 v91, 0
-; GFX10-NEXT:    v_mov_b32_e32 v92, 0
-; GFX10-NEXT:    v_mov_b32_e32 v93, 0
-; GFX10-NEXT:    v_mov_b32_e32 v94, 0
-; GFX10-NEXT:    v_mov_b32_e32 v95, 0
-; GFX10-NEXT:    s_clause 0x1f
-; GFX10-NEXT:    buffer_load_dword v95, off, s[0:3], s32
-; GFX10-NEXT:    buffer_load_dword v94, off, s[0:3], s32 offset:4
-; GFX10-NEXT:    buffer_load_dword v93, off, s[0:3], s32 offset:8
-; GFX10-NEXT:    buffer_load_dword v92, off, s[0:3], s32 offset:12
-; GFX10-NEXT:    buffer_load_dword v91, off, s[0:3], s32 offset:16
-; GFX10-NEXT:    buffer_load_dword v90, off, s[0:3], s32 offset:20
-; GFX10-NEXT:    buffer_load_dword v89, off, s[0:3], s32 offset:24
-; GFX10-NEXT:    buffer_load_dword v88, off, s[0:3], s32 offset:28
-; GFX10-NEXT:    buffer_load_dword v79, off, s[0:3], s32 offset:32
-; GFX10-NEXT:    buffer_load_dword v78, off, s[0:3], s32 offset:36
-; GFX10-NEXT:    buffer_load_dword v77, off, s[0:3], s32 offset:40
-; GFX10-NEXT:    buffer_load_dword v76, off, s[0:3], s32 offset:44
-; GFX10-NEXT:    buffer_load_dword v75, off, s[0:3], s32 offset:48
-; GFX10-NEXT:    buffer_load_dword v74, off, s[0:3], s32 offset:52
-; GFX10-NEXT:    buffer_load_dword v73, off, s[0:3], s32 offset:56
-; GFX10-NEXT:    buffer_load_dword v72, off, s[0:3], s32 offset:60
-; GFX10-NEXT:    buffer_load_dword v63, off, s[0:3], s32 offset:64
-; GFX10-NEXT:    buffer_load_dword v62, off, s[0:3], s32 offset:68
-; GFX10-NEXT:    buffer_load_dword v61, off, s[0:3], s32 offset:72
-; GFX10-NEXT:    buffer_load_dword v60, off, s[0:3], s32 offset:76
-; GFX10-NEXT:    buffer_load_dword v59, off, s[0:3], s32 offset:80
-; GFX10-NEXT:    buffer_load_dword v58, off, s[0:3], s32 offset:84
-; GFX10-NEXT:    buffer_load_dword v57, off, s[0:3], s32 offset:88
-; GFX10-NEXT:    buffer_load_dword v56, off, s[0:3], s32 offset:92
-; GFX10-NEXT:    buffer_load_dword v47, off, s[0:3], s32 offset:96
-; GFX10-NEXT:    buffer_load_dword v46, off, s[0:3], s32 offset:100
-; GFX10-NEXT:    buffer_load_dword v45, off, s[0:3], s32 offset:104
-; GFX10-NEXT:    buffer_load_dword v44, off, s[0:3], s32 offset:108
-; GFX10-NEXT:    buffer_load_dword v43, off, s[0:3], s32 offset:112
-; GFX10-NEXT:    buffer_load_dword v42, off, s[0:3], s32 offset:116
-; GFX10-NEXT:    buffer_load_dword v41, off, s[0:3], s32 offset:120
-; GFX10-NEXT:    buffer_load_dword v40, off, s[0:3], s32 offset:124
 ; GFX10-NEXT:    v_mov_b32_e32 v0, 0
 ; GFX10-NEXT:    v_mov_b32_e32 v1, 0
 ; GFX10-NEXT:    v_mov_b32_e32 v2, 0
@@ -758,6 +596,14 @@ define amdgpu_gfx <100 x i32> @return_100xi32() #0 {
 ; GFX10-NEXT:    v_mov_b32_e32 v37, 0
 ; GFX10-NEXT:    v_mov_b32_e32 v38, 0
 ; GFX10-NEXT:    v_mov_b32_e32 v39, 0
+; GFX10-NEXT:    v_mov_b32_e32 v40, 0
+; GFX10-NEXT:    v_mov_b32_e32 v41, 0
+; GFX10-NEXT:    v_mov_b32_e32 v42, 0
+; GFX10-NEXT:    v_mov_b32_e32 v43, 0
+; GFX10-NEXT:    v_mov_b32_e32 v44, 0
+; GFX10-NEXT:    v_mov_b32_e32 v45, 0
+; GFX10-NEXT:    v_mov_b32_e32 v46, 0
+; GFX10-NEXT:    v_mov_b32_e32 v47, 0
 ; GFX10-NEXT:    v_mov_b32_e32 v48, 0
 ; GFX10-NEXT:    v_mov_b32_e32 v49, 0
 ; GFX10-NEXT:    v_mov_b32_e32 v50, 0
@@ -766,6 +612,14 @@ define amdgpu_gfx <100 x i32> @return_100xi32() #0 {
 ; GFX10-NEXT:    v_mov_b32_e32 v53, 0
 ; GFX10-NEXT:    v_mov_b32_e32 v54, 0
 ; GFX10-NEXT:    v_mov_b32_e32 v55, 0
+; GFX10-NEXT:    v_mov_b32_e32 v56, 0
+; GFX10-NEXT:    v_mov_b32_e32 v57, 0
+; GFX10-NEXT:    v_mov_b32_e32 v58, 0
+; GFX10-NEXT:    v_mov_b32_e32 v59, 0
+; GFX10-NEXT:    v_mov_b32_e32 v60, 0
+; GFX10-NEXT:    v_mov_b32_e32 v61, 0
+; GFX10-NEXT:    v_mov_b32_e32 v62, 0
+; GFX10-NEXT:    v_mov_b32_e32 v63, 0
 ; GFX10-NEXT:    v_mov_b32_e32 v64, 0
 ; GFX10-NEXT:    v_mov_b32_e32 v65, 0
 ; GFX10-NEXT:    v_mov_b32_e32 v66, 0
@@ -774,6 +628,14 @@ define amdgpu_gfx <100 x i32> @return_100xi32() #0 {
 ; GFX10-NEXT:    v_mov_b32_e32 v69, 0
 ; GFX10-NEXT:    v_mov_b32_e32 v70, 0
 ; GFX10-NEXT:    v_mov_b32_e32 v71, 0
+; GFX10-NEXT:    v_mov_b32_e32 v72, 0
+; GFX10-NEXT:    v_mov_b32_e32 v73, 0
+; GFX10-NEXT:    v_mov_b32_e32 v74, 0
+; GFX10-NEXT:    v_mov_b32_e32 v75, 0
+; GFX10-NEXT:    v_mov_b32_e32 v76, 0
+; GFX10-NEXT:    v_mov_b32_e32 v77, 0
+; GFX10-NEXT:    v_mov_b32_e32 v78, 0
+; GFX10-NEXT:    v_mov_b32_e32 v79, 0
 ; GFX10-NEXT:    v_mov_b32_e32 v80, 0
 ; GFX10-NEXT:    v_mov_b32_e32 v81, 0
 ; GFX10-NEXT:    v_mov_b32_e32 v82, 0
@@ -782,100 +644,24 @@ define amdgpu_gfx <100 x i32> @return_100xi32() #0 {
 ; GFX10-NEXT:    v_mov_b32_e32 v85, 0
 ; GFX10-NEXT:    v_mov_b32_e32 v86, 0
 ; GFX10-NEXT:    v_mov_b32_e32 v87, 0
+; GFX10-NEXT:    v_mov_b32_e32 v88, 0
+; GFX10-NEXT:    v_mov_b32_e32 v89, 0
+; GFX10-NEXT:    v_mov_b32_e32 v90, 0
+; GFX10-NEXT:    v_mov_b32_e32 v91, 0
+; GFX10-NEXT:    v_mov_b32_e32 v92, 0
+; GFX10-NEXT:    v_mov_b32_e32 v93, 0
+; GFX10-NEXT:    v_mov_b32_e32 v94, 0
+; GFX10-NEXT:    v_mov_b32_e32 v95, 0
 ; GFX10-NEXT:    v_mov_b32_e32 v96, 0
 ; GFX10-NEXT:    v_mov_b32_e32 v97, 0
 ; GFX10-NEXT:    v_mov_b32_e32 v98, 0
 ; GFX10-NEXT:    v_mov_b32_e32 v99, 0
-; GFX10-NEXT:    s_waitcnt vmcnt(0)
-; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: return_100xi32:
 ; GFX11:       ; %bb.0:
 ; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
-; GFX11-NEXT:    s_clause 0x1f
-; GFX11-NEXT:    scratch_store_b32 off, v40, s32 offset:124
-; GFX11-NEXT:    scratch_store_b32 off, v41, s32 offset:120
-; GFX11-NEXT:    scratch_store_b32 off, v42, s32 offset:116
-; GFX11-NEXT:    scratch_store_b32 off, v43, s32 offset:112
-; GFX11-NEXT:    scratch_store_b32 off, v44, s32 offset:108
-; GFX11-NEXT:    scratch_store_b32 off, v45, s32 offset:104
-; GFX11-NEXT:    scratch_store_b32 off, v46, s32 offset:100
-; GFX11-NEXT:    scratch_store_b32 off, v47, s32 offset:96
-; GFX11-NEXT:    scratch_store_b32 off, v56, s32 offset:92
-; GFX11-NEXT:    scratch_store_b32 off, v57, s32 offset:88
-; GFX11-NEXT:    scratch_store_b32 off, v58, s32 offset:84
-; GFX11-NEXT:    scratch_store_b32 off, v59, s32 offset:80
-; GFX11-NEXT:    scratch_store_b32 off, v60, s32 offset:76
-; GFX11-NEXT:    scratch_store_b32 off, v61, s32 offset:72
-; GFX11-NEXT:    scratch_store_b32 off, v62, s32 offset:68
-; GFX11-NEXT:    scratch_store_b32 off, v63, s32 offset:64
-; GFX11-NEXT:    scratch_store_b32 off, v72, s32 offset:60
-; GFX11-NEXT:    scratch_store_b32 off, v73, s32 offset:56
-; GFX11-NEXT:    scratch_store_b32 off, v74, s32 offset:52
-; GFX11-NEXT:    scratch_store_b32 off, v75, s32 offset:48
-; GFX11-NEXT:    scratch_store_b32 off, v76, s32 offset:44
-; GFX11-NEXT:    scratch_store_b32 off, v77, s32 offset:40
-; GFX11-NEXT:    scratch_store_b32 off, v78, s32 offset:36
-; GFX11-NEXT:    scratch_store_b32 off, v79, s32 offset:32
-; GFX11-NEXT:    scratch_store_b32 off, v88, s32 offset:28
-; GFX11-NEXT:    scratch_store_b32 off, v89, s32 offset:24
-; GFX11-NEXT:    scratch_store_b32 off, v90, s32 offset:20
-; GFX11-NEXT:    scratch_store_b32 off, v91, s32 offset:16
-; GFX11-NEXT:    scratch_store_b32 off, v92, s32 offset:12
-; GFX11-NEXT:    scratch_store_b32 off, v93, s32 offset:8
-; GFX11-NEXT:    scratch_store_b32 off, v94, s32 offset:4
-; GFX11-NEXT:    scratch_store_b32 off, v95, s32
-; GFX11-NEXT:    v_dual_mov_b32 v40, 0 :: v_dual_mov_b32 v41, 0
-; GFX11-NEXT:    v_dual_mov_b32 v42, 0 :: v_dual_mov_b32 v43, 0
-; GFX11-NEXT:    v_dual_mov_b32 v44, 0 :: v_dual_mov_b32 v45, 0
-; GFX11-NEXT:    v_dual_mov_b32 v46, 0 :: v_dual_mov_b32 v47, 0
-; GFX11-NEXT:    v_dual_mov_b32 v56, 0 :: v_dual_mov_b32 v57, 0
-; GFX11-NEXT:    v_dual_mov_b32 v58, 0 :: v_dual_mov_b32 v59, 0
-; GFX11-NEXT:    v_dual_mov_b32 v60, 0 :: v_dual_mov_b32 v61, 0
-; GFX11-NEXT:    v_dual_mov_b32 v62, 0 :: v_dual_mov_b32 v63, 0
-; GFX11-NEXT:    v_dual_mov_b32 v72, 0 :: v_dual_mov_b32 v73, 0
-; GFX11-NEXT:    v_dual_mov_b32 v74, 0 :: v_dual_mov_b32 v75, 0
-; GFX11-NEXT:    v_dual_mov_b32 v76, 0 :: v_dual_mov_b32 v77, 0
-; GFX11-NEXT:    v_dual_mov_b32 v78, 0 :: v_dual_mov_b32 v79, 0
-; GFX11-NEXT:    v_dual_mov_b32 v88, 0 :: v_dual_mov_b32 v89, 0
-; GFX11-NEXT:    v_dual_mov_b32 v90, 0 :: v_dual_mov_b32 v91, 0
-; GFX11-NEXT:    v_dual_mov_b32 v92, 0 :: v_dual_mov_b32 v93, 0
-; GFX11-NEXT:    v_dual_mov_b32 v94, 0 :: v_dual_mov_b32 v95, 0
-; GFX11-NEXT:    s_clause 0x1f
-; GFX11-NEXT:    scratch_load_b32 v95, off, s32
-; GFX11-NEXT:    scratch_load_b32 v94, off, s32 offset:4
-; GFX11-NEXT:    scratch_load_b32 v93, off, s32 offset:8
-; GFX11-NEXT:    scratch_load_b32 v92, off, s32 offset:12
-; GFX11-NEXT:    scratch_load_b32 v91, off, s32 offset:16
-; GFX11-NEXT:    scratch_load_b32 v90, off, s32 offset:20
-; GFX11-NEXT:    scratch_load_b32 v89, off, s32 offset:24
-; GFX11-NEXT:    scratch_load_b32 v88, off, s32 offset:28
-; GFX11-NEXT:    scratch_load_b32 v79, off, s32 offset:32
-; GFX11-NEXT:    scratch_load_b32 v78, off, s32 offset:36
-; GFX11-NEXT:    scratch_load_b32 v77, off, s32 offset:40
-; GFX11-NEXT:    scratch_load_b32 v76, off, s32 offset:44
-; GFX11-NEXT:    scratch_load_b32 v75, off, s32 offset:48
-; GFX11-NEXT:    scratch_load_b32 v74, off, s32 offset:52
-; GFX11-NEXT:    scratch_load_b32 v73, off, s32 offset:56
-; GFX11-NEXT:    scratch_load_b32 v72, off, s32 offset:60
-; GFX11-NEXT:    scratch_load_b32 v63, off, s32 offset:64
-; GFX11-NEXT:    scratch_load_b32 v62, off, s32 offset:68
-; GFX11-NEXT:    scratch_load_b32 v61, off, s32 offset:72
-; GFX11-NEXT:    scratch_load_b32 v60, off, s32 offset:76
-; GFX11-NEXT:    scratch_load_b32 v59, off, s32 offset:80
-; GFX11-NEXT:    scratch_load_b32 v58, off, s32 offset:84
-; GFX11-NEXT:    scratch_load_b32 v57, off, s32 offset:88
-; GFX11-NEXT:    scratch_load_b32 v56, off, s32 offset:92
-; GFX11-NEXT:    scratch_load_b32 v47, off, s32 offset:96
-; GFX11-NEXT:    scratch_load_b32 v46, off, s32 offset:100
-; GFX11-NEXT:    scratch_load_b32 v45, off, s32 offset:104
-; GFX11-NEXT:    scratch_load_b32 v44, off, s32 offset:108
-; GFX11-NEXT:    scratch_load_b32 v43, off, s32 offset:112
-; GFX11-NEXT:    scratch_load_b32 v42, off, s32 offset:116
-; GFX11-NEXT:    scratch_load_b32 v41, off, s32 offset:120
-; GFX11-NEXT:    scratch_load_b32 v40, off, s32 offset:124
 ; GFX11-NEXT:    v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 0
 ; GFX11-NEXT:    v_dual_mov_b32 v2, 0 :: v_dual_mov_b32 v3, 0
 ; GFX11-NEXT:    v_dual_mov_b32 v4, 0 :: v_dual_mov_b32 v5, 0
@@ -896,22 +682,36 @@ define amdgpu_gfx <100 x i32> @return_100xi32() #0 {
 ; GFX11-NEXT:    v_dual_mov_b32 v34, 0 :: v_dual_mov_b32 v35, 0
 ; GFX11-NEXT:    v_dual_mov_b32 v36, 0 :: v_dual_mov_b32 v37, 0
 ; GFX11-NEXT:    v_dual_mov_b32 v38, 0 :: v_dual_mov_b32 v39, 0
+; GFX11-NEXT:    v_dual_mov_b32 v40, 0 :: v_dual_mov_b32 v41, 0
+; GFX11-NEXT:    v_dual_mov_b32 v42, 0 :: v_dual_mov_b32 v43, 0
+; GFX11-NEXT:    v_dual_mov_b32 v44, 0 :: v_dual_mov_b32 v45, 0
+; GFX11-NEXT:    v_dual_mov_b32 v46, 0 :: v_dual_mov_b32 v47, 0
 ; GFX11-NEXT:    v_dual_mov_b32 v48, 0 :: v_dual_mov_b32 v49, 0
 ; GFX11-NEXT:    v_dual_mov_b32 v50, 0 :: v_dual_mov_b32 v51, 0
 ; GFX11-NEXT:    v_dual_mov_b32 v52, 0 :: v_dual_mov_b32 v53, 0
 ; GFX11-NEXT:    v_dual_mov_b32 v54, 0 :: v_dual_mov_b32 v55, 0
+; GFX11-NEXT:    v_dual_mov_b32 v56, 0 :: v_dual_mov_b32 v57, 0
+; GFX11-NEXT:    v_dual_mov_b32 v58, 0 :: v_dual_mov_b32 v59, 0
+; GFX11-NEXT:    v_dual_mov_b32 v60, 0 :: v_dual_mov_b32 v61, 0
+; GFX11-NEXT:    v_dual_mov_b32 v62, 0 :: v_dual_mov_b32 v63, 0
 ; GFX11-NEXT:    v_dual_mov_b32 v64, 0 :: v_dual_mov_b32 v65, 0
 ; GFX11-NEXT:    v_dual_mov_b32 v66, 0 :: v_dual_mov_b32 v67, 0
 ; GFX11-NEXT:    v_dual_mov_b32 v68, 0 :: v_dual_mov_b32 v69, 0
 ; GFX11-NEXT:    v_dual_mov_b32 v70, 0 :: v_dual_mov_b32 v71, 0
+; GFX11-NEXT:    v_dual_mov_b32 v72, 0 :: v_dual_mov_b32 v73, 0
+; GFX11-NEXT:    v_dual_mov_b32 v74, 0 :: v_dual_mov_b32 v75, 0
+; GFX11-NEXT:    v_dual_mov_b32 v76, 0 :: v_dual_mov_b32 v77, 0
+; GFX11-NEXT:    v_dual_mov_b32 v78, 0 :: v_dual_mov_b32 v79, 0
 ; GFX11-NEXT:    v_dual_mov_b32 v80, 0 :: v_dual_mov_b32 v81, 0
 ; GFX11-NEXT:    v_dual_mov_b32 v82, 0 :: v_dual_mov_b32 v83, 0
 ; GFX11-NEXT:    v_dual_mov_b32 v84, 0 :: v_dual_mov_b32 v85, 0
 ; GFX11-NEXT:    v_dual_mov_b32 v86, 0 :: v_dual_mov_b32 v87, 0
+; GFX11-NEXT:    v_dual_mov_b32 v88, 0 :: v_dual_mov_b32 v89, 0
+; GFX11-NEXT:    v_dual_mov_b32 v90, 0 :: v_dual_mov_b32 v91, 0
+; GFX11-NEXT:    v_dual_mov_b32 v92, 0 :: v_dual_mov_b32 v93, 0
+; GFX11-NEXT:    v_dual_mov_b32 v94, 0 :: v_dual_mov_b32 v95, 0
 ; GFX11-NEXT:    v_dual_mov_b32 v96, 0 :: v_dual_mov_b32 v97, 0
 ; GFX11-NEXT:    v_dual_mov_b32 v98, 0 :: v_dual_mov_b32 v99, 0
-; GFX11-NEXT:    s_waitcnt vmcnt(0)
-; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
   ret <100 x i32> zeroinitializer
 }


        


More information about the llvm-commits mailing list