[llvm] adac04f - [AMDGPU] Fix using wrong register in frame index shift (#101649)

via llvm-commits llvm-commits at lists.llvm.org
Fri Aug 2 04:47:35 PDT 2024


Author: Pankaj Dwivedi
Date: 2024-08-02T17:17:32+05:30
New Revision: adac04ffc90adebbaa0661d642f482dab8440df5

URL: https://github.com/llvm/llvm-project/commit/adac04ffc90adebbaa0661d642f482dab8440df5
DIFF: https://github.com/llvm/llvm-project/commit/adac04ffc90adebbaa0661d642f482dab8440df5.diff

LOG: [AMDGPU] Fix using wrong register in frame index shift (#101649)

In case of v_mad we have materialized the offset in vgpr and mad is
performed in wave space, later vgpr have to be shifted back in lane
space. [#99556](https://github.com/llvm/llvm-project/pull/99556)
introduces a bug.

Co-authored-by: Pankajdwivedi-25 <pankajkumar.divedi at amd.com>

Added: 
    

Modified: 
    llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
    llvm/test/CodeGen/AMDGPU/eliminate-frame-index-s-mov-b32.mir
    llvm/test/CodeGen/AMDGPU/materialize-frame-index-sgpr.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
index e824e95610a65..ba49f4a309ebb 100644
--- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
@@ -2591,7 +2591,7 @@ bool SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
                 BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_LSHRREV_B32_e64),
                         TmpResultReg)
                     .addImm(ST.getWavefrontSizeLog2())
-                    .addReg(FrameReg);
+                    .addReg(TmpResultReg);
               }
 
               Register NewDest = IsCopy ? ResultReg

diff  --git a/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-s-mov-b32.mir b/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-s-mov-b32.mir
index 78fb25a76d25e..08c2904c601ad 100644
--- a/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-s-mov-b32.mir
+++ b/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-s-mov-b32.mir
@@ -708,7 +708,7 @@ body:   |
     ; GFX8-NEXT: S_CMP_EQ_I32 $sgpr4, $sgpr5, implicit-def $scc
     ; GFX8-NEXT: $vgpr0 = V_MOV_B32_e32 64, implicit $exec
     ; GFX8-NEXT: $vgpr0 = V_MAD_U32_U24_e64 killed $vgpr0, 64, $sgpr32, 0, implicit $exec
-    ; GFX8-NEXT: $vgpr0 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec
+    ; GFX8-NEXT: $vgpr0 = V_LSHRREV_B32_e64 6, $vgpr0, implicit $exec
     ; GFX8-NEXT: $sgpr4 = V_READFIRSTLANE_B32 $vgpr0, implicit $exec
     ; GFX8-NEXT: S_NOP 0, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7
     ; GFX8-NEXT: S_NOP 0, implicit $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
@@ -903,7 +903,7 @@ body:             |
     ; GFX8-NEXT: S_CMP_EQ_I32 $sgpr4, $sgpr5, implicit-def $scc
     ; GFX8-NEXT: $vgpr0 = V_MOV_B32_e32 68, implicit $exec
     ; GFX8-NEXT: $vgpr0 = V_MAD_U32_U24_e64 killed $vgpr0, 64, $sgpr32, 0, implicit $exec
-    ; GFX8-NEXT: $vgpr0 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec
+    ; GFX8-NEXT: $vgpr0 = V_LSHRREV_B32_e64 6, $vgpr0, implicit $exec
     ; GFX8-NEXT: $sgpr4 = V_READFIRSTLANE_B32 $vgpr0, implicit $exec
     ; GFX8-NEXT: S_NOP 0, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7
     ; GFX8-NEXT: S_NOP 0, implicit $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15

diff  --git a/llvm/test/CodeGen/AMDGPU/materialize-frame-index-sgpr.ll b/llvm/test/CodeGen/AMDGPU/materialize-frame-index-sgpr.ll
index 9cd92dcd5c94d..87cfaec208897 100644
--- a/llvm/test/CodeGen/AMDGPU/materialize-frame-index-sgpr.ll
+++ b/llvm/test/CodeGen/AMDGPU/materialize-frame-index-sgpr.ll
@@ -77,7 +77,7 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs() #0
 ; GFX7-NEXT:    buffer_store_dword v0, off, s[0:3], s32
 ; GFX7-NEXT:    v_mov_b32_e32 v0, 0x4040
 ; GFX7-NEXT:    v_mad_u32_u24 v0, v0, 64, s32
-; GFX7-NEXT:    v_lshr_b32_e64 v0, s32, 6
+; GFX7-NEXT:    v_lshrrev_b32_e32 v0, 6, v0
 ; GFX7-NEXT:    v_writelane_b32 v23, s59, 28
 ; GFX7-NEXT:    v_readfirstlane_b32 s59, v0
 ; GFX7-NEXT:    buffer_load_dword v0, off, s[0:3], s32
@@ -168,7 +168,7 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs() #0
 ; GFX8-NEXT:    buffer_store_dword v0, off, s[0:3], s32
 ; GFX8-NEXT:    v_mov_b32_e32 v0, 0x4040
 ; GFX8-NEXT:    v_mad_u32_u24 v0, v0, 64, s32
-; GFX8-NEXT:    v_lshrrev_b32_e64 v0, 6, s32
+; GFX8-NEXT:    v_lshrrev_b32_e32 v0, 6, v0
 ; GFX8-NEXT:    v_writelane_b32 v23, s59, 28
 ; GFX8-NEXT:    v_readfirstlane_b32 s59, v0
 ; GFX8-NEXT:    buffer_load_dword v0, off, s[0:3], s32
@@ -841,7 +841,7 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs__lowe
 ; GFX7-NEXT:    ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:20], vcc
 ; GFX7-NEXT:    ;;#ASMEND
 ; GFX7-NEXT:    v_mad_u32_u24 v22, v22, 64, s32
-; GFX7-NEXT:    v_lshr_b32_e64 v22, s32, 6
+; GFX7-NEXT:    v_lshrrev_b32_e32 v22, 6, v22
 ; GFX7-NEXT:    v_writelane_b32 v21, s59, 28
 ; GFX7-NEXT:    v_readfirstlane_b32 s59, v22
 ; GFX7-NEXT:    ;;#ASMSTART
@@ -924,7 +924,7 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs__lowe
 ; GFX8-NEXT:    ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:20], vcc
 ; GFX8-NEXT:    ;;#ASMEND
 ; GFX8-NEXT:    v_mad_u32_u24 v22, v22, 64, s32
-; GFX8-NEXT:    v_lshrrev_b32_e64 v22, 6, s32
+; GFX8-NEXT:    v_lshrrev_b32_e32 v22, 6, v22
 ; GFX8-NEXT:    v_writelane_b32 v21, s59, 28
 ; GFX8-NEXT:    v_readfirstlane_b32 s59, v22
 ; GFX8-NEXT:    ;;#ASMSTART


        


More information about the llvm-commits mailing list