[llvm] r371149 - AMDGPU: Allow getMemOperandWithOffset to analyze stack accesses

Matt Arsenault via llvm-commits llvm-commits at lists.llvm.org
Thu Sep 5 16:54:35 PDT 2019


Author: arsenm
Date: Thu Sep  5 16:54:35 2019
New Revision: 371149

URL: http://llvm.org/viewvc/llvm-project?rev=371149&view=rev
Log:
AMDGPU: Allow getMemOperandWithOffset to analyze stack accesses

Report soffset as a base register if the scratch resource can be
ignored.

Modified:
    llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.cpp
    llvm/trunk/test/CodeGen/AMDGPU/byval-frame-setup.ll
    llvm/trunk/test/CodeGen/AMDGPU/call-argument-types.ll
    llvm/trunk/test/CodeGen/AMDGPU/callee-special-input-vgprs.ll
    llvm/trunk/test/CodeGen/AMDGPU/frame-index-elimination.ll
    llvm/trunk/test/CodeGen/AMDGPU/insert_vector_elt.ll

Modified: llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.cpp?rev=371149&r1=371148&r2=371149&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.cpp Thu Sep  5 16:54:35 2019
@@ -318,8 +318,25 @@ bool SIInstrInfo::getMemOperandWithOffse
 
   if (isMUBUF(LdSt) || isMTBUF(LdSt)) {
     const MachineOperand *SOffset = getNamedOperand(LdSt, AMDGPU::OpName::soffset);
-    if (SOffset && SOffset->isReg())
-      return false;
+    if (SOffset && SOffset->isReg()) {
+      // We can only handle this if it's a stack access, as any other resource
+      // would require reporting multiple base registers.
+      const MachineOperand *AddrReg = getNamedOperand(LdSt, AMDGPU::OpName::vaddr);
+      if (AddrReg && !AddrReg->isFI())
+        return false;
+
+      const MachineOperand *RSrc = getNamedOperand(LdSt, AMDGPU::OpName::srsrc);
+      const SIMachineFunctionInfo *MFI
+        = LdSt.getParent()->getParent()->getInfo<SIMachineFunctionInfo>();
+      if (RSrc->getReg() != MFI->getScratchRSrcReg())
+        return false;
+
+      const MachineOperand *OffsetImm =
+        getNamedOperand(LdSt, AMDGPU::OpName::offset);
+      BaseOp = SOffset;
+      Offset = OffsetImm->getImm();
+      return true;
+    }
 
     const MachineOperand *AddrReg = getNamedOperand(LdSt, AMDGPU::OpName::vaddr);
     if (!AddrReg)

Modified: llvm/trunk/test/CodeGen/AMDGPU/byval-frame-setup.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/byval-frame-setup.ll?rev=371149&r1=371148&r2=371149&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/byval-frame-setup.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/byval-frame-setup.ll Thu Sep  5 16:54:35 2019
@@ -133,10 +133,10 @@ entry:
 ; GCN-DAG: buffer_store_dword [[LOAD2]], off, s[0:3], s32 offset:8
 ; GCN-DAG: buffer_store_dword [[LOAD3]], off, s[0:3], s32 offset:12
 
-; GCN: buffer_load_dword [[LOAD4:v[0-9]+]], off, s[0:3], s34 offset:16
 ; GCN: buffer_load_dword [[LOAD5:v[0-9]+]], off, s[0:3], s34 offset:20
 ; GCN: buffer_load_dword [[LOAD6:v[0-9]+]], off, s[0:3], s34 offset:24
 ; GCN: buffer_load_dword [[LOAD7:v[0-9]+]], off, s[0:3], s34 offset:28
+; GCN: buffer_load_dword [[LOAD4:v[0-9]+]], off, s[0:3], s34 offset:16
 
 ; GCN-DAG: buffer_store_dword [[LOAD4]], off, s[0:3], s32 offset:16
 ; GCN-DAG: buffer_store_dword [[LOAD5]], off, s[0:3], s32 offset:20
@@ -263,10 +263,10 @@ entry:
 
 
 ; GCN-NOT: s_add_u32 s32, s32, 0x800
+; GCN-DAG: s_add_u32 s32, s33, 0xc00{{$}}
 
 ; GCN: buffer_load_dword [[LOAD0:v[0-9]+]], off, s[0:3], s33 offset:8
 ; GCN: buffer_load_dword [[LOAD1:v[0-9]+]], off, s[0:3], s33 offset:12
-; GCN-DAG: s_add_u32 s32, s33, 0xc00{{$}}
 ; GCN: buffer_load_dword [[LOAD2:v[0-9]+]], off, s[0:3], s33 offset:16
 ; GCN: buffer_load_dword [[LOAD3:v[0-9]+]], off, s[0:3], s33 offset:20
 
@@ -331,10 +331,11 @@ entry:
 ; GCN-DAG: buffer_store_dword [[LOAD2]], off, s[0:3], s32 offset:8
 ; GCN-DAG: buffer_store_dword [[LOAD3]], off, s[0:3], s32 offset:12
 
-; GCN: buffer_load_dword [[LOAD4:v[0-9]+]], off, s[0:3], s34 offset:16
 ; GCN: buffer_load_dword [[LOAD5:v[0-9]+]], off, s[0:3], s34 offset:20
 ; GCN: buffer_load_dword [[LOAD6:v[0-9]+]], off, s[0:3], s34 offset:24
 ; GCN: buffer_load_dword [[LOAD7:v[0-9]+]], off, s[0:3], s34 offset:28
+; GCN: buffer_load_dword [[LOAD4:v[0-9]+]], off, s[0:3], s34 offset:16
+
 ; GCN: s_waitcnt vmcnt(0)
 ; GCN-DAG: buffer_store_dword [[LOAD4]], off, s[0:3], s32 offset:16
 ; GCN-DAG: buffer_store_dword [[LOAD5]], off, s[0:3], s32 offset:20

Modified: llvm/trunk/test/CodeGen/AMDGPU/call-argument-types.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/call-argument-types.ll?rev=371149&r1=371148&r2=371149&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/call-argument-types.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/call-argument-types.ll Thu Sep  5 16:54:35 2019
@@ -765,17 +765,16 @@ entry:
 
 ; GCN-LABEL: {{^}}tail_call_byval_align16:
 ; GCN-NOT: s32
-; GCN: buffer_store_dword v32, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill
-; GCN: buffer_store_dword v33, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill
-; GCN: buffer_load_dword v32, off, s[0:3], s32 offset:16
-; GCN: buffer_load_dword v33, off, s[0:3], s32 offset:20
+; GCN-NOT: buffer_store_dword v33
+; GCN: buffer_store_dword v32, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill
+; GCN-NOT: buffer_store_dword v33
+; GCN: buffer_load_dword v32, off, s[0:3], s32 offset:20
 
 ; GCN: s_getpc_b64
 
-; GCN: buffer_store_dword v33, off, s[0:3], s32 offset:4
-; GCN: buffer_store_dword v32, off, s[0:3], s32{{$}}
-; GCN: buffer_load_dword v33, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload
-; GCN: buffer_load_dword v32, off, s[0:3], s32 offset:12 ; 4-byte Folded Reload
+; GCN: buffer_store_dword v32, off, s[0:3], s32 offset:4
+; GCN: buffer_load_dword v32, off, s[0:3], s32 offset:16
+; GCN: buffer_load_dword v32, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload
 ; GCN-NOT: s32
 ; GCN: s_setpc_b64
 define void @tail_call_byval_align16(<32 x i32> %val, double %tmp) #0 {
@@ -806,12 +805,12 @@ entry:
 
 ; GCN-LABEL: {{^}}stack_12xv3i32:
 ; GCN: v_mov_b32_e32 [[REG12:v[0-9]+]], 12
-; GCN: buffer_store_dword [[REG12]], {{.*$}}
 ; GCN: v_mov_b32_e32 [[REG13:v[0-9]+]], 13
-; GCN: buffer_store_dword [[REG13]], {{.*}} offset:4
 ; GCN: v_mov_b32_e32 [[REG14:v[0-9]+]], 14
-; GCN: buffer_store_dword [[REG14]], {{.*}} offset:8
 ; GCN: v_mov_b32_e32 [[REG15:v[0-9]+]], 15
+; GCN: buffer_store_dword [[REG12]], {{.*$}}
+; GCN: buffer_store_dword [[REG13]], {{.*}} offset:4
+; GCN: buffer_store_dword [[REG14]], {{.*}} offset:8
 ; GCN: buffer_store_dword [[REG15]], {{.*}} offset:12
 ; GCN: v_mov_b32_e32 v31, 11
 ; GCN: s_getpc
@@ -835,12 +834,12 @@ entry:
 
 ; GCN-LABEL: {{^}}stack_12xv3f32:
 ; GCN: v_mov_b32_e32 [[REG12:v[0-9]+]], 0x41400000
-; GCN: buffer_store_dword [[REG12]], {{.*$}}
 ; GCN: v_mov_b32_e32 [[REG13:v[0-9]+]], 0x41500000
-; GCN: buffer_store_dword [[REG13]], {{.*}} offset:4
 ; GCN: v_mov_b32_e32 [[REG14:v[0-9]+]], 0x41600000
-; GCN: buffer_store_dword [[REG14]], {{.*}} offset:8
 ; GCN: v_mov_b32_e32 [[REG15:v[0-9]+]], 0x41700000
+; GCN: buffer_store_dword [[REG12]], {{.*$}}
+; GCN: buffer_store_dword [[REG13]], {{.*}} offset:4
+; GCN: buffer_store_dword [[REG14]], {{.*}} offset:8
 ; GCN: buffer_store_dword [[REG15]], {{.*}} offset:12
 ; GCN: v_mov_b32_e32 v31, 0x41300000
 ; GCN: s_getpc
@@ -865,20 +864,20 @@ entry:
 ; GCN-LABEL: {{^}}stack_8xv5i32:
 
 ; GCN: v_mov_b32_e32 [[REG8:v[0-9]+]], 8
-; GCN: buffer_store_dword [[REG8]], {{.*$}}
 ; GCN: v_mov_b32_e32 [[REG9:v[0-9]+]], 9
-; GCN: buffer_store_dword [[REG9]], {{.*}} offset:4
 ; GCN: v_mov_b32_e32 [[REG10:v[0-9]+]], 10
-; GCN: buffer_store_dword [[REG10]], {{.*}} offset:8
 ; GCN: v_mov_b32_e32 [[REG11:v[0-9]+]], 11
-; GCN: buffer_store_dword [[REG11]], {{.*}} offset:12
 ; GCN: v_mov_b32_e32 [[REG12:v[0-9]+]], 12
+; GCN: buffer_store_dword [[REG8]], {{.*$}}
+; GCN: buffer_store_dword [[REG9]], {{.*}} offset:4
+; GCN: buffer_store_dword [[REG10]], {{.*}} offset:8
+; GCN: buffer_store_dword [[REG11]], {{.*}} offset:12
 ; GCN: buffer_store_dword [[REG12]], {{.*}} offset:16
 ; GCN: v_mov_b32_e32 [[REG13:v[0-9]+]], 13
-; GCN: buffer_store_dword [[REG13]], {{.*}} offset:20
 ; GCN: v_mov_b32_e32 [[REG14:v[0-9]+]], 14
-; GCN: buffer_store_dword [[REG14]], {{.*}} offset:24
 ; GCN: v_mov_b32_e32 [[REG15:v[0-9]+]], 15
+; GCN: buffer_store_dword [[REG13]], {{.*}} offset:20
+; GCN: buffer_store_dword [[REG14]], {{.*}} offset:24
 ; GCN: buffer_store_dword [[REG15]], {{.*}} offset:28
 
 ; GCN: v_mov_b32_e32 v31, 7
@@ -899,20 +898,20 @@ entry:
 
 ; GCN-LABEL: {{^}}stack_8xv5f32:
 ; GCN: v_mov_b32_e32 [[REG8:v[0-9]+]], 0x41000000
-; GCN: buffer_store_dword [[REG8]], {{.*$}}
 ; GCN: v_mov_b32_e32 [[REG9:v[0-9]+]], 0x41100000
-; GCN: buffer_store_dword [[REG9]], {{.*}} offset:4
 ; GCN: v_mov_b32_e32 [[REG10:v[0-9]+]], 0x41200000
-; GCN: buffer_store_dword [[REG10]], {{.*}} offset:8
 ; GCN: v_mov_b32_e32 [[REG11:v[0-9]+]], 0x41300000
-; GCN: buffer_store_dword [[REG11]], {{.*}} offset:12
 ; GCN: v_mov_b32_e32 [[REG12:v[0-9]+]], 0x41400000
+; GCN: buffer_store_dword [[REG8]], {{.*$}}
+; GCN: buffer_store_dword [[REG9]], {{.*}} offset:4
+; GCN: buffer_store_dword [[REG10]], {{.*}} offset:8
+; GCN: buffer_store_dword [[REG11]], {{.*}} offset:12
 ; GCN: buffer_store_dword [[REG12]], {{.*}} offset:16
 ; GCN: v_mov_b32_e32 [[REG13:v[0-9]+]], 0x41500000
-; GCN: buffer_store_dword [[REG13]], {{.*}} offset:20
 ; GCN: v_mov_b32_e32 [[REG14:v[0-9]+]], 0x41600000
-; GCN: buffer_store_dword [[REG14]], {{.*}} offset:24
 ; GCN: v_mov_b32_e32 [[REG15:v[0-9]+]], 0x41700000
+; GCN: buffer_store_dword [[REG13]], {{.*}} offset:20
+; GCN: buffer_store_dword [[REG14]], {{.*}} offset:24
 ; GCN: buffer_store_dword [[REG15]], {{.*}} offset:28
 
 ; GCN: v_mov_b32_e32 v31, 0x40e00000

Modified: llvm/trunk/test/CodeGen/AMDGPU/callee-special-input-vgprs.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/callee-special-input-vgprs.ll?rev=371149&r1=371148&r2=371149&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/callee-special-input-vgprs.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/callee-special-input-vgprs.ll Thu Sep  5 16:54:35 2019
@@ -489,16 +489,15 @@ define void @too_many_args_use_workitem_
 
 ; GCN-LABEL: {{^}}kern_call_too_many_args_use_workitem_id_x_byval:
 ; GCN: enable_vgpr_workitem_id = 0
-
-; GCN: s_mov_b32 s33, s7
+; GCN-DAG: s_mov_b32 s33, s7
+; GCN-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 0x3e7{{$}}
+; GCN: buffer_store_dword [[K]], off, s[0:3], s33 offset:4
+; GCN: buffer_load_dword [[RELOAD_BYVAL:v[0-9]+]], off, s[0:3], s33 offset:4
 ; GCN: s_add_u32 s32, s33, 0x400{{$}}
 
 ; GCN-NOT: s32
-; GCN-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 0x3e7{{$}}
-; GCN: buffer_store_dword [[K]], off, s[0:3], s33 offset:4
 ; GCN: buffer_store_dword v0, off, s[0:3], s32 offset:4
 
-; GCN: buffer_load_dword [[RELOAD_BYVAL:v[0-9]+]], off, s[0:3], s33 offset:4
 ; GCN: buffer_store_dword [[RELOAD_BYVAL]], off, s[0:3], s32{{$}}
 ; GCN: v_mov_b32_e32 [[RELOAD_BYVAL]],
 ; GCN: s_swappc_b64
@@ -521,9 +520,8 @@ define amdgpu_kernel void @kern_call_too
 ; GCN-LABEL: {{^}}func_call_too_many_args_use_workitem_id_x_byval:
 ; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 0x3e7{{$}}
 ; GCN: buffer_store_dword [[K]], off, s[0:3], s34{{$}}
-; GCN: buffer_store_dword v0, off, s[0:3], s32 offset:4
-
 ; GCN: buffer_load_dword [[RELOAD_BYVAL:v[0-9]+]], off, s[0:3], s34{{$}}
+; GCN: buffer_store_dword v0, off, s[0:3], s32 offset:4
 ; GCN: buffer_store_dword [[RELOAD_BYVAL]], off, s[0:3], s32{{$}}
 ; GCN: v_mov_b32_e32 [[RELOAD_BYVAL]],
 ; GCN: s_swappc_b64

Modified: llvm/trunk/test/CodeGen/AMDGPU/frame-index-elimination.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/frame-index-elimination.ll?rev=371149&r1=371148&r2=371149&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/frame-index-elimination.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/frame-index-elimination.ll Thu Sep  5 16:54:35 2019
@@ -231,10 +231,10 @@ declare void @func(<4 x float> addrspace
 
 ; GCN-LABEL: {{^}}undefined_stack_store_reg:
 ; GCN: s_and_saveexec_b64
+; GCN: buffer_store_dword v{{[0-9]+}}, off, s[0:3], s34 offset:
 ; GCN: buffer_store_dword v0, off, s[0:3], s34 offset:
 ; GCN: buffer_store_dword v0, off, s[0:3], s34 offset:
 ; GCN: buffer_store_dword v0, off, s[0:3], s34 offset:
-; GCN: buffer_store_dword v{{[0-9]+}}, off, s[0:3], s34 offset:
 define void @undefined_stack_store_reg(float %arg, i32 %arg1) #0 {
 bb:
   %tmp = alloca <4 x float>, align 16, addrspace(5)

Modified: llvm/trunk/test/CodeGen/AMDGPU/insert_vector_elt.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/insert_vector_elt.ll?rev=371149&r1=371148&r2=371149&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/insert_vector_elt.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/insert_vector_elt.ll Thu Sep  5 16:54:35 2019
@@ -1650,10 +1650,10 @@ define amdgpu_kernel void @dynamic_inser
 ; SI-NEXT:    v_mov_b32_e32 v13, s21
 ; SI-NEXT:    v_mov_b32_e32 v14, s22
 ; SI-NEXT:    v_mov_b32_e32 v15, s23
-; SI-NEXT:    buffer_store_dwordx4 v[12:15], off, s[0:3], s7 offset:112
 ; SI-NEXT:    buffer_store_dwordx4 v[8:11], off, s[0:3], s7 offset:96
-; SI-NEXT:    buffer_store_dwordx4 v[4:7], off, s[0:3], s7 offset:80
+; SI-NEXT:    buffer_store_dwordx4 v[12:15], off, s[0:3], s7 offset:112
 ; SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], s7 offset:64
+; SI-NEXT:    buffer_store_dwordx4 v[4:7], off, s[0:3], s7 offset:80
 ; SI-NEXT:    v_or_b32_e32 v16, s4, v16
 ; SI-NEXT:    v_mov_b32_e32 v0, 0
 ; SI-NEXT:    v_mov_b32_e32 v1, 0x40200000
@@ -1696,10 +1696,10 @@ define amdgpu_kernel void @dynamic_inser
 ; VI-NEXT:    v_mov_b32_e32 v13, s21
 ; VI-NEXT:    v_mov_b32_e32 v14, s22
 ; VI-NEXT:    v_mov_b32_e32 v15, s23
-; VI-NEXT:    buffer_store_dwordx4 v[12:15], off, s[0:3], s7 offset:112
 ; VI-NEXT:    buffer_store_dwordx4 v[8:11], off, s[0:3], s7 offset:96
-; VI-NEXT:    buffer_store_dwordx4 v[4:7], off, s[0:3], s7 offset:80
+; VI-NEXT:    buffer_store_dwordx4 v[12:15], off, s[0:3], s7 offset:112
 ; VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], s7 offset:64
+; VI-NEXT:    buffer_store_dwordx4 v[4:7], off, s[0:3], s7 offset:80
 ; VI-NEXT:    v_or_b32_e32 v16, s4, v16
 ; VI-NEXT:    v_mov_b32_e32 v0, 0
 ; VI-NEXT:    v_mov_b32_e32 v1, 0x40200000




More information about the llvm-commits mailing list