[llvm] r307576 - AMDGPU: Allow SIShrinkInstructions to fold FrameIndexes

Mon Jul 10 13:04:35 PDT 2017

Author: arsenm
Date: Mon Jul 10 13:04:35 2017
New Revision: 307576

URL: http://llvm.org/viewvc/llvm-project?rev=307576&view=rev
Log:
AMDGPU: Allow SIShrinkInstructions to fold FrameIndexes

Added:
    llvm/trunk/test/CodeGen/AMDGPU/vop-shrink-frame-index.mir
Modified:
    llvm/trunk/lib/Target/AMDGPU/SIShrinkInstructions.cpp
    llvm/trunk/test/CodeGen/AMDGPU/scratch-simple.ll

Modified: llvm/trunk/lib/Target/AMDGPU/SIShrinkInstructions.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIShrinkInstructions.cpp?rev=307576&r1=307575&r2=307576&view=diff
==============================================================================

--- llvm/trunk/lib/Target/AMDGPU/SIShrinkInstructions.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIShrinkInstructions.cpp Mon Jul 10 13:04:35 2017
@@ -150,6 +150,10 @@ static bool foldImmediates(MachineInstr
           Src0.setSubReg(0);
           Src0.ChangeToImmediate(MovSrc.getImm());
           ConstantFolded = true;
+        } else if (MovSrc.isFI()) {
+          Src0.setSubReg(0);
+          Src0.ChangeToFrameIndex(MovSrc.getIndex());
+          ConstantFolded = true;
         }
 
         if (ConstantFolded) {

Modified: llvm/trunk/test/CodeGen/AMDGPU/scratch-simple.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/scratch-simple.ll?rev=307576&r1=307575&r2=307576&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/scratch-simple.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/scratch-simple.ll Mon Jul 10 13:04:35 2017
@@ -12,10 +12,8 @@
 ; GCN-DAG: v_lshlrev_b32_e32 [[BYTES:v[0-9]+]], 2, v0
 ; GCN-DAG: v_and_b32_e32 [[CLAMP_IDX:v[0-9]+]], 0x1fc, [[BYTES]]
 
-; GCN-DAG: v_mov_b32_e32 [[C200:v[0-9]+]], 0x200
-; GCN-DAG: v_mov_b32_e32 [[C400:v[0-9]+]], 0x400
-; GCN-DAG: v_or_b32_e32 [[LO_OFF:v[0-9]+]], [[CLAMP_IDX]], [[C200]]
-; GCN-DAG: v_or_b32_e32 [[HI_OFF:v[0-9]+]], [[CLAMP_IDX]], [[C400]]
+; GCN-DAG: v_or_b32_e32 [[LO_OFF:v[0-9]+]], 0x200, [[CLAMP_IDX]]
+; GCN-DAG: v_or_b32_e32 [[HI_OFF:v[0-9]+]], 0x400, [[CLAMP_IDX]]
 
 ; GCN: buffer_load_dword {{v[0-9]+}}, [[LO_OFF]], {{s\[[0-9]+:[0-9]+\]}}, [[SWO]] offen
 ; GCN: buffer_load_dword {{v[0-9]+}}, [[HI_OFF]], {{s\[[0-9]+:[0-9]+\]}}, [[SWO]] offen

Added: llvm/trunk/test/CodeGen/AMDGPU/vop-shrink-frame-index.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/vop-shrink-frame-index.mir?rev=307576&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/vop-shrink-frame-index.mir (added)
+++ llvm/trunk/test/CodeGen/AMDGPU/vop-shrink-frame-index.mir Mon Jul 10 13:04:35 2017
@@ -0,0 +1,161 @@
+# RUN: llc -march=amdgcn -verify-machineinstrs -run-pass si-shrink-instructions -o - %s | FileCheck -check-prefix=GCN %s
+--- |
+
+  define amdgpu_kernel void @fold_fi_vgpr() {
+    %alloca = alloca [4 x i32]
+    ret void
+  }
+
+  define amdgpu_kernel void @fold_vgpr_fi() {
+    %alloca = alloca [4 x i32]
+    ret void
+  }
+
+  define amdgpu_kernel void @fold_sgpr_fi() {
+    %alloca = alloca [4 x i32]
+    ret void
+  }
+
+  define amdgpu_kernel void @fold_fi_sgpr() {
+    %alloca = alloca [4 x i32]
+    ret void
+  }
+
+  define amdgpu_kernel void @fold_fi_imm() {
+    %alloca = alloca [4 x i32]
+    ret void
+  }
+
+  define amdgpu_kernel void @fold_imm_fi() {
+    %alloca = alloca [4 x i32]
+    ret void
+  }
+
+...
+# GCN-LABEL: name: fold_fi_vgpr{{$}}
+# GCN: %1 = IMPLICIT_DEF
+
+# GCN: %2 = V_ADD_I32_e32 %stack.0.alloca, %1, implicit-def %vcc, implicit %exec
+name: fold_fi_vgpr
+tracksRegLiveness: true
+registers:
+  - { id: 0, class: vgpr_32 }
+  - { id: 1, class: vgpr_32 }
+  - { id: 2, class: vgpr_32 }
+stack:
+  - { id: 0, name: alloca, type: default, offset: 0, size: 128, alignment: 8,
+      callee-saved-register: '', local-offset: 0, di-variable: '', di-expression: '',
+      di-location: '' }
+body:             |
+  bb.0:
+    %0 = V_MOV_B32_e32 %stack.0.alloca, implicit %exec
+    %1 = IMPLICIT_DEF
+    %2, %vcc = V_ADD_I32_e64 %0, %1, implicit %exec
+    S_ENDPGM
+
+...
+# GCN-LABEL: name: fold_vgpr_fi{{$}}
+# GCN: %1 = IMPLICIT_DEF
+# GCN: %2 = V_ADD_I32_e32 %stack.0.alloca, %1, implicit-def %vcc, implicit %exec
+name: fold_vgpr_fi
+tracksRegLiveness: true
+registers:
+  - { id: 0, class: vgpr_32 }
+  - { id: 1, class: vgpr_32 }
+  - { id: 2, class: vgpr_32 }
+stack:
+  - { id: 0, name: alloca, type: default, offset: 0, size: 128, alignment: 8,
+      callee-saved-register: '', local-offset: 0, di-variable: '', di-expression: '',
+      di-location: '' }
+body:             |
+  bb.0:
+    %0 = V_MOV_B32_e32 %stack.0.alloca, implicit %exec
+    %1 = IMPLICIT_DEF
+    %2, %vcc = V_ADD_I32_e64 %1, %0, implicit %exec
+    S_ENDPGM
+
+...
+# GCN-LABEL: name: fold_sgpr_fi{{$}}
+# GCN: %0 = V_MOV_B32_e32 %stack.0.alloca, implicit %exec
+# GCN: %1 = IMPLICIT_DEF
+# GCN: %2 = V_ADD_I32_e32 %1, %0, implicit-def %vcc, implicit %exec
+name: fold_sgpr_fi
+tracksRegLiveness: true
+registers:
+  - { id: 0, class: vgpr_32 }
+  - { id: 1, class: sgpr_32 }
+  - { id: 2, class: vgpr_32 }
+stack:
+  - { id: 0, name: alloca, type: default, offset: 0, size: 128, alignment: 8,
+      callee-saved-register: '', local-offset: 0, di-variable: '', di-expression: '',
+      di-location: '' }
+body:             |
+  bb.0:
+    %0 = V_MOV_B32_e32 %stack.0.alloca, implicit %exec
+    %1 = IMPLICIT_DEF
+    %2, %vcc = V_ADD_I32_e64 %1, %0, implicit %exec
+    S_ENDPGM
+
+...
+# GCN-LABEL: name: fold_fi_sgpr{{$}}
+# GCN: %0 = V_MOV_B32_e32 %stack.0.alloca, implicit %exec
+# GCN: %1 = IMPLICIT_DEF
+# GCN: %2 = V_ADD_I32_e32 %1, %0, implicit-def %vcc, implicit %exec
+name: fold_fi_sgpr
+tracksRegLiveness: true
+registers:
+  - { id: 0, class: vgpr_32 }
+  - { id: 1, class: sgpr_32 }
+  - { id: 2, class: vgpr_32 }
+stack:
+  - { id: 0, name: alloca, type: default, offset: 0, size: 128, alignment: 8,
+      callee-saved-register: '', local-offset: 0, di-variable: '', di-expression: '',
+      di-location: '' }
+body:             |
+  bb.0:
+    %0 = V_MOV_B32_e32 %stack.0.alloca, implicit %exec
+    %1 = IMPLICIT_DEF
+    %2, %vcc = V_ADD_I32_e64 %0, %1, implicit %exec
+    S_ENDPGM
+...
+# TODO: Should probably prefer folding immediate first
+# GCN-LABEL: name: fold_fi_imm{{$}}
+# GCN: %1 = V_MOV_B32_e32 999, implicit %exec
+# GCN: %2 = V_ADD_I32_e32 %stack.0.alloca, %1, implicit-def %vcc, implicit %exec
+name: fold_fi_imm
+tracksRegLiveness: true
+registers:
+  - { id: 0, class: vgpr_32 }
+  - { id: 1, class: vgpr_32 }
+  - { id: 2, class: vgpr_32 }
+stack:
+  - { id: 0, name: alloca, type: default, offset: 0, size: 128, alignment: 8,
+      callee-saved-register: '', local-offset: 0, di-variable: '', di-expression: '',
+      di-location: '' }
+body:             |
+  bb.0:
+    %0 = V_MOV_B32_e32 %stack.0.alloca, implicit %exec
+    %1 = V_MOV_B32_e32 999, implicit %exec
+    %2, %vcc = V_ADD_I32_e64 %0, %1, implicit %exec
+    S_ENDPGM
+
+...
+# GCN-LABEL: name: fold_imm_fi{{$}}
+# GCN: %0 = V_MOV_B32_e32 %stack.0.alloca, implicit %exec
+# GCN: %2 = V_ADD_I32_e32 999, %0, implicit-def %vcc, implicit %exec
+name: fold_imm_fi
+tracksRegLiveness: true
+registers:
+  - { id: 0, class: vgpr_32 }
+  - { id: 1, class: vgpr_32 }
+  - { id: 2, class: vgpr_32 }
+stack:
+  - { id: 0, name: alloca, type: default, offset: 0, size: 128, alignment: 8,
+      callee-saved-register: '', local-offset: 0, di-variable: '', di-expression: '',
+      di-location: '' }
+body:             |
+  bb.0:
+    %0 = V_MOV_B32_e32 %stack.0.alloca, implicit %exec
+    %1 = V_MOV_B32_e32 999, implicit %exec
+    %2, %vcc = V_ADD_I32_e64 %1, %0, implicit %exec
+    S_ENDPGM