[llvm] r366249 - AMDGPU/GlobalISel: Fix selection of private stores
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Tue Jul 16 12:27:44 PDT 2019
Author: arsenm
Date: Tue Jul 16 12:27:44 2019
New Revision: 366249
URL: http://llvm.org/viewvc/llvm-project?rev=366249&view=rev
Log:
AMDGPU/GlobalISel: Fix selection of private stores
Added:
llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-private.mir
Modified:
llvm/trunk/lib/Target/AMDGPU/BUFInstructions.td
Modified: llvm/trunk/lib/Target/AMDGPU/BUFInstructions.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/BUFInstructions.td?rev=366249&r1=366248&r2=366249&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/BUFInstructions.td (original)
+++ llvm/trunk/lib/Target/AMDGPU/BUFInstructions.td Tue Jul 16 12:27:44 2019
@@ -1568,17 +1568,18 @@ defm : MUBUFStore_Pattern <BUFFER_STORE_
multiclass MUBUFScratchStorePat <MUBUF_Pseudo InstrOffen,
MUBUF_Pseudo InstrOffset,
- ValueType vt, PatFrag st> {
+ ValueType vt, PatFrag st,
+ RegisterClass rc = VGPR_32> {
def : GCNPat <
(st vt:$value, (MUBUFScratchOffen v4i32:$srsrc, i32:$vaddr,
i32:$soffset, u16imm:$offset)),
- (InstrOffen $value, $vaddr, $srsrc, $soffset, $offset, 0, 0, 0, 0)
+ (InstrOffen rc:$value, $vaddr, $srsrc, $soffset, $offset, 0, 0, 0, 0)
>;
def : GCNPat <
(st vt:$value, (MUBUFScratchOffset v4i32:$srsrc, i32:$soffset,
u16imm:$offset)),
- (InstrOffset $value, $srsrc, $soffset, $offset, 0, 0, 0, 0)
+ (InstrOffset rc:$value, $srsrc, $soffset, $offset, 0, 0, 0, 0)
>;
}
@@ -1587,9 +1588,9 @@ defm : MUBUFScratchStorePat <BUFFER_STOR
defm : MUBUFScratchStorePat <BUFFER_STORE_BYTE_OFFEN, BUFFER_STORE_BYTE_OFFSET, i16, truncstorei8_private>;
defm : MUBUFScratchStorePat <BUFFER_STORE_SHORT_OFFEN, BUFFER_STORE_SHORT_OFFSET, i16, store_private>;
defm : MUBUFScratchStorePat <BUFFER_STORE_DWORD_OFFEN, BUFFER_STORE_DWORD_OFFSET, i32, store_private>;
-defm : MUBUFScratchStorePat <BUFFER_STORE_DWORDX2_OFFEN, BUFFER_STORE_DWORDX2_OFFSET, v2i32, store_private>;
-defm : MUBUFScratchStorePat <BUFFER_STORE_DWORDX3_OFFEN, BUFFER_STORE_DWORDX3_OFFSET, v3i32, store_private>;
-defm : MUBUFScratchStorePat <BUFFER_STORE_DWORDX4_OFFEN, BUFFER_STORE_DWORDX4_OFFSET, v4i32, store_private>;
+defm : MUBUFScratchStorePat <BUFFER_STORE_DWORDX2_OFFEN, BUFFER_STORE_DWORDX2_OFFSET, v2i32, store_private, VReg_64>;
+defm : MUBUFScratchStorePat <BUFFER_STORE_DWORDX3_OFFEN, BUFFER_STORE_DWORDX3_OFFSET, v3i32, store_private, VReg_96>;
+defm : MUBUFScratchStorePat <BUFFER_STORE_DWORDX4_OFFEN, BUFFER_STORE_DWORDX4_OFFSET, v4i32, store_private, VReg_128>;
let OtherPredicates = [D16PreservesUnusedBits] in {
Added: llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-private.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-private.mir?rev=366249&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-private.mir (added)
+++ llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-private.mir Tue Jul 16 12:27:44 2019
@@ -0,0 +1,280 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -march=amdgcn -mcpu=tahiti -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX6 %s
+# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX9 %s
+
+---
+
+name: store_private_s32_to_4
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+machineFunctionInfo:
+ scratchRSrcReg: $sgpr0_sgpr1_sgpr2_sgpr3
+ scratchWaveOffsetReg: $sgpr4
+ stackPtrOffsetReg: $sgpr32
+
+body: |
+ bb.0:
+ liveins: $vgpr0, $vgpr1
+
+ ; GFX6-LABEL: name: store_private_s32_to_4
+ ; GFX6: liveins: $vgpr0, $vgpr1
+ ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFX6: BUFFER_STORE_DWORD_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 5)
+ ; GFX9-LABEL: name: store_private_s32_to_4
+ ; GFX9: liveins: $vgpr0, $vgpr1
+ ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFX9: BUFFER_STORE_DWORD_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 5)
+ %0:vgpr(s32) = COPY $vgpr0
+ %1:vgpr(p5) = COPY $vgpr1
+ G_STORE %0, %1 :: (store 4, align 4, addrspace 5)
+
+...
+
+---
+
+name: store_private_s32_to_2
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+machineFunctionInfo:
+ scratchRSrcReg: $sgpr0_sgpr1_sgpr2_sgpr3
+ scratchWaveOffsetReg: $sgpr4
+ stackPtrOffsetReg: $sgpr32
+
+body: |
+ bb.0:
+ liveins: $vgpr0, $vgpr1
+
+ ; GFX6-LABEL: name: store_private_s32_to_2
+ ; GFX6: liveins: $vgpr0, $vgpr1
+ ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFX6: BUFFER_STORE_SHORT_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec :: (store 2, addrspace 5)
+ ; GFX9-LABEL: name: store_private_s32_to_2
+ ; GFX9: liveins: $vgpr0, $vgpr1
+ ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFX9: BUFFER_STORE_SHORT_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec :: (store 2, addrspace 5)
+ %0:vgpr(s32) = COPY $vgpr0
+ %1:vgpr(p5) = COPY $vgpr1
+ G_STORE %0, %1 :: (store 2, align 2, addrspace 5)
+
+...
+
+---
+
+name: store_private_s32_to_1
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+machineFunctionInfo:
+ scratchRSrcReg: $sgpr0_sgpr1_sgpr2_sgpr3
+ scratchWaveOffsetReg: $sgpr4
+ stackPtrOffsetReg: $sgpr32
+
+body: |
+ bb.0:
+ liveins: $vgpr0, $vgpr1
+
+ ; GFX6-LABEL: name: store_private_s32_to_1
+ ; GFX6: liveins: $vgpr0, $vgpr1
+ ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFX6: BUFFER_STORE_BYTE_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec :: (store 1, addrspace 5)
+ ; GFX9-LABEL: name: store_private_s32_to_1
+ ; GFX9: liveins: $vgpr0, $vgpr1
+ ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFX9: BUFFER_STORE_BYTE_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec :: (store 1, addrspace 5)
+ %0:vgpr(s32) = COPY $vgpr0
+ %1:vgpr(p5) = COPY $vgpr1
+ G_STORE %0, %1 :: (store 1, align 1, addrspace 5)
+
+...
+
+---
+
+name: store_private_v2s16
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+machineFunctionInfo:
+ scratchRSrcReg: $sgpr0_sgpr1_sgpr2_sgpr3
+ scratchWaveOffsetReg: $sgpr4
+ stackPtrOffsetReg: $sgpr32
+
+body: |
+ bb.0:
+ liveins: $vgpr0, $vgpr1
+
+ ; GFX6-LABEL: name: store_private_v2s16
+ ; GFX6: liveins: $vgpr0, $vgpr1
+ ; GFX6: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0
+ ; GFX6: [[COPY1:%[0-9]+]]:vgpr(p5) = COPY $vgpr1
+ ; GFX6: G_STORE [[COPY]](<2 x s16>), [[COPY1]](p5) :: (store 4, addrspace 5)
+ ; GFX9-LABEL: name: store_private_v2s16
+ ; GFX9: liveins: $vgpr0, $vgpr1
+ ; GFX9: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0
+ ; GFX9: [[COPY1:%[0-9]+]]:vgpr(p5) = COPY $vgpr1
+ ; GFX9: G_STORE [[COPY]](<2 x s16>), [[COPY1]](p5) :: (store 4, addrspace 5)
+ %0:vgpr(<2 x s16>) = COPY $vgpr0
+ %1:vgpr(p5) = COPY $vgpr1
+ G_STORE %0, %1 :: (store 4, align 4, addrspace 5)
+
+...
+
+---
+
+name: store_private_p3
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+machineFunctionInfo:
+ scratchRSrcReg: $sgpr0_sgpr1_sgpr2_sgpr3
+ scratchWaveOffsetReg: $sgpr4
+ stackPtrOffsetReg: $sgpr32
+
+body: |
+ bb.0:
+ liveins: $vgpr0, $vgpr1
+
+ ; GFX6-LABEL: name: store_private_p3
+ ; GFX6: liveins: $vgpr0, $vgpr1
+ ; GFX6: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
+ ; GFX6: [[COPY1:%[0-9]+]]:vgpr(p5) = COPY $vgpr1
+ ; GFX6: G_STORE [[COPY]](p3), [[COPY1]](p5) :: (store 4, addrspace 5)
+ ; GFX9-LABEL: name: store_private_p3
+ ; GFX9: liveins: $vgpr0, $vgpr1
+ ; GFX9: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
+ ; GFX9: [[COPY1:%[0-9]+]]:vgpr(p5) = COPY $vgpr1
+ ; GFX9: G_STORE [[COPY]](p3), [[COPY1]](p5) :: (store 4, addrspace 5)
+ %0:vgpr(p3) = COPY $vgpr0
+ %1:vgpr(p5) = COPY $vgpr1
+ G_STORE %0, %1 :: (store 4, align 4, addrspace 5)
+
+...
+
+---
+
+name: store_private_p5
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+machineFunctionInfo:
+ scratchRSrcReg: $sgpr0_sgpr1_sgpr2_sgpr3
+ scratchWaveOffsetReg: $sgpr4
+ stackPtrOffsetReg: $sgpr32
+
+body: |
+ bb.0:
+ liveins: $vgpr0, $vgpr1
+
+ ; GFX6-LABEL: name: store_private_p5
+ ; GFX6: liveins: $vgpr0, $vgpr1
+ ; GFX6: [[COPY:%[0-9]+]]:vgpr(p5) = COPY $vgpr0
+ ; GFX6: [[COPY1:%[0-9]+]]:vgpr(p5) = COPY $vgpr1
+ ; GFX6: G_STORE [[COPY]](p5), [[COPY1]](p5) :: (store 4, addrspace 5)
+ ; GFX9-LABEL: name: store_private_p5
+ ; GFX9: liveins: $vgpr0, $vgpr1
+ ; GFX9: [[COPY:%[0-9]+]]:vgpr(p5) = COPY $vgpr0
+ ; GFX9: [[COPY1:%[0-9]+]]:vgpr(p5) = COPY $vgpr1
+ ; GFX9: G_STORE [[COPY]](p5), [[COPY1]](p5) :: (store 4, addrspace 5)
+ %0:vgpr(p5) = COPY $vgpr0
+ %1:vgpr(p5) = COPY $vgpr1
+ G_STORE %0, %1 :: (store 4, align 4, addrspace 5)
+
+...
+
+---
+
+name: store_private_s32_to_1_fi_offset_4095
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+machineFunctionInfo:
+ scratchRSrcReg: $sgpr0_sgpr1_sgpr2_sgpr3
+ scratchWaveOffsetReg: $sgpr4
+ stackPtrOffsetReg: $sgpr32
+stack:
+ - { id: 0, size: 4096, alignment: 4 }
+
+body: |
+ bb.0:
+
+ ; GFX6-LABEL: name: store_private_s32_to_1_fi_offset_4095
+ ; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
+ ; GFX6: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4095, implicit $exec
+ ; GFX6: %2:vgpr_32, dead %4:sreg_64_xexec = V_ADD_I32_e64 [[V_MOV_B32_e32_]], [[V_MOV_B32_e32_1]], 0, implicit $exec
+ ; GFX6: [[V_MOV_B32_e32_2:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX6: BUFFER_STORE_BYTE_OFFEN [[V_MOV_B32_e32_2]], %2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec :: (store 1, addrspace 5)
+ ; GFX9-LABEL: name: store_private_s32_to_1_fi_offset_4095
+ ; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX9: BUFFER_STORE_BYTE_OFFEN [[V_MOV_B32_e32_]], %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4095, 0, 0, 0, 0, implicit $exec :: (store 1, addrspace 5)
+ %0:vgpr(p5) = G_FRAME_INDEX %stack.0
+ %1:vgpr(s32) = G_CONSTANT i32 4095
+ %2:vgpr(p5) = G_GEP %0, %1
+ %3:vgpr(s32) = G_CONSTANT i32 0
+ G_STORE %3, %2 :: (store 1, align 1, addrspace 5)
+
+...
+
+---
+
+name: store_private_s32_to_1_constant_4095
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+machineFunctionInfo:
+ scratchRSrcReg: $sgpr0_sgpr1_sgpr2_sgpr3
+ scratchWaveOffsetReg: $sgpr4
+ stackPtrOffsetReg: $sgpr32
+stack:
+ - { id: 0, size: 4096, alignment: 4 }
+
+body: |
+ bb.0:
+
+ ; GFX6-LABEL: name: store_private_s32_to_1_constant_4095
+ ; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX6: BUFFER_STORE_BYTE_OFFSET [[V_MOV_B32_e32_]], $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 4095, 0, 0, 0, 0, implicit $exec :: (store 1, addrspace 5)
+ ; GFX9-LABEL: name: store_private_s32_to_1_constant_4095
+ ; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX9: BUFFER_STORE_BYTE_OFFSET [[V_MOV_B32_e32_]], $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 4095, 0, 0, 0, 0, implicit $exec :: (store 1, addrspace 5)
+ %0:vgpr(p5) = G_CONSTANT i32 4095
+ %1:vgpr(s32) = G_CONSTANT i32 0
+ G_STORE %1, %0 :: (store 1, align 1, addrspace 5)
+
+...
+
+---
+
+name: store_private_s32_to_1_constant_4096
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+machineFunctionInfo:
+ scratchRSrcReg: $sgpr0_sgpr1_sgpr2_sgpr3
+ scratchWaveOffsetReg: $sgpr4
+ stackPtrOffsetReg: $sgpr32
+stack:
+ - { id: 0, size: 4096, alignment: 4 }
+
+body: |
+ bb.0:
+
+ ; GFX6-LABEL: name: store_private_s32_to_1_constant_4096
+ ; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX6: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4096, implicit $exec
+ ; GFX6: BUFFER_STORE_BYTE_OFFEN [[V_MOV_B32_e32_]], [[V_MOV_B32_e32_1]], $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec :: (store 1, addrspace 5)
+ ; GFX9-LABEL: name: store_private_s32_to_1_constant_4096
+ ; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX9: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4096, implicit $exec
+ ; GFX9: BUFFER_STORE_BYTE_OFFEN [[V_MOV_B32_e32_]], [[V_MOV_B32_e32_1]], $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec :: (store 1, addrspace 5)
+ %0:vgpr(p5) = G_CONSTANT i32 4096
+ %1:vgpr(s32) = G_CONSTANT i32 0
+ G_STORE %1, %0 :: (store 1, align 1, addrspace 5)
+
+...
More information about the llvm-commits
mailing list