[llvm] 2605adb - [AMDGPU][GlobalISel] Select 8-byte LDS Ops with 4-byte alignment
Austin Kerbow via llvm-commits
llvm-commits at lists.llvm.org
Wed Jan 29 10:43:07 PST 2020
Author: Austin Kerbow
Date: 2020-01-29T10:42:12-08:00
New Revision: 2605adb69c6f1f95c709c21560add8230e30e60b
URL: https://github.com/llvm/llvm-project/commit/2605adb69c6f1f95c709c21560add8230e30e60b
DIFF: https://github.com/llvm/llvm-project/commit/2605adb69c6f1f95c709c21560add8230e30e60b.diff
LOG: [AMDGPU][GlobalISel] Select 8-byte LDS Ops with 4-byte alignment
Reviewers: arsenm
Subscribers: kzhuravl, jvesely, wdng, nhaehnle, yaxunl, rovka, dstuttard, tpr, t-tye, hiraditya, Petar.Avramovic, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D73585
Added:
Modified:
llvm/lib/Target/AMDGPU/AMDGPUGISel.td
llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
llvm/lib/Target/AMDGPU/DSInstructions.td
llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-local.mir
llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-local.mir
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUGISel.td b/llvm/lib/Target/AMDGPU/AMDGPUGISel.td
index ce65717f2b29..21208aaf67ce 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUGISel.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPUGISel.td
@@ -88,6 +88,10 @@ def gi_ds_1addr_1offset :
GIComplexOperandMatcher<s32, "selectDS1Addr1Offset">,
GIComplexPatternEquiv<DS1Addr1Offset>;
+def gi_ds_64bit_4byte_aligned :
+ GIComplexOperandMatcher<s64, "selectDS64Bit4ByteAligned">,
+ GIComplexPatternEquiv<DS64Bit4ByteAligned>;
+
def gi_mubuf_addr64 :
GIComplexOperandMatcher<s64, "selectMUBUFAddr64">,
GIComplexPatternEquiv<MUBUFAddr64>;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
index f50817f669f9..4596889d7429 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
@@ -2399,6 +2399,50 @@ AMDGPUInstructionSelector::selectDS1Addr1Offset(MachineOperand &Root) const {
}};
}
+InstructionSelector::ComplexRendererFns
+AMDGPUInstructionSelector::selectDS64Bit4ByteAligned(MachineOperand &Root) const {
+ const MachineInstr *RootDef = MRI->getVRegDef(Root.getReg());
+ if (!RootDef) {
+ return {{
+ [=](MachineInstrBuilder &MIB) { MIB.add(Root); },
+ [=](MachineInstrBuilder &MIB) { MIB.addImm(0); },
+ [=](MachineInstrBuilder &MIB) { MIB.addImm(1); }
+ }};
+ }
+
+ int64_t ConstAddr = 0;
+ Register PtrBase;
+ int64_t Offset;
+
+ std::tie(PtrBase, Offset) =
+ getPtrBaseWithConstantOffset(Root.getReg(), *MRI);
+
+ if (Offset) {
+ int64_t DWordOffset0 = Offset / 4;
+ int64_t DWordOffset1 = DWordOffset0 + 1;
+ if (isDSOffsetLegal(PtrBase, DWordOffset1, 8)) {
+ // (add n0, c0)
+ return {{
+ [=](MachineInstrBuilder &MIB) { MIB.addReg(PtrBase); },
+ [=](MachineInstrBuilder &MIB) { MIB.addImm(DWordOffset0); },
+ [=](MachineInstrBuilder &MIB) { MIB.addImm(DWordOffset1); }
+ }};
+ }
+ } else if (RootDef->getOpcode() == AMDGPU::G_SUB) {
+ // TODO
+
+ } else if (mi_match(Root.getReg(), *MRI, m_ICst(ConstAddr))) {
+ // TODO
+
+ }
+
+ return {{
+ [=](MachineInstrBuilder &MIB) { MIB.add(Root); },
+ [=](MachineInstrBuilder &MIB) { MIB.addImm(0); },
+ [=](MachineInstrBuilder &MIB) { MIB.addImm(1); }
+ }};
+}
+
/// If \p Root is a G_PTR_ADD with a G_CONSTANT on the right hand side, return
/// the base value with the constant offset. There may be intervening copies
/// between \p Root and the identified constant. Returns \p Root, 0 if this does
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
index f4d9defd33f8..d7bf1885dd51 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
@@ -179,6 +179,8 @@ class AMDGPUInstructionSelector : public InstructionSelector {
InstructionSelector::ComplexRendererFns
selectDS1Addr1Offset(MachineOperand &Root) const;
+ InstructionSelector::ComplexRendererFns
+ selectDS64Bit4ByteAligned(MachineOperand &Root) const;
std::pair<Register, int64_t>
getPtrBaseWithConstantOffset(Register Root,
diff --git a/llvm/lib/Target/AMDGPU/DSInstructions.td b/llvm/lib/Target/AMDGPU/DSInstructions.td
index fe7faca8b157..ab069c681c97 100644
--- a/llvm/lib/Target/AMDGPU/DSInstructions.td
+++ b/llvm/lib/Target/AMDGPU/DSInstructions.td
@@ -737,31 +737,35 @@ def : DSWritePat <DS_WRITE_B16_D16_HI, i32, store_hi16_local>;
def : DSWritePat <DS_WRITE_B8_D16_HI, i32, truncstorei8_hi16_local>;
}
-
-class DS64Bit4ByteAlignedReadPat<DS_Pseudo inst, PatFrag frag> : GCNPat <
- (v2i32 (frag (DS64Bit4ByteAligned i32:$ptr, i8:$offset0, i8:$offset1))),
+class DS64Bit4ByteAlignedReadPat<DS_Pseudo inst, ValueType vt, PatFrag frag> : GCNPat <
+ (vt:$value (frag (DS64Bit4ByteAligned i32:$ptr, i8:$offset0, i8:$offset1))),
(inst $ptr, $offset0, $offset1, (i1 0))
>;
-class DS64Bit4ByteAlignedWritePat<DS_Pseudo inst, PatFrag frag> : GCNPat<
- (frag v2i32:$value, (DS64Bit4ByteAligned i32:$ptr, i8:$offset0, i8:$offset1)),
- (inst $ptr, (i32 (EXTRACT_SUBREG $value, sub0)),
- (i32 (EXTRACT_SUBREG $value, sub1)), $offset0, $offset1,
+class DS64Bit4ByteAlignedWritePat<DS_Pseudo inst, ValueType vt, PatFrag frag> : GCNPat<
+ (frag vt:$value, (DS64Bit4ByteAligned i32:$ptr, i8:$offset0, i8:$offset1)),
+ (inst $ptr, (i32 (EXTRACT_SUBREG VReg_64:$value, sub0)),
+ (i32 (EXTRACT_SUBREG VReg_64:$value, sub1)), $offset0, $offset1,
(i1 0))
>;
-// v2i32 loads are split into i32 loads on SI during lowering, due to a bug
-// related to bounds checking.
-let OtherPredicates = [LDSRequiresM0Init, isGFX7Plus] in {
-def : DS64Bit4ByteAlignedReadPat<DS_READ2_B32, load_local_m0>;
-def : DS64Bit4ByteAlignedWritePat<DS_WRITE2_B32, store_local_m0>;
-}
+multiclass DS64Bit4ByteAlignedPat_mc<ValueType vt> {
+ let OtherPredicates = [LDSRequiresM0Init, isGFX7Plus] in {
+ def : DS64Bit4ByteAlignedReadPat<DS_READ2_B32, vt, load_local_m0>;
+ def : DS64Bit4ByteAlignedWritePat<DS_WRITE2_B32, vt, store_local_m0>;
+ }
-let OtherPredicates = [NotLDSRequiresM0Init] in {
-def : DS64Bit4ByteAlignedReadPat<DS_READ2_B32_gfx9, load_local>;
-def : DS64Bit4ByteAlignedWritePat<DS_WRITE2_B32_gfx9, store_local>;
+ let OtherPredicates = [NotLDSRequiresM0Init] in {
+ def : DS64Bit4ByteAlignedReadPat<DS_READ2_B32_gfx9, vt, load_local>;
+ def : DS64Bit4ByteAlignedWritePat<DS_WRITE2_B32_gfx9, vt, store_local>;
+ }
}
+// v2i32 loads are split into i32 loads on SI during lowering, due to a bug
+// related to bounds checking.
+foreach vt = VReg_64.RegTypes in {
+defm : DS64Bit4ByteAlignedPat_mc<vt>;
+}
let AddedComplexity = 100 in {
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-local.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-local.mir
index a118a873de59..08fdd0f30a16 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-local.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-local.mir
@@ -28,12 +28,6 @@ body: |
; GFX7: $m0 = S_MOV_B32 -1
; GFX7: [[DS_READ_B32_:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load 4, addrspace 3)
; GFX7: $vgpr0 = COPY [[DS_READ_B32_]]
- ; GFX7-DS128-LABEL: name: load_local_s32_from_4
- ; GFX7-DS128: liveins: $vgpr0
- ; GFX7-DS128: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; GFX7-DS128: $m0 = S_MOV_B32 -1
- ; GFX7-DS128: [[DS_READ_B32_:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load 4, addrspace 3)
- ; GFX7-DS128: $vgpr0 = COPY [[DS_READ_B32_]]
; GFX9-LABEL: name: load_local_s32_from_4
; GFX9: liveins: $vgpr0
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
@@ -68,12 +62,6 @@ body: |
; GFX7: $m0 = S_MOV_B32 -1
; GFX7: [[DS_READ_U16_:%[0-9]+]]:vgpr_32 = DS_READ_U16 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load 2, addrspace 3)
; GFX7: $vgpr0 = COPY [[DS_READ_U16_]]
- ; GFX7-DS128-LABEL: name: load_local_s32_from_2
- ; GFX7-DS128: liveins: $vgpr0
- ; GFX7-DS128: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; GFX7-DS128: $m0 = S_MOV_B32 -1
- ; GFX7-DS128: [[DS_READ_U16_:%[0-9]+]]:vgpr_32 = DS_READ_U16 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load 2, addrspace 3)
- ; GFX7-DS128: $vgpr0 = COPY [[DS_READ_U16_]]
; GFX9-LABEL: name: load_local_s32_from_2
; GFX9: liveins: $vgpr0
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
@@ -112,12 +100,6 @@ body: |
; GFX7: $m0 = S_MOV_B32 -1
; GFX7: [[DS_READ_U8_:%[0-9]+]]:vgpr_32 = DS_READ_U8 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load 1, addrspace 3)
; GFX7: $vgpr0 = COPY [[DS_READ_U8_]]
- ; GFX7-DS128-LABEL: name: load_local_s32_from_1
- ; GFX7-DS128: liveins: $vgpr0
- ; GFX7-DS128: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; GFX7-DS128: $m0 = S_MOV_B32 -1
- ; GFX7-DS128: [[DS_READ_U8_:%[0-9]+]]:vgpr_32 = DS_READ_U8 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load 1, addrspace 3)
- ; GFX7-DS128: $vgpr0 = COPY [[DS_READ_U8_]]
; GFX9-LABEL: name: load_local_s32_from_1
; GFX9: liveins: $vgpr0
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
@@ -152,12 +134,6 @@ body: |
; GFX7: $m0 = S_MOV_B32 -1
; GFX7: [[DS_READ_B64_:%[0-9]+]]:vreg_64 = DS_READ_B64 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load 8, addrspace 3)
; GFX7: $vgpr0_vgpr1 = COPY [[DS_READ_B64_]]
- ; GFX7-DS128-LABEL: name: load_local_v2s32
- ; GFX7-DS128: liveins: $vgpr0
- ; GFX7-DS128: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; GFX7-DS128: $m0 = S_MOV_B32 -1
- ; GFX7-DS128: [[DS_READ_B64_:%[0-9]+]]:vreg_64 = DS_READ_B64 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load 8, addrspace 3)
- ; GFX7-DS128: $vgpr0_vgpr1 = COPY [[DS_READ_B64_]]
; GFX9-LABEL: name: load_local_v2s32
; GFX9: liveins: $vgpr0
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
@@ -188,21 +164,15 @@ body: |
; GFX6: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>)
; GFX7-LABEL: name: load_local_v2s32_align4
; GFX7: liveins: $vgpr0
- ; GFX7: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
+ ; GFX7: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX7: $m0 = S_MOV_B32 -1
- ; GFX7: [[LOAD:%[0-9]+]]:vreg_64(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load 8, align 4, addrspace 3)
- ; GFX7: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>)
- ; GFX7-DS128-LABEL: name: load_local_v2s32_align4
- ; GFX7-DS128: liveins: $vgpr0
- ; GFX7-DS128: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
- ; GFX7-DS128: $m0 = S_MOV_B32 -1
- ; GFX7-DS128: [[LOAD:%[0-9]+]]:vreg_64(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load 8, align 4, addrspace 3)
- ; GFX7-DS128: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>)
+ ; GFX7: [[DS_READ2_B32_:%[0-9]+]]:vreg_64 = DS_READ2_B32 [[COPY]], 0, 1, 0, implicit $m0, implicit $exec :: (load 8, align 4, addrspace 3)
+ ; GFX7: $vgpr0_vgpr1 = COPY [[DS_READ2_B32_]]
; GFX9-LABEL: name: load_local_v2s32_align4
; GFX9: liveins: $vgpr0
- ; GFX9: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
- ; GFX9: [[LOAD:%[0-9]+]]:vreg_64(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load 8, align 4, addrspace 3)
- ; GFX9: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>)
+ ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX9: [[DS_READ2_B32_gfx9_:%[0-9]+]]:vreg_64 = DS_READ2_B32_gfx9 [[COPY]], 0, 1, 0, implicit $exec :: (load 8, align 4, addrspace 3)
+ ; GFX9: $vgpr0_vgpr1 = COPY [[DS_READ2_B32_gfx9_]]
%0:vgpr(p3) = COPY $vgpr0
%1:vgpr(<2 x s32>) = G_LOAD %0 :: (load 8, align 4, addrspace 3)
$vgpr0_vgpr1 = COPY %1
@@ -232,12 +202,6 @@ body: |
; GFX7: $m0 = S_MOV_B32 -1
; GFX7: [[DS_READ_B64_:%[0-9]+]]:vreg_64 = DS_READ_B64 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load 8, addrspace 3)
; GFX7: $vgpr0_vgpr1 = COPY [[DS_READ_B64_]]
- ; GFX7-DS128-LABEL: name: load_local_s64
- ; GFX7-DS128: liveins: $vgpr0
- ; GFX7-DS128: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; GFX7-DS128: $m0 = S_MOV_B32 -1
- ; GFX7-DS128: [[DS_READ_B64_:%[0-9]+]]:vreg_64 = DS_READ_B64 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load 8, addrspace 3)
- ; GFX7-DS128: $vgpr0_vgpr1 = COPY [[DS_READ_B64_]]
; GFX9-LABEL: name: load_local_s64
; GFX9: liveins: $vgpr0
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
@@ -268,21 +232,15 @@ body: |
; GFX6: $vgpr0_vgpr1 = COPY [[LOAD]](s64)
; GFX7-LABEL: name: load_local_s64_align4
; GFX7: liveins: $vgpr0
- ; GFX7: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
+ ; GFX7: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX7: $m0 = S_MOV_B32 -1
- ; GFX7: [[LOAD:%[0-9]+]]:vreg_64(s64) = G_LOAD [[COPY]](p3) :: (load 8, align 4, addrspace 3)
- ; GFX7: $vgpr0_vgpr1 = COPY [[LOAD]](s64)
- ; GFX7-DS128-LABEL: name: load_local_s64_align4
- ; GFX7-DS128: liveins: $vgpr0
- ; GFX7-DS128: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
- ; GFX7-DS128: $m0 = S_MOV_B32 -1
- ; GFX7-DS128: [[LOAD:%[0-9]+]]:vreg_64(s64) = G_LOAD [[COPY]](p3) :: (load 8, align 4, addrspace 3)
- ; GFX7-DS128: $vgpr0_vgpr1 = COPY [[LOAD]](s64)
+ ; GFX7: [[DS_READ2_B32_:%[0-9]+]]:vreg_64 = DS_READ2_B32 [[COPY]], 0, 1, 0, implicit $m0, implicit $exec :: (load 8, align 4, addrspace 3)
+ ; GFX7: $vgpr0_vgpr1 = COPY [[DS_READ2_B32_]]
; GFX9-LABEL: name: load_local_s64_align4
; GFX9: liveins: $vgpr0
- ; GFX9: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
- ; GFX9: [[LOAD:%[0-9]+]]:vreg_64(s64) = G_LOAD [[COPY]](p3) :: (load 8, align 4, addrspace 3)
- ; GFX9: $vgpr0_vgpr1 = COPY [[LOAD]](s64)
+ ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX9: [[DS_READ2_B32_gfx9_:%[0-9]+]]:vreg_64 = DS_READ2_B32_gfx9 [[COPY]], 0, 1, 0, implicit $exec :: (load 8, align 4, addrspace 3)
+ ; GFX9: $vgpr0_vgpr1 = COPY [[DS_READ2_B32_gfx9_]]
%0:vgpr(p3) = COPY $vgpr0
%1:vgpr(s64) = G_LOAD %0 :: (load 8, align 4, addrspace 3)
$vgpr0_vgpr1 = COPY %1
@@ -312,12 +270,6 @@ body: |
; GFX7: $m0 = S_MOV_B32 -1
; GFX7: [[DS_READ_B32_:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load 4, addrspace 3)
; GFX7: $vgpr0 = COPY [[DS_READ_B32_]]
- ; GFX7-DS128-LABEL: name: load_local_p3_from_4
- ; GFX7-DS128: liveins: $vgpr0
- ; GFX7-DS128: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; GFX7-DS128: $m0 = S_MOV_B32 -1
- ; GFX7-DS128: [[DS_READ_B32_:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load 4, addrspace 3)
- ; GFX7-DS128: $vgpr0 = COPY [[DS_READ_B32_]]
; GFX9-LABEL: name: load_local_p3_from_4
; GFX9: liveins: $vgpr0
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
@@ -352,12 +304,6 @@ body: |
; GFX7: $m0 = S_MOV_B32 -1
; GFX7: [[DS_READ_B32_:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load 4, addrspace 3)
; GFX7: $vgpr0 = COPY [[DS_READ_B32_]]
- ; GFX7-DS128-LABEL: name: load_local_p5_from_4
- ; GFX7-DS128: liveins: $vgpr0
- ; GFX7-DS128: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; GFX7-DS128: $m0 = S_MOV_B32 -1
- ; GFX7-DS128: [[DS_READ_B32_:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load 4, addrspace 3)
- ; GFX7-DS128: $vgpr0 = COPY [[DS_READ_B32_]]
; GFX9-LABEL: name: load_local_p5_from_4
; GFX9: liveins: $vgpr0
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
@@ -392,12 +338,6 @@ body: |
; GFX7: $m0 = S_MOV_B32 -1
; GFX7: [[DS_READ_B64_:%[0-9]+]]:vreg_64 = DS_READ_B64 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load 8, addrspace 3)
; GFX7: $vgpr0_vgpr1 = COPY [[DS_READ_B64_]]
- ; GFX7-DS128-LABEL: name: load_local_p1_align8
- ; GFX7-DS128: liveins: $vgpr0
- ; GFX7-DS128: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; GFX7-DS128: $m0 = S_MOV_B32 -1
- ; GFX7-DS128: [[DS_READ_B64_:%[0-9]+]]:vreg_64 = DS_READ_B64 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load 8, addrspace 3)
- ; GFX7-DS128: $vgpr0_vgpr1 = COPY [[DS_READ_B64_]]
; GFX9-LABEL: name: load_local_p1_align8
; GFX9: liveins: $vgpr0
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
@@ -428,21 +368,15 @@ body: |
; GFX6: $vgpr0_vgpr1 = COPY [[LOAD]](p1)
; GFX7-LABEL: name: load_local_p1_align4
; GFX7: liveins: $vgpr0
- ; GFX7: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
+ ; GFX7: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX7: $m0 = S_MOV_B32 -1
- ; GFX7: [[LOAD:%[0-9]+]]:vreg_64(p1) = G_LOAD [[COPY]](p3) :: (load 8, align 4, addrspace 3)
- ; GFX7: $vgpr0_vgpr1 = COPY [[LOAD]](p1)
- ; GFX7-DS128-LABEL: name: load_local_p1_align4
- ; GFX7-DS128: liveins: $vgpr0
- ; GFX7-DS128: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
- ; GFX7-DS128: $m0 = S_MOV_B32 -1
- ; GFX7-DS128: [[LOAD:%[0-9]+]]:vreg_64(p1) = G_LOAD [[COPY]](p3) :: (load 8, align 4, addrspace 3)
- ; GFX7-DS128: $vgpr0_vgpr1 = COPY [[LOAD]](p1)
+ ; GFX7: [[DS_READ2_B32_:%[0-9]+]]:vreg_64 = DS_READ2_B32 [[COPY]], 0, 1, 0, implicit $m0, implicit $exec :: (load 8, align 4, addrspace 3)
+ ; GFX7: $vgpr0_vgpr1 = COPY [[DS_READ2_B32_]]
; GFX9-LABEL: name: load_local_p1_align4
; GFX9: liveins: $vgpr0
- ; GFX9: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
- ; GFX9: [[LOAD:%[0-9]+]]:vreg_64(p1) = G_LOAD [[COPY]](p3) :: (load 8, align 4, addrspace 3)
- ; GFX9: $vgpr0_vgpr1 = COPY [[LOAD]](p1)
+ ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX9: [[DS_READ2_B32_gfx9_:%[0-9]+]]:vreg_64 = DS_READ2_B32_gfx9 [[COPY]], 0, 1, 0, implicit $exec :: (load 8, align 4, addrspace 3)
+ ; GFX9: $vgpr0_vgpr1 = COPY [[DS_READ2_B32_gfx9_]]
%0:vgpr(p3) = COPY $vgpr0
%1:vgpr(p1) = G_LOAD %0 :: (load 8, align 4, addrspace 3)
$vgpr0_vgpr1 = COPY %1
@@ -472,12 +406,6 @@ body: |
; GFX7: $m0 = S_MOV_B32 -1
; GFX7: [[LOAD:%[0-9]+]]:vreg_64(p999) = G_LOAD [[COPY]](p3) :: (load 8, addrspace 3)
; GFX7: $vgpr0_vgpr1 = COPY [[LOAD]](p999)
- ; GFX7-DS128-LABEL: name: load_local_p999_from_8
- ; GFX7-DS128: liveins: $vgpr0
- ; GFX7-DS128: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
- ; GFX7-DS128: $m0 = S_MOV_B32 -1
- ; GFX7-DS128: [[LOAD:%[0-9]+]]:vreg_64(p999) = G_LOAD [[COPY]](p3) :: (load 8, addrspace 3)
- ; GFX7-DS128: $vgpr0_vgpr1 = COPY [[LOAD]](p999)
; GFX9-LABEL: name: load_local_p999_from_8
; GFX9: liveins: $vgpr0
; GFX9: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
@@ -512,12 +440,6 @@ body: |
; GFX7: $m0 = S_MOV_B32 -1
; GFX7: [[LOAD:%[0-9]+]]:vreg_64(<2 x p3>) = G_LOAD [[COPY]](p3) :: (load 8, addrspace 3)
; GFX7: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>)
- ; GFX7-DS128-LABEL: name: load_local_v2p3
- ; GFX7-DS128: liveins: $vgpr0
- ; GFX7-DS128: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
- ; GFX7-DS128: $m0 = S_MOV_B32 -1
- ; GFX7-DS128: [[LOAD:%[0-9]+]]:vreg_64(<2 x p3>) = G_LOAD [[COPY]](p3) :: (load 8, addrspace 3)
- ; GFX7-DS128: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>)
; GFX9-LABEL: name: load_local_v2p3
; GFX9: liveins: $vgpr0
; GFX9: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
@@ -552,12 +474,6 @@ body: |
; GFX7: $m0 = S_MOV_B32 -1
; GFX7: [[DS_READ_B32_:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load 4, addrspace 3)
; GFX7: $vgpr0 = COPY [[DS_READ_B32_]]
- ; GFX7-DS128-LABEL: name: load_local_v2s16
- ; GFX7-DS128: liveins: $vgpr0
- ; GFX7-DS128: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; GFX7-DS128: $m0 = S_MOV_B32 -1
- ; GFX7-DS128: [[DS_READ_B32_:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load 4, addrspace 3)
- ; GFX7-DS128: $vgpr0 = COPY [[DS_READ_B32_]]
; GFX9-LABEL: name: load_local_v2s16
; GFX9: liveins: $vgpr0
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
@@ -592,12 +508,6 @@ body: |
; GFX7: $m0 = S_MOV_B32 -1
; GFX7: [[DS_READ_B64_:%[0-9]+]]:vreg_64 = DS_READ_B64 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load 8, addrspace 3)
; GFX7: $vgpr0_vgpr1 = COPY [[DS_READ_B64_]]
- ; GFX7-DS128-LABEL: name: load_local_v4s16
- ; GFX7-DS128: liveins: $vgpr0
- ; GFX7-DS128: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; GFX7-DS128: $m0 = S_MOV_B32 -1
- ; GFX7-DS128: [[DS_READ_B64_:%[0-9]+]]:vreg_64 = DS_READ_B64 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load 8, addrspace 3)
- ; GFX7-DS128: $vgpr0_vgpr1 = COPY [[DS_READ_B64_]]
; GFX9-LABEL: name: load_local_v4s16
; GFX9: liveins: $vgpr0
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
@@ -659,12 +569,6 @@ body: |
; GFX7: $m0 = S_MOV_B32 -1
; GFX7: [[DS_READ_U8_:%[0-9]+]]:vgpr_32 = DS_READ_U8 [[COPY]], 65535, 0, implicit $m0, implicit $exec :: (load 1, addrspace 3)
; GFX7: $vgpr0 = COPY [[DS_READ_U8_]]
- ; GFX7-DS128-LABEL: name: load_local_s32_from_1_gep_65535
- ; GFX7-DS128: liveins: $vgpr0
- ; GFX7-DS128: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; GFX7-DS128: $m0 = S_MOV_B32 -1
- ; GFX7-DS128: [[DS_READ_U8_:%[0-9]+]]:vgpr_32 = DS_READ_U8 [[COPY]], 65535, 0, implicit $m0, implicit $exec :: (load 1, addrspace 3)
- ; GFX7-DS128: $vgpr0 = COPY [[DS_READ_U8_]]
; GFX9-LABEL: name: load_local_s32_from_1_gep_65535
; GFX9: liveins: $vgpr0
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
@@ -750,14 +654,6 @@ body: |
; GFX7: $m0 = S_MOV_B32 -1
; GFX7: [[DS_READ_U8_:%[0-9]+]]:vgpr_32 = DS_READ_U8 %2, 0, 0, implicit $m0, implicit $exec :: (load 1, addrspace 3)
; GFX7: $vgpr0 = COPY [[DS_READ_U8_]]
- ; GFX7-DS128-LABEL: name: load_local_s32_from_1_gep_65536
- ; GFX7-DS128: liveins: $vgpr0
- ; GFX7-DS128: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; GFX7-DS128: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 65536, implicit $exec
- ; GFX7-DS128: %2:vgpr_32, dead %4:sreg_64_xexec = V_ADD_I32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec
- ; GFX7-DS128: $m0 = S_MOV_B32 -1
- ; GFX7-DS128: [[DS_READ_U8_:%[0-9]+]]:vgpr_32 = DS_READ_U8 %2, 0, 0, implicit $m0, implicit $exec :: (load 1, addrspace 3)
- ; GFX7-DS128: $vgpr0 = COPY [[DS_READ_U8_]]
; GFX9-LABEL: name: load_local_s32_from_1_gep_65536
; GFX9: liveins: $vgpr0
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
@@ -800,14 +696,6 @@ body: |
; GFX7: $m0 = S_MOV_B32 -1
; GFX7: [[DS_READ_U8_:%[0-9]+]]:vgpr_32 = DS_READ_U8 %2, 0, 0, implicit $m0, implicit $exec :: (load 1, addrspace 3)
; GFX7: $vgpr0 = COPY [[DS_READ_U8_]]
- ; GFX7-DS128-LABEL: name: load_local_s32_from_1_gep_m1
- ; GFX7-DS128: liveins: $vgpr0
- ; GFX7-DS128: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; GFX7-DS128: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294967295, implicit $exec
- ; GFX7-DS128: %2:vgpr_32, dead %4:sreg_64_xexec = V_ADD_I32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec
- ; GFX7-DS128: $m0 = S_MOV_B32 -1
- ; GFX7-DS128: [[DS_READ_U8_:%[0-9]+]]:vgpr_32 = DS_READ_U8 %2, 0, 0, implicit $m0, implicit $exec :: (load 1, addrspace 3)
- ; GFX7-DS128: $vgpr0 = COPY [[DS_READ_U8_]]
; GFX9-LABEL: name: load_local_s32_from_1_gep_m1
; GFX9: liveins: $vgpr0
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
@@ -822,3 +710,83 @@ body: |
$vgpr0 = COPY %3
...
+
+---
+
+name: load_local_s64_align4_from_1_gep_1016
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+
+body: |
+ bb.0:
+ liveins: $vgpr0_vgpr1
+
+ ; GFX6-LABEL: name: load_local_s64_align4_from_1_gep_1016
+ ; GFX6: liveins: $vgpr0_vgpr1
+ ; GFX6: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
+ ; GFX6: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 1016
+ ; GFX6: [[PTR_ADD:%[0-9]+]]:vgpr(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+ ; GFX6: $m0 = S_MOV_B32 -1
+ ; GFX6: [[LOAD:%[0-9]+]]:vreg_64(s64) = G_LOAD [[PTR_ADD]](p3) :: (load 8, align 4, addrspace 3)
+ ; GFX6: $vgpr0_vgpr1 = COPY [[LOAD]](s64)
+ ; GFX7-LABEL: name: load_local_s64_align4_from_1_gep_1016
+ ; GFX7: liveins: $vgpr0_vgpr1
+ ; GFX7: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX7: $m0 = S_MOV_B32 -1
+ ; GFX7: [[DS_READ2_B32_:%[0-9]+]]:vreg_64 = DS_READ2_B32 [[COPY]], 254, 255, 0, implicit $m0, implicit $exec :: (load 8, align 4, addrspace 3)
+ ; GFX7: $vgpr0_vgpr1 = COPY [[DS_READ2_B32_]]
+ ; GFX9-LABEL: name: load_local_s64_align4_from_1_gep_1016
+ ; GFX9: liveins: $vgpr0_vgpr1
+ ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX9: [[DS_READ2_B32_gfx9_:%[0-9]+]]:vreg_64 = DS_READ2_B32_gfx9 [[COPY]], 254, 255, 0, implicit $exec :: (load 8, align 4, addrspace 3)
+ ; GFX9: $vgpr0_vgpr1 = COPY [[DS_READ2_B32_gfx9_]]
+ %0:vgpr(p3) = COPY $vgpr0
+ %1:vgpr(s32) = G_CONSTANT i32 1016
+ %2:vgpr(p3) = G_PTR_ADD %0, %1
+ %3:vgpr(s64) = G_LOAD %2 :: (load 8, align 4, addrspace 3)
+ $vgpr0_vgpr1 = COPY %3
+
+...
+
+---
+
+name: load_local_s64_align4_from_1_gep_1020
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+
+body: |
+ bb.0:
+ liveins: $vgpr0_vgpr1
+
+ ; GFX6-LABEL: name: load_local_s64_align4_from_1_gep_1020
+ ; GFX6: liveins: $vgpr0_vgpr1
+ ; GFX6: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
+ ; GFX6: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 1020
+ ; GFX6: [[PTR_ADD:%[0-9]+]]:vgpr(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
+ ; GFX6: $m0 = S_MOV_B32 -1
+ ; GFX6: [[LOAD:%[0-9]+]]:vreg_64(s64) = G_LOAD [[PTR_ADD]](p3) :: (load 8, align 4, addrspace 3)
+ ; GFX6: $vgpr0_vgpr1 = COPY [[LOAD]](s64)
+ ; GFX7-LABEL: name: load_local_s64_align4_from_1_gep_1020
+ ; GFX7: liveins: $vgpr0_vgpr1
+ ; GFX7: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX7: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1020, implicit $exec
+ ; GFX7: %2:vgpr_32, dead %4:sreg_64_xexec = V_ADD_I32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec
+ ; GFX7: $m0 = S_MOV_B32 -1
+ ; GFX7: [[DS_READ2_B32_:%[0-9]+]]:vreg_64 = DS_READ2_B32 %2, 0, 1, 0, implicit $m0, implicit $exec :: (load 8, align 4, addrspace 3)
+ ; GFX7: $vgpr0_vgpr1 = COPY [[DS_READ2_B32_]]
+ ; GFX9-LABEL: name: load_local_s64_align4_from_1_gep_1020
+ ; GFX9: liveins: $vgpr0_vgpr1
+ ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1020, implicit $exec
+ ; GFX9: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec
+ ; GFX9: [[DS_READ2_B32_gfx9_:%[0-9]+]]:vreg_64 = DS_READ2_B32_gfx9 [[V_ADD_U32_e64_]], 0, 1, 0, implicit $exec :: (load 8, align 4, addrspace 3)
+ ; GFX9: $vgpr0_vgpr1 = COPY [[DS_READ2_B32_gfx9_]]
+ %0:vgpr(p3) = COPY $vgpr0
+ %1:vgpr(s32) = G_CONSTANT i32 1020
+ %2:vgpr(p3) = G_PTR_ADD %0, %1
+ %3:vgpr(s64) = G_LOAD %2 :: (load 8, align 4, addrspace 3)
+ $vgpr0_vgpr1 = COPY %3
+
+...
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-local.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-local.mir
index 449a3e5f725f..60cc05c7da5c 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-local.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-local.mir
@@ -284,15 +284,19 @@ body: |
; GFX6: G_STORE [[COPY]](s64), [[COPY1]](p3) :: (store 8, align 4, addrspace 3)
; GFX7-LABEL: name: store_local_s64_align4
; GFX7: liveins: $vgpr0_vgpr1, $vgpr2
- ; GFX7: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1
- ; GFX7: [[COPY1:%[0-9]+]]:vgpr(p3) = COPY $vgpr2
+ ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; GFX7: $m0 = S_MOV_B32 -1
- ; GFX7: G_STORE [[COPY]](s64), [[COPY1]](p3) :: (store 8, align 4, addrspace 3)
+ ; GFX7: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
+ ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
+ ; GFX7: DS_WRITE2_B32 [[COPY1]], [[COPY3]], [[COPY2]], 0, 1, 0, implicit $m0, implicit $exec :: (store 8, align 4, addrspace 3)
; GFX9-LABEL: name: store_local_s64_align4
; GFX9: liveins: $vgpr0_vgpr1, $vgpr2
- ; GFX9: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1
- ; GFX9: [[COPY1:%[0-9]+]]:vgpr(p3) = COPY $vgpr2
- ; GFX9: G_STORE [[COPY]](s64), [[COPY1]](p3) :: (store 8, align 4, addrspace 3)
+ ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+ ; GFX9: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
+ ; GFX9: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
+ ; GFX9: DS_WRITE2_B32_gfx9 [[COPY1]], [[COPY3]], [[COPY2]], 0, 1, 0, implicit $exec :: (store 8, align 4, addrspace 3)
%0:vgpr(s64) = COPY $vgpr0_vgpr1
%1:vgpr(p3) = COPY $vgpr2
G_STORE %0, %1 :: (store 8, align 4, addrspace 3)
@@ -322,15 +326,19 @@ body: |
; GFX6: G_STORE [[COPY]](p1), [[COPY1]](p3) :: (store 8, align 4, addrspace 3)
; GFX7-LABEL: name: store_local_p1_align4
; GFX7: liveins: $vgpr0_vgpr1, $vgpr2
- ; GFX7: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
- ; GFX7: [[COPY1:%[0-9]+]]:vgpr(p3) = COPY $vgpr2
+ ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; GFX7: $m0 = S_MOV_B32 -1
- ; GFX7: G_STORE [[COPY]](p1), [[COPY1]](p3) :: (store 8, align 4, addrspace 3)
+ ; GFX7: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
+ ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
+ ; GFX7: DS_WRITE2_B32 [[COPY1]], [[COPY3]], [[COPY2]], 0, 1, 0, implicit $m0, implicit $exec :: (store 8, align 4, addrspace 3)
; GFX9-LABEL: name: store_local_p1_align4
; GFX9: liveins: $vgpr0_vgpr1, $vgpr2
- ; GFX9: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
- ; GFX9: [[COPY1:%[0-9]+]]:vgpr(p3) = COPY $vgpr2
- ; GFX9: G_STORE [[COPY]](p1), [[COPY1]](p3) :: (store 8, align 4, addrspace 3)
+ ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+ ; GFX9: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
+ ; GFX9: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
+ ; GFX9: DS_WRITE2_B32_gfx9 [[COPY1]], [[COPY3]], [[COPY2]], 0, 1, 0, implicit $exec :: (store 8, align 4, addrspace 3)
%0:vgpr(p1) = COPY $vgpr0_vgpr1
%1:vgpr(p3) = COPY $vgpr2
G_STORE %0, %1 :: (store 8, align 4, addrspace 3)
@@ -360,15 +368,19 @@ body: |
; GFX6: G_STORE [[COPY]](<2 x s32>), [[COPY1]](p3) :: (store 8, align 4, addrspace 3)
; GFX7-LABEL: name: store_local_v2s32_align4
; GFX7: liveins: $vgpr0_vgpr1, $vgpr2
- ; GFX7: [[COPY:%[0-9]+]]:vgpr(<2 x s32>) = COPY $vgpr0_vgpr1
- ; GFX7: [[COPY1:%[0-9]+]]:vgpr(p3) = COPY $vgpr2
+ ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; GFX7: $m0 = S_MOV_B32 -1
- ; GFX7: G_STORE [[COPY]](<2 x s32>), [[COPY1]](p3) :: (store 8, align 4, addrspace 3)
+ ; GFX7: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
+ ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
+ ; GFX7: DS_WRITE2_B32 [[COPY1]], [[COPY3]], [[COPY2]], 0, 1, 0, implicit $m0, implicit $exec :: (store 8, align 4, addrspace 3)
; GFX9-LABEL: name: store_local_v2s32_align4
; GFX9: liveins: $vgpr0_vgpr1, $vgpr2
- ; GFX9: [[COPY:%[0-9]+]]:vgpr(<2 x s32>) = COPY $vgpr0_vgpr1
- ; GFX9: [[COPY1:%[0-9]+]]:vgpr(p3) = COPY $vgpr2
- ; GFX9: G_STORE [[COPY]](<2 x s32>), [[COPY1]](p3) :: (store 8, align 4, addrspace 3)
+ ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+ ; GFX9: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
+ ; GFX9: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
+ ; GFX9: DS_WRITE2_B32_gfx9 [[COPY1]], [[COPY3]], [[COPY2]], 0, 1, 0, implicit $exec :: (store 8, align 4, addrspace 3)
%0:vgpr(<2 x s32>) = COPY $vgpr0_vgpr1
%1:vgpr(p3) = COPY $vgpr2
G_STORE %0, %1 :: (store 8, align 4, addrspace 3)
@@ -398,15 +410,19 @@ body: |
; GFX6: G_STORE [[COPY]](<4 x s16>), [[COPY1]](p3) :: (store 8, align 4, addrspace 3)
; GFX7-LABEL: name: store_local_v4s16_align4
; GFX7: liveins: $vgpr0_vgpr1, $vgpr2
- ; GFX7: [[COPY:%[0-9]+]]:vgpr(<4 x s16>) = COPY $vgpr0_vgpr1
- ; GFX7: [[COPY1:%[0-9]+]]:vgpr(p3) = COPY $vgpr2
+ ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; GFX7: $m0 = S_MOV_B32 -1
- ; GFX7: G_STORE [[COPY]](<4 x s16>), [[COPY1]](p3) :: (store 8, align 4, addrspace 3)
+ ; GFX7: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
+ ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
+ ; GFX7: DS_WRITE2_B32 [[COPY1]], [[COPY3]], [[COPY2]], 0, 1, 0, implicit $m0, implicit $exec :: (store 8, align 4, addrspace 3)
; GFX9-LABEL: name: store_local_v4s16_align4
; GFX9: liveins: $vgpr0_vgpr1, $vgpr2
- ; GFX9: [[COPY:%[0-9]+]]:vgpr(<4 x s16>) = COPY $vgpr0_vgpr1
- ; GFX9: [[COPY1:%[0-9]+]]:vgpr(p3) = COPY $vgpr2
- ; GFX9: G_STORE [[COPY]](<4 x s16>), [[COPY1]](p3) :: (store 8, align 4, addrspace 3)
+ ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+ ; GFX9: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
+ ; GFX9: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
+ ; GFX9: DS_WRITE2_B32_gfx9 [[COPY1]], [[COPY3]], [[COPY2]], 0, 1, 0, implicit $exec :: (store 8, align 4, addrspace 3)
%0:vgpr(<4 x s16>) = COPY $vgpr0_vgpr1
%1:vgpr(p3) = COPY $vgpr2
G_STORE %0, %1 :: (store 8, align 4, addrspace 3)
@@ -564,3 +580,99 @@ body: |
G_STORE %0, %1 :: (store 8, align 8, addrspace 3)
...
+
+---
+
+name: store_local_s64_align4_from_1_gep_1016
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+machineFunctionInfo:
+ scratchRSrcReg: $sgpr0_sgpr1_sgpr2_sgpr3
+ scratchWaveOffsetReg: $sgpr4
+ stackPtrOffsetReg: $sgpr32
+
+body: |
+ bb.0:
+ liveins: $vgpr0_vgpr1, $vgpr2
+
+ ; GFX6-LABEL: name: store_local_s64_align4_from_1_gep_1016
+ ; GFX6: liveins: $vgpr0_vgpr1, $vgpr2
+ ; GFX6: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1
+ ; GFX6: [[COPY1:%[0-9]+]]:vgpr(p3) = COPY $vgpr2
+ ; GFX6: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 1016
+ ; GFX6: [[PTR_ADD:%[0-9]+]]:vgpr(p3) = G_PTR_ADD [[COPY1]], [[C]](s32)
+ ; GFX6: $m0 = S_MOV_B32 -1
+ ; GFX6: G_STORE [[COPY]](s64), [[PTR_ADD]](p3) :: (store 8, align 4, addrspace 3)
+ ; GFX7-LABEL: name: store_local_s64_align4_from_1_gep_1016
+ ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2
+ ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+ ; GFX7: $m0 = S_MOV_B32 -1
+ ; GFX7: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
+ ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
+ ; GFX7: DS_WRITE2_B32 [[COPY1]], [[COPY3]], [[COPY2]], 254, 255, 0, implicit $m0, implicit $exec :: (store 8, align 4, addrspace 3)
+ ; GFX9-LABEL: name: store_local_s64_align4_from_1_gep_1016
+ ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2
+ ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+ ; GFX9: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
+ ; GFX9: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
+ ; GFX9: DS_WRITE2_B32_gfx9 [[COPY1]], [[COPY3]], [[COPY2]], 254, 255, 0, implicit $exec :: (store 8, align 4, addrspace 3)
+ %0:vgpr(s64) = COPY $vgpr0_vgpr1
+ %1:vgpr(p3) = COPY $vgpr2
+ %2:vgpr(s32) = G_CONSTANT i32 1016
+ %3:vgpr(p3) = G_PTR_ADD %1, %2
+ G_STORE %0, %3 :: (store 8, align 4, addrspace 3)
+
+...
+
+---
+
+name: store_local_s64_align4_from_1_gep_1020
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+machineFunctionInfo:
+ scratchRSrcReg: $sgpr0_sgpr1_sgpr2_sgpr3
+ scratchWaveOffsetReg: $sgpr4
+ stackPtrOffsetReg: $sgpr32
+
+body: |
+ bb.0:
+ liveins: $vgpr0_vgpr1, $vgpr2
+
+ ; GFX6-LABEL: name: store_local_s64_align4_from_1_gep_1020
+ ; GFX6: liveins: $vgpr0_vgpr1, $vgpr2
+ ; GFX6: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1
+ ; GFX6: [[COPY1:%[0-9]+]]:vgpr(p3) = COPY $vgpr2
+ ; GFX6: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 1020
+ ; GFX6: [[PTR_ADD:%[0-9]+]]:vgpr(p3) = G_PTR_ADD [[COPY1]], [[C]](s32)
+ ; GFX6: $m0 = S_MOV_B32 -1
+ ; GFX6: G_STORE [[COPY]](s64), [[PTR_ADD]](p3) :: (store 8, align 4, addrspace 3)
+ ; GFX7-LABEL: name: store_local_s64_align4_from_1_gep_1020
+ ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2
+ ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+ ; GFX7: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1020, implicit $exec
+ ; GFX7: %3:vgpr_32, dead %6:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[V_MOV_B32_e32_]], 0, implicit $exec
+ ; GFX7: $m0 = S_MOV_B32 -1
+ ; GFX7: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
+ ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
+ ; GFX7: DS_WRITE2_B32 %3, [[COPY3]], [[COPY2]], 0, 1, 0, implicit $m0, implicit $exec :: (store 8, align 4, addrspace 3)
+ ; GFX9-LABEL: name: store_local_s64_align4_from_1_gep_1020
+ ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2
+ ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+ ; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1020, implicit $exec
+ ; GFX9: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY1]], [[V_MOV_B32_e32_]], 0, implicit $exec
+ ; GFX9: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
+ ; GFX9: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
+ ; GFX9: DS_WRITE2_B32_gfx9 [[V_ADD_U32_e64_]], [[COPY3]], [[COPY2]], 0, 1, 0, implicit $exec :: (store 8, align 4, addrspace 3)
+ %0:vgpr(s64) = COPY $vgpr0_vgpr1
+ %1:vgpr(p3) = COPY $vgpr2
+ %2:vgpr(s32) = G_CONSTANT i32 1020
+ %3:vgpr(p3) = G_PTR_ADD %1, %2
+ G_STORE %0, %3 :: (store 8, align 4, addrspace 3)
+
+...
More information about the llvm-commits
mailing list