[llvm] r343047 - [AMDGPU] Fix ds combine with subregs
Stanislav Mekhanoshin via llvm-commits
llvm-commits at lists.llvm.org
Tue Sep 25 16:33:18 PDT 2018
Author: rampitec
Date: Tue Sep 25 16:33:18 2018
New Revision: 343047
URL: http://llvm.org/viewvc/llvm-project?rev=343047&view=rev
Log:
[AMDGPU] Fix ds combine with subregs
Differential Revision: https://reviews.llvm.org/D52522
Modified:
llvm/trunk/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp
llvm/trunk/test/CodeGen/AMDGPU/merge-load-store-vreg.mir
Modified: llvm/trunk/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp?rev=343047&r1=343046&r2=343047&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp Tue Sep 25 16:33:18 2018
@@ -514,6 +514,7 @@ MachineBasicBlock::iterator SILoadStore
DebugLoc DL = CI.I->getDebugLoc();
unsigned BaseReg = AddrReg->getReg();
+ unsigned BaseSubReg = AddrReg->getSubReg();
unsigned BaseRegFlags = 0;
if (CI.BaseOff) {
unsigned ImmReg = MRI->createVirtualRegister(&AMDGPU::SGPR_32RegClass);
@@ -525,15 +526,16 @@ MachineBasicBlock::iterator SILoadStore
TII->getAddNoCarry(*MBB, CI.Paired, DL, BaseReg)
.addReg(ImmReg)
- .addReg(AddrReg->getReg());
+ .addReg(AddrReg->getReg(), 0, BaseSubReg);
+ BaseSubReg = 0;
}
MachineInstrBuilder Read2 = BuildMI(*MBB, CI.Paired, DL, Read2Desc, DestReg)
- .addReg(BaseReg, BaseRegFlags) // addr
- .addImm(NewOffset0) // offset0
- .addImm(NewOffset1) // offset1
- .addImm(0) // gds
- .cloneMergedMemRefs({&*CI.I, &*CI.Paired});
+ .addReg(BaseReg, BaseRegFlags, BaseSubReg) // addr
+ .addImm(NewOffset0) // offset0
+ .addImm(NewOffset1) // offset1
+ .addImm(0) // gds
+ .cloneMergedMemRefs({&*CI.I, &*CI.Paired});
(void)Read2;
@@ -601,6 +603,7 @@ MachineBasicBlock::iterator SILoadStoreO
DebugLoc DL = CI.I->getDebugLoc();
unsigned BaseReg = AddrReg->getReg();
+ unsigned BaseSubReg = AddrReg->getSubReg();
unsigned BaseRegFlags = 0;
if (CI.BaseOff) {
unsigned ImmReg = MRI->createVirtualRegister(&AMDGPU::SGPR_32RegClass);
@@ -612,17 +615,18 @@ MachineBasicBlock::iterator SILoadStoreO
TII->getAddNoCarry(*MBB, CI.Paired, DL, BaseReg)
.addReg(ImmReg)
- .addReg(AddrReg->getReg());
+ .addReg(AddrReg->getReg(), 0, BaseSubReg);
+ BaseSubReg = 0;
}
MachineInstrBuilder Write2 = BuildMI(*MBB, CI.Paired, DL, Write2Desc)
- .addReg(BaseReg, BaseRegFlags) // addr
- .add(*Data0) // data0
- .add(*Data1) // data1
- .addImm(NewOffset0) // offset0
- .addImm(NewOffset1) // offset1
- .addImm(0) // gds
- .cloneMergedMemRefs({&*CI.I, &*CI.Paired});
+ .addReg(BaseReg, BaseRegFlags, BaseSubReg) // addr
+ .add(*Data0) // data0
+ .add(*Data1) // data1
+ .addImm(NewOffset0) // offset0
+ .addImm(NewOffset1) // offset1
+ .addImm(0) // gds
+ .cloneMergedMemRefs({&*CI.I, &*CI.Paired});
moveInstsAfter(Write2, CI.InstsToMove);
Modified: llvm/trunk/test/CodeGen/AMDGPU/merge-load-store-vreg.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/merge-load-store-vreg.mir?rev=343047&r1=343046&r2=343047&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/merge-load-store-vreg.mir (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/merge-load-store-vreg.mir Tue Sep 25 16:33:18 2018
@@ -6,7 +6,7 @@
# $vcc, which is used in _e32); this ensures that $vcc is not inadvertently
# clobbered.
-# GCN-LABEL: name: kernel
+# GCN-LABEL: name: ds_combine_base_offset{{$}}
# VI: V_ADD_I32_e64 %6, %0,
# VI-NEXT: DS_WRITE2_B32 killed %7, %0, %3, 0, 8,
@@ -21,7 +21,37 @@
--- |
@0 = internal unnamed_addr addrspace(3) global [256 x float] undef, align 4
- define amdgpu_kernel void @kernel() {
+ define amdgpu_kernel void @ds_combine_base_offset() {
+ bb.0:
+ br label %bb2
+
+ bb1:
+ ret void
+
+ bb2:
+ %tmp = getelementptr inbounds [256 x float], [256 x float] addrspace(3)* @0, i32 0, i32 0
+ %tmp1 = getelementptr inbounds float, float addrspace(3)* %tmp, i32 8
+ %tmp2 = getelementptr inbounds float, float addrspace(3)* %tmp, i32 16
+ %tmp3 = getelementptr inbounds float, float addrspace(3)* %tmp, i32 24
+ br label %bb1
+ }
+
+ define amdgpu_kernel void @ds_combine_base_offset_subreg() {
+ bb.0:
+ br label %bb2
+
+ bb1:
+ ret void
+
+ bb2:
+ %tmp = getelementptr inbounds [256 x float], [256 x float] addrspace(3)* @0, i32 0, i32 0
+ %tmp1 = getelementptr inbounds float, float addrspace(3)* %tmp, i32 8
+ %tmp2 = getelementptr inbounds float, float addrspace(3)* %tmp, i32 16
+ %tmp3 = getelementptr inbounds float, float addrspace(3)* %tmp, i32 24
+ br label %bb1
+ }
+
+ define amdgpu_kernel void @ds_combine_subreg() {
bb.0:
br label %bb2
@@ -36,7 +66,7 @@
br label %bb1
}
---
-name: kernel
+name: ds_combine_base_offset
body: |
bb.0:
%0:vgpr_32 = IMPLICIT_DEF
@@ -57,4 +87,70 @@ body: |
$vcc = S_AND_B64 $exec, $vcc, implicit-def $scc
S_CBRANCH_VCCNZ %bb.1, implicit $vcc
S_BRANCH %bb.1
+...
+
+# GCN-LABEL: name: ds_combine_base_offset_subreg{{$}}
+
+# VI: V_ADD_I32_e64 %6, %0.sub0,
+# VI-NEXT: DS_WRITE2_B32 killed %7, %0.sub0, %3.sub0, 0, 8,
+# VI: V_ADD_I32_e64 %10, %3.sub0,
+# VI-NEXT: DS_READ2_B32 killed %11, 0, 8,
+
+# GFX9: V_ADD_U32_e64 %6, %0.sub0,
+# GFX9-NEXT: DS_WRITE2_B32_gfx9 killed %7, %0.sub0, %3.sub0, 0, 8,
+# GFX9: V_ADD_U32_e64 %9, %3.sub0,
+# GFX9-NEXT: DS_READ2_B32_gfx9 killed %10, 0, 8,
+---
+name: ds_combine_base_offset_subreg
+body: |
+ bb.0:
+ %0:vreg_64 = IMPLICIT_DEF
+ S_BRANCH %bb.2
+
+ bb.1:
+ S_ENDPGM
+
+ bb.2:
+ %1:sreg_64_xexec = V_CMP_NE_U32_e64 %0.sub0, 0, implicit $exec
+ %2:vgpr_32 = V_CNDMASK_B32_e64 0, 1, %1, implicit $exec
+ V_CMP_NE_U32_e32 1, %2, implicit-def $vcc, implicit $exec
+ DS_WRITE_B32 %0.sub0, %0.sub0, 1024, 0, implicit $m0, implicit $exec :: (store 4 into %ir.tmp)
+ undef %3.sub0:vreg_64 = V_MOV_B32_e32 0, implicit $exec
+ DS_WRITE_B32 %0.sub0, %3.sub0, 1056, 0, implicit $m0, implicit $exec :: (store 4 into %ir.tmp1)
+ %4:vgpr_32 = DS_READ_B32 %3.sub0, 1088, 0, implicit $m0, implicit $exec :: (load 4 from %ir.tmp2)
+ %5:vgpr_32 = DS_READ_B32 %3.sub0, 1120, 0, implicit $m0, implicit $exec :: (load 4 from %ir.tmp3)
+ $vcc = S_AND_B64 $exec, $vcc, implicit-def $scc
+ S_CBRANCH_VCCNZ %bb.1, implicit $vcc
+ S_BRANCH %bb.1
+...
+
+# GCN-LABEL: name: ds_combine_subreg{{$}}
+
+# VI: DS_WRITE2_B32 %0.sub0, %0.sub0, %3.sub0, 0, 8,
+# VI: DS_READ2_B32 %3.sub0, 0, 8,
+
+# GFX9: DS_WRITE2_B32_gfx9 %0.sub0, %0.sub0, %3.sub0, 0, 8,
+# GFX9: DS_READ2_B32_gfx9 %3.sub0, 0, 8,
+---
+name: ds_combine_subreg
+body: |
+ bb.0:
+ %0:vreg_64 = IMPLICIT_DEF
+ S_BRANCH %bb.2
+
+ bb.1:
+ S_ENDPGM
+
+ bb.2:
+ %1:sreg_64_xexec = V_CMP_NE_U32_e64 %0.sub0, 0, implicit $exec
+ %2:vgpr_32 = V_CNDMASK_B32_e64 0, 1, %1, implicit $exec
+ V_CMP_NE_U32_e32 1, %2, implicit-def $vcc, implicit $exec
+ DS_WRITE_B32 %0.sub0, %0.sub0, 0, 0, implicit $m0, implicit $exec :: (store 4 into %ir.tmp)
+ undef %3.sub0:vreg_64 = V_MOV_B32_e32 0, implicit $exec
+ DS_WRITE_B32 %0.sub0, %3.sub0, 32, 0, implicit $m0, implicit $exec :: (store 4 into %ir.tmp1)
+ %4:vgpr_32 = DS_READ_B32 %3.sub0, 0, 0, implicit $m0, implicit $exec :: (load 4 from %ir.tmp2)
+ %5:vgpr_32 = DS_READ_B32 %3.sub0, 32, 0, implicit $m0, implicit $exec :: (load 4 from %ir.tmp3)
+ $vcc = S_AND_B64 $exec, $vcc, implicit-def $scc
+ S_CBRANCH_VCCNZ %bb.1, implicit $vcc
+ S_BRANCH %bb.1
...
More information about the llvm-commits
mailing list