[llvm] 67d6132 - GlobalISel: Preserve memory types for implicit sret load/stores
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Mon Jul 19 08:52:49 PDT 2021
Author: Matt Arsenault
Date: 2021-07-19T11:52:42-04:00
New Revision: 67d6132463541d5e4cf7483cb3c7b5d95b46ee6d
URL: https://github.com/llvm/llvm-project/commit/67d6132463541d5e4cf7483cb3c7b5d95b46ee6d
DIFF: https://github.com/llvm/llvm-project/commit/67d6132463541d5e4cf7483cb3c7b5d95b46ee6d.diff
LOG: GlobalISel: Preserve memory types for implicit sret load/stores
Added:
Modified:
llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
llvm/test/CodeGen/AMDGPU/GlobalISel/function-returns.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-return-values.ll
Removed:
################################################################################
diff --git a/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp b/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
index 83dc9bf4244b..8dd544f28827 100644
--- a/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
@@ -791,7 +791,7 @@ void CallLowering::insertSRetLoads(MachineIRBuilder &MIRBuilder, Type *RetTy,
Register Addr;
MIRBuilder.materializePtrAdd(Addr, DemoteReg, OffsetLLTy, Offsets[I]);
auto *MMO = MF.getMachineMemOperand(PtrInfo, MachineMemOperand::MOLoad,
- MRI.getType(VRegs[I]).getSizeInBytes(),
+ MRI.getType(VRegs[I]),
commonAlignment(BaseAlign, Offsets[I]));
MIRBuilder.buildLoad(VRegs[I], Addr, *MMO);
}
@@ -822,7 +822,7 @@ void CallLowering::insertSRetStores(MachineIRBuilder &MIRBuilder, Type *RetTy,
Register Addr;
MIRBuilder.materializePtrAdd(Addr, DemoteReg, OffsetLLTy, Offsets[I]);
auto *MMO = MF.getMachineMemOperand(PtrInfo, MachineMemOperand::MOStore,
- MRI.getType(VRegs[I]).getSizeInBytes(),
+ MRI.getType(VRegs[I]),
commonAlignment(BaseAlign, Offsets[I]));
MIRBuilder.buildStore(VRegs[I], Addr, *MMO);
}
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/function-returns.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/function-returns.ll
index 0d285c4f4666..3dfd3ab5a15a 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/function-returns.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/function-returns.ll
@@ -1143,7 +1143,7 @@ define <33 x i32> @v33i32_func_void() #0 {
; CHECK: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
; CHECK: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile load (p1) from `<33 x i32> addrspace(1)* addrspace(4)* undef`, addrspace 4)
; CHECK: [[LOAD1:%[0-9]+]]:_(<33 x s32>) = G_LOAD [[LOAD]](p1) :: (load (<33 x s32>) from %ir.ptr, align 256, addrspace 1)
- ; CHECK: G_STORE [[LOAD1]](<33 x s32>), [[COPY]](p5) :: (store (s1056), align 256, addrspace 5)
+ ; CHECK: G_STORE [[LOAD1]](<33 x s32>), [[COPY]](p5) :: (store (<33 x s32>), align 256, addrspace 5)
; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]]
; CHECK: S_SETPC_B64_return [[COPY2]]
%ptr = load volatile <33 x i32> addrspace(1)*, <33 x i32> addrspace(1)* addrspace(4)* undef
@@ -1167,7 +1167,7 @@ define <33 x i32> @v33i32_func_v33i32_i32(<33 x i32> addrspace(1)* %p, i32 %idx)
; CHECK: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[MV]], [[MUL]](s64)
; CHECK: [[COPY5:%[0-9]+]]:_(p1) = COPY [[PTR_ADD]](p1)
; CHECK: [[LOAD:%[0-9]+]]:_(<33 x s32>) = G_LOAD [[COPY5]](p1) :: (load (<33 x s32>) from %ir.gep, align 256, addrspace 1)
- ; CHECK: G_STORE [[LOAD]](<33 x s32>), [[COPY]](p5) :: (store (s1056), align 256, addrspace 5)
+ ; CHECK: G_STORE [[LOAD]](<33 x s32>), [[COPY]](p5) :: (store (<33 x s32>), align 256, addrspace 5)
; CHECK: [[COPY6:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY4]]
; CHECK: S_SETPC_B64_return [[COPY6]]
%gep = getelementptr inbounds <33 x i32>, <33 x i32> addrspace(1)* %p, i32 %idx
@@ -1187,7 +1187,7 @@ define { <32 x i32>, i32 } @struct_v32i32_i32_func_void() #0 {
; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 128
; CHECK: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[LOAD]], [[C]](s64)
; CHECK: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s32) from %ir.ptr + 128, align 128, addrspace 1)
- ; CHECK: G_STORE [[LOAD1]](<32 x s32>), [[COPY]](p5) :: (store (s1024), addrspace 5)
+ ; CHECK: G_STORE [[LOAD1]](<32 x s32>), [[COPY]](p5) :: (store (<32 x s32>), addrspace 5)
; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 128
; CHECK: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32)
; CHECK: G_STORE [[LOAD2]](s32), [[PTR_ADD1]](p5) :: (store (s32), align 128, addrspace 5)
@@ -1213,7 +1213,7 @@ define { i32, <32 x i32> } @struct_i32_v32i32_func_void() #0 {
; CHECK: G_STORE [[LOAD1]](s32), [[COPY]](p5) :: (store (s32), align 128, addrspace 5)
; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 128
; CHECK: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32)
- ; CHECK: G_STORE [[LOAD2]](<32 x s32>), [[PTR_ADD1]](p5) :: (store (s1024), addrspace 5)
+ ; CHECK: G_STORE [[LOAD2]](<32 x s32>), [[PTR_ADD1]](p5) :: (store (<32 x s32>), addrspace 5)
; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]]
; CHECK: S_SETPC_B64_return [[COPY2]]
%ptr = load volatile { i32, <32 x i32> } addrspace(1)*, { i32, <32 x i32> } addrspace(1)* addrspace(4)* undef
@@ -1490,16 +1490,16 @@ define %struct.with.ptrs @ptr_in_struct_func_void() #0 {
; CHECK: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 144
; CHECK: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[DEF]], [[C2]](s64)
; CHECK: [[LOAD3:%[0-9]+]]:_(<2 x p1>) = G_LOAD [[PTR_ADD2]](p1) :: (volatile load (<2 x p1>) from `%struct.with.ptrs addrspace(1)* undef` + 144, addrspace 1)
- ; CHECK: G_STORE [[LOAD]](<32 x s32>), [[COPY]](p5) :: (store (s1024), addrspace 5)
+ ; CHECK: G_STORE [[LOAD]](<32 x s32>), [[COPY]](p5) :: (store (<32 x s32>), addrspace 5)
; CHECK: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 128
; CHECK: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](s32)
- ; CHECK: G_STORE [[LOAD1]](p3), [[PTR_ADD3]](p5) :: (store (s32), align 128, addrspace 5)
+ ; CHECK: G_STORE [[LOAD1]](p3), [[PTR_ADD3]](p5) :: (store (p3), align 128, addrspace 5)
; CHECK: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 136
; CHECK: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32)
- ; CHECK: G_STORE [[LOAD2]](p1), [[PTR_ADD4]](p5) :: (store (s64), addrspace 5)
+ ; CHECK: G_STORE [[LOAD2]](p1), [[PTR_ADD4]](p5) :: (store (p1), addrspace 5)
; CHECK: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 144
; CHECK: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C5]](s32)
- ; CHECK: G_STORE [[LOAD3]](<2 x p1>), [[PTR_ADD5]](p5) :: (store (s128), addrspace 5)
+ ; CHECK: G_STORE [[LOAD3]](<2 x p1>), [[PTR_ADD5]](p5) :: (store (<2 x p1>), addrspace 5)
; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]]
; CHECK: S_SETPC_B64_return [[COPY2]]
%val = load volatile %struct.with.ptrs, %struct.with.ptrs addrspace(1)* undef
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-return-values.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-return-values.ll
index 139f0c4c321f..0b228cceebd4 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-return-values.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-return-values.ll
@@ -2684,7 +2684,7 @@ define amdgpu_kernel void @test_call_external_v32i32_i32_func_void() #0 {
; GCN: $vgpr31 = COPY [[OR1]](s32)
; GCN: $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_v32i32_i32_func_void, csr_amdgpu_highregs, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
; GCN: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
- ; GCN: [[LOAD:%[0-9]+]]:_(<32 x s32>) = G_LOAD [[FRAME_INDEX]](p5) :: (load (s1024) from %stack.0, addrspace 5)
+ ; GCN: [[LOAD:%[0-9]+]]:_(<32 x s32>) = G_LOAD [[FRAME_INDEX]](p5) :: (load (<32 x s32>) from %stack.0, addrspace 5)
; GCN: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 128
; GCN: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C3]](s32)
; GCN: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from %stack.0, align 128, addrspace 5)
@@ -2752,7 +2752,7 @@ define amdgpu_kernel void @test_call_external_i32_v32i32_func_void() #0 {
; GCN: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (load (s32) from %stack.0, align 128, addrspace 5)
; GCN: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 128
; GCN: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C3]](s32)
- ; GCN: [[LOAD1:%[0-9]+]]:_(<32 x s32>) = G_LOAD [[PTR_ADD1]](p5) :: (load (s1024) from %stack.0, addrspace 5)
+ ; GCN: [[LOAD1:%[0-9]+]]:_(<32 x s32>) = G_LOAD [[PTR_ADD1]](p5) :: (load (<32 x s32>) from %stack.0, addrspace 5)
; GCN: G_STORE [[LOAD]](s32), [[DEF]](p1) :: (volatile store (s32) into `i32 addrspace(1)* undef`, addrspace 1)
; GCN: G_STORE [[LOAD1]](<32 x s32>), [[COPY10]](p1) :: (volatile store (<32 x s32>) into `<32 x i32> addrspace(1)* undef`, align 8, addrspace 1)
; GCN: S_ENDPGM 0
@@ -2813,7 +2813,7 @@ define amdgpu_kernel void @test_call_external_v33i32_func_void() #0 {
; GCN: $vgpr31 = COPY [[OR1]](s32)
; GCN: $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_v33i32_func_void, csr_amdgpu_highregs, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
; GCN: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
- ; GCN: [[LOAD:%[0-9]+]]:_(<33 x s32>) = G_LOAD [[FRAME_INDEX]](p5) :: (load (s1056) from %stack.0, align 256, addrspace 5)
+ ; GCN: [[LOAD:%[0-9]+]]:_(<33 x s32>) = G_LOAD [[FRAME_INDEX]](p5) :: (load (<33 x s32>) from %stack.0, align 256, addrspace 5)
; GCN: G_STORE [[LOAD]](<33 x s32>), [[DEF]](p1) :: (volatile store (<33 x s32>) into `<33 x i32> addrspace(1)* undef`, align 8, addrspace 1)
; GCN: S_ENDPGM 0
%val = call <33 x i32> @external_v33i32_func_void()
@@ -2879,7 +2879,7 @@ define amdgpu_kernel void @test_call_external_v33i32_func_v33i32_i32(<33 x i32>
; GCN: $vgpr31 = COPY [[OR1]](s32)
; GCN: $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_v33i32_func_v33i32_i32, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
; GCN: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
- ; GCN: [[LOAD2:%[0-9]+]]:_(<33 x s32>) = G_LOAD [[FRAME_INDEX]](p5) :: (load (s1056) from %stack.0, align 256, addrspace 5)
+ ; GCN: [[LOAD2:%[0-9]+]]:_(<33 x s32>) = G_LOAD [[FRAME_INDEX]](p5) :: (load (<33 x s32>) from %stack.0, align 256, addrspace 5)
; GCN: G_STORE [[LOAD2]](<33 x s32>), [[DEF]](p1) :: (volatile store (<33 x s32>) into `<33 x i32> addrspace(1)* undef`, align 8, addrspace 1)
; GCN: S_ENDPGM 0
%val = call <33 x i32> @external_v33i32_func_v33i32_i32(<33 x i32> addrspace(1)* %p, i32 %idx)
More information about the llvm-commits
mailing list