[llvm] 67d6132 - GlobalISel: Preserve memory types for implicit sret load/stores

Matt Arsenault via llvm-commits llvm-commits at lists.llvm.org
Mon Jul 19 08:52:49 PDT 2021


Author: Matt Arsenault
Date: 2021-07-19T11:52:42-04:00
New Revision: 67d6132463541d5e4cf7483cb3c7b5d95b46ee6d

URL: https://github.com/llvm/llvm-project/commit/67d6132463541d5e4cf7483cb3c7b5d95b46ee6d
DIFF: https://github.com/llvm/llvm-project/commit/67d6132463541d5e4cf7483cb3c7b5d95b46ee6d.diff

LOG: GlobalISel: Preserve memory types for implicit sret load/stores

Added: 
    

Modified: 
    llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
    llvm/test/CodeGen/AMDGPU/GlobalISel/function-returns.ll
    llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-return-values.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp b/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
index 83dc9bf4244b..8dd544f28827 100644
--- a/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
@@ -791,7 +791,7 @@ void CallLowering::insertSRetLoads(MachineIRBuilder &MIRBuilder, Type *RetTy,
     Register Addr;
     MIRBuilder.materializePtrAdd(Addr, DemoteReg, OffsetLLTy, Offsets[I]);
     auto *MMO = MF.getMachineMemOperand(PtrInfo, MachineMemOperand::MOLoad,
-                                        MRI.getType(VRegs[I]).getSizeInBytes(),
+                                        MRI.getType(VRegs[I]),
                                         commonAlignment(BaseAlign, Offsets[I]));
     MIRBuilder.buildLoad(VRegs[I], Addr, *MMO);
   }
@@ -822,7 +822,7 @@ void CallLowering::insertSRetStores(MachineIRBuilder &MIRBuilder, Type *RetTy,
     Register Addr;
     MIRBuilder.materializePtrAdd(Addr, DemoteReg, OffsetLLTy, Offsets[I]);
     auto *MMO = MF.getMachineMemOperand(PtrInfo, MachineMemOperand::MOStore,
-                                        MRI.getType(VRegs[I]).getSizeInBytes(),
+                                        MRI.getType(VRegs[I]),
                                         commonAlignment(BaseAlign, Offsets[I]));
     MIRBuilder.buildStore(VRegs[I], Addr, *MMO);
   }

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/function-returns.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/function-returns.ll
index 0d285c4f4666..3dfd3ab5a15a 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/function-returns.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/function-returns.ll
@@ -1143,7 +1143,7 @@ define <33 x i32> @v33i32_func_void() #0 {
   ; CHECK:   [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
   ; CHECK:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile load (p1) from `<33 x i32> addrspace(1)* addrspace(4)* undef`, addrspace 4)
   ; CHECK:   [[LOAD1:%[0-9]+]]:_(<33 x s32>) = G_LOAD [[LOAD]](p1) :: (load (<33 x s32>) from %ir.ptr, align 256, addrspace 1)
-  ; CHECK:   G_STORE [[LOAD1]](<33 x s32>), [[COPY]](p5) :: (store (s1056), align 256, addrspace 5)
+  ; CHECK:   G_STORE [[LOAD1]](<33 x s32>), [[COPY]](p5) :: (store (<33 x s32>), align 256, addrspace 5)
   ; CHECK:   [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]]
   ; CHECK:   S_SETPC_B64_return [[COPY2]]
   %ptr = load volatile <33 x i32> addrspace(1)*, <33 x i32> addrspace(1)* addrspace(4)* undef
@@ -1167,7 +1167,7 @@ define <33 x i32> @v33i32_func_v33i32_i32(<33 x i32> addrspace(1)* %p, i32 %idx)
   ; CHECK:   [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[MV]], [[MUL]](s64)
   ; CHECK:   [[COPY5:%[0-9]+]]:_(p1) = COPY [[PTR_ADD]](p1)
   ; CHECK:   [[LOAD:%[0-9]+]]:_(<33 x s32>) = G_LOAD [[COPY5]](p1) :: (load (<33 x s32>) from %ir.gep, align 256, addrspace 1)
-  ; CHECK:   G_STORE [[LOAD]](<33 x s32>), [[COPY]](p5) :: (store (s1056), align 256, addrspace 5)
+  ; CHECK:   G_STORE [[LOAD]](<33 x s32>), [[COPY]](p5) :: (store (<33 x s32>), align 256, addrspace 5)
   ; CHECK:   [[COPY6:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY4]]
   ; CHECK:   S_SETPC_B64_return [[COPY6]]
   %gep = getelementptr inbounds <33 x i32>, <33 x i32> addrspace(1)* %p, i32 %idx
@@ -1187,7 +1187,7 @@ define { <32 x i32>, i32 } @struct_v32i32_i32_func_void() #0 {
   ; CHECK:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 128
   ; CHECK:   [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[LOAD]], [[C]](s64)
   ; CHECK:   [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s32) from %ir.ptr + 128, align 128, addrspace 1)
-  ; CHECK:   G_STORE [[LOAD1]](<32 x s32>), [[COPY]](p5) :: (store (s1024), addrspace 5)
+  ; CHECK:   G_STORE [[LOAD1]](<32 x s32>), [[COPY]](p5) :: (store (<32 x s32>), addrspace 5)
   ; CHECK:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 128
   ; CHECK:   [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32)
   ; CHECK:   G_STORE [[LOAD2]](s32), [[PTR_ADD1]](p5) :: (store (s32), align 128, addrspace 5)
@@ -1213,7 +1213,7 @@ define { i32, <32 x i32> } @struct_i32_v32i32_func_void() #0 {
   ; CHECK:   G_STORE [[LOAD1]](s32), [[COPY]](p5) :: (store (s32), align 128, addrspace 5)
   ; CHECK:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 128
   ; CHECK:   [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32)
-  ; CHECK:   G_STORE [[LOAD2]](<32 x s32>), [[PTR_ADD1]](p5) :: (store (s1024), addrspace 5)
+  ; CHECK:   G_STORE [[LOAD2]](<32 x s32>), [[PTR_ADD1]](p5) :: (store (<32 x s32>), addrspace 5)
   ; CHECK:   [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]]
   ; CHECK:   S_SETPC_B64_return [[COPY2]]
   %ptr = load volatile { i32, <32 x i32> } addrspace(1)*, { i32, <32 x i32> } addrspace(1)* addrspace(4)* undef
@@ -1490,16 +1490,16 @@ define %struct.with.ptrs @ptr_in_struct_func_void() #0 {
   ; CHECK:   [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 144
   ; CHECK:   [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[DEF]], [[C2]](s64)
   ; CHECK:   [[LOAD3:%[0-9]+]]:_(<2 x p1>) = G_LOAD [[PTR_ADD2]](p1) :: (volatile load (<2 x p1>) from `%struct.with.ptrs addrspace(1)* undef` + 144, addrspace 1)
-  ; CHECK:   G_STORE [[LOAD]](<32 x s32>), [[COPY]](p5) :: (store (s1024), addrspace 5)
+  ; CHECK:   G_STORE [[LOAD]](<32 x s32>), [[COPY]](p5) :: (store (<32 x s32>), addrspace 5)
   ; CHECK:   [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 128
   ; CHECK:   [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](s32)
-  ; CHECK:   G_STORE [[LOAD1]](p3), [[PTR_ADD3]](p5) :: (store (s32), align 128, addrspace 5)
+  ; CHECK:   G_STORE [[LOAD1]](p3), [[PTR_ADD3]](p5) :: (store (p3), align 128, addrspace 5)
   ; CHECK:   [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 136
   ; CHECK:   [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32)
-  ; CHECK:   G_STORE [[LOAD2]](p1), [[PTR_ADD4]](p5) :: (store (s64), addrspace 5)
+  ; CHECK:   G_STORE [[LOAD2]](p1), [[PTR_ADD4]](p5) :: (store (p1), addrspace 5)
   ; CHECK:   [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 144
   ; CHECK:   [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C5]](s32)
-  ; CHECK:   G_STORE [[LOAD3]](<2 x p1>), [[PTR_ADD5]](p5) :: (store (s128), addrspace 5)
+  ; CHECK:   G_STORE [[LOAD3]](<2 x p1>), [[PTR_ADD5]](p5) :: (store (<2 x p1>), addrspace 5)
   ; CHECK:   [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]]
   ; CHECK:   S_SETPC_B64_return [[COPY2]]
   %val = load volatile %struct.with.ptrs, %struct.with.ptrs addrspace(1)* undef

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-return-values.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-return-values.ll
index 139f0c4c321f..0b228cceebd4 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-return-values.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-return-values.ll
@@ -2684,7 +2684,7 @@ define amdgpu_kernel void @test_call_external_v32i32_i32_func_void() #0 {
   ; GCN:   $vgpr31 = COPY [[OR1]](s32)
   ; GCN:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_v32i32_i32_func_void, csr_amdgpu_highregs, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
   ; GCN:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
-  ; GCN:   [[LOAD:%[0-9]+]]:_(<32 x s32>) = G_LOAD [[FRAME_INDEX]](p5) :: (load (s1024) from %stack.0, addrspace 5)
+  ; GCN:   [[LOAD:%[0-9]+]]:_(<32 x s32>) = G_LOAD [[FRAME_INDEX]](p5) :: (load (<32 x s32>) from %stack.0, addrspace 5)
   ; GCN:   [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 128
   ; GCN:   [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C3]](s32)
   ; GCN:   [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from %stack.0, align 128, addrspace 5)
@@ -2752,7 +2752,7 @@ define amdgpu_kernel void @test_call_external_i32_v32i32_func_void() #0 {
   ; GCN:   [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (load (s32) from %stack.0, align 128, addrspace 5)
   ; GCN:   [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 128
   ; GCN:   [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C3]](s32)
-  ; GCN:   [[LOAD1:%[0-9]+]]:_(<32 x s32>) = G_LOAD [[PTR_ADD1]](p5) :: (load (s1024) from %stack.0, addrspace 5)
+  ; GCN:   [[LOAD1:%[0-9]+]]:_(<32 x s32>) = G_LOAD [[PTR_ADD1]](p5) :: (load (<32 x s32>) from %stack.0, addrspace 5)
   ; GCN:   G_STORE [[LOAD]](s32), [[DEF]](p1) :: (volatile store (s32) into `i32 addrspace(1)* undef`, addrspace 1)
   ; GCN:   G_STORE [[LOAD1]](<32 x s32>), [[COPY10]](p1) :: (volatile store (<32 x s32>) into `<32 x i32> addrspace(1)* undef`, align 8, addrspace 1)
   ; GCN:   S_ENDPGM 0
@@ -2813,7 +2813,7 @@ define amdgpu_kernel void @test_call_external_v33i32_func_void() #0 {
   ; GCN:   $vgpr31 = COPY [[OR1]](s32)
   ; GCN:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_v33i32_func_void, csr_amdgpu_highregs, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
   ; GCN:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
-  ; GCN:   [[LOAD:%[0-9]+]]:_(<33 x s32>) = G_LOAD [[FRAME_INDEX]](p5) :: (load (s1056) from %stack.0, align 256, addrspace 5)
+  ; GCN:   [[LOAD:%[0-9]+]]:_(<33 x s32>) = G_LOAD [[FRAME_INDEX]](p5) :: (load (<33 x s32>) from %stack.0, align 256, addrspace 5)
   ; GCN:   G_STORE [[LOAD]](<33 x s32>), [[DEF]](p1) :: (volatile store (<33 x s32>) into `<33 x i32> addrspace(1)* undef`, align 8, addrspace 1)
   ; GCN:   S_ENDPGM 0
   %val = call <33 x i32> @external_v33i32_func_void()
@@ -2879,7 +2879,7 @@ define amdgpu_kernel void @test_call_external_v33i32_func_v33i32_i32(<33 x i32>
   ; GCN:   $vgpr31 = COPY [[OR1]](s32)
   ; GCN:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_v33i32_func_v33i32_i32, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
   ; GCN:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
-  ; GCN:   [[LOAD2:%[0-9]+]]:_(<33 x s32>) = G_LOAD [[FRAME_INDEX]](p5) :: (load (s1056) from %stack.0, align 256, addrspace 5)
+  ; GCN:   [[LOAD2:%[0-9]+]]:_(<33 x s32>) = G_LOAD [[FRAME_INDEX]](p5) :: (load (<33 x s32>) from %stack.0, align 256, addrspace 5)
   ; GCN:   G_STORE [[LOAD2]](<33 x s32>), [[DEF]](p1) :: (volatile store (<33 x s32>) into `<33 x i32> addrspace(1)* undef`, align 8, addrspace 1)
   ; GCN:   S_ENDPGM 0
   %val = call <33 x i32> @external_v33i32_func_v33i32_i32(<33 x i32> addrspace(1)* %p, i32 %idx)


        


More information about the llvm-commits mailing list