[llvm] r373716 - AMDGPU/GlobalISel: Fix using wrong addrspace for aperture

Matt Arsenault via llvm-commits llvm-commits at lists.llvm.org
Fri Oct 4 01:35:38 PDT 2019


Author: arsenm
Date: Fri Oct  4 01:35:38 2019
New Revision: 373716

URL: http://llvm.org/viewvc/llvm-project?rev=373716&view=rev
Log:
AMDGPU/GlobalISel: Fix using wrong addrspace for aperture

This was always passing the destination flat address space, when it
should be picking between the two valid source options.

Modified:
    llvm/trunk/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
    llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/legalize-addrspacecast.mir

Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp?rev=373716&r1=373715&r2=373716&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp Fri Oct  4 01:35:38 2019
@@ -1085,6 +1085,8 @@ Register AMDGPULegalizerInfo::getSegment
   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
   const LLT S32 = LLT::scalar(32);
 
+  assert(AS == AMDGPUAS::LOCAL_ADDRESS || AS == AMDGPUAS::PRIVATE_ADDRESS);
+
   if (ST.hasApertureRegs()) {
     // FIXME: Use inline constants (src_{shared, private}_base) instead of
     // getreg.
@@ -1231,7 +1233,7 @@ bool AMDGPULegalizerInfo::legalizeAddrSp
   auto FlatNull =
       B.buildConstant(DstTy, TM.getNullPointerValue(DestAS));
 
-  Register ApertureReg = getSegmentAperture(DestAS, MRI, B);
+  Register ApertureReg = getSegmentAperture(SrcAS, MRI, B);
   if (!ApertureReg.isValid())
     return false;
 

Modified: llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/legalize-addrspacecast.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/legalize-addrspacecast.mir?rev=373716&r1=373715&r2=373716&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/legalize-addrspacecast.mir (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/legalize-addrspacecast.mir Fri Oct  4 01:35:38 2019
@@ -259,9 +259,9 @@ body: |
     ; VI: [[C:%[0-9]+]]:_(p3) = G_CONSTANT i32 -1
     ; VI: [[C1:%[0-9]+]]:_(p0) = G_CONSTANT i64 0
     ; VI: [[COPY2:%[0-9]+]]:_(p4) = COPY [[COPY]](p4)
-    ; VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 68
+    ; VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 64
     ; VI: [[GEP:%[0-9]+]]:_(p4) = G_GEP [[COPY2]], [[C2]](s64)
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[GEP]](p4) :: (dereferenceable invariant load 4 from `i8 addrspace(4)* undef` + 68, addrspace 4)
+    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[GEP]](p4) :: (dereferenceable invariant load 4 from `i8 addrspace(4)* undef` + 64, align 64, addrspace 4)
     ; VI: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[COPY1]](p3), [[C]]
     ; VI: [[PTRTOINT:%[0-9]+]]:_(s32) = G_PTRTOINT [[COPY1]](p3)
     ; VI: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[PTRTOINT]](s32), [[LOAD]](s32)
@@ -271,7 +271,7 @@ body: |
     ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
     ; GFX9: [[C:%[0-9]+]]:_(p3) = G_CONSTANT i32 -1
     ; GFX9: [[C1:%[0-9]+]]:_(p0) = G_CONSTANT i64 0
-    ; GFX9: [[S_GETREG_B32_:%[0-9]+]]:sreg_32(s32) = S_GETREG_B32 30735
+    ; GFX9: [[S_GETREG_B32_:%[0-9]+]]:sreg_32(s32) = S_GETREG_B32 31759
     ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
     ; GFX9: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[S_GETREG_B32_]], [[C2]](s32)
     ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[COPY]](p3), [[C]]
@@ -465,16 +465,16 @@ body: |
     ; VI: [[C:%[0-9]+]]:_(p3) = G_CONSTANT i32 -1
     ; VI: [[C1:%[0-9]+]]:_(p0) = G_CONSTANT i64 0
     ; VI: [[COPY2:%[0-9]+]]:_(p4) = COPY [[COPY]](p4)
-    ; VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 68
+    ; VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 64
     ; VI: [[GEP:%[0-9]+]]:_(p4) = G_GEP [[COPY2]], [[C2]](s64)
-    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[GEP]](p4) :: (dereferenceable invariant load 4 from `i8 addrspace(4)* undef` + 68, addrspace 4)
+    ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[GEP]](p4) :: (dereferenceable invariant load 4 from `i8 addrspace(4)* undef` + 64, align 64, addrspace 4)
     ; VI: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV]](p3), [[C]]
     ; VI: [[PTRTOINT:%[0-9]+]]:_(s32) = G_PTRTOINT [[UV]](p3)
     ; VI: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[PTRTOINT]](s32), [[LOAD]](s32)
     ; VI: [[SELECT:%[0-9]+]]:_(p0) = G_SELECT [[ICMP]](s1), [[MV]], [[C1]]
     ; VI: [[COPY3:%[0-9]+]]:_(p4) = COPY [[COPY]](p4)
     ; VI: [[GEP1:%[0-9]+]]:_(p4) = G_GEP [[COPY3]], [[C2]](s64)
-    ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[GEP1]](p4) :: (dereferenceable invariant load 4 from `i8 addrspace(4)* undef` + 68, addrspace 4)
+    ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[GEP1]](p4) :: (dereferenceable invariant load 4 from `i8 addrspace(4)* undef` + 64, align 64, addrspace 4)
     ; VI: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV1]](p3), [[C]]
     ; VI: [[PTRTOINT1:%[0-9]+]]:_(s32) = G_PTRTOINT [[UV1]](p3)
     ; VI: [[MV1:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[PTRTOINT1]](s32), [[LOAD1]](s32)
@@ -486,14 +486,14 @@ body: |
     ; GFX9: [[UV:%[0-9]+]]:_(p3), [[UV1:%[0-9]+]]:_(p3) = G_UNMERGE_VALUES [[COPY]](<2 x p3>)
     ; GFX9: [[C:%[0-9]+]]:_(p3) = G_CONSTANT i32 -1
     ; GFX9: [[C1:%[0-9]+]]:_(p0) = G_CONSTANT i64 0
-    ; GFX9: [[S_GETREG_B32_:%[0-9]+]]:sreg_32(s32) = S_GETREG_B32 30735
+    ; GFX9: [[S_GETREG_B32_:%[0-9]+]]:sreg_32(s32) = S_GETREG_B32 31759
     ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
     ; GFX9: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[S_GETREG_B32_]], [[C2]](s32)
     ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV]](p3), [[C]]
     ; GFX9: [[PTRTOINT:%[0-9]+]]:_(s32) = G_PTRTOINT [[UV]](p3)
     ; GFX9: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[PTRTOINT]](s32), [[SHL]](s32)
     ; GFX9: [[SELECT:%[0-9]+]]:_(p0) = G_SELECT [[ICMP]](s1), [[MV]], [[C1]]
-    ; GFX9: [[S_GETREG_B32_1:%[0-9]+]]:sreg_32(s32) = S_GETREG_B32 30735
+    ; GFX9: [[S_GETREG_B32_1:%[0-9]+]]:sreg_32(s32) = S_GETREG_B32 31759
     ; GFX9: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[S_GETREG_B32_1]], [[C2]](s32)
     ; GFX9: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV1]](p3), [[C]]
     ; GFX9: [[PTRTOINT1:%[0-9]+]]:_(s32) = G_PTRTOINT [[UV1]](p3)




More information about the llvm-commits mailing list