[llvm] 4515c24 - AMDGPU/GlobalISel: Fix assertions on legalize queries with huge align

Wed Jan 12 15:57:47 PST 2022

Author: Matt Arsenault
Date: 2022-01-12T18:21:44-05:00
New Revision: 4515c24bbc32196957b03c6a42259d460c9c4848

URL: https://github.com/llvm/llvm-project/commit/4515c24bbc32196957b03c6a42259d460c9c4848
DIFF: https://github.com/llvm/llvm-project/commit/4515c24bbc32196957b03c6a42259d460c9c4848.diff

LOG: AMDGPU/GlobalISel: Fix assertions on legalize queries with huge align

For some reason we pass around the alignment in bits as uint64_t. Two
places were truncating it to unsigned, and losing bits in extreme
cases.

Added: 
    

Modified: 
    llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
    llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-flat.mir
    llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-global.mir

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
index 51f070fa5e172..64012a67c4655 100644

--- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
@@ -272,8 +272,8 @@ static bool isLoadStoreSizeLegal(const GCNSubtarget &ST,
   const bool IsLoad = Query.Opcode != AMDGPU::G_STORE;
 
   unsigned RegSize = Ty.getSizeInBits();
-  unsigned MemSize = Query.MMODescrs[0].MemoryTy.getSizeInBits();
-  unsigned AlignBits = Query.MMODescrs[0].AlignInBits;
+  uint64_t MemSize = Query.MMODescrs[0].MemoryTy.getSizeInBits();
+  uint64_t AlignBits = Query.MMODescrs[0].AlignInBits;
   unsigned AS = Query.Types[1].getAddressSpace();
 
   // All of these need to be custom lowered to cast the pointer operand.
@@ -380,7 +380,7 @@ static bool shouldBitcastLoadStoreType(const GCNSubtarget &ST, const LLT Ty,
 /// access up to the alignment. Note this case when the memory access itself
 /// changes, not the size of the result register.
 static bool shouldWidenLoad(const GCNSubtarget &ST, LLT MemoryTy,
-                            unsigned AlignInBits, unsigned AddrSpace,
+                            uint64_t AlignInBits, unsigned AddrSpace,
                             unsigned Opcode) {
   unsigned SizeInBits = MemoryTy.getSizeInBits();
   // We don't want to widen cases that are naturally legal.
@@ -1172,7 +1172,7 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
               if (MemSize > MaxSize)
                 return std::make_pair(0, LLT::scalar(MaxSize));
 
-              unsigned Align = Query.MMODescrs[0].AlignInBits;
+              uint64_t Align = Query.MMODescrs[0].AlignInBits;
               return std::make_pair(0, LLT::scalar(Align));
             })
         .fewerElementsIf(
@@ -2547,7 +2547,7 @@ bool AMDGPULegalizerInfo::legalizeLoad(LegalizerHelper &Helper,
   const LLT MemTy = MMO->getMemoryType();
   const Align MemAlign = MMO->getAlign();
   const unsigned MemSize = MemTy.getSizeInBits();
-  const unsigned AlignInBits = 8 * MemAlign.value();
+  const uint64_t AlignInBits = 8 * MemAlign.value();
 
   // Widen non-power-of-2 loads to the alignment if needed
   if (shouldWidenLoad(ST, MemTy, AlignInBits, AddrSpace, MI.getOpcode())) {

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-flat.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-flat.mir
index 7b047ddd27e33..331a5d3521890 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-flat.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-flat.mir
@@ -6575,3 +6575,27 @@ body: |
     $vgpr0_vgpr1 = COPY %1
 ...
 
+# Make sure there's no crash on very high alignments
+---
+name: test_load_flat_s32_align536870912
+body: |
+  bb.0:
+    liveins: $vgpr0_vgpr1
+
+    ; CI-LABEL: name: test_load_flat_s32_align536870912
+    ; CI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
+    ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16), align 536870912)
+    ; CI-NEXT: $vgpr0 = COPY [[LOAD]](s32)
+    ; VI-LABEL: name: test_load_flat_s32_align536870912
+    ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16), align 536870912)
+    ; VI-NEXT: $vgpr0 = COPY [[LOAD]](s32)
+    ; GFX9-LABEL: name: test_load_flat_s32_align536870912
+    ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
+    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16), align 536870912)
+    ; GFX9-NEXT: $vgpr0 = COPY [[LOAD]](s32)
+    %0:_(p0) = COPY $vgpr0_vgpr1
+    %1:_(s32) = G_LOAD %0 :: (load (s16), align 536870912)
+    $vgpr0 = COPY %1
+
+...

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-global.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-global.mir
index acaf1dab33f8f..100180176064b 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-global.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-global.mir
@@ -15593,3 +15593,40 @@ body: |
     %1:_(<8 x s4>) = G_LOAD %0 :: (load (<8 x s4>), align 4, addrspace 1)
     $vgpr0 = COPY %1
 ...
+
+# Make sure there's no crash on very high alignments
+---
+name: test_load_global_s32_align536870912
+body: |
+  bb.0:
+    liveins: $vgpr0_vgpr1
+
+    ; SI-LABEL: name: test_load_global_s32_align536870912
+    ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
+    ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 536870912, addrspace 1)
+    ; SI-NEXT: $vgpr0 = COPY [[LOAD]](s32)
+    ; CI-HSA-LABEL: name: test_load_global_s32_align536870912
+    ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
+    ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 536870912, addrspace 1)
+    ; CI-HSA-NEXT: $vgpr0 = COPY [[LOAD]](s32)
+    ; CI-MESA-LABEL: name: test_load_global_s32_align536870912
+    ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
+    ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 536870912, addrspace 1)
+    ; CI-MESA-NEXT: $vgpr0 = COPY [[LOAD]](s32)
+    ; VI-LABEL: name: test_load_global_s32_align536870912
+    ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
+    ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 536870912, addrspace 1)
+    ; VI-NEXT: $vgpr0 = COPY [[LOAD]](s32)
+    ; GFX9-HSA-LABEL: name: test_load_global_s32_align536870912
+    ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
+    ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 536870912, addrspace 1)
+    ; GFX9-HSA-NEXT: $vgpr0 = COPY [[LOAD]](s32)
+    ; GFX9-MESA-LABEL: name: test_load_global_s32_align536870912
+    ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
+    ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 536870912, addrspace 1)
+    ; GFX9-MESA-NEXT: $vgpr0 = COPY [[LOAD]](s32)
+    %0:_(p1) = COPY $vgpr0_vgpr1
+    %1:_(s32) = G_LOAD %0 :: (load (s16), align 536870912, addrspace 1)
+    $vgpr0 = COPY %1
+
+...