[llvm] 4515c24 - AMDGPU/GlobalISel: Fix assertions on legalize queries with huge align
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Wed Jan 12 15:57:47 PST 2022
Author: Matt Arsenault
Date: 2022-01-12T18:21:44-05:00
New Revision: 4515c24bbc32196957b03c6a42259d460c9c4848
URL: https://github.com/llvm/llvm-project/commit/4515c24bbc32196957b03c6a42259d460c9c4848
DIFF: https://github.com/llvm/llvm-project/commit/4515c24bbc32196957b03c6a42259d460c9c4848.diff
LOG: AMDGPU/GlobalISel: Fix assertions on legalize queries with huge align
For some reason we pass around the alignment in bits as uint64_t. Two
places were truncating it to unsigned, and losing bits in extreme
cases.
Added:
Modified:
llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-flat.mir
llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-global.mir
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
index 51f070fa5e172..64012a67c4655 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
@@ -272,8 +272,8 @@ static bool isLoadStoreSizeLegal(const GCNSubtarget &ST,
const bool IsLoad = Query.Opcode != AMDGPU::G_STORE;
unsigned RegSize = Ty.getSizeInBits();
- unsigned MemSize = Query.MMODescrs[0].MemoryTy.getSizeInBits();
- unsigned AlignBits = Query.MMODescrs[0].AlignInBits;
+ uint64_t MemSize = Query.MMODescrs[0].MemoryTy.getSizeInBits();
+ uint64_t AlignBits = Query.MMODescrs[0].AlignInBits;
unsigned AS = Query.Types[1].getAddressSpace();
// All of these need to be custom lowered to cast the pointer operand.
@@ -380,7 +380,7 @@ static bool shouldBitcastLoadStoreType(const GCNSubtarget &ST, const LLT Ty,
/// access up to the alignment. Note this case when the memory access itself
/// changes, not the size of the result register.
static bool shouldWidenLoad(const GCNSubtarget &ST, LLT MemoryTy,
- unsigned AlignInBits, unsigned AddrSpace,
+ uint64_t AlignInBits, unsigned AddrSpace,
unsigned Opcode) {
unsigned SizeInBits = MemoryTy.getSizeInBits();
// We don't want to widen cases that are naturally legal.
@@ -1172,7 +1172,7 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
if (MemSize > MaxSize)
return std::make_pair(0, LLT::scalar(MaxSize));
- unsigned Align = Query.MMODescrs[0].AlignInBits;
+ uint64_t Align = Query.MMODescrs[0].AlignInBits;
return std::make_pair(0, LLT::scalar(Align));
})
.fewerElementsIf(
@@ -2547,7 +2547,7 @@ bool AMDGPULegalizerInfo::legalizeLoad(LegalizerHelper &Helper,
const LLT MemTy = MMO->getMemoryType();
const Align MemAlign = MMO->getAlign();
const unsigned MemSize = MemTy.getSizeInBits();
- const unsigned AlignInBits = 8 * MemAlign.value();
+ const uint64_t AlignInBits = 8 * MemAlign.value();
// Widen non-power-of-2 loads to the alignment if needed
if (shouldWidenLoad(ST, MemTy, AlignInBits, AddrSpace, MI.getOpcode())) {
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-flat.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-flat.mir
index 7b047ddd27e33..331a5d3521890 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-flat.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-flat.mir
@@ -6575,3 +6575,27 @@ body: |
$vgpr0_vgpr1 = COPY %1
...
+# Make sure there's no crash on very high alignments
+---
+name: test_load_flat_s32_align536870912
+body: |
+ bb.0:
+ liveins: $vgpr0_vgpr1
+
+ ; CI-LABEL: name: test_load_flat_s32_align536870912
+ ; CI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
+ ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16), align 536870912)
+ ; CI-NEXT: $vgpr0 = COPY [[LOAD]](s32)
+ ; VI-LABEL: name: test_load_flat_s32_align536870912
+ ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
+ ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16), align 536870912)
+ ; VI-NEXT: $vgpr0 = COPY [[LOAD]](s32)
+ ; GFX9-LABEL: name: test_load_flat_s32_align536870912
+ ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
+ ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16), align 536870912)
+ ; GFX9-NEXT: $vgpr0 = COPY [[LOAD]](s32)
+ %0:_(p0) = COPY $vgpr0_vgpr1
+ %1:_(s32) = G_LOAD %0 :: (load (s16), align 536870912)
+ $vgpr0 = COPY %1
+
+...
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-global.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-global.mir
index acaf1dab33f8f..100180176064b 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-global.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-global.mir
@@ -15593,3 +15593,40 @@ body: |
%1:_(<8 x s4>) = G_LOAD %0 :: (load (<8 x s4>), align 4, addrspace 1)
$vgpr0 = COPY %1
...
+
+# Make sure there's no crash on very high alignments
+---
+name: test_load_global_s32_align536870912
+body: |
+ bb.0:
+ liveins: $vgpr0_vgpr1
+
+ ; SI-LABEL: name: test_load_global_s32_align536870912
+ ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
+ ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 536870912, addrspace 1)
+ ; SI-NEXT: $vgpr0 = COPY [[LOAD]](s32)
+ ; CI-HSA-LABEL: name: test_load_global_s32_align536870912
+ ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
+ ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 536870912, addrspace 1)
+ ; CI-HSA-NEXT: $vgpr0 = COPY [[LOAD]](s32)
+ ; CI-MESA-LABEL: name: test_load_global_s32_align536870912
+ ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
+ ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 536870912, addrspace 1)
+ ; CI-MESA-NEXT: $vgpr0 = COPY [[LOAD]](s32)
+ ; VI-LABEL: name: test_load_global_s32_align536870912
+ ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
+ ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 536870912, addrspace 1)
+ ; VI-NEXT: $vgpr0 = COPY [[LOAD]](s32)
+ ; GFX9-HSA-LABEL: name: test_load_global_s32_align536870912
+ ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
+ ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 536870912, addrspace 1)
+ ; GFX9-HSA-NEXT: $vgpr0 = COPY [[LOAD]](s32)
+ ; GFX9-MESA-LABEL: name: test_load_global_s32_align536870912
+ ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
+ ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 536870912, addrspace 1)
+ ; GFX9-MESA-NEXT: $vgpr0 = COPY [[LOAD]](s32)
+ %0:_(p1) = COPY $vgpr0_vgpr1
+ %1:_(s32) = G_LOAD %0 :: (load (s16), align 536870912, addrspace 1)
+ $vgpr0 = COPY %1
+
+...
More information about the llvm-commits
mailing list