[llvm] f19226d - GlobalISel: Have load lowering handle some unaligned accesses
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Fri Jul 30 09:56:07 PDT 2021
Author: Matt Arsenault
Date: 2021-07-30T12:55:58-04:00
New Revision: f19226dda5aea6d677b0e52b309ef9c605178a51
URL: https://github.com/llvm/llvm-project/commit/f19226dda5aea6d677b0e52b309ef9c605178a51
DIFF: https://github.com/llvm/llvm-project/commit/f19226dda5aea6d677b0e52b309ef9c605178a51.diff
LOG: GlobalISel: Have load lowering handle some unaligned accesses
The code for splitting an unaligned access into 2 pieces is
essentially the same as for splitting a non-power-of-2 load for
scalars. It would be better to pick an optimal memory access size and
directly use it, but splitting in half is what the DAG does.
As-is this fixes handling of some unaligned sextload/zextloads for
AMDGPU. In the future this will help drop the ugly abuse of
narrowScalar to handle splitting unaligned accesses.
Added:
Modified:
llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sextload-global.mir
llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-zextload-global.mir
Removed:
################################################################################
diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
index c1e0d2549c42e..4dddb98b47d55 100644
--- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
@@ -2928,16 +2928,15 @@ LegalizerHelper::LegalizeResult LegalizerHelper::lowerLoad(GAnyLoad &LoadMI) {
return Legalized;
}
- // This load needs splitting into power of 2 sized loads.
if (DstTy.isVector())
return UnableToLegalize;
- if (isPowerOf2_32(MemSizeInBits))
- return UnableToLegalize; // Don't know what we're being asked to do.
// Big endian lowering not implemented.
if (MIRBuilder.getDataLayout().isBigEndian())
return UnableToLegalize;
+ // This load needs splitting into power of 2 sized loads.
+ //
// Our strategy here is to generate anyextending loads for the smaller
// types up to next power-2 result type, and then combine the two larger
// result values together, before truncating back down to the non-pow-2
@@ -2950,8 +2949,21 @@ LegalizerHelper::LegalizeResult LegalizerHelper::lowerLoad(GAnyLoad &LoadMI) {
// v1 = i24 trunc v5
// By doing this we generate the correct truncate which should get
// combined away as an artifact with a matching extend.
- uint64_t LargeSplitSize = PowerOf2Floor(MemSizeInBits);
- uint64_t SmallSplitSize = MemSizeInBits - LargeSplitSize;
+
+ uint64_t LargeSplitSize, SmallSplitSize;
+
+ if (!isPowerOf2_32(MemSizeInBits)) {
+ LargeSplitSize = PowerOf2Floor(MemSizeInBits);
+ SmallSplitSize = MemSizeInBits - LargeSplitSize;
+ } else {
+ // Assume we're being asked to decompose an unaligned load.
+ // TODO: If this requires multiple splits, handle them all at once.
+ auto &Ctx = MF.getFunction().getContext();
+ if (TLI.allowsMemoryAccess(Ctx, MIRBuilder.getDataLayout(), MemTy, MMO))
+ return UnableToLegalize;
+
+ SmallSplitSize = LargeSplitSize = MemSizeInBits / 2;
+ }
MachineMemOperand *LargeMMO =
MF.getMachineMemOperand(&MMO, 0, LargeSplitSize / 8);
@@ -2976,9 +2988,16 @@ LegalizerHelper::LegalizeResult LegalizerHelper::lowerLoad(GAnyLoad &LoadMI) {
if (AnyExtTy == DstTy)
MIRBuilder.buildOr(DstReg, Shift, LargeLoad);
- else {
+ else if (AnyExtTy.getSizeInBits() != DstTy.getSizeInBits()) {
auto Or = MIRBuilder.buildOr(AnyExtTy, Shift, LargeLoad);
MIRBuilder.buildTrunc(DstReg, {Or});
+ } else {
+ assert(DstTy.isPointer() && "expected pointer");
+ auto Or = MIRBuilder.buildOr(AnyExtTy, Shift, LargeLoad);
+
+ // FIXME: We currently consider this to be illegal for non-integral address
+ // spaces, but we need still need a way to reinterpret the bits.
+ MIRBuilder.buildIntToPtr(DstReg, Or);
}
LoadMI.eraseFromParent();
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sextload-global.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sextload-global.mir
index 7b9ede5651cea..ab4e61a918468 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sextload-global.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sextload-global.mir
@@ -259,8 +259,14 @@ body: |
; GFX8: $vgpr0 = COPY [[SEXTLOAD]](s32)
; GFX6-LABEL: name: test_sextload_global_s32_from_2_align1
; GFX6: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
- ; GFX6: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p1) :: (load (s16), align 1, addrspace 1)
- ; GFX6: $vgpr0 = COPY [[SEXTLOAD]](s32)
+ ; GFX6: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s8), addrspace 1)
+ ; GFX6: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+ ; GFX6: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+ ; GFX6: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1)
+ ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+ ; GFX6: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[SEXTLOAD]], [[C1]](s32)
+ ; GFX6: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]]
+ ; GFX6: $vgpr0 = COPY [[OR]](s32)
%0:_(p1) = COPY $vgpr0_vgpr1
%1:_(s32) = G_SEXTLOAD %0 :: (load (s16), align 1, addrspace 1)
$vgpr0 = COPY %1
@@ -279,8 +285,14 @@ body: |
; GFX8: $vgpr0_vgpr1 = COPY [[SEXT]](s64)
; GFX6-LABEL: name: test_sextload_global_s64_from_2_align1
; GFX6: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
- ; GFX6: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p1) :: (load (s16), align 1, addrspace 1)
- ; GFX6: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[SEXTLOAD]](s32)
+ ; GFX6: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s8), addrspace 1)
+ ; GFX6: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+ ; GFX6: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+ ; GFX6: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1)
+ ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+ ; GFX6: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[SEXTLOAD]], [[C1]](s32)
+ ; GFX6: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]]
+ ; GFX6: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[OR]](s32)
; GFX6: $vgpr0_vgpr1 = COPY [[SEXT]](s64)
%0:_(p1) = COPY $vgpr0_vgpr1
%1:_(s64) = G_SEXTLOAD %0 :: (load (s16), align 1, addrspace 1)
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-zextload-global.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-zextload-global.mir
index 5ce354362c6a3..c3d3d272b18fc 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-zextload-global.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-zextload-global.mir
@@ -259,8 +259,14 @@ body: |
; GFX8: $vgpr0 = COPY [[ZEXTLOAD]](s32)
; GFX6-LABEL: name: test_zextload_global_s32_from_2_align1
; GFX6: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
- ; GFX6: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s16), align 1, addrspace 1)
- ; GFX6: $vgpr0 = COPY [[ZEXTLOAD]](s32)
+ ; GFX6: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s8), addrspace 1)
+ ; GFX6: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+ ; GFX6: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+ ; GFX6: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1)
+ ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+ ; GFX6: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32)
+ ; GFX6: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]]
+ ; GFX6: $vgpr0 = COPY [[OR]](s32)
%0:_(p1) = COPY $vgpr0_vgpr1
%1:_(s32) = G_ZEXTLOAD %0 :: (load (s16), align 1, addrspace 1)
$vgpr0 = COPY %1
@@ -279,8 +285,14 @@ body: |
; GFX8: $vgpr0_vgpr1 = COPY [[ZEXT]](s64)
; GFX6-LABEL: name: test_zextload_global_s64_from_2_align1
; GFX6: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
- ; GFX6: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s16), align 1, addrspace 1)
- ; GFX6: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[ZEXTLOAD]](s32)
+ ; GFX6: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s8), addrspace 1)
+ ; GFX6: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+ ; GFX6: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+ ; GFX6: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p1) :: (load (s8) from unknown-address + 1, addrspace 1)
+ ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+ ; GFX6: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32)
+ ; GFX6: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]]
+ ; GFX6: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[OR]](s32)
; GFX6: $vgpr0_vgpr1 = COPY [[ZEXT]](s64)
%0:_(p1) = COPY $vgpr0_vgpr1
%1:_(s64) = G_ZEXTLOAD %0 :: (load (s16), align 1, addrspace 1)
More information about the llvm-commits
mailing list