[llvm] a601b30 - GlobalISel: Lower non-byte loads and stores
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Wed Jun 30 14:18:07 PDT 2021
Author: Matt Arsenault
Date: 2021-06-30T17:05:50-04:00
New Revision: a601b308d91eabf008cf13a041dc0238550e3c0c
URL: https://github.com/llvm/llvm-project/commit/a601b308d91eabf008cf13a041dc0238550e3c0c
DIFF: https://github.com/llvm/llvm-project/commit/a601b308d91eabf008cf13a041dc0238550e3c0c.diff
LOG: GlobalISel: Lower non-byte loads and stores
Previously we didn't preserve the memory type and had to blindly
interpret a number of bytes. Now that non-byte memory accesses are
representable, we can handle these correctly.
Ported from DAG version (minus some weird special case i1 legality
checking which I don't fully understand, and we don't have a way to
query for)
For now, this is NFC and the test changes are placeholders. Since the
legality queries are still relying on byte-flattened memory sizes, the
legalizer can't actually see these non-byte accesses. This keeps this
change self contained without merging it with the larger patch to
switch to LLT memory queries.
Added:
Modified:
llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sextload-global.mir
llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-zextload-global.mir
Removed:
################################################################################
diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
index 1816d0d8de5f..60c209729bda 100644
--- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
@@ -2762,7 +2762,51 @@ LegalizerHelper::lowerLoad(MachineInstr &MI) {
Register DstReg = MI.getOperand(0).getReg();
Register PtrReg = MI.getOperand(1).getReg();
LLT DstTy = MRI.getType(DstReg);
- auto &MMO = **MI.memoperands_begin();
+ MachineMemOperand &MMO = **MI.memoperands_begin();
+ LLT MemTy = MMO.getMemoryType();
+ MachineFunction &MF = MIRBuilder.getMF();
+ if (MemTy.isVector())
+ return UnableToLegalize;
+
+ unsigned MemSizeInBits = MemTy.getSizeInBits();
+ unsigned MemStoreSizeInBits = 8 * MemTy.getSizeInBytes();
+
+ if (MemSizeInBits != MemStoreSizeInBits) {
+ // Promote to a byte-sized load if not loading an integral number of
+ // bytes. For example, promote EXTLOAD:i20 -> EXTLOAD:i24.
+ LLT WideMemTy = LLT::scalar(MemStoreSizeInBits);
+ MachineMemOperand *NewMMO =
+ MF.getMachineMemOperand(&MMO, MMO.getPointerInfo(), WideMemTy);
+
+ Register LoadReg = DstReg;
+ LLT LoadTy = DstTy;
+
+ // If this wasn't already an extending load, we need to widen the result
+ // register to avoid creating a load with a narrower result than the source.
+ if (MemStoreSizeInBits > DstTy.getSizeInBits()) {
+ LoadTy = WideMemTy;
+ LoadReg = MRI.createGenericVirtualRegister(WideMemTy);
+ }
+
+ if (MI.getOpcode() == TargetOpcode::G_SEXTLOAD) {
+ auto NewLoad = MIRBuilder.buildLoad(LoadTy, PtrReg, *NewMMO);
+ MIRBuilder.buildSExtInReg(LoadReg, NewLoad, MemSizeInBits);
+ } else if (MI.getOpcode() == TargetOpcode::G_ZEXTLOAD ||
+ WideMemTy == DstTy) {
+ auto NewLoad = MIRBuilder.buildLoad(LoadTy, PtrReg, *NewMMO);
+ // The extra bits are guaranteed to be zero, since we stored them that
+ // way. A zext load from Wide thus automatically gives zext from MemVT.
+ MIRBuilder.buildAssertZExt(LoadReg, NewLoad, MemSizeInBits);
+ } else {
+ MIRBuilder.buildLoad(LoadReg, PtrReg, *NewMMO);
+ }
+
+ if (DstTy != LoadTy)
+ MIRBuilder.buildTrunc(DstReg, LoadReg);
+
+ MI.eraseFromParent();
+ return Legalized;
+ }
if (DstTy.getSizeInBits() != MMO.getSizeInBits())
return UnableToLegalize;
@@ -2831,20 +2875,46 @@ LegalizerHelper::lowerStore(MachineInstr &MI) {
Register SrcReg = MI.getOperand(0).getReg();
Register PtrReg = MI.getOperand(1).getReg();
LLT SrcTy = MRI.getType(SrcReg);
+ MachineFunction &MF = MIRBuilder.getMF();
MachineMemOperand &MMO = **MI.memoperands_begin();
- if (SrcTy.getSizeInBits() != MMO.getSizeInBits())
- return UnableToLegalize;
+ LLT MemTy = MMO.getMemoryType();
+
if (SrcTy.isVector())
return UnableToLegalize;
- if (isPowerOf2_32(SrcTy.getSizeInBits()))
+
+ unsigned StoreWidth = MemTy.getSizeInBits();
+ unsigned StoreSizeInBits = 8 * MemTy.getSizeInBytes();
+
+ if (StoreWidth != StoreSizeInBits) {
+ // Promote to a byte-sized store with upper bits zero if not
+ // storing an integral number of bytes. For example, promote
+ // TRUNCSTORE:i1 X -> TRUNCSTORE:i8 (and X, 1)
+ LLT WideTy = LLT::scalar(StoreSizeInBits);
+
+ if (StoreSizeInBits > SrcTy.getSizeInBits()) {
+ // Avoid creating a store with a narrower source than result.
+ SrcReg = MIRBuilder.buildAnyExt(WideTy, SrcReg).getReg(0);
+ SrcTy = WideTy;
+ }
+
+ auto ZextInReg = MIRBuilder.buildZExtInReg(SrcTy, SrcReg, StoreWidth);
+
+ MachineMemOperand *NewMMO =
+ MF.getMachineMemOperand(&MMO, MMO.getPointerInfo(), WideTy);
+ MIRBuilder.buildStore(ZextInReg, PtrReg, *NewMMO);
+ MI.eraseFromParent();
+ return Legalized;
+ }
+
+ if (isPowerOf2_32(MemTy.getSizeInBits()))
return UnableToLegalize; // Don't know what we're being asked to do.
// Extend to the next pow-2.
- const LLT ExtendTy = LLT::scalar(NextPowerOf2(SrcTy.getSizeInBits()));
+ const LLT ExtendTy = LLT::scalar(NextPowerOf2(MemTy.getSizeInBits()));
auto ExtVal = MIRBuilder.buildAnyExt(ExtendTy, SrcReg);
// Obtain the smaller value by shifting away the larger value.
- uint64_t LargeSplitSize = PowerOf2Floor(SrcTy.getSizeInBits());
+ uint64_t LargeSplitSize = PowerOf2Floor(MemTy.getSizeInBits());
uint64_t SmallSplitSize = SrcTy.getSizeInBits() - LargeSplitSize;
auto ShiftAmt = MIRBuilder.buildConstant(ExtendTy, LargeSplitSize);
auto SmallVal = MIRBuilder.buildLShr(ExtendTy, ExtVal, ShiftAmt);
@@ -2857,7 +2927,6 @@ LegalizerHelper::lowerStore(MachineInstr &MI) {
auto SmallPtr =
MIRBuilder.buildPtrAdd(PtrAddReg, PtrReg, OffsetCst);
- MachineFunction &MF = MIRBuilder.getMF();
MachineMemOperand *LargeMMO =
MF.getMachineMemOperand(&MMO, 0, LargeSplitSize / 8);
MachineMemOperand *SmallMMO =
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sextload-global.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sextload-global.mir
index b5017eef2000..e09b59a1a389 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sextload-global.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sextload-global.mir
@@ -6,6 +6,7 @@
# FIXME: Run with and without unaligned access turned on
# ERR-NOT: remark
+# ERR: remark: <unknown>:0:0: unable to legalize instruction: %1:_(s32) = G_SEXTLOAD %0:_(p1) :: (load (s24), align 4, addrspace 1) (in function: test_sextload_global_i32_i24)
# ERR: remark: <unknown>:0:0: unable to legalize instruction: %1:_(<2 x s16>) = G_SEXTLOAD %0:_(p1) :: (load (<2 x s8>), addrspace 1) (in function: test_sextload_global_v2i16_from_v2s8)
# ERR-NEXT: remark: <unknown>:0:0: unable to legalize instruction: %1:_(<2 x s32>) = G_SEXTLOAD %0:_(p1) :: (load (<2 x s8>), addrspace 1) (in function: test_sextload_global_v2i32_from_v2s8)
# ERR-NEXT: remark: <unknown>:0:0: unable to legalize instruction: %1:_(<2 x s32>) = G_SEXTLOAD %0:_(p1) :: (load (<2 x s16>), addrspace 1) (in function: test_sextload_global_v2i32_from_v2s16)
@@ -14,6 +15,100 @@
# ERR-NEXT: remark: <unknown>:0:0: unable to legalize instruction: %1:_(s128) = G_SEXTLOAD %0:_(p1) :: (load (s64), addrspace 1) (in function: test_sextload_global_s128_8)
# ERR-NOT: remark
+---
+name: test_sextload_global_i32_i1
+body: |
+ bb.0:
+ liveins: $vgpr0_vgpr1
+
+ ; GFX8-LABEL: name: test_sextload_global_i32_i1
+ ; GFX8: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
+ ; GFX8: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p1) :: (load (s1), addrspace 1)
+ ; GFX8: $vgpr0 = COPY [[SEXTLOAD]](s32)
+ ; GFX6-LABEL: name: test_sextload_global_i32_i1
+ ; GFX6: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
+ ; GFX6: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p1) :: (load (s1), addrspace 1)
+ ; GFX6: $vgpr0 = COPY [[SEXTLOAD]](s32)
+ %0:_(p1) = COPY $vgpr0_vgpr1
+ %1:_(s32) = G_SEXTLOAD %0 :: (load (s1), addrspace 1)
+ $vgpr0 = COPY %1
+...
+
+---
+name: test_sextload_global_i32_i7
+body: |
+ bb.0:
+ liveins: $vgpr0_vgpr1
+
+ ; GFX8-LABEL: name: test_sextload_global_i32_i7
+ ; GFX8: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
+ ; GFX8: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p1) :: (load (s7), addrspace 1)
+ ; GFX8: $vgpr0 = COPY [[SEXTLOAD]](s32)
+ ; GFX6-LABEL: name: test_sextload_global_i32_i7
+ ; GFX6: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
+ ; GFX6: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p1) :: (load (s7), addrspace 1)
+ ; GFX6: $vgpr0 = COPY [[SEXTLOAD]](s32)
+ %0:_(p1) = COPY $vgpr0_vgpr1
+ %1:_(s32) = G_SEXTLOAD %0 :: (load (s7), addrspace 1)
+ $vgpr0 = COPY %1
+...
+---
+name: test_sextload_global_i32_i24
+body: |
+ bb.0:
+ liveins: $vgpr0_vgpr1
+
+ ; GFX8-LABEL: name: test_sextload_global_i32_i24
+ ; GFX8: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
+ ; GFX8: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p1) :: (load (s24), align 4, addrspace 1)
+ ; GFX8: $vgpr0 = COPY [[SEXTLOAD]](s32)
+ ; GFX6-LABEL: name: test_sextload_global_i32_i24
+ ; GFX6: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
+ ; GFX6: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p1) :: (load (s24), align 4, addrspace 1)
+ ; GFX6: $vgpr0 = COPY [[SEXTLOAD]](s32)
+ %0:_(p1) = COPY $vgpr0_vgpr1
+ %1:_(s32) = G_SEXTLOAD %0 :: (load (s24), addrspace 1)
+ $vgpr0 = COPY %1
+...
+
+---
+name: test_sextload_global_i32_i30
+body: |
+ bb.0:
+ liveins: $vgpr0_vgpr1
+
+ ; GFX8-LABEL: name: test_sextload_global_i32_i30
+ ; GFX8: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
+ ; GFX8: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p1) :: (load (s30), addrspace 1)
+ ; GFX8: $vgpr0 = COPY [[SEXTLOAD]](s32)
+ ; GFX6-LABEL: name: test_sextload_global_i32_i30
+ ; GFX6: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
+ ; GFX6: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p1) :: (load (s30), addrspace 1)
+ ; GFX6: $vgpr0 = COPY [[SEXTLOAD]](s32)
+ %0:_(p1) = COPY $vgpr0_vgpr1
+ %1:_(s32) = G_SEXTLOAD %0 :: (load (s30), addrspace 1)
+ $vgpr0 = COPY %1
+...
+
+---
+name: test_sextload_global_i32_i31
+body: |
+ bb.0:
+ liveins: $vgpr0_vgpr1
+
+ ; GFX8-LABEL: name: test_sextload_global_i32_i31
+ ; GFX8: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
+ ; GFX8: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p1) :: (load (s31), addrspace 1)
+ ; GFX8: $vgpr0 = COPY [[SEXTLOAD]](s32)
+ ; GFX6-LABEL: name: test_sextload_global_i32_i31
+ ; GFX6: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
+ ; GFX6: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p1) :: (load (s31), addrspace 1)
+ ; GFX6: $vgpr0 = COPY [[SEXTLOAD]](s32)
+ %0:_(p1) = COPY $vgpr0_vgpr1
+ %1:_(s32) = G_SEXTLOAD %0 :: (load (s31), addrspace 1)
+ $vgpr0 = COPY %1
+...
+
---
name: test_sextload_global_i32_i8
body: |
@@ -32,6 +127,7 @@ body: |
%1:_(s32) = G_SEXTLOAD %0 :: (load (s8), addrspace 1)
$vgpr0 = COPY %1
...
+
---
name: test_sextload_global_i32_i16
body: |
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-zextload-global.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-zextload-global.mir
index c877762540da..b598e05fb9bd 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-zextload-global.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-zextload-global.mir
@@ -6,6 +6,7 @@
# FIXME: Run with and without unaligned access turned on
# ERR-NOT: remark
+# ERR: remark: <unknown>:0:0: unable to legalize instruction: %1:_(s32) = G_ZEXTLOAD %0:_(p1) :: (load (s24), align 4, addrspace 1) (in function: test_zextload_global_i32_i24)
# ERR: remark: <unknown>:0:0: unable to legalize instruction: %1:_(<2 x s16>) = G_ZEXTLOAD %0:_(p1) :: (load (s16), addrspace 1) (in function: test_zextload_global_v2i16_from_2)
# ERR-NEXT: remark: <unknown>:0:0: unable to legalize instruction: %1:_(<2 x s32>) = G_ZEXTLOAD %0:_(p1) :: (load (s16), addrspace 1) (in function: test_zextload_global_v2i32_from_2)
# ERR-NEXT: remark: <unknown>:0:0: unable to legalize instruction: %1:_(<2 x s32>) = G_ZEXTLOAD %0:_(p1) :: (load (s32), addrspace 1) (in function: test_zextload_global_v2i32_from_4)
@@ -14,6 +15,101 @@
# ERR-NEXT: remark: <unknown>:0:0: unable to legalize instruction: %1:_(s128) = G_ZEXTLOAD %0:_(p1) :: (load (s64), addrspace 1) (in function: test_zextload_global_s128_8)
# ERR-NOT: remark
+---
+name: test_zextload_global_i32_i1
+body: |
+ bb.0:
+ liveins: $vgpr0_vgpr1
+
+ ; GFX8-LABEL: name: test_zextload_global_i32_i1
+ ; GFX8: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
+ ; GFX8: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s1), addrspace 1)
+ ; GFX8: $vgpr0 = COPY [[ZEXTLOAD]](s32)
+ ; GFX6-LABEL: name: test_zextload_global_i32_i1
+ ; GFX6: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
+ ; GFX6: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s1), addrspace 1)
+ ; GFX6: $vgpr0 = COPY [[ZEXTLOAD]](s32)
+ %0:_(p1) = COPY $vgpr0_vgpr1
+ %1:_(s32) = G_ZEXTLOAD %0 :: (load (s1), addrspace 1)
+ $vgpr0 = COPY %1
+...
+
+---
+name: test_zextload_global_i32_i7
+body: |
+ bb.0:
+ liveins: $vgpr0_vgpr1
+
+ ; GFX8-LABEL: name: test_zextload_global_i32_i7
+ ; GFX8: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
+ ; GFX8: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s7), addrspace 1)
+ ; GFX8: $vgpr0 = COPY [[ZEXTLOAD]](s32)
+ ; GFX6-LABEL: name: test_zextload_global_i32_i7
+ ; GFX6: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
+ ; GFX6: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s7), addrspace 1)
+ ; GFX6: $vgpr0 = COPY [[ZEXTLOAD]](s32)
+ %0:_(p1) = COPY $vgpr0_vgpr1
+ %1:_(s32) = G_ZEXTLOAD %0 :: (load (s7), addrspace 1)
+ $vgpr0 = COPY %1
+...
+
+---
+name: test_zextload_global_i32_i24
+body: |
+ bb.0:
+ liveins: $vgpr0_vgpr1
+
+ ; GFX8-LABEL: name: test_zextload_global_i32_i24
+ ; GFX8: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
+ ; GFX8: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s24), align 4, addrspace 1)
+ ; GFX8: $vgpr0 = COPY [[ZEXTLOAD]](s32)
+ ; GFX6-LABEL: name: test_zextload_global_i32_i24
+ ; GFX6: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
+ ; GFX6: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s24), align 4, addrspace 1)
+ ; GFX6: $vgpr0 = COPY [[ZEXTLOAD]](s32)
+ %0:_(p1) = COPY $vgpr0_vgpr1
+ %1:_(s32) = G_ZEXTLOAD %0 :: (load (s24), addrspace 1)
+ $vgpr0 = COPY %1
+...
+
+---
+name: test_zextload_global_i32_i30
+body: |
+ bb.0:
+ liveins: $vgpr0_vgpr1
+
+ ; GFX8-LABEL: name: test_zextload_global_i32_i30
+ ; GFX8: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
+ ; GFX8: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s30), addrspace 1)
+ ; GFX8: $vgpr0 = COPY [[ZEXTLOAD]](s32)
+ ; GFX6-LABEL: name: test_zextload_global_i32_i30
+ ; GFX6: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
+ ; GFX6: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s30), addrspace 1)
+ ; GFX6: $vgpr0 = COPY [[ZEXTLOAD]](s32)
+ %0:_(p1) = COPY $vgpr0_vgpr1
+ %1:_(s32) = G_ZEXTLOAD %0 :: (load (s30), addrspace 1)
+ $vgpr0 = COPY %1
+...
+
+---
+name: test_zextload_global_i32_i31
+body: |
+ bb.0:
+ liveins: $vgpr0_vgpr1
+
+ ; GFX8-LABEL: name: test_zextload_global_i32_i31
+ ; GFX8: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
+ ; GFX8: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s31), addrspace 1)
+ ; GFX8: $vgpr0 = COPY [[ZEXTLOAD]](s32)
+ ; GFX6-LABEL: name: test_zextload_global_i32_i31
+ ; GFX6: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
+ ; GFX6: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s31), addrspace 1)
+ ; GFX6: $vgpr0 = COPY [[ZEXTLOAD]](s32)
+ %0:_(p1) = COPY $vgpr0_vgpr1
+ %1:_(s32) = G_ZEXTLOAD %0 :: (load (s31), addrspace 1)
+ $vgpr0 = COPY %1
+...
+
---
name: test_zextload_global_i32_i8
body: |
More information about the llvm-commits
mailing list