[PATCH] D102731: AMDGPU/GlobalISel: Lower constant-32-bit zextload/sextload consistently
Matt Arsenault via Phabricator via llvm-commits
llvm-commits at lists.llvm.org
Tue May 18 15:53:37 PDT 2021
arsenm created this revision.
arsenm added reviewers: foad, cdevadas, mbrkusanin, Petar.Avramovic.
Herald added subscribers: kerbowa, hiraditya, t-tye, tpr, dstuttard, rovka, yaxunl, nhaehnle, jvesely, kzhuravl.
arsenm requested review of this revision.
Herald added a subscriber: wdng.
Herald added a project: LLVM.
We were accidentally leaning on code in lowerLoad which expands
extending loads which should be removed.
https://reviews.llvm.org/D102731
Files:
llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sextload-constant-32bit.mir
llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-zextload-constant-32bit.mir
Index: llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-zextload-constant-32bit.mir
===================================================================
--- llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-zextload-constant-32bit.mir
+++ llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-zextload-constant-32bit.mir
@@ -103,11 +103,8 @@
; CI: [[COPY:%[0-9]+]]:_(p6) = COPY $sgpr0
; CI: [[C:%[0-9]+]]:_(p6) = G_CONSTANT i32 0
; CI: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[COPY]](p6), [[C]](p6)
- ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[MV]](p4) :: (load 1, addrspace 6)
- ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
- ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32)
- ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]]
- ; CI: $vgpr0 = COPY [[AND]](s32)
+ ; CI: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[MV]](p4) :: (load 1, addrspace 6)
+ ; CI: $vgpr0 = COPY [[ZEXTLOAD]](s32)
%0:_(p6) = COPY $sgpr0
%1:_(s32) = G_ZEXTLOAD %0 :: (load 1, align 1, addrspace 6)
$vgpr0 = COPY %1
@@ -123,11 +120,8 @@
; CI: [[COPY:%[0-9]+]]:_(p6) = COPY $sgpr0
; CI: [[C:%[0-9]+]]:_(p6) = G_CONSTANT i32 0
; CI: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[COPY]](p6), [[C]](p6)
- ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[MV]](p4) :: (load 2, addrspace 6)
- ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
- ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32)
- ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]]
- ; CI: $vgpr0 = COPY [[AND]](s32)
+ ; CI: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[MV]](p4) :: (load 2, addrspace 6)
+ ; CI: $vgpr0 = COPY [[ZEXTLOAD]](s32)
%0:_(p6) = COPY $sgpr0
%1:_(s32) = G_ZEXTLOAD %0 :: (load 2, align 2, addrspace 6)
$vgpr0 = COPY %1
Index: llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sextload-constant-32bit.mir
===================================================================
--- llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sextload-constant-32bit.mir
+++ llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sextload-constant-32bit.mir
@@ -103,10 +103,8 @@
; CI: [[COPY:%[0-9]+]]:_(p6) = COPY $sgpr0
; CI: [[C:%[0-9]+]]:_(p6) = G_CONSTANT i32 0
; CI: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[COPY]](p6), [[C]](p6)
- ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[MV]](p4) :: (load 1, addrspace 6)
- ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32)
- ; CI: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY1]], 8
- ; CI: $vgpr0 = COPY [[SEXT_INREG]](s32)
+ ; CI: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[MV]](p4) :: (load 1, addrspace 6)
+ ; CI: $vgpr0 = COPY [[SEXTLOAD]](s32)
%0:_(p6) = COPY $sgpr0
%1:_(s32) = G_SEXTLOAD %0 :: (load 1, align 1, addrspace 6)
$vgpr0 = COPY %1
@@ -122,10 +120,8 @@
; CI: [[COPY:%[0-9]+]]:_(p6) = COPY $sgpr0
; CI: [[C:%[0-9]+]]:_(p6) = G_CONSTANT i32 0
; CI: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[COPY]](p6), [[C]](p6)
- ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[MV]](p4) :: (load 2, addrspace 6)
- ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32)
- ; CI: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY1]], 16
- ; CI: $vgpr0 = COPY [[SEXT_INREG]](s32)
+ ; CI: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[MV]](p4) :: (load 2, addrspace 6)
+ ; CI: $vgpr0 = COPY [[SEXTLOAD]](s32)
%0:_(p6) = COPY $sgpr0
%1:_(s32) = G_SEXTLOAD %0 :: (load 2, align 2, addrspace 6)
$vgpr0 = COPY %1
Index: llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
===================================================================
--- llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
+++ llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
@@ -1273,6 +1273,13 @@
{{S32, FlatPtr, 8, 8}, {S32, FlatPtr, 16, 16}});
}
+ // Constant 32-bit is handled by addrspacecasting the 32-bit pointer to
+ // 64-bits.
+ //
+ // TODO: Should generalize bitcast action into coerce, which will also cover
+ // inserting addrspacecasts.
+ ExtLoads.customIf(typeIs(1, Constant32Ptr));
+
ExtLoads.clampScalar(0, S32, S32)
.widenScalarToNextPow2(0)
.unsupportedIfMemSizeNotPow2()
@@ -1694,6 +1701,8 @@
case TargetOpcode::G_GLOBAL_VALUE:
return legalizeGlobalValue(MI, MRI, B);
case TargetOpcode::G_LOAD:
+ case TargetOpcode::G_SEXTLOAD:
+ case TargetOpcode::G_ZEXTLOAD:
return legalizeLoad(Helper, MI);
case TargetOpcode::G_FMAD:
return legalizeFMad(MI, MRI, B);
@@ -2404,6 +2413,9 @@
return true;
}
+ if (MI.getOpcode() != AMDGPU::G_LOAD)
+ return false;
+
Register ValReg = MI.getOperand(0).getReg();
LLT ValTy = MRI.getType(ValReg);
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D102731.346293.patch
Type: text/x-patch
Size: 4629 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20210518/4bef1a2f/attachment.bin>
More information about the llvm-commits
mailing list