[PATCH] D105815: GlobalISel: Use extension instead of merge with undef in common case
Matt Arsenault via Phabricator via llvm-commits
llvm-commits at lists.llvm.org
Mon Jul 12 07:40:05 PDT 2021
arsenm created this revision.
arsenm added reviewers: aemerson, paquette, dsanders, aditya_nandakumar, bogner.
Herald added subscribers: kerbowa, hiraditya, tpr, rovka, nhaehnle, jvesely.
arsenm requested review of this revision.
Herald added a subscriber: wdng.
Herald added a project: LLVM.
This fixes not respecting signext/zeroext in these cases. In the
anyext case, this avoids a larger merge with undef and should be a
better canonical form.
This should also handle this if a merge is needed, but I'm not aware
of a case where that can happen. In a future change this will also
allow AMDGPU to drop some custom code without introducing regressions.
https://reviews.llvm.org/D105815
Files:
llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call.ll
Index: llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call.ll
===================================================================
--- llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call.ll
+++ llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call.ll
@@ -1121,9 +1121,8 @@
; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
; CHECK: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32)
; CHECK: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
- ; CHECK: [[DEF1:%[0-9]+]]:_(s48) = G_IMPLICIT_DEF
- ; CHECK: [[MV:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[LOAD]](s48), [[DEF1]](s48)
- ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[MV]](s96)
+ ; CHECK: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s48)
+ ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ANYEXT]](s64)
; CHECK: $vgpr0 = COPY [[UV]](s32)
; CHECK: $vgpr1 = COPY [[UV1]](s32)
; CHECK: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
@@ -1181,9 +1180,8 @@
; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
; CHECK: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32)
; CHECK: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
- ; CHECK: [[DEF1:%[0-9]+]]:_(s48) = G_IMPLICIT_DEF
- ; CHECK: [[MV:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[LOAD]](s48), [[DEF1]](s48)
- ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[MV]](s96)
+ ; CHECK: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[LOAD]](s48)
+ ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SEXT]](s64)
; CHECK: $vgpr0 = COPY [[UV]](s32)
; CHECK: $vgpr1 = COPY [[UV1]](s32)
; CHECK: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
@@ -1241,9 +1239,8 @@
; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
; CHECK: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32)
; CHECK: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
- ; CHECK: [[DEF1:%[0-9]+]]:_(s48) = G_IMPLICIT_DEF
- ; CHECK: [[MV:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[LOAD]](s48), [[DEF1]](s48)
- ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[MV]](s96)
+ ; CHECK: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[LOAD]](s48)
+ ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ZEXT]](s64)
; CHECK: $vgpr0 = COPY [[UV]](s32)
; CHECK: $vgpr1 = COPY [[UV1]](s32)
; CHECK: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
Index: llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
===================================================================
--- llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
+++ llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
@@ -484,24 +484,32 @@
LLT DstTy = MRI.getType(DstRegs[0]);
LLT LCMTy = getLCMType(SrcTy, PartTy);
- const unsigned LCMSize = LCMTy.getSizeInBits();
const unsigned DstSize = DstTy.getSizeInBits();
const unsigned SrcSize = SrcTy.getSizeInBits();
+ unsigned CoveringSize = LCMTy.getSizeInBits();
Register UnmergeSrc = SrcReg;
- if (LCMSize != SrcSize) {
- // Widen to the common type.
- Register Undef = B.buildUndef(SrcTy).getReg(0);
- SmallVector<Register, 8> MergeParts(1, SrcReg);
- for (unsigned Size = SrcSize; Size != LCMSize; Size += SrcSize)
- MergeParts.push_back(Undef);
-
- UnmergeSrc = B.buildMerge(LCMTy, MergeParts).getReg(0);
+
+ if (CoveringSize != SrcSize) {
+ // For scalars, it's common to be able to use a simple extension.
+ if (SrcTy.isScalar() && DstTy.isScalar()) {
+ CoveringSize = alignTo(SrcSize, DstSize);
+ LLT CoverTy = LLT::scalar(CoveringSize);
+ UnmergeSrc = B.buildInstr(ExtendOp, {CoverTy}, {SrcReg}).getReg(0);
+ } else {
+ // Widen to the common type.
+ // FIXME: This should respect the extend type
+ Register Undef = B.buildUndef(SrcTy).getReg(0);
+ SmallVector<Register, 8> MergeParts(1, SrcReg);
+ for (unsigned Size = SrcSize; Size != CoveringSize; Size += SrcSize)
+ MergeParts.push_back(Undef);
+ UnmergeSrc = B.buildMerge(LCMTy, MergeParts).getReg(0);
+ }
}
// Unmerge to the original registers and pad with dead defs.
SmallVector<Register, 8> UnmergeResults(DstRegs.begin(), DstRegs.end());
- for (unsigned Size = DstSize * DstRegs.size(); Size != LCMSize;
+ for (unsigned Size = DstSize * DstRegs.size(); Size != CoveringSize;
Size += DstSize) {
UnmergeResults.push_back(MRI.createGenericVirtualRegister(DstTy));
}
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D105815.357941.patch
Type: text/x-patch
Size: 4575 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20210712/f24fc93d/attachment.bin>
More information about the llvm-commits
mailing list