[PATCH] D100100: [AMDGPU] SIFoldOperands: try harder to fold cndmask instructions
Jay Foad via Phabricator via llvm-commits
llvm-commits at lists.llvm.org
Thu Apr 8 06:19:23 PDT 2021
foad created this revision.
foad added reviewers: arsenm, rampitec, hliao.
Herald added subscribers: kerbowa, hiraditya, t-tye, tpr, dstuttard, yaxunl, nhaehnle, jvesely, kzhuravl.
foad requested review of this revision.
Herald added subscribers: llvm-commits, wdng.
Herald added a project: LLVM.
Look through copies to find more cases where the two values being
selected are identical. The motivation for this is just to be able to
remove the weird special case where tryFoldCndMask was called from
foldInstOperand, part way through folding a move-immediate into its
users, without regressing any lit tests.
Repository:
rG LLVM Github Monorepo
https://reviews.llvm.org/D100100
Files:
llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
llvm/test/CodeGen/AMDGPU/fold-cndmask-wave32.mir
Index: llvm/test/CodeGen/AMDGPU/fold-cndmask-wave32.mir
===================================================================
--- llvm/test/CodeGen/AMDGPU/fold-cndmask-wave32.mir
+++ llvm/test/CodeGen/AMDGPU/fold-cndmask-wave32.mir
@@ -11,7 +11,7 @@
; CHECK: [[DEF:%[0-9]+]]:sreg_32_xm0_xexec = IMPLICIT_DEF
; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
; CHECK: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
- ; CHECK: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
+ ; CHECK: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
%0:sreg_32_xm0_xexec = IMPLICIT_DEF
%1:sreg_32 = S_MOV_B32 0
%2:vgpr_32 = COPY %1:sreg_32
Index: llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
===================================================================
--- llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
+++ llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
@@ -1155,8 +1155,12 @@
MachineOperand *Src0 = TII->getNamedOperand(MI, AMDGPU::OpName::src0);
MachineOperand *Src1 = TII->getNamedOperand(MI, AMDGPU::OpName::src1);
- if (!Src1->isIdenticalTo(*Src0))
- return false;
+ if (!Src1->isIdenticalTo(*Src0)) {
+ auto *Src0Imm = getImmOrMaterializedImm(*MRI, *Src0);
+ auto *Src1Imm = getImmOrMaterializedImm(*MRI, *Src1);
+ if (!Src1Imm->isIdenticalTo(*Src0Imm))
+ return false;
+ }
int Src1ModIdx =
AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1_modifiers);
@@ -1276,11 +1280,8 @@
for (MachineInstr *Copy : CopiesToReplace)
Copy->addImplicitDefUseOperands(*MF);
- SmallPtrSet<MachineInstr *, 16> Folded;
for (FoldCandidate &Fold : FoldList) {
assert(!Fold.isReg() || Fold.OpToFold);
- if (Folded.count(Fold.UseMI))
- continue;
if (Fold.isReg() && Fold.OpToFold->getReg().isVirtual()) {
Register Reg = Fold.OpToFold->getReg();
MachineInstr *DefMI = Fold.OpToFold->getParent();
@@ -1300,8 +1301,6 @@
LLVM_DEBUG(dbgs() << "Folded source from " << MI << " into OpNo "
<< static_cast<int>(Fold.UseOpNo) << " of "
<< *Fold.UseMI);
- if (tryFoldCndMask(*Fold.UseMI))
- Folded.insert(Fold.UseMI);
} else if (Fold.isCommuted()) {
// Restoring instruction's original operand order if fold has failed.
TII->commuteInstruction(*Fold.UseMI, false);
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D100100.336079.patch
Type: text/x-patch
Size: 2372 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20210408/f02f3404/attachment-0001.bin>
More information about the llvm-commits
mailing list