[llvm] c28f79a - [AMDGPU] SIFoldOperands: try harder to fold cndmask instructions
Jay Foad via llvm-commits
llvm-commits at lists.llvm.org
Thu Apr 8 06:26:18 PDT 2021
Author: Jay Foad
Date: 2021-04-08T14:26:12+01:00
New Revision: c28f79a0e3352a972026f6dc896d13f81835b468
URL: https://github.com/llvm/llvm-project/commit/c28f79a0e3352a972026f6dc896d13f81835b468
DIFF: https://github.com/llvm/llvm-project/commit/c28f79a0e3352a972026f6dc896d13f81835b468.diff
LOG: [AMDGPU] SIFoldOperands: try harder to fold cndmask instructions
Look through copies to find more cases where the two values being
selected are identical. The motivation for this is just to be able to
remove the weird special case where tryFoldCndMask was called from
foldInstOperand, part way through folding a move-immediate into its
users, without regressing any lit tests.
Added:
Modified:
llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
llvm/test/CodeGen/AMDGPU/fold-cndmask-wave32.mir
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
index d27936599d8f..6de34dcf3a27 100644
--- a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
+++ b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
@@ -1155,8 +1155,12 @@ bool SIFoldOperands::tryFoldCndMask(MachineInstr &MI) const {
MachineOperand *Src0 = TII->getNamedOperand(MI, AMDGPU::OpName::src0);
MachineOperand *Src1 = TII->getNamedOperand(MI, AMDGPU::OpName::src1);
- if (!Src1->isIdenticalTo(*Src0))
- return false;
+ if (!Src1->isIdenticalTo(*Src0)) {
+ auto *Src0Imm = getImmOrMaterializedImm(*MRI, *Src0);
+ auto *Src1Imm = getImmOrMaterializedImm(*MRI, *Src1);
+ if (!Src1Imm->isIdenticalTo(*Src0Imm))
+ return false;
+ }
int Src1ModIdx =
AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1_modifiers);
@@ -1276,11 +1280,8 @@ void SIFoldOperands::foldInstOperand(MachineInstr &MI,
for (MachineInstr *Copy : CopiesToReplace)
Copy->addImplicitDefUseOperands(*MF);
- SmallPtrSet<MachineInstr *, 16> Folded;
for (FoldCandidate &Fold : FoldList) {
assert(!Fold.isReg() || Fold.OpToFold);
- if (Folded.count(Fold.UseMI))
- continue;
if (Fold.isReg() && Fold.OpToFold->getReg().isVirtual()) {
Register Reg = Fold.OpToFold->getReg();
MachineInstr *DefMI = Fold.OpToFold->getParent();
@@ -1300,8 +1301,6 @@ void SIFoldOperands::foldInstOperand(MachineInstr &MI,
LLVM_DEBUG(dbgs() << "Folded source from " << MI << " into OpNo "
<< static_cast<int>(Fold.UseOpNo) << " of "
<< *Fold.UseMI);
- if (tryFoldCndMask(*Fold.UseMI))
- Folded.insert(Fold.UseMI);
} else if (Fold.isCommuted()) {
// Restoring instruction's original operand order if fold has failed.
TII->commuteInstruction(*Fold.UseMI, false);
diff --git a/llvm/test/CodeGen/AMDGPU/fold-cndmask-wave32.mir b/llvm/test/CodeGen/AMDGPU/fold-cndmask-wave32.mir
index a921ce33682c..c22e0259a892 100644
--- a/llvm/test/CodeGen/AMDGPU/fold-cndmask-wave32.mir
+++ b/llvm/test/CodeGen/AMDGPU/fold-cndmask-wave32.mir
@@ -11,7 +11,7 @@ body: |
; CHECK: [[DEF:%[0-9]+]]:sreg_32_xm0_xexec = IMPLICIT_DEF
; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
; CHECK: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
- ; CHECK: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
+ ; CHECK: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
%0:sreg_32_xm0_xexec = IMPLICIT_DEF
%1:sreg_32 = S_MOV_B32 0
%2:vgpr_32 = COPY %1:sreg_32
More information about the llvm-commits
mailing list