[PATCH] D31318: [AMDGPU] Fold V_CNDMASK with identical source operands
Stanislav Mekhanoshin via Phabricator via llvm-commits
llvm-commits at lists.llvm.org
Thu Mar 23 18:25:20 PDT 2017
rampitec created this revision.
Herald added subscribers: t-tye, tpr, dstuttard, yaxunl, nhaehnle, wdng, kzhuravl.
Such instructions sometimes appear after lowering and folding.
Repository:
rL LLVM
https://reviews.llvm.org/D31318
Files:
lib/Target/AMDGPU/SIFoldOperands.cpp
test/CodeGen/AMDGPU/fold-cndmask.mir
Index: test/CodeGen/AMDGPU/fold-cndmask.mir
===================================================================
--- /dev/null
+++ test/CodeGen/AMDGPU/fold-cndmask.mir
@@ -0,0 +1,31 @@
+# RUN: llc -march=amdgcn -run-pass si-fold-operands -verify-machineinstrs -o - %s | FileCheck %s
+
+# CHECK: %1 = V_MOV_B32_e32 0, implicit %exec
+# CHECK: %2 = V_MOV_B32_e32 0, implicit %exec
+# CHECK: %4 = COPY %3
+# CHECK: %5 = V_MOV_B32_e32 0, implicit %exec
+# CHECK: %6 = V_MOV_B32_e32 0, implicit %exec
+
+
+---
+name: fold_cndmask
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: sgpr_64 }
+ - { id: 1, class: vgpr_32 }
+ - { id: 2, class: vgpr_32 }
+ - { id: 3, class: vgpr_32 }
+ - { id: 4, class: vgpr_32 }
+ - { id: 5, class: vgpr_32 }
+ - { id: 6, class: vgpr_32 }
+body: |
+ bb.0.entry:
+ %0 = IMPLICIT_DEF
+ %1 = V_CNDMASK_B32_e64 0, 0, %0, implicit %exec
+ %2 = V_CNDMASK_B32_e64 %1, %1, %0, implicit %exec
+ %3 = IMPLICIT_DEF
+ %4 = V_CNDMASK_B32_e64 %3, %3, %0, implicit %exec
+ %5 = COPY %1
+ %6 = V_CNDMASK_B32_e64 %5, 0, %0, implicit %exec
+...
+
Index: lib/Target/AMDGPU/SIFoldOperands.cpp
===================================================================
--- lib/Target/AMDGPU/SIFoldOperands.cpp
+++ lib/Target/AMDGPU/SIFoldOperands.cpp
@@ -591,6 +591,34 @@
return false;
}
+// Try to fold an instruction into a simpler one
+static bool tryFoldInst(const SIInstrInfo *TII,
+ MachineInstr *MI) {
+ unsigned Opc = MI->getOpcode();
+
+ if (Opc == AMDGPU::V_CNDMASK_B32_e32 ||
+ Opc == AMDGPU::V_CNDMASK_B32_e32_si ||
+ Opc == AMDGPU::V_CNDMASK_B32_e32_vi ||
+ Opc == AMDGPU::V_CNDMASK_B32_e64 ||
+ Opc == AMDGPU::V_CNDMASK_B32_e64_si ||
+ Opc == AMDGPU::V_CNDMASK_B32_e64_vi ||
+ Opc == AMDGPU::V_CNDMASK_B64_PSEUDO) {
+ const MachineOperand *Src0 = TII->getNamedOperand(*MI, AMDGPU::OpName::src0);
+ const MachineOperand *Src1 = TII->getNamedOperand(*MI, AMDGPU::OpName::src1);
+ if (Src1->isIdenticalTo(*Src0)) {
+ DEBUG(dbgs() << "Folded " << *MI << " into ");
+ MI->RemoveOperand(AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2));
+ MI->RemoveOperand(AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1));
+ mutateCopyOp(*MI, TII->get(Src0->isReg() ? AMDGPU::COPY
+ : getMovOpc(false)));
+ DEBUG(dbgs() << *MI << '\n');
+ return true;
+ }
+ }
+
+ return false;
+}
+
void SIFoldOperands::foldInstOperand(MachineInstr &MI,
MachineOperand &OpToFold) const {
// We need mutate the operands of new mov instructions to add implicit
@@ -692,6 +720,7 @@
}
DEBUG(dbgs() << "Folded source from " << MI << " into OpNo " <<
static_cast<int>(Fold.UseOpNo) << " of " << *Fold.UseMI << '\n');
+ tryFoldInst(TII, Fold.UseMI);
}
}
}
@@ -907,6 +936,8 @@
Next = std::next(I);
MachineInstr &MI = *I;
+ tryFoldInst(TII, &MI);
+
if (!isFoldableCopy(MI)) {
if (IsIEEEMode || !tryFoldOMod(MI))
tryFoldClamp(MI);
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D31318.92894.patch
Type: text/x-patch
Size: 3153 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20170324/cb500e42/attachment.bin>
More information about the llvm-commits
mailing list