[llvm] r298723 - [AMDGPU] Fold V_CNDMASK with identical source operands

Stanislav Mekhanoshin via llvm-commits llvm-commits at lists.llvm.org
Fri Mar 24 11:55:21 PDT 2017


Author: rampitec
Date: Fri Mar 24 13:55:20 2017
New Revision: 298723

URL: http://llvm.org/viewvc/llvm-project?rev=298723&view=rev
Log:
[AMDGPU] Fold V_CNDMASK with identical source operands

Such instructions sometimes appear after lowering and folding.

Differential Revision: https://reviews.llvm.org/D31318

Added:
    llvm/trunk/test/CodeGen/AMDGPU/fold-cndmask.mir
Modified:
    llvm/trunk/lib/Target/AMDGPU/SIFoldOperands.cpp

Modified: llvm/trunk/lib/Target/AMDGPU/SIFoldOperands.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIFoldOperands.cpp?rev=298723&r1=298722&r2=298723&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIFoldOperands.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIFoldOperands.cpp Fri Mar 24 13:55:20 2017
@@ -591,6 +591,32 @@ static bool tryConstantFoldOp(MachineReg
   return false;
 }
 
+// Try to fold an instruction into a simpler one
+static bool tryFoldInst(const SIInstrInfo *TII,
+                        MachineInstr *MI) {
+  unsigned Opc = MI->getOpcode();
+
+  if (Opc == AMDGPU::V_CNDMASK_B32_e32    ||
+      Opc == AMDGPU::V_CNDMASK_B32_e64    ||
+      Opc == AMDGPU::V_CNDMASK_B64_PSEUDO) {
+    const MachineOperand *Src0 = TII->getNamedOperand(*MI, AMDGPU::OpName::src0);
+    const MachineOperand *Src1 = TII->getNamedOperand(*MI, AMDGPU::OpName::src1);
+    if (Src1->isIdenticalTo(*Src0)) {
+      DEBUG(dbgs() << "Folded " << *MI << " into ");
+      int Src2Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2);
+      if (Src2Idx != -1)
+        MI->RemoveOperand(Src2Idx);
+      MI->RemoveOperand(AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1));
+      mutateCopyOp(*MI, TII->get(Src0->isReg() ? (unsigned)AMDGPU::COPY
+                                               : getMovOpc(false)));
+      DEBUG(dbgs() << *MI << '\n');
+      return true;
+    }
+  }
+
+  return false;
+}
+
 void SIFoldOperands::foldInstOperand(MachineInstr &MI,
                                      MachineOperand &OpToFold) const {
   // We need mutate the operands of new mov instructions to add implicit
@@ -692,6 +718,7 @@ void SIFoldOperands::foldInstOperand(Mac
       }
       DEBUG(dbgs() << "Folded source from " << MI << " into OpNo " <<
             static_cast<int>(Fold.UseOpNo) << " of " << *Fold.UseMI << '\n');
+      tryFoldInst(TII, Fold.UseMI);
     }
   }
 }
@@ -907,6 +934,8 @@ bool SIFoldOperands::runOnMachineFunctio
       Next = std::next(I);
       MachineInstr &MI = *I;
 
+      tryFoldInst(TII, &MI);
+
       if (!isFoldableCopy(MI)) {
         if (IsIEEEMode || !tryFoldOMod(MI))
           tryFoldClamp(MI);

Added: llvm/trunk/test/CodeGen/AMDGPU/fold-cndmask.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/fold-cndmask.mir?rev=298723&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/fold-cndmask.mir (added)
+++ llvm/trunk/test/CodeGen/AMDGPU/fold-cndmask.mir Fri Mar 24 13:55:20 2017
@@ -0,0 +1,34 @@
+# RUN: llc -march=amdgcn -run-pass si-fold-operands -verify-machineinstrs -o - %s | FileCheck %s
+
+# CHECK: %1 = V_MOV_B32_e32 0, implicit %exec
+# CHECK: %2 = V_MOV_B32_e32 0, implicit %exec
+# CHECK: %4 = COPY %3
+# CHECK: %5 = V_MOV_B32_e32 0, implicit %exec
+# CHECK: %6 = V_MOV_B32_e32 0, implicit %exec
+# CHECK: %7 = COPY %3
+
+---
+name:            fold_cndmask
+tracksRegLiveness: true
+registers:
+  - { id: 0, class: sgpr_64 }
+  - { id: 1, class: vgpr_32 }
+  - { id: 2, class: vgpr_32 }
+  - { id: 3, class: vgpr_32 }
+  - { id: 4, class: vgpr_32 }
+  - { id: 5, class: vgpr_32 }
+  - { id: 6, class: vgpr_32 }
+  - { id: 7, class: vgpr_32 }
+body:             |
+  bb.0.entry:
+    %0 = IMPLICIT_DEF
+    %1 = V_CNDMASK_B32_e64 0, 0, %0, implicit %exec
+    %2 = V_CNDMASK_B32_e64 %1, %1, %0, implicit %exec
+    %3 = IMPLICIT_DEF
+    %4 = V_CNDMASK_B32_e64 %3, %3, %0, implicit %exec
+    %5 = COPY %1
+    %6 = V_CNDMASK_B32_e64 %5, 0, %0, implicit %exec
+    %vcc = IMPLICIT_DEF
+    %7 = V_CNDMASK_B32_e32 %3, %3, implicit %exec, implicit %vcc
+
+...




More information about the llvm-commits mailing list