[PATCH] D23402: AMDGPU/SI: Teach SIInstrInfo::FoldImmediate() to fold immediates into copies

Thu Aug 11 05:08:18 PDT 2016

tstellarAMD created this revision.
tstellarAMD added a reviewer: arsenm.
tstellarAMD added a subscriber: llvm-commits.
Herald added subscribers: kzhuravl, arsenm.

I put this code here, because I want to re-use it in a few other places.
This supersedes some of the immediate folding code we have in SIFoldOperands.
I think the peephole optimizers is probably a better place for folding
immediates into copies, since it does some register coalescing in the same time.

This will also make it easier to transition SIFoldOperands into a smarter pass,
where it looks at all uses of instruction at once to determine the optimal way to
fold operands.  Right now, the pass just considers one operand at a time.

https://reviews.llvm.org/D23402

Files:
  lib/Target/AMDGPU/SIInstrFormats.td
  lib/Target/AMDGPU/SIInstrInfo.cpp

Index: lib/Target/AMDGPU/SIInstrInfo.cpp
===================================================================

--- lib/Target/AMDGPU/SIInstrInfo.cpp
+++ lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -1210,14 +1210,42 @@
   MI.RemoveOperand(Src0ModIdx);
 }
 
-// TODO: Maybe this should be removed this and custom fold everything in
-// SIFoldOperands?
 bool SIInstrInfo::FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
                                 unsigned Reg, MachineRegisterInfo *MRI) const {
   if (!MRI->hasOneNonDBGUse(Reg))
     return false;
 
   unsigned Opc = UseMI.getOpcode();
+  if (Opc == AMDGPU::COPY) {
+    bool isVGPRCopy = RI.isVGPR(*MRI, UseMI.getOperand(0).getReg());
+    switch (DefMI.getOpcode()) {
+    default:
+      return false;
+    case AMDGPU::V_MOV_B32_e64:
+      if (hasModifiersSet(DefMI, AMDGPU::OpName::src0_modifiers))
+        return false;
+    case AMDGPU::S_MOV_B64:
+      // TODO: We could fold 64-bit immediates, but this get compilicated
+      // when there are sub-registers.
+      return false;
+
+    case AMDGPU::V_MOV_B32_e32:
+    case AMDGPU::S_MOV_B32:
+      break;
+    }
+    unsigned NewOpc = isVGPRCopy ? AMDGPU::V_MOV_B32_e32 : AMDGPU::S_MOV_B32;
+    const MachineOperand *ImmOp = getNamedOperand(DefMI, AMDGPU::OpName::src0);
+    assert(ImmOp);
+    // FIXME: We could handle FrameIndex values here.
+    if (!ImmOp->isImm()) {
+      return false;
+    }
+    UseMI.setDesc(get(NewOpc));
+    UseMI.getOperand(1).ChangeToImmediate(ImmOp->getImm());
+    UseMI.addImplicitDefUseOperands(*UseMI.getParent()->getParent());
+    return true;
+  }
+
   if (Opc == AMDGPU::V_MAD_F32 || Opc == AMDGPU::V_MAC_F32_e64) {
     // Don't fold if we are using source modifiers. The new VOP2 instructions
     // don't have them.
Index: lib/Target/AMDGPU/SIInstrFormats.td
===================================================================
--- lib/Target/AMDGPU/SIInstrFormats.td
+++ lib/Target/AMDGPU/SIInstrFormats.td
@@ -293,6 +293,8 @@
   let isCodeGenOnly = 0;
   let SALU = 1;
   let SOP1 = 1;
+
+  let UseNamedOperandTable = 1;
 }
 
 class SOP2 <dag outs, dag ins, string asm, list<dag> pattern> :


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D23402.67675.patch
Type: text/x-patch
Size: 2160 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20160811/a17fe6f5/attachment.bin>