[llvm-commits] [llvm] r112108 - in /llvm/trunk/lib/Target/ARM: ARMExpandPseudoInsts.cpp ARMISelDAGToDAG.cpp ARMInstrFormats.td ARMInstrNEON.td NEONPreAllocPass.cpp

Bob Wilson bob.wilson at apple.com
Wed Aug 25 16:27:42 PDT 2010


Author: bwilson
Date: Wed Aug 25 18:27:42 2010
New Revision: 112108

URL: http://llvm.org/viewvc/llvm-project?rev=112108&view=rev
Log:
Start converting NEON load/stores to use pseudo instructions, beginning here
with the VST4 instructions.  Until after register allocation, we want to
represent sets of adjacent registers by a single super-register.  These
VST4 pseudo instructions have a single QQ or QQQQ source register operand.
They get expanded to the real VST4 instructions with 4 separate D register
operands.  Once this conversion is complete, we'll be able to remove the
NEONPreAllocPass and avoid some fragile and hacky code elsewhere.

Modified:
    llvm/trunk/lib/Target/ARM/ARMExpandPseudoInsts.cpp
    llvm/trunk/lib/Target/ARM/ARMISelDAGToDAG.cpp
    llvm/trunk/lib/Target/ARM/ARMInstrFormats.td
    llvm/trunk/lib/Target/ARM/ARMInstrNEON.td
    llvm/trunk/lib/Target/ARM/NEONPreAllocPass.cpp

Modified: llvm/trunk/lib/Target/ARM/ARMExpandPseudoInsts.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMExpandPseudoInsts.cpp?rev=112108&r1=112107&r2=112108&view=diff
==============================================================================
--- llvm/trunk/lib/Target/ARM/ARMExpandPseudoInsts.cpp (original)
+++ llvm/trunk/lib/Target/ARM/ARMExpandPseudoInsts.cpp Wed Aug 25 18:27:42 2010
@@ -24,6 +24,13 @@
 
 namespace {
   class ARMExpandPseudo : public MachineFunctionPass {
+    // Constants for register spacing in NEON load/store instructions.
+    enum NEONRegSpacing {
+      SingleSpc,
+      EvenDblSpc,
+      OddDblSpc
+    };
+
   public:
     static char ID;
     ARMExpandPseudo() : MachineFunctionPass(ID) {}
@@ -41,6 +48,8 @@
     void TransferImpOps(MachineInstr &OldMI,
                         MachineInstrBuilder &UseMI, MachineInstrBuilder &DefMI);
     bool ExpandMBB(MachineBasicBlock &MBB);
+    void ExpandVST4(MachineBasicBlock::iterator &MBBI, unsigned Opc,
+                    bool hasWriteBack, NEONRegSpacing RegSpc);
   };
   char ARMExpandPseudo::ID = 0;
 }
@@ -63,6 +72,61 @@
   }
 }
 
+/// ExpandVST4 - Translate VST4 pseudo instructions with QQ or QQQQ register
+/// operands to real VST4 instructions with 4 D register operands.
+void ARMExpandPseudo::ExpandVST4(MachineBasicBlock::iterator &MBBI,
+                                 unsigned Opc, bool hasWriteBack,
+                                 NEONRegSpacing RegSpc) {
+  MachineInstr &MI = *MBBI;
+  MachineBasicBlock &MBB = *MI.getParent();
+
+  MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opc));
+  unsigned OpIdx = 0;
+  if (hasWriteBack) {
+    bool DstIsDead = MI.getOperand(OpIdx).isDead();
+    unsigned DstReg = MI.getOperand(OpIdx++).getReg();
+    MIB.addReg(DstReg, getDefRegState(true) | getDeadRegState(DstIsDead));
+  }
+  // Copy the addrmode6 operands.
+  bool AddrIsKill = MI.getOperand(OpIdx).isKill();
+  MIB.addReg(MI.getOperand(OpIdx++).getReg(), getKillRegState(AddrIsKill));
+  MIB.addImm(MI.getOperand(OpIdx++).getImm());
+  if (hasWriteBack) {
+    // Copy the am6offset operand.
+    bool OffsetIsKill = MI.getOperand(OpIdx).isKill();
+    MIB.addReg(MI.getOperand(OpIdx++).getReg(), getKillRegState(OffsetIsKill));
+  }
+
+  bool SrcIsKill = MI.getOperand(OpIdx).isKill();
+  unsigned SrcReg = MI.getOperand(OpIdx).getReg();
+  unsigned D0, D1, D2, D3;
+  if (RegSpc == SingleSpc) {
+    D0 = TRI->getSubReg(SrcReg, ARM::dsub_0);
+    D1 = TRI->getSubReg(SrcReg, ARM::dsub_1);
+    D2 = TRI->getSubReg(SrcReg, ARM::dsub_2);
+    D3 = TRI->getSubReg(SrcReg, ARM::dsub_3);
+  } else if (RegSpc == EvenDblSpc) {
+    D0 = TRI->getSubReg(SrcReg, ARM::dsub_0);
+    D1 = TRI->getSubReg(SrcReg, ARM::dsub_2);
+    D2 = TRI->getSubReg(SrcReg, ARM::dsub_4);
+    D3 = TRI->getSubReg(SrcReg, ARM::dsub_6);
+  } else {
+    assert(RegSpc == OddDblSpc && "unknown register spacing for VST4");
+    D0 = TRI->getSubReg(SrcReg, ARM::dsub_1);
+    D1 = TRI->getSubReg(SrcReg, ARM::dsub_3);
+    D2 = TRI->getSubReg(SrcReg, ARM::dsub_5);
+    D3 = TRI->getSubReg(SrcReg, ARM::dsub_7);
+  } 
+
+  MIB.addReg(D0, getKillRegState(SrcIsKill))
+    .addReg(D1, getKillRegState(SrcIsKill))
+    .addReg(D2, getKillRegState(SrcIsKill))
+    .addReg(D3, getKillRegState(SrcIsKill));
+  MIB = AddDefaultPred(MIB);
+  TransferImpOps(MI, MIB, MIB);
+  MI.eraseFromParent();
+}
+
 bool ARMExpandPseudo::ExpandMBB(MachineBasicBlock &MBB) {
   bool Modified = false;
 
@@ -71,9 +135,13 @@
     MachineInstr &MI = *MBBI;
     MachineBasicBlock::iterator NMBBI = llvm::next(MBBI);
 
+    bool ModifiedOp = true;
     unsigned Opcode = MI.getOpcode();
     switch (Opcode) {
-    default: break;
+    default:
+      ModifiedOp = false;
+      break;
+
     case ARM::tLDRpci_pic: 
     case ARM::t2LDRpci_pic: {
       unsigned NewLdOpc = (Opcode == ARM::tLDRpci_pic)
@@ -92,7 +160,6 @@
         .addOperand(MI.getOperand(2));
       TransferImpOps(MI, MIB1, MIB2);
       MI.eraseFromParent();
-      Modified = true;
       break;
     }
 
@@ -128,7 +195,6 @@
       HI16.addImm(Pred).addReg(PredReg);
       TransferImpOps(MI, LO16, HI16);
       MI.eraseFromParent();
-      Modified = true;
       break;
     }
 
@@ -155,9 +221,37 @@
                      .addReg(OddSrc, getKillRegState(SrcIsKill)));
       TransferImpOps(MI, Even, Odd);
       MI.eraseFromParent();
-      Modified = true;
     }
+
+    case ARM::VST4d8Pseudo:
+      ExpandVST4(MBBI, ARM::VST4d8, false, SingleSpc); break;
+    case ARM::VST4d16Pseudo:
+      ExpandVST4(MBBI, ARM::VST4d16, false, SingleSpc); break;
+    case ARM::VST4d32Pseudo:
+      ExpandVST4(MBBI, ARM::VST4d32, false, SingleSpc); break;
+    case ARM::VST4d8Pseudo_UPD:
+      ExpandVST4(MBBI, ARM::VST4d8_UPD, true, SingleSpc); break;
+    case ARM::VST4d16Pseudo_UPD:
+      ExpandVST4(MBBI, ARM::VST4d16_UPD, true, SingleSpc); break;
+    case ARM::VST4d32Pseudo_UPD:
+      ExpandVST4(MBBI, ARM::VST4d32_UPD, true, SingleSpc); break;
+    case ARM::VST4q8Pseudo_UPD:
+      ExpandVST4(MBBI, ARM::VST4q8_UPD, true, EvenDblSpc); break;
+    case ARM::VST4q16Pseudo_UPD:
+      ExpandVST4(MBBI, ARM::VST4q16_UPD, true, EvenDblSpc); break;
+    case ARM::VST4q32Pseudo_UPD:
+      ExpandVST4(MBBI, ARM::VST4q32_UPD, true, EvenDblSpc); break;
+    case ARM::VST4q8oddPseudo_UPD:
+      ExpandVST4(MBBI, ARM::VST4q8_UPD, true, OddDblSpc); break;
+    case ARM::VST4q16oddPseudo_UPD:
+      ExpandVST4(MBBI, ARM::VST4q16_UPD, true, OddDblSpc); break;
+    case ARM::VST4q32oddPseudo_UPD:
+      ExpandVST4(MBBI, ARM::VST4q32_UPD, true, OddDblSpc); break;
+      break;
     }
+
+    if (ModifiedOp)
+      Modified = true;
     MBBI = NMBBI;
   }
 

Modified: llvm/trunk/lib/Target/ARM/ARMISelDAGToDAG.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMISelDAGToDAG.cpp?rev=112108&r1=112107&r2=112108&view=diff
==============================================================================
--- llvm/trunk/lib/Target/ARM/ARMISelDAGToDAG.cpp (original)
+++ llvm/trunk/lib/Target/ARM/ARMISelDAGToDAG.cpp Wed Aug 25 18:27:42 2010
@@ -1260,6 +1260,11 @@
   Ops.push_back(MemAddr);
   Ops.push_back(Align);
 
+  // FIXME: This is a temporary flag to distinguish VSTs that have been
+  // converted to pseudo instructions.
+  bool usePseudoInstrs = (NumVecs == 4 &&
+                          VT.getSimpleVT().SimpleTy != MVT::v1i64);
+
   if (is64BitVector) {
     if (NumVecs >= 2) {
       SDValue RegSeq;
@@ -1278,6 +1283,9 @@
           : N->getOperand(3+3);
         RegSeq = SDValue(QuadDRegs(MVT::v4i64, V0, V1, V2, V3), 0);
       }
+      if (usePseudoInstrs)
+        Ops.push_back(RegSeq);
+      else {
 
       // Now extract the D registers back out.
       Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_0, dl, VT,
@@ -1290,15 +1298,16 @@
       if (NumVecs > 3)
         Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_3, dl, VT,
                                                      RegSeq));
+      }
     } else {
-      for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
-        Ops.push_back(N->getOperand(Vec+3));
+      Ops.push_back(N->getOperand(3));
     }
     Ops.push_back(Pred);
     Ops.push_back(Reg0); // predicate register
     Ops.push_back(Chain);
     unsigned Opc = DOpcodes[OpcodeIndex];
-    return CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops.data(), NumVecs+5);
+    return CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops.data(),
+                                  usePseudoInstrs ? 6 : NumVecs+5);
   }
 
   EVT RegVT = GetNEONSubregVT(VT);
@@ -1363,6 +1372,9 @@
   // Store the even D registers.
   assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering");
   Ops.push_back(Reg0); // post-access address offset
+  if (usePseudoInstrs)
+    Ops.push_back(RegSeq);
+  else
   for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
     Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_0+Vec*2, dl,
                                                  RegVT, RegSeq));
@@ -1371,18 +1383,24 @@
   Ops.push_back(Chain);
   unsigned Opc = QOpcodes0[OpcodeIndex];
   SDNode *VStA = CurDAG->getMachineNode(Opc, dl, MemAddr.getValueType(),
-                                        MVT::Other, Ops.data(), NumVecs+6);
+                                        MVT::Other, Ops.data(),
+                                        usePseudoInstrs ? 7 : NumVecs+6);
   Chain = SDValue(VStA, 1);
 
   // Store the odd D registers.
   Ops[0] = SDValue(VStA, 0); // MemAddr
+  if (usePseudoInstrs)
+    Ops[6] = Chain;
+  else {
   for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
     Ops[Vec+3] = CurDAG->getTargetExtractSubreg(ARM::dsub_1+Vec*2, dl,
                                                 RegVT, RegSeq);
   Ops[NumVecs+5] = Chain;
+  }
   Opc = QOpcodes1[OpcodeIndex];
   SDNode *VStB = CurDAG->getMachineNode(Opc, dl, MemAddr.getValueType(),
-                                        MVT::Other, Ops.data(), NumVecs+6);
+                                        MVT::Other, Ops.data(),
+                                        usePseudoInstrs ? 7 : NumVecs+6);
   Chain = SDValue(VStB, 1);
   ReplaceUses(SDValue(N, 0), Chain);
   return NULL;
@@ -2312,14 +2330,14 @@
     }
 
     case Intrinsic::arm_neon_vst4: {
-      unsigned DOpcodes[] = { ARM::VST4d8, ARM::VST4d16,
-                              ARM::VST4d32, ARM::VST1d64Q };
-      unsigned QOpcodes0[] = { ARM::VST4q8_UPD,
-                               ARM::VST4q16_UPD,
-                               ARM::VST4q32_UPD };
-      unsigned QOpcodes1[] = { ARM::VST4q8odd_UPD,
-                               ARM::VST4q16odd_UPD,
-                               ARM::VST4q32odd_UPD };
+      unsigned DOpcodes[] = { ARM::VST4d8Pseudo, ARM::VST4d16Pseudo,
+                              ARM::VST4d32Pseudo, ARM::VST1d64Q };
+      unsigned QOpcodes0[] = { ARM::VST4q8Pseudo_UPD,
+                               ARM::VST4q16Pseudo_UPD,
+                               ARM::VST4q32Pseudo_UPD };
+      unsigned QOpcodes1[] = { ARM::VST4q8oddPseudo_UPD,
+                               ARM::VST4q16oddPseudo_UPD,
+                               ARM::VST4q32oddPseudo_UPD };
       return SelectVST(N, 4, DOpcodes, QOpcodes0, QOpcodes1);
     }
 

Modified: llvm/trunk/lib/Target/ARM/ARMInstrFormats.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMInstrFormats.td?rev=112108&r1=112107&r2=112108&view=diff
==============================================================================
--- llvm/trunk/lib/Target/ARM/ARMInstrFormats.td (original)
+++ llvm/trunk/lib/Target/ARM/ARMInstrFormats.td Wed Aug 25 18:27:42 2010
@@ -1534,6 +1534,14 @@
   let Inst{7-4} = op7_4;
 }
 
+class PseudoNLdSt<dag oops, dag iops, InstrItinClass itin, string cstr>
+  : InstARM<AddrMode6, Size4Bytes, IndexModeNone, Pseudo, NeonDomain, cstr,
+            itin> {
+  let OutOperandList = oops;
+  let InOperandList = !con(iops, (ins pred:$p));
+  list<Predicate> Predicates = [HasNEON];
+}
+
 class NDataI<dag oops, dag iops, Format f, InstrItinClass itin,
              string opc, string dt, string asm, string cstr, list<dag> pattern>
   : NeonI<oops, iops, AddrModeNone, IndexModeNone, f, itin, opc, dt, asm, cstr,

Modified: llvm/trunk/lib/Target/ARM/ARMInstrNEON.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMInstrNEON.td?rev=112108&r1=112107&r2=112108&view=diff
==============================================================================
--- llvm/trunk/lib/Target/ARM/ARMInstrNEON.td (original)
+++ llvm/trunk/lib/Target/ARM/ARMInstrNEON.td Wed Aug 25 18:27:42 2010
@@ -486,6 +486,19 @@
 
 let mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 in {
 
+// Classes for VST* pseudo-instructions with multi-register operands.
+// These are expanded to real instructions after register allocation.
+class VSTQQPseudo
+  : PseudoNLdSt<(outs), (ins addrmode6:$addr, QQPR:$src), IIC_VST, "">;
+class VSTQQWBPseudo
+  : PseudoNLdSt<(outs GPR:$wb),
+                (ins addrmode6:$addr, am6offset:$offset, QQPR:$src), IIC_VST,
+                "$addr.addr = $wb">;
+class VSTQQQQWBPseudo
+  : PseudoNLdSt<(outs GPR:$wb),
+                (ins addrmode6:$addr, am6offset:$offset, QQQQPR:$src), IIC_VST,
+                "$addr.addr = $wb">;
+
 //   VST1     : Vector Store (multiple single elements)
 class VST1D<bits<4> op7_4, string Dt>
   : NLdSt<0,0b00,0b0111,op7_4, (outs), (ins addrmode6:$addr, DPR:$src), IIC_VST,
@@ -664,6 +677,10 @@
 def  VST4d16  : VST4D<0b0000, 0b0100, "16">;
 def  VST4d32  : VST4D<0b0000, 0b1000, "32">;
 
+def  VST4d8Pseudo  : VSTQQPseudo;
+def  VST4d16Pseudo : VSTQQPseudo;
+def  VST4d32Pseudo : VSTQQPseudo;
+
 // ...with address register writeback:
 class VST4DWB<bits<4> op11_8, bits<4> op7_4, string Dt>
   : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb),
@@ -676,6 +693,10 @@
 def VST4d16_UPD : VST4DWB<0b0000, 0b0100, "16">;
 def VST4d32_UPD : VST4DWB<0b0000, 0b1000, "32">;
 
+def VST4d8Pseudo_UPD  : VSTQQWBPseudo;
+def VST4d16Pseudo_UPD : VSTQQWBPseudo;
+def VST4d32Pseudo_UPD : VSTQQWBPseudo;
+
 // ...with double-spaced registers (non-updating versions for disassembly only):
 def VST4q8      : VST4D<0b0001, 0b0000, "8">;
 def VST4q16     : VST4D<0b0001, 0b0100, "16">;
@@ -684,10 +705,14 @@
 def VST4q16_UPD : VST4DWB<0b0001, 0b0100, "16">;
 def VST4q32_UPD : VST4DWB<0b0001, 0b1000, "32">;
 
+def VST4q8Pseudo_UPD  : VSTQQQQWBPseudo;
+def VST4q16Pseudo_UPD : VSTQQQQWBPseudo;
+def VST4q32Pseudo_UPD : VSTQQQQWBPseudo;
+
 // ...alternate versions to be allocated odd register numbers:
-def VST4q8odd_UPD  : VST4DWB<0b0001, 0b0000, "8">;
-def VST4q16odd_UPD : VST4DWB<0b0001, 0b0100, "16">;
-def VST4q32odd_UPD : VST4DWB<0b0001, 0b1000, "32">;
+def VST4q8oddPseudo_UPD  : VSTQQQQWBPseudo;
+def VST4q16oddPseudo_UPD : VSTQQQQWBPseudo;
+def VST4q32oddPseudo_UPD : VSTQQQQWBPseudo;
 
 //   VST1LN   : Vector Store (single element from one lane)
 //   FIXME: Not yet implemented.

Modified: llvm/trunk/lib/Target/ARM/NEONPreAllocPass.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/NEONPreAllocPass.cpp?rev=112108&r1=112107&r2=112108&view=diff
==============================================================================
--- llvm/trunk/lib/Target/ARM/NEONPreAllocPass.cpp (original)
+++ llvm/trunk/lib/Target/ARM/NEONPreAllocPass.cpp Wed Aug 25 18:27:42 2010
@@ -260,9 +260,6 @@
     Stride = 2;
     return true;
 
-  case ARM::VST4d8:
-  case ARM::VST4d16:
-  case ARM::VST4d32:
   case ARM::VST1d64Q:
   case ARM::VST4LNd8:
   case ARM::VST4LNd16:
@@ -271,24 +268,6 @@
     NumRegs = 4;
     return true;
 
-  case ARM::VST4q8_UPD:
-  case ARM::VST4q16_UPD:
-  case ARM::VST4q32_UPD:
-    FirstOpnd = 4;
-    NumRegs = 4;
-    Offset = 0;
-    Stride = 2;
-    return true;
-
-  case ARM::VST4q8odd_UPD:
-  case ARM::VST4q16odd_UPD:
-  case ARM::VST4q32odd_UPD:
-    FirstOpnd = 4;
-    NumRegs = 4;
-    Offset = 1;
-    Stride = 2;
-    return true;
-
   case ARM::VST4LNq16:
   case ARM::VST4LNq32:
     FirstOpnd = 2;





More information about the llvm-commits mailing list