[llvm-commits] [llvm] r113812 - in /llvm/trunk/lib/Target/ARM: ARMExpandPseudoInsts.cpp ARMISelDAGToDAG.cpp ARMInstrNEON.td NEONPreAllocPass.cpp
Bob Wilson
bob.wilson at apple.com
Mon Sep 13 16:01:35 PDT 2010
Author: bwilson
Date: Mon Sep 13 18:01:35 2010
New Revision: 113812
URL: http://llvm.org/viewvc/llvm-project?rev=113812&view=rev
Log:
Switch all the NEON vld-lane and vst-lane instructions over to the new
pseudo-instruction approach. Change ARMExpandPseudoInsts to use a table
to record all the NEON load/store information.
Modified:
llvm/trunk/lib/Target/ARM/ARMExpandPseudoInsts.cpp
llvm/trunk/lib/Target/ARM/ARMISelDAGToDAG.cpp
llvm/trunk/lib/Target/ARM/ARMInstrNEON.td
llvm/trunk/lib/Target/ARM/NEONPreAllocPass.cpp
Modified: llvm/trunk/lib/Target/ARM/ARMExpandPseudoInsts.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMExpandPseudoInsts.cpp?rev=113812&r1=113811&r2=113812&view=diff
==============================================================================
--- llvm/trunk/lib/Target/ARM/ARMExpandPseudoInsts.cpp (original)
+++ llvm/trunk/lib/Target/ARM/ARMExpandPseudoInsts.cpp Mon Sep 13 18:01:35 2010
@@ -24,13 +24,6 @@
namespace {
class ARMExpandPseudo : public MachineFunctionPass {
- // Constants for register spacing in NEON load/store instructions.
- enum NEONRegSpacing {
- SingleSpc,
- EvenDblSpc,
- OddDblSpc
- };
-
public:
static char ID;
ARMExpandPseudo() : MachineFunctionPass(ID) {}
@@ -48,10 +41,9 @@
void TransferImpOps(MachineInstr &OldMI,
MachineInstrBuilder &UseMI, MachineInstrBuilder &DefMI);
bool ExpandMBB(MachineBasicBlock &MBB);
- void ExpandVLD(MachineBasicBlock::iterator &MBBI, unsigned Opc,
- bool hasWriteBack, NEONRegSpacing RegSpc, unsigned NumRegs);
- void ExpandVST(MachineBasicBlock::iterator &MBBI, unsigned Opc,
- bool hasWriteBack, NEONRegSpacing RegSpc, unsigned NumRegs);
+ void ExpandVLD(MachineBasicBlock::iterator &MBBI);
+ void ExpandVST(MachineBasicBlock::iterator &MBBI);
+ void ExpandLaneOp(MachineBasicBlock::iterator &MBBI);
};
char ARMExpandPseudo::ID = 0;
}
@@ -73,37 +65,289 @@
}
}
+namespace {
+ // Constants for register spacing in NEON load/store instructions.
+ // For quad-register load-lane and store-lane pseudo instructors, the
+ // spacing is initially assumed to be EvenDblSpc, and that is changed to
+ // OddDblSpc depending on the lane number operand.
+ enum NEONRegSpacing {
+ SingleSpc,
+ EvenDblSpc,
+ OddDblSpc
+ };
+
+ // Entries for NEON load/store information table. The table is sorted by
+ // PseudoOpc for fast binary-search lookups.
+ struct NEONLdStTableEntry {
+ unsigned PseudoOpc;
+ unsigned RealOpc;
+ bool IsLoad;
+ bool HasWriteBack;
+ NEONRegSpacing RegSpacing;
+ unsigned char NumRegs; // D registers loaded or stored
+ unsigned char RegElts; // elements per D register; used for lane ops
+
+ // Comparison methods for binary search of the table.
+ bool operator<(const NEONLdStTableEntry &TE) const {
+ return PseudoOpc < TE.PseudoOpc;
+ }
+ friend bool operator<(const NEONLdStTableEntry &TE, unsigned PseudoOpc) {
+ return TE.PseudoOpc < PseudoOpc;
+ }
+ friend bool ATTRIBUTE_UNUSED operator<(unsigned PseudoOpc,
+ const NEONLdStTableEntry &TE) {
+ return PseudoOpc < TE.PseudoOpc;
+ }
+ };
+}
+
+static const NEONLdStTableEntry NEONLdStTable[] = {
+{ ARM::VLD1d64QPseudo, ARM::VLD1d64Q, true, false, SingleSpc, 4, 1 },
+{ ARM::VLD1d64QPseudo_UPD, ARM::VLD1d64Q_UPD, true, true, SingleSpc, 4, 1 },
+{ ARM::VLD1d64TPseudo, ARM::VLD1d64T, true, false, SingleSpc, 3, 1 },
+{ ARM::VLD1d64TPseudo_UPD, ARM::VLD1d64T_UPD, true, true, SingleSpc, 3, 1 },
+
+{ ARM::VLD1q16Pseudo, ARM::VLD1q16, true, false, SingleSpc, 2, 4 },
+{ ARM::VLD1q16Pseudo_UPD, ARM::VLD1q16_UPD, true, true, SingleSpc, 2, 4 },
+{ ARM::VLD1q32Pseudo, ARM::VLD1q32, true, false, SingleSpc, 2, 2 },
+{ ARM::VLD1q32Pseudo_UPD, ARM::VLD1q32_UPD, true, true, SingleSpc, 2, 2 },
+{ ARM::VLD1q64Pseudo, ARM::VLD1q64, true, false, SingleSpc, 2, 1 },
+{ ARM::VLD1q64Pseudo_UPD, ARM::VLD1q64_UPD, true, true, SingleSpc, 2, 1 },
+{ ARM::VLD1q8Pseudo, ARM::VLD1q8, true, false, SingleSpc, 2, 8 },
+{ ARM::VLD1q8Pseudo_UPD, ARM::VLD1q8_UPD, true, true, SingleSpc, 2, 8 },
+
+{ ARM::VLD2LNd16Pseudo, ARM::VLD2LNd16, true, false, SingleSpc, 2, 4 },
+{ ARM::VLD2LNd16Pseudo_UPD, ARM::VLD2LNd16_UPD, true, true, SingleSpc, 2, 4 },
+{ ARM::VLD2LNd32Pseudo, ARM::VLD2LNd32, true, false, SingleSpc, 2, 2 },
+{ ARM::VLD2LNd32Pseudo_UPD, ARM::VLD2LNd32_UPD, true, true, SingleSpc, 2, 2 },
+{ ARM::VLD2LNd8Pseudo, ARM::VLD2LNd8, true, false, SingleSpc, 2, 8 },
+{ ARM::VLD2LNd8Pseudo_UPD, ARM::VLD2LNd8_UPD, true, true, SingleSpc, 2, 8 },
+{ ARM::VLD2LNq16Pseudo, ARM::VLD2LNq16, true, false, EvenDblSpc, 2, 4 },
+{ ARM::VLD2LNq16Pseudo_UPD, ARM::VLD2LNq16_UPD, true, true, EvenDblSpc, 2, 4 },
+{ ARM::VLD2LNq32Pseudo, ARM::VLD2LNq32, true, false, EvenDblSpc, 2, 2 },
+{ ARM::VLD2LNq32Pseudo_UPD, ARM::VLD2LNq32_UPD, true, true, EvenDblSpc, 2, 2 },
+
+{ ARM::VLD2d16Pseudo, ARM::VLD2d16, true, false, SingleSpc, 2, 4 },
+{ ARM::VLD2d16Pseudo_UPD, ARM::VLD2d16_UPD, true, true, SingleSpc, 2, 4 },
+{ ARM::VLD2d32Pseudo, ARM::VLD2d32, true, false, SingleSpc, 2, 2 },
+{ ARM::VLD2d32Pseudo_UPD, ARM::VLD2d32_UPD, true, true, SingleSpc, 2, 2 },
+{ ARM::VLD2d8Pseudo, ARM::VLD2d8, true, false, SingleSpc, 2, 8 },
+{ ARM::VLD2d8Pseudo_UPD, ARM::VLD2d8_UPD, true, true, SingleSpc, 2, 8 },
+
+{ ARM::VLD2q16Pseudo, ARM::VLD2q16, true, false, SingleSpc, 4, 4 },
+{ ARM::VLD2q16Pseudo_UPD, ARM::VLD2q16_UPD, true, true, SingleSpc, 4, 4 },
+{ ARM::VLD2q32Pseudo, ARM::VLD2q32, true, false, SingleSpc, 4, 2 },
+{ ARM::VLD2q32Pseudo_UPD, ARM::VLD2q32_UPD, true, true, SingleSpc, 4, 2 },
+{ ARM::VLD2q8Pseudo, ARM::VLD2q8, true, false, SingleSpc, 4, 8 },
+{ ARM::VLD2q8Pseudo_UPD, ARM::VLD2q8_UPD, true, true, SingleSpc, 4, 8 },
+
+{ ARM::VLD3LNd16Pseudo, ARM::VLD3LNd16, true, false, SingleSpc, 3, 4 },
+{ ARM::VLD3LNd16Pseudo_UPD, ARM::VLD3LNd16_UPD, true, true, SingleSpc, 3, 4 },
+{ ARM::VLD3LNd32Pseudo, ARM::VLD3LNd32, true, false, SingleSpc, 3, 2 },
+{ ARM::VLD3LNd32Pseudo_UPD, ARM::VLD3LNd32_UPD, true, true, SingleSpc, 3, 2 },
+{ ARM::VLD3LNd8Pseudo, ARM::VLD3LNd8, true, false, SingleSpc, 3, 8 },
+{ ARM::VLD3LNd8Pseudo_UPD, ARM::VLD3LNd8_UPD, true, true, SingleSpc, 3, 8 },
+{ ARM::VLD3LNq16Pseudo, ARM::VLD3LNq16, true, false, EvenDblSpc, 3, 4 },
+{ ARM::VLD3LNq16Pseudo_UPD, ARM::VLD3LNq16_UPD, true, true, EvenDblSpc, 3, 4 },
+{ ARM::VLD3LNq32Pseudo, ARM::VLD3LNq32, true, false, EvenDblSpc, 3, 2 },
+{ ARM::VLD3LNq32Pseudo_UPD, ARM::VLD3LNq32_UPD, true, true, EvenDblSpc, 3, 2 },
+
+{ ARM::VLD3d16Pseudo, ARM::VLD3d16, true, false, SingleSpc, 3, 4 },
+{ ARM::VLD3d16Pseudo_UPD, ARM::VLD3d16_UPD, true, true, SingleSpc, 3, 4 },
+{ ARM::VLD3d32Pseudo, ARM::VLD3d32, true, false, SingleSpc, 3, 2 },
+{ ARM::VLD3d32Pseudo_UPD, ARM::VLD3d32_UPD, true, true, SingleSpc, 3, 2 },
+{ ARM::VLD3d8Pseudo, ARM::VLD3d8, true, false, SingleSpc, 3, 8 },
+{ ARM::VLD3d8Pseudo_UPD, ARM::VLD3d8_UPD, true, true, SingleSpc, 3, 8 },
+
+{ ARM::VLD3q16Pseudo_UPD, ARM::VLD3q16_UPD, true, true, EvenDblSpc, 3, 4 },
+{ ARM::VLD3q16oddPseudo_UPD, ARM::VLD3q16_UPD, true, true, OddDblSpc, 3, 4 },
+{ ARM::VLD3q32Pseudo_UPD, ARM::VLD3q32_UPD, true, true, EvenDblSpc, 3, 2 },
+{ ARM::VLD3q32oddPseudo_UPD, ARM::VLD3q32_UPD, true, true, OddDblSpc, 3, 2 },
+{ ARM::VLD3q8Pseudo_UPD, ARM::VLD3q8_UPD, true, true, EvenDblSpc, 3, 8 },
+{ ARM::VLD3q8oddPseudo_UPD, ARM::VLD3q8_UPD, true, true, OddDblSpc, 3, 8 },
+
+{ ARM::VLD4LNd16Pseudo, ARM::VLD4LNd16, true, false, SingleSpc, 4, 4 },
+{ ARM::VLD4LNd16Pseudo_UPD, ARM::VLD4LNd16_UPD, true, true, SingleSpc, 4, 4 },
+{ ARM::VLD4LNd32Pseudo, ARM::VLD4LNd32, true, false, SingleSpc, 4, 2 },
+{ ARM::VLD4LNd32Pseudo_UPD, ARM::VLD4LNd32_UPD, true, true, SingleSpc, 4, 2 },
+{ ARM::VLD4LNd8Pseudo, ARM::VLD4LNd8, true, false, SingleSpc, 4, 8 },
+{ ARM::VLD4LNd8Pseudo_UPD, ARM::VLD4LNd8_UPD, true, true, SingleSpc, 4, 8 },
+{ ARM::VLD4LNq16Pseudo, ARM::VLD4LNq16, true, false, EvenDblSpc, 4, 4 },
+{ ARM::VLD4LNq16Pseudo_UPD, ARM::VLD4LNq16_UPD, true, true, EvenDblSpc, 4, 4 },
+{ ARM::VLD4LNq32Pseudo, ARM::VLD4LNq32, true, false, EvenDblSpc, 4, 2 },
+{ ARM::VLD4LNq32Pseudo_UPD, ARM::VLD4LNq32_UPD, true, true, EvenDblSpc, 4, 2 },
+
+{ ARM::VLD4d16Pseudo, ARM::VLD4d16, true, false, SingleSpc, 4, 4 },
+{ ARM::VLD4d16Pseudo_UPD, ARM::VLD4d16_UPD, true, true, SingleSpc, 4, 4 },
+{ ARM::VLD4d32Pseudo, ARM::VLD4d32, true, false, SingleSpc, 4, 2 },
+{ ARM::VLD4d32Pseudo_UPD, ARM::VLD4d32_UPD, true, true, SingleSpc, 4, 2 },
+{ ARM::VLD4d8Pseudo, ARM::VLD4d8, true, false, SingleSpc, 4, 8 },
+{ ARM::VLD4d8Pseudo_UPD, ARM::VLD4d8_UPD, true, true, SingleSpc, 4, 8 },
+
+{ ARM::VLD4q16Pseudo_UPD, ARM::VLD4q16_UPD, true, true, EvenDblSpc, 4, 4 },
+{ ARM::VLD4q16oddPseudo_UPD, ARM::VLD4q16_UPD, true, true, OddDblSpc, 4, 4 },
+{ ARM::VLD4q32Pseudo_UPD, ARM::VLD4q32_UPD, true, true, EvenDblSpc, 4, 2 },
+{ ARM::VLD4q32oddPseudo_UPD, ARM::VLD4q32_UPD, true, true, OddDblSpc, 4, 2 },
+{ ARM::VLD4q8Pseudo_UPD, ARM::VLD4q8_UPD, true, true, EvenDblSpc, 4, 8 },
+{ ARM::VLD4q8oddPseudo_UPD, ARM::VLD4q8_UPD, true, true, OddDblSpc, 4, 8 },
+
+{ ARM::VST1d64QPseudo, ARM::VST1d64Q, false, false, SingleSpc, 4, 1 },
+{ ARM::VST1d64QPseudo_UPD, ARM::VST1d64Q_UPD, false, true, SingleSpc, 4, 1 },
+{ ARM::VST1d64TPseudo, ARM::VST1d64T, false, false, SingleSpc, 3, 1 },
+{ ARM::VST1d64TPseudo_UPD, ARM::VST1d64T_UPD, false, true, SingleSpc, 3, 1 },
+
+{ ARM::VST1q16Pseudo, ARM::VST1q16, false, false, SingleSpc, 2, 4 },
+{ ARM::VST1q16Pseudo_UPD, ARM::VST1q16_UPD, false, true, SingleSpc, 2, 4 },
+{ ARM::VST1q32Pseudo, ARM::VST1q32, false, false, SingleSpc, 2, 2 },
+{ ARM::VST1q32Pseudo_UPD, ARM::VST1q32_UPD, false, true, SingleSpc, 2, 2 },
+{ ARM::VST1q64Pseudo, ARM::VST1q64, false, false, SingleSpc, 2, 1 },
+{ ARM::VST1q64Pseudo_UPD, ARM::VST1q64_UPD, false, true, SingleSpc, 2, 1 },
+{ ARM::VST1q8Pseudo, ARM::VST1q8, false, false, SingleSpc, 2, 8 },
+{ ARM::VST1q8Pseudo_UPD, ARM::VST1q8_UPD, false, true, SingleSpc, 2, 8 },
+
+{ ARM::VST2LNd16Pseudo, ARM::VST2LNd16, false, false, SingleSpc, 2, 4 },
+{ ARM::VST2LNd16Pseudo_UPD, ARM::VST2LNd16_UPD, false, true, SingleSpc, 2, 4 },
+{ ARM::VST2LNd32Pseudo, ARM::VST2LNd32, false, false, SingleSpc, 2, 2 },
+{ ARM::VST2LNd32Pseudo_UPD, ARM::VST2LNd32_UPD, false, true, SingleSpc, 2, 2 },
+{ ARM::VST2LNd8Pseudo, ARM::VST2LNd8, false, false, SingleSpc, 2, 8 },
+{ ARM::VST2LNd8Pseudo_UPD, ARM::VST2LNd8_UPD, false, true, SingleSpc, 2, 8 },
+{ ARM::VST2LNq16Pseudo, ARM::VST2LNq16, false, false, EvenDblSpc, 2, 4},
+{ ARM::VST2LNq16Pseudo_UPD, ARM::VST2LNq16_UPD, false, true, EvenDblSpc, 2, 4},
+{ ARM::VST2LNq32Pseudo, ARM::VST2LNq32, false, false, EvenDblSpc, 2, 2},
+{ ARM::VST2LNq32Pseudo_UPD, ARM::VST2LNq32_UPD, false, true, EvenDblSpc, 2, 2},
+
+{ ARM::VST2d16Pseudo, ARM::VST2d16, false, false, SingleSpc, 2, 4 },
+{ ARM::VST2d16Pseudo_UPD, ARM::VST2d16_UPD, false, true, SingleSpc, 2, 4 },
+{ ARM::VST2d32Pseudo, ARM::VST2d32, false, false, SingleSpc, 2, 2 },
+{ ARM::VST2d32Pseudo_UPD, ARM::VST2d32_UPD, false, true, SingleSpc, 2, 2 },
+{ ARM::VST2d8Pseudo, ARM::VST2d8, false, false, SingleSpc, 2, 8 },
+{ ARM::VST2d8Pseudo_UPD, ARM::VST2d8_UPD, false, true, SingleSpc, 2, 8 },
+
+{ ARM::VST2q16Pseudo, ARM::VST2q16, false, false, SingleSpc, 4, 4 },
+{ ARM::VST2q16Pseudo_UPD, ARM::VST2q16_UPD, false, true, SingleSpc, 4, 4 },
+{ ARM::VST2q32Pseudo, ARM::VST2q32, false, false, SingleSpc, 4, 2 },
+{ ARM::VST2q32Pseudo_UPD, ARM::VST2q32_UPD, false, true, SingleSpc, 4, 2 },
+{ ARM::VST2q8Pseudo, ARM::VST2q8, false, false, SingleSpc, 4, 8 },
+{ ARM::VST2q8Pseudo_UPD, ARM::VST2q8_UPD, false, true, SingleSpc, 4, 8 },
+
+{ ARM::VST3LNd16Pseudo, ARM::VST3LNd16, false, false, SingleSpc, 3, 4 },
+{ ARM::VST3LNd16Pseudo_UPD, ARM::VST3LNd16_UPD, false, true, SingleSpc, 3, 4 },
+{ ARM::VST3LNd32Pseudo, ARM::VST3LNd32, false, false, SingleSpc, 3, 2 },
+{ ARM::VST3LNd32Pseudo_UPD, ARM::VST3LNd32_UPD, false, true, SingleSpc, 3, 2 },
+{ ARM::VST3LNd8Pseudo, ARM::VST3LNd8, false, false, SingleSpc, 3, 8 },
+{ ARM::VST3LNd8Pseudo_UPD, ARM::VST3LNd8_UPD, false, true, SingleSpc, 3, 8 },
+{ ARM::VST3LNq16Pseudo, ARM::VST3LNq16, false, false, EvenDblSpc, 3, 4},
+{ ARM::VST3LNq16Pseudo_UPD, ARM::VST3LNq16_UPD, false, true, EvenDblSpc, 3, 4},
+{ ARM::VST3LNq32Pseudo, ARM::VST3LNq32, false, false, EvenDblSpc, 3, 2},
+{ ARM::VST3LNq32Pseudo_UPD, ARM::VST3LNq32_UPD, false, true, EvenDblSpc, 3, 2},
+
+{ ARM::VST3d16Pseudo, ARM::VST3d16, false, false, SingleSpc, 3, 4 },
+{ ARM::VST3d16Pseudo_UPD, ARM::VST3d16_UPD, false, true, SingleSpc, 3, 4 },
+{ ARM::VST3d32Pseudo, ARM::VST3d32, false, false, SingleSpc, 3, 2 },
+{ ARM::VST3d32Pseudo_UPD, ARM::VST3d32_UPD, false, true, SingleSpc, 3, 2 },
+{ ARM::VST3d8Pseudo, ARM::VST3d8, false, false, SingleSpc, 3, 8 },
+{ ARM::VST3d8Pseudo_UPD, ARM::VST3d8_UPD, false, true, SingleSpc, 3, 8 },
+
+{ ARM::VST3q16Pseudo_UPD, ARM::VST3q16_UPD, false, true, EvenDblSpc, 3, 4 },
+{ ARM::VST3q16oddPseudo_UPD, ARM::VST3q16_UPD, false, true, OddDblSpc, 3, 4 },
+{ ARM::VST3q32Pseudo_UPD, ARM::VST3q32_UPD, false, true, EvenDblSpc, 3, 2 },
+{ ARM::VST3q32oddPseudo_UPD, ARM::VST3q32_UPD, false, true, OddDblSpc, 3, 2 },
+{ ARM::VST3q8Pseudo_UPD, ARM::VST3q8_UPD, false, true, EvenDblSpc, 3, 8 },
+{ ARM::VST3q8oddPseudo_UPD, ARM::VST3q8_UPD, false, true, OddDblSpc, 3, 8 },
+
+{ ARM::VST4LNd16Pseudo, ARM::VST4LNd16, false, false, SingleSpc, 4, 4 },
+{ ARM::VST4LNd16Pseudo_UPD, ARM::VST4LNd16_UPD, false, true, SingleSpc, 4, 4 },
+{ ARM::VST4LNd32Pseudo, ARM::VST4LNd32, false, false, SingleSpc, 4, 2 },
+{ ARM::VST4LNd32Pseudo_UPD, ARM::VST4LNd32_UPD, false, true, SingleSpc, 4, 2 },
+{ ARM::VST4LNd8Pseudo, ARM::VST4LNd8, false, false, SingleSpc, 4, 8 },
+{ ARM::VST4LNd8Pseudo_UPD, ARM::VST4LNd8_UPD, false, true, SingleSpc, 4, 8 },
+{ ARM::VST4LNq16Pseudo, ARM::VST4LNq16, false, false, EvenDblSpc, 4, 4},
+{ ARM::VST4LNq16Pseudo_UPD, ARM::VST4LNq16_UPD, false, true, EvenDblSpc, 4, 4},
+{ ARM::VST4LNq32Pseudo, ARM::VST4LNq32, false, false, EvenDblSpc, 4, 2},
+{ ARM::VST4LNq32Pseudo_UPD, ARM::VST4LNq32_UPD, false, true, EvenDblSpc, 4, 2},
+
+{ ARM::VST4d16Pseudo, ARM::VST4d16, false, false, SingleSpc, 4, 4 },
+{ ARM::VST4d16Pseudo_UPD, ARM::VST4d16_UPD, false, true, SingleSpc, 4, 4 },
+{ ARM::VST4d32Pseudo, ARM::VST4d32, false, false, SingleSpc, 4, 2 },
+{ ARM::VST4d32Pseudo_UPD, ARM::VST4d32_UPD, false, true, SingleSpc, 4, 2 },
+{ ARM::VST4d8Pseudo, ARM::VST4d8, false, false, SingleSpc, 4, 8 },
+{ ARM::VST4d8Pseudo_UPD, ARM::VST4d8_UPD, false, true, SingleSpc, 4, 8 },
+
+{ ARM::VST4q16Pseudo_UPD, ARM::VST4q16_UPD, false, true, EvenDblSpc, 4, 4 },
+{ ARM::VST4q16oddPseudo_UPD, ARM::VST4q16_UPD, false, true, OddDblSpc, 4, 4 },
+{ ARM::VST4q32Pseudo_UPD, ARM::VST4q32_UPD, false, true, EvenDblSpc, 4, 2 },
+{ ARM::VST4q32oddPseudo_UPD, ARM::VST4q32_UPD, false, true, OddDblSpc, 4, 2 },
+{ ARM::VST4q8Pseudo_UPD, ARM::VST4q8_UPD, false, true, EvenDblSpc, 4, 8 },
+{ ARM::VST4q8oddPseudo_UPD , ARM::VST4q8_UPD, false, true, OddDblSpc, 4, 8 }
+};
+
+/// LookupNEONLdSt - Search the NEONLdStTable for information about a NEON
+/// load or store pseudo instruction.
+static const NEONLdStTableEntry *LookupNEONLdSt(unsigned Opcode) {
+ unsigned NumEntries = array_lengthof(NEONLdStTable);
+
+#ifndef NDEBUG
+ // Make sure the table is sorted.
+ static bool TableChecked = false;
+ if (!TableChecked) {
+ for (unsigned i = 0; i != NumEntries-1; ++i)
+ assert(NEONLdStTable[i] < NEONLdStTable[i+1] &&
+ "NEONLdStTable is not sorted!");
+ TableChecked = true;
+ }
+#endif
+
+ const NEONLdStTableEntry *I =
+ std::lower_bound(NEONLdStTable, NEONLdStTable + NumEntries, Opcode);
+ if (I != NEONLdStTable + NumEntries && I->PseudoOpc == Opcode)
+ return I;
+ return NULL;
+}
+
+/// GetDSubRegs - Get 4 D subregisters of a Q, QQ, or QQQQ register,
+/// corresponding to the specified register spacing. Not all of the results
+/// are necessarily valid, e.g., a Q register only has 2 D subregisters.
+static void GetDSubRegs(unsigned Reg, NEONRegSpacing RegSpc,
+ const TargetRegisterInfo *TRI, unsigned &D0,
+ unsigned &D1, unsigned &D2, unsigned &D3) {
+ if (RegSpc == SingleSpc) {
+ D0 = TRI->getSubReg(Reg, ARM::dsub_0);
+ D1 = TRI->getSubReg(Reg, ARM::dsub_1);
+ D2 = TRI->getSubReg(Reg, ARM::dsub_2);
+ D3 = TRI->getSubReg(Reg, ARM::dsub_3);
+ } else if (RegSpc == EvenDblSpc) {
+ D0 = TRI->getSubReg(Reg, ARM::dsub_0);
+ D1 = TRI->getSubReg(Reg, ARM::dsub_2);
+ D2 = TRI->getSubReg(Reg, ARM::dsub_4);
+ D3 = TRI->getSubReg(Reg, ARM::dsub_6);
+ } else {
+ assert(RegSpc == OddDblSpc && "unknown register spacing");
+ D0 = TRI->getSubReg(Reg, ARM::dsub_1);
+ D1 = TRI->getSubReg(Reg, ARM::dsub_3);
+ D2 = TRI->getSubReg(Reg, ARM::dsub_5);
+ D3 = TRI->getSubReg(Reg, ARM::dsub_7);
+ }
+}
+
/// ExpandVLD - Translate VLD pseudo instructions with Q, QQ or QQQQ register
/// operands to real VLD instructions with D register operands.
-void ARMExpandPseudo::ExpandVLD(MachineBasicBlock::iterator &MBBI,
- unsigned Opc, bool hasWriteBack,
- NEONRegSpacing RegSpc, unsigned NumRegs) {
+void ARMExpandPseudo::ExpandVLD(MachineBasicBlock::iterator &MBBI) {
MachineInstr &MI = *MBBI;
MachineBasicBlock &MBB = *MI.getParent();
- MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opc));
+ const NEONLdStTableEntry *TableEntry = LookupNEONLdSt(MI.getOpcode());
+ assert(TableEntry && TableEntry->IsLoad && "NEONLdStTable lookup failed");
+ NEONRegSpacing RegSpc = TableEntry->RegSpacing;
+ unsigned NumRegs = TableEntry->NumRegs;
+
+ MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(),
+ TII->get(TableEntry->RealOpc));
unsigned OpIdx = 0;
bool DstIsDead = MI.getOperand(OpIdx).isDead();
unsigned DstReg = MI.getOperand(OpIdx++).getReg();
unsigned D0, D1, D2, D3;
- if (RegSpc == SingleSpc) {
- D0 = TRI->getSubReg(DstReg, ARM::dsub_0);
- D1 = TRI->getSubReg(DstReg, ARM::dsub_1);
- D2 = TRI->getSubReg(DstReg, ARM::dsub_2);
- D3 = TRI->getSubReg(DstReg, ARM::dsub_3);
- } else if (RegSpc == EvenDblSpc) {
- D0 = TRI->getSubReg(DstReg, ARM::dsub_0);
- D1 = TRI->getSubReg(DstReg, ARM::dsub_2);
- D2 = TRI->getSubReg(DstReg, ARM::dsub_4);
- D3 = TRI->getSubReg(DstReg, ARM::dsub_6);
- } else {
- assert(RegSpc == OddDblSpc && "unknown register spacing for VLD");
- D0 = TRI->getSubReg(DstReg, ARM::dsub_1);
- D1 = TRI->getSubReg(DstReg, ARM::dsub_3);
- D2 = TRI->getSubReg(DstReg, ARM::dsub_5);
- D3 = TRI->getSubReg(DstReg, ARM::dsub_7);
- }
+ GetDSubRegs(DstReg, RegSpc, TRI, D0, D1, D2, D3);
MIB.addReg(D0, RegState::Define | getDeadRegState(DstIsDead))
.addReg(D1, RegState::Define | getDeadRegState(DstIsDead));
if (NumRegs > 2)
@@ -111,14 +355,14 @@
if (NumRegs > 3)
MIB.addReg(D3, RegState::Define | getDeadRegState(DstIsDead));
- if (hasWriteBack)
+ if (TableEntry->HasWriteBack)
MIB.addOperand(MI.getOperand(OpIdx++));
// Copy the addrmode6 operands.
MIB.addOperand(MI.getOperand(OpIdx++));
MIB.addOperand(MI.getOperand(OpIdx++));
// Copy the am6offset operand.
- if (hasWriteBack)
+ if (TableEntry->HasWriteBack)
MIB.addOperand(MI.getOperand(OpIdx++));
MIB = AddDefaultPred(MIB);
@@ -138,45 +382,32 @@
/// ExpandVST - Translate VST pseudo instructions with Q, QQ or QQQQ register
/// operands to real VST instructions with D register operands.
-void ARMExpandPseudo::ExpandVST(MachineBasicBlock::iterator &MBBI,
- unsigned Opc, bool hasWriteBack,
- NEONRegSpacing RegSpc, unsigned NumRegs) {
+void ARMExpandPseudo::ExpandVST(MachineBasicBlock::iterator &MBBI) {
MachineInstr &MI = *MBBI;
MachineBasicBlock &MBB = *MI.getParent();
- MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opc));
+ const NEONLdStTableEntry *TableEntry = LookupNEONLdSt(MI.getOpcode());
+ assert(TableEntry && !TableEntry->IsLoad && "NEONLdStTable lookup failed");
+ NEONRegSpacing RegSpc = TableEntry->RegSpacing;
+ unsigned NumRegs = TableEntry->NumRegs;
+
+ MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(),
+ TII->get(TableEntry->RealOpc));
unsigned OpIdx = 0;
- if (hasWriteBack)
+ if (TableEntry->HasWriteBack)
MIB.addOperand(MI.getOperand(OpIdx++));
// Copy the addrmode6 operands.
MIB.addOperand(MI.getOperand(OpIdx++));
MIB.addOperand(MI.getOperand(OpIdx++));
// Copy the am6offset operand.
- if (hasWriteBack)
+ if (TableEntry->HasWriteBack)
MIB.addOperand(MI.getOperand(OpIdx++));
bool SrcIsKill = MI.getOperand(OpIdx).isKill();
unsigned SrcReg = MI.getOperand(OpIdx).getReg();
unsigned D0, D1, D2, D3;
- if (RegSpc == SingleSpc) {
- D0 = TRI->getSubReg(SrcReg, ARM::dsub_0);
- D1 = TRI->getSubReg(SrcReg, ARM::dsub_1);
- D2 = TRI->getSubReg(SrcReg, ARM::dsub_2);
- D3 = TRI->getSubReg(SrcReg, ARM::dsub_3);
- } else if (RegSpc == EvenDblSpc) {
- D0 = TRI->getSubReg(SrcReg, ARM::dsub_0);
- D1 = TRI->getSubReg(SrcReg, ARM::dsub_2);
- D2 = TRI->getSubReg(SrcReg, ARM::dsub_4);
- D3 = TRI->getSubReg(SrcReg, ARM::dsub_6);
- } else {
- assert(RegSpc == OddDblSpc && "unknown register spacing for VST");
- D0 = TRI->getSubReg(SrcReg, ARM::dsub_1);
- D1 = TRI->getSubReg(SrcReg, ARM::dsub_3);
- D2 = TRI->getSubReg(SrcReg, ARM::dsub_5);
- D3 = TRI->getSubReg(SrcReg, ARM::dsub_7);
- }
-
+ GetDSubRegs(SrcReg, RegSpc, TRI, D0, D1, D2, D3);
MIB.addReg(D0).addReg(D1);
if (NumRegs > 2)
MIB.addReg(D2);
@@ -190,6 +421,85 @@
MI.eraseFromParent();
}
+/// ExpandLaneOp - Translate VLD*LN and VST*LN instructions with Q, QQ or QQQQ
+/// register operands to real instructions with D register operands.
+void ARMExpandPseudo::ExpandLaneOp(MachineBasicBlock::iterator &MBBI) {
+ MachineInstr &MI = *MBBI;
+ MachineBasicBlock &MBB = *MI.getParent();
+
+ const NEONLdStTableEntry *TableEntry = LookupNEONLdSt(MI.getOpcode());
+ assert(TableEntry && "NEONLdStTable lookup failed");
+ NEONRegSpacing RegSpc = TableEntry->RegSpacing;
+ unsigned NumRegs = TableEntry->NumRegs;
+ unsigned RegElts = TableEntry->RegElts;
+
+ MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(),
+ TII->get(TableEntry->RealOpc));
+ unsigned OpIdx = 0;
+ // The lane operand is always the 3rd from last operand, before the 2
+ // predicate operands.
+ unsigned Lane = MI.getOperand(MI.getDesc().getNumOperands() - 3).getImm();
+
+ // Adjust the lane and spacing as needed for Q registers.
+ assert(RegSpc != OddDblSpc && "unexpected register spacing for VLD/VST-lane");
+ if (RegSpc == EvenDblSpc && Lane >= RegElts) {
+ RegSpc = OddDblSpc;
+ Lane -= RegElts;
+ }
+ assert(Lane < RegElts && "out of range lane for VLD/VST-lane");
+
+ unsigned DstReg, D0, D1, D2, D3;
+ bool DstIsDead;
+ if (TableEntry->IsLoad) {
+ DstIsDead = MI.getOperand(OpIdx).isDead();
+ DstReg = MI.getOperand(OpIdx++).getReg();
+ GetDSubRegs(DstReg, RegSpc, TRI, D0, D1, D2, D3);
+ MIB.addReg(D0, RegState::Define | getDeadRegState(DstIsDead))
+ .addReg(D1, RegState::Define | getDeadRegState(DstIsDead));
+ if (NumRegs > 2)
+ MIB.addReg(D2, RegState::Define | getDeadRegState(DstIsDead));
+ if (NumRegs > 3)
+ MIB.addReg(D3, RegState::Define | getDeadRegState(DstIsDead));
+ }
+
+ if (TableEntry->HasWriteBack)
+ MIB.addOperand(MI.getOperand(OpIdx++));
+
+ // Copy the addrmode6 operands.
+ MIB.addOperand(MI.getOperand(OpIdx++));
+ MIB.addOperand(MI.getOperand(OpIdx++));
+ // Copy the am6offset operand.
+ if (TableEntry->HasWriteBack)
+ MIB.addOperand(MI.getOperand(OpIdx++));
+
+ // Grab the super-register source.
+ MachineOperand MO = MI.getOperand(OpIdx++);
+ if (!TableEntry->IsLoad)
+ GetDSubRegs(MO.getReg(), RegSpc, TRI, D0, D1, D2, D3);
+
+ // Add the subregs as sources of the new instruction.
+ unsigned SrcFlags = (getUndefRegState(MO.isUndef()) |
+ getKillRegState(MO.isKill()));
+ MIB.addReg(D0, SrcFlags).addReg(D1, SrcFlags);
+ if (NumRegs > 2)
+ MIB.addReg(D2, SrcFlags);
+ if (NumRegs > 3)
+ MIB.addReg(D3, SrcFlags);
+
+ // Add the lane number operand.
+ MIB.addImm(Lane);
+
+ MIB = AddDefaultPred(MIB);
+ // Copy the super-register source to be an implicit source.
+ MO.setImplicit(true);
+ MIB.addOperand(MO);
+ if (TableEntry->IsLoad)
+ // Add an implicit def for the super-register.
+ MIB.addReg(DstReg, RegState::ImplicitDefine | getDeadRegState(DstIsDead));
+ TransferImpOps(MI, MIB, MIB);
+ MI.eraseFromParent();
+}
+
bool ARMExpandPseudo::ExpandMBB(MachineBasicBlock &MBB) {
bool Modified = false;
@@ -292,204 +602,169 @@
}
case ARM::VLD1q8Pseudo:
- ExpandVLD(MBBI, ARM::VLD1q8, false, SingleSpc, 2); break;
case ARM::VLD1q16Pseudo:
- ExpandVLD(MBBI, ARM::VLD1q16, false, SingleSpc, 2); break;
case ARM::VLD1q32Pseudo:
- ExpandVLD(MBBI, ARM::VLD1q32, false, SingleSpc, 2); break;
case ARM::VLD1q64Pseudo:
- ExpandVLD(MBBI, ARM::VLD1q64, false, SingleSpc, 2); break;
case ARM::VLD1q8Pseudo_UPD:
- ExpandVLD(MBBI, ARM::VLD1q8, true, SingleSpc, 2); break;
case ARM::VLD1q16Pseudo_UPD:
- ExpandVLD(MBBI, ARM::VLD1q16, true, SingleSpc, 2); break;
case ARM::VLD1q32Pseudo_UPD:
- ExpandVLD(MBBI, ARM::VLD1q32, true, SingleSpc, 2); break;
case ARM::VLD1q64Pseudo_UPD:
- ExpandVLD(MBBI, ARM::VLD1q64, true, SingleSpc, 2); break;
-
case ARM::VLD2d8Pseudo:
- ExpandVLD(MBBI, ARM::VLD2d8, false, SingleSpc, 2); break;
case ARM::VLD2d16Pseudo:
- ExpandVLD(MBBI, ARM::VLD2d16, false, SingleSpc, 2); break;
case ARM::VLD2d32Pseudo:
- ExpandVLD(MBBI, ARM::VLD2d32, false, SingleSpc, 2); break;
case ARM::VLD2q8Pseudo:
- ExpandVLD(MBBI, ARM::VLD2q8, false, SingleSpc, 4); break;
case ARM::VLD2q16Pseudo:
- ExpandVLD(MBBI, ARM::VLD2q16, false, SingleSpc, 4); break;
case ARM::VLD2q32Pseudo:
- ExpandVLD(MBBI, ARM::VLD2q32, false, SingleSpc, 4); break;
case ARM::VLD2d8Pseudo_UPD:
- ExpandVLD(MBBI, ARM::VLD2d8, true, SingleSpc, 2); break;
case ARM::VLD2d16Pseudo_UPD:
- ExpandVLD(MBBI, ARM::VLD2d16, true, SingleSpc, 2); break;
case ARM::VLD2d32Pseudo_UPD:
- ExpandVLD(MBBI, ARM::VLD2d32, true, SingleSpc, 2); break;
case ARM::VLD2q8Pseudo_UPD:
- ExpandVLD(MBBI, ARM::VLD2q8, true, SingleSpc, 4); break;
case ARM::VLD2q16Pseudo_UPD:
- ExpandVLD(MBBI, ARM::VLD2q16, true, SingleSpc, 4); break;
case ARM::VLD2q32Pseudo_UPD:
- ExpandVLD(MBBI, ARM::VLD2q32, true, SingleSpc, 4); break;
-
case ARM::VLD3d8Pseudo:
- ExpandVLD(MBBI, ARM::VLD3d8, false, SingleSpc, 3); break;
case ARM::VLD3d16Pseudo:
- ExpandVLD(MBBI, ARM::VLD3d16, false, SingleSpc, 3); break;
case ARM::VLD3d32Pseudo:
- ExpandVLD(MBBI, ARM::VLD3d32, false, SingleSpc, 3); break;
case ARM::VLD1d64TPseudo:
- ExpandVLD(MBBI, ARM::VLD1d64T, false, SingleSpc, 3); break;
case ARM::VLD3d8Pseudo_UPD:
- ExpandVLD(MBBI, ARM::VLD3d8_UPD, true, SingleSpc, 3); break;
case ARM::VLD3d16Pseudo_UPD:
- ExpandVLD(MBBI, ARM::VLD3d16_UPD, true, SingleSpc, 3); break;
case ARM::VLD3d32Pseudo_UPD:
- ExpandVLD(MBBI, ARM::VLD3d32_UPD, true, SingleSpc, 3); break;
case ARM::VLD1d64TPseudo_UPD:
- ExpandVLD(MBBI, ARM::VLD1d64T_UPD, true, SingleSpc, 3); break;
case ARM::VLD3q8Pseudo_UPD:
- ExpandVLD(MBBI, ARM::VLD3q8_UPD, true, EvenDblSpc, 3); break;
case ARM::VLD3q16Pseudo_UPD:
- ExpandVLD(MBBI, ARM::VLD3q16_UPD, true, EvenDblSpc, 3); break;
case ARM::VLD3q32Pseudo_UPD:
- ExpandVLD(MBBI, ARM::VLD3q32_UPD, true, EvenDblSpc, 3); break;
case ARM::VLD3q8oddPseudo_UPD:
- ExpandVLD(MBBI, ARM::VLD3q8_UPD, true, OddDblSpc, 3); break;
case ARM::VLD3q16oddPseudo_UPD:
- ExpandVLD(MBBI, ARM::VLD3q16_UPD, true, OddDblSpc, 3); break;
case ARM::VLD3q32oddPseudo_UPD:
- ExpandVLD(MBBI, ARM::VLD3q32_UPD, true, OddDblSpc, 3); break;
-
case ARM::VLD4d8Pseudo:
- ExpandVLD(MBBI, ARM::VLD4d8, false, SingleSpc, 4); break;
case ARM::VLD4d16Pseudo:
- ExpandVLD(MBBI, ARM::VLD4d16, false, SingleSpc, 4); break;
case ARM::VLD4d32Pseudo:
- ExpandVLD(MBBI, ARM::VLD4d32, false, SingleSpc, 4); break;
case ARM::VLD1d64QPseudo:
- ExpandVLD(MBBI, ARM::VLD1d64Q, false, SingleSpc, 4); break;
case ARM::VLD4d8Pseudo_UPD:
- ExpandVLD(MBBI, ARM::VLD4d8_UPD, true, SingleSpc, 4); break;
case ARM::VLD4d16Pseudo_UPD:
- ExpandVLD(MBBI, ARM::VLD4d16_UPD, true, SingleSpc, 4); break;
case ARM::VLD4d32Pseudo_UPD:
- ExpandVLD(MBBI, ARM::VLD4d32_UPD, true, SingleSpc, 4); break;
case ARM::VLD1d64QPseudo_UPD:
- ExpandVLD(MBBI, ARM::VLD1d64Q_UPD, true, SingleSpc, 4); break;
case ARM::VLD4q8Pseudo_UPD:
- ExpandVLD(MBBI, ARM::VLD4q8_UPD, true, EvenDblSpc, 4); break;
case ARM::VLD4q16Pseudo_UPD:
- ExpandVLD(MBBI, ARM::VLD4q16_UPD, true, EvenDblSpc, 4); break;
case ARM::VLD4q32Pseudo_UPD:
- ExpandVLD(MBBI, ARM::VLD4q32_UPD, true, EvenDblSpc, 4); break;
case ARM::VLD4q8oddPseudo_UPD:
- ExpandVLD(MBBI, ARM::VLD4q8_UPD, true, OddDblSpc, 4); break;
case ARM::VLD4q16oddPseudo_UPD:
- ExpandVLD(MBBI, ARM::VLD4q16_UPD, true, OddDblSpc, 4); break;
case ARM::VLD4q32oddPseudo_UPD:
- ExpandVLD(MBBI, ARM::VLD4q32_UPD, true, OddDblSpc, 4); break;
+ ExpandVLD(MBBI);
+ break;
case ARM::VST1q8Pseudo:
- ExpandVST(MBBI, ARM::VST1q8, false, SingleSpc, 2); break;
case ARM::VST1q16Pseudo:
- ExpandVST(MBBI, ARM::VST1q16, false, SingleSpc, 2); break;
case ARM::VST1q32Pseudo:
- ExpandVST(MBBI, ARM::VST1q32, false, SingleSpc, 2); break;
case ARM::VST1q64Pseudo:
- ExpandVST(MBBI, ARM::VST1q64, false, SingleSpc, 2); break;
case ARM::VST1q8Pseudo_UPD:
- ExpandVST(MBBI, ARM::VST1q8_UPD, true, SingleSpc, 2); break;
case ARM::VST1q16Pseudo_UPD:
- ExpandVST(MBBI, ARM::VST1q16_UPD, true, SingleSpc, 2); break;
case ARM::VST1q32Pseudo_UPD:
- ExpandVST(MBBI, ARM::VST1q32_UPD, true, SingleSpc, 2); break;
case ARM::VST1q64Pseudo_UPD:
- ExpandVST(MBBI, ARM::VST1q64_UPD, true, SingleSpc, 2); break;
-
case ARM::VST2d8Pseudo:
- ExpandVST(MBBI, ARM::VST2d8, false, SingleSpc, 2); break;
case ARM::VST2d16Pseudo:
- ExpandVST(MBBI, ARM::VST2d16, false, SingleSpc, 2); break;
case ARM::VST2d32Pseudo:
- ExpandVST(MBBI, ARM::VST2d32, false, SingleSpc, 2); break;
case ARM::VST2q8Pseudo:
- ExpandVST(MBBI, ARM::VST2q8, false, SingleSpc, 4); break;
case ARM::VST2q16Pseudo:
- ExpandVST(MBBI, ARM::VST2q16, false, SingleSpc, 4); break;
case ARM::VST2q32Pseudo:
- ExpandVST(MBBI, ARM::VST2q32, false, SingleSpc, 4); break;
case ARM::VST2d8Pseudo_UPD:
- ExpandVST(MBBI, ARM::VST2d8_UPD, true, SingleSpc, 2); break;
case ARM::VST2d16Pseudo_UPD:
- ExpandVST(MBBI, ARM::VST2d16_UPD, true, SingleSpc, 2); break;
case ARM::VST2d32Pseudo_UPD:
- ExpandVST(MBBI, ARM::VST2d32_UPD, true, SingleSpc, 2); break;
case ARM::VST2q8Pseudo_UPD:
- ExpandVST(MBBI, ARM::VST2q8_UPD, true, SingleSpc, 4); break;
case ARM::VST2q16Pseudo_UPD:
- ExpandVST(MBBI, ARM::VST2q16_UPD, true, SingleSpc, 4); break;
case ARM::VST2q32Pseudo_UPD:
- ExpandVST(MBBI, ARM::VST2q32_UPD, true, SingleSpc, 4); break;
-
case ARM::VST3d8Pseudo:
- ExpandVST(MBBI, ARM::VST3d8, false, SingleSpc, 3); break;
case ARM::VST3d16Pseudo:
- ExpandVST(MBBI, ARM::VST3d16, false, SingleSpc, 3); break;
case ARM::VST3d32Pseudo:
- ExpandVST(MBBI, ARM::VST3d32, false, SingleSpc, 3); break;
case ARM::VST1d64TPseudo:
- ExpandVST(MBBI, ARM::VST1d64T, false, SingleSpc, 3); break;
case ARM::VST3d8Pseudo_UPD:
- ExpandVST(MBBI, ARM::VST3d8_UPD, true, SingleSpc, 3); break;
case ARM::VST3d16Pseudo_UPD:
- ExpandVST(MBBI, ARM::VST3d16_UPD, true, SingleSpc, 3); break;
case ARM::VST3d32Pseudo_UPD:
- ExpandVST(MBBI, ARM::VST3d32_UPD, true, SingleSpc, 3); break;
case ARM::VST1d64TPseudo_UPD:
- ExpandVST(MBBI, ARM::VST1d64T_UPD, true, SingleSpc, 3); break;
case ARM::VST3q8Pseudo_UPD:
- ExpandVST(MBBI, ARM::VST3q8_UPD, true, EvenDblSpc, 3); break;
case ARM::VST3q16Pseudo_UPD:
- ExpandVST(MBBI, ARM::VST3q16_UPD, true, EvenDblSpc, 3); break;
case ARM::VST3q32Pseudo_UPD:
- ExpandVST(MBBI, ARM::VST3q32_UPD, true, EvenDblSpc, 3); break;
case ARM::VST3q8oddPseudo_UPD:
- ExpandVST(MBBI, ARM::VST3q8_UPD, true, OddDblSpc, 3); break;
case ARM::VST3q16oddPseudo_UPD:
- ExpandVST(MBBI, ARM::VST3q16_UPD, true, OddDblSpc, 3); break;
case ARM::VST3q32oddPseudo_UPD:
- ExpandVST(MBBI, ARM::VST3q32_UPD, true, OddDblSpc, 3); break;
-
case ARM::VST4d8Pseudo:
- ExpandVST(MBBI, ARM::VST4d8, false, SingleSpc, 4); break;
case ARM::VST4d16Pseudo:
- ExpandVST(MBBI, ARM::VST4d16, false, SingleSpc, 4); break;
case ARM::VST4d32Pseudo:
- ExpandVST(MBBI, ARM::VST4d32, false, SingleSpc, 4); break;
case ARM::VST1d64QPseudo:
- ExpandVST(MBBI, ARM::VST1d64Q, false, SingleSpc, 4); break;
case ARM::VST4d8Pseudo_UPD:
- ExpandVST(MBBI, ARM::VST4d8_UPD, true, SingleSpc, 4); break;
case ARM::VST4d16Pseudo_UPD:
- ExpandVST(MBBI, ARM::VST4d16_UPD, true, SingleSpc, 4); break;
case ARM::VST4d32Pseudo_UPD:
- ExpandVST(MBBI, ARM::VST4d32_UPD, true, SingleSpc, 4); break;
case ARM::VST1d64QPseudo_UPD:
- ExpandVST(MBBI, ARM::VST1d64Q_UPD, true, SingleSpc, 4); break;
case ARM::VST4q8Pseudo_UPD:
- ExpandVST(MBBI, ARM::VST4q8_UPD, true, EvenDblSpc, 4); break;
case ARM::VST4q16Pseudo_UPD:
- ExpandVST(MBBI, ARM::VST4q16_UPD, true, EvenDblSpc, 4); break;
case ARM::VST4q32Pseudo_UPD:
- ExpandVST(MBBI, ARM::VST4q32_UPD, true, EvenDblSpc, 4); break;
case ARM::VST4q8oddPseudo_UPD:
- ExpandVST(MBBI, ARM::VST4q8_UPD, true, OddDblSpc, 4); break;
case ARM::VST4q16oddPseudo_UPD:
- ExpandVST(MBBI, ARM::VST4q16_UPD, true, OddDblSpc, 4); break;
case ARM::VST4q32oddPseudo_UPD:
- ExpandVST(MBBI, ARM::VST4q32_UPD, true, OddDblSpc, 4); break;
+ ExpandVST(MBBI);
+ break;
+
+ case ARM::VLD2LNd8Pseudo:
+ case ARM::VLD2LNd16Pseudo:
+ case ARM::VLD2LNd32Pseudo:
+ case ARM::VLD2LNq16Pseudo:
+ case ARM::VLD2LNq32Pseudo:
+ case ARM::VLD2LNd8Pseudo_UPD:
+ case ARM::VLD2LNd16Pseudo_UPD:
+ case ARM::VLD2LNd32Pseudo_UPD:
+ case ARM::VLD2LNq16Pseudo_UPD:
+ case ARM::VLD2LNq32Pseudo_UPD:
+ case ARM::VLD3LNd8Pseudo:
+ case ARM::VLD3LNd16Pseudo:
+ case ARM::VLD3LNd32Pseudo:
+ case ARM::VLD3LNq16Pseudo:
+ case ARM::VLD3LNq32Pseudo:
+ case ARM::VLD3LNd8Pseudo_UPD:
+ case ARM::VLD3LNd16Pseudo_UPD:
+ case ARM::VLD3LNd32Pseudo_UPD:
+ case ARM::VLD3LNq16Pseudo_UPD:
+ case ARM::VLD3LNq32Pseudo_UPD:
+ case ARM::VLD4LNd8Pseudo:
+ case ARM::VLD4LNd16Pseudo:
+ case ARM::VLD4LNd32Pseudo:
+ case ARM::VLD4LNq16Pseudo:
+ case ARM::VLD4LNq32Pseudo:
+ case ARM::VLD4LNd8Pseudo_UPD:
+ case ARM::VLD4LNd16Pseudo_UPD:
+ case ARM::VLD4LNd32Pseudo_UPD:
+ case ARM::VLD4LNq16Pseudo_UPD:
+ case ARM::VLD4LNq32Pseudo_UPD:
+ case ARM::VST2LNd8Pseudo:
+ case ARM::VST2LNd16Pseudo:
+ case ARM::VST2LNd32Pseudo:
+ case ARM::VST2LNq16Pseudo:
+ case ARM::VST2LNq32Pseudo:
+ case ARM::VST2LNd8Pseudo_UPD:
+ case ARM::VST2LNd16Pseudo_UPD:
+ case ARM::VST2LNd32Pseudo_UPD:
+ case ARM::VST2LNq16Pseudo_UPD:
+ case ARM::VST2LNq32Pseudo_UPD:
+ case ARM::VST3LNd8Pseudo:
+ case ARM::VST3LNd16Pseudo:
+ case ARM::VST3LNd32Pseudo:
+ case ARM::VST3LNq16Pseudo:
+ case ARM::VST3LNq32Pseudo:
+ case ARM::VST3LNd8Pseudo_UPD:
+ case ARM::VST3LNd16Pseudo_UPD:
+ case ARM::VST3LNd32Pseudo_UPD:
+ case ARM::VST3LNq16Pseudo_UPD:
+ case ARM::VST3LNq32Pseudo_UPD:
+ case ARM::VST4LNd8Pseudo:
+ case ARM::VST4LNd16Pseudo:
+ case ARM::VST4LNd32Pseudo:
+ case ARM::VST4LNq16Pseudo:
+ case ARM::VST4LNq32Pseudo:
+ case ARM::VST4LNd8Pseudo_UPD:
+ case ARM::VST4LNd16Pseudo_UPD:
+ case ARM::VST4LNd32Pseudo_UPD:
+ case ARM::VST4LNq16Pseudo_UPD:
+ case ARM::VST4LNq32Pseudo_UPD:
+ ExpandLaneOp(MBBI);
+ break;
}
if (ModifiedOp)
Modified: llvm/trunk/lib/Target/ARM/ARMISelDAGToDAG.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMISelDAGToDAG.cpp?rev=113812&r1=113811&r2=113812&view=diff
==============================================================================
--- llvm/trunk/lib/Target/ARM/ARMISelDAGToDAG.cpp (original)
+++ llvm/trunk/lib/Target/ARM/ARMISelDAGToDAG.cpp Mon Sep 13 18:01:35 2010
@@ -151,10 +151,9 @@
/// SelectVLDSTLane - Select NEON load/store lane intrinsics. NumVecs should
/// be 2, 3 or 4. The opcode arrays specify the instructions used for
- /// load/store of D registers and even subregs and odd subregs of Q registers.
+ /// load/store of D registers and Q registers.
SDNode *SelectVLDSTLane(SDNode *N, bool IsLoad, unsigned NumVecs,
- unsigned *DOpcodes, unsigned *QOpcodes0,
- unsigned *QOpcodes1);
+ unsigned *DOpcodes, unsigned *QOpcodes);
/// SelectVTBL - Select NEON VTBL and VTBX intrinsics. NumVecs should be 2,
/// 3 or 4. These are custom-selected so that a REG_SEQUENCE can be
@@ -196,10 +195,6 @@
SDNode *QuadSRegs(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3);
SDNode *QuadDRegs(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3);
SDNode *QuadQRegs(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3);
-
- // Form sequences of 8 consecutive D registers.
- SDNode *OctoDRegs(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3,
- SDValue V4, SDValue V5, SDValue V6, SDValue V7);
};
}
@@ -1015,39 +1010,6 @@
return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops, 8);
}
-/// OctoDRegs - Form 8 consecutive D registers.
-///
-SDNode *ARMDAGToDAGISel::OctoDRegs(EVT VT, SDValue V0, SDValue V1,
- SDValue V2, SDValue V3,
- SDValue V4, SDValue V5,
- SDValue V6, SDValue V7) {
- DebugLoc dl = V0.getNode()->getDebugLoc();
- SDValue SubReg0 = CurDAG->getTargetConstant(ARM::dsub_0, MVT::i32);
- SDValue SubReg1 = CurDAG->getTargetConstant(ARM::dsub_1, MVT::i32);
- SDValue SubReg2 = CurDAG->getTargetConstant(ARM::dsub_2, MVT::i32);
- SDValue SubReg3 = CurDAG->getTargetConstant(ARM::dsub_3, MVT::i32);
- SDValue SubReg4 = CurDAG->getTargetConstant(ARM::dsub_4, MVT::i32);
- SDValue SubReg5 = CurDAG->getTargetConstant(ARM::dsub_5, MVT::i32);
- SDValue SubReg6 = CurDAG->getTargetConstant(ARM::dsub_6, MVT::i32);
- SDValue SubReg7 = CurDAG->getTargetConstant(ARM::dsub_7, MVT::i32);
- const SDValue Ops[] ={ V0, SubReg0, V1, SubReg1, V2, SubReg2, V3, SubReg3,
- V4, SubReg4, V5, SubReg5, V6, SubReg6, V7, SubReg7 };
- return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops, 16);
-}
-
-/// GetNEONSubregVT - Given a type for a 128-bit NEON vector, return the type
-/// for a 64-bit subregister of the vector.
-static EVT GetNEONSubregVT(EVT VT) {
- switch (VT.getSimpleVT().SimpleTy) {
- default: llvm_unreachable("unhandled NEON type");
- case MVT::v16i8: return MVT::v8i8;
- case MVT::v8i16: return MVT::v4i16;
- case MVT::v4f32: return MVT::v2f32;
- case MVT::v4i32: return MVT::v2i32;
- case MVT::v2i64: return MVT::v1i64;
- }
-}
-
SDNode *ARMDAGToDAGISel::SelectVLD(SDNode *N, unsigned NumVecs,
unsigned *DOpcodes, unsigned *QOpcodes0,
unsigned *QOpcodes1) {
@@ -1281,8 +1243,7 @@
SDNode *ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad,
unsigned NumVecs, unsigned *DOpcodes,
- unsigned *QOpcodes0,
- unsigned *QOpcodes1) {
+ unsigned *QOpcodes) {
assert(NumVecs >=2 && NumVecs <= 4 && "VLDSTLane NumVecs out-of-range");
DebugLoc dl = N->getDebugLoc();
@@ -1296,16 +1257,6 @@
EVT VT = IsLoad ? N->getValueType(0) : N->getOperand(3).getValueType();
bool is64BitVector = VT.is64BitVector();
- // Quad registers are handled by load/store of subregs. Find the subreg info.
- unsigned NumElts = 0;
- bool Even = false;
- EVT RegVT = VT;
- if (!is64BitVector) {
- RegVT = GetNEONSubregVT(VT);
- NumElts = RegVT.getVectorNumElements();
- Even = Lane < NumElts;
- }
-
unsigned OpcodeIndex;
switch (VT.getSimpleVT().SimpleTy) {
default: llvm_unreachable("unhandled vld/vst lane type");
@@ -1323,121 +1274,59 @@
SDValue Pred = getAL(CurDAG);
SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
- SmallVector<SDValue, 10> Ops;
+ SmallVector<SDValue, 7> Ops;
Ops.push_back(MemAddr);
Ops.push_back(Align);
- unsigned Opc = 0;
- if (is64BitVector) {
- Opc = DOpcodes[OpcodeIndex];
- SDValue RegSeq;
- SDValue V0 = N->getOperand(0+3);
- SDValue V1 = N->getOperand(1+3);
- if (NumVecs == 2) {
- RegSeq = SDValue(PairDRegs(MVT::v2i64, V0, V1), 0);
- } else {
- SDValue V2 = N->getOperand(2+3);
- SDValue V3 = (NumVecs == 3)
- ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,dl,VT), 0)
- : N->getOperand(3+3);
- RegSeq = SDValue(QuadDRegs(MVT::v4i64, V0, V1, V2, V3), 0);
- }
+ unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] :
+ Opc = QOpcodes[OpcodeIndex]);
- // Now extract the D registers back out.
- Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_0, dl, VT, RegSeq));
- Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_1, dl, VT, RegSeq));
- if (NumVecs > 2)
- Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_2, dl, VT,RegSeq));
- if (NumVecs > 3)
- Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_3, dl, VT,RegSeq));
+ SDValue SuperReg;
+ SDValue V0 = N->getOperand(0+3);
+ SDValue V1 = N->getOperand(1+3);
+ if (NumVecs == 2) {
+ if (is64BitVector)
+ SuperReg = SDValue(PairDRegs(MVT::v2i64, V0, V1), 0);
+ else
+ SuperReg = SDValue(PairQRegs(MVT::v4i64, V0, V1), 0);
} else {
- // Check if this is loading the even or odd subreg of a Q register.
- if (Lane < NumElts) {
- Opc = QOpcodes0[OpcodeIndex];
- } else {
- Lane -= NumElts;
- Opc = QOpcodes1[OpcodeIndex];
- }
-
- SDValue RegSeq;
- SDValue V0 = N->getOperand(0+3);
- SDValue V1 = N->getOperand(1+3);
- if (NumVecs == 2) {
- RegSeq = SDValue(PairQRegs(MVT::v4i64, V0, V1), 0);
- } else {
- SDValue V2 = N->getOperand(2+3);
- SDValue V3 = (NumVecs == 3)
- ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,dl,VT), 0)
- : N->getOperand(3+3);
- RegSeq = SDValue(QuadQRegs(MVT::v8i64, V0, V1, V2, V3), 0);
- }
-
- // Extract the subregs of the input vector.
- unsigned SubIdx = Even ? ARM::dsub_0 : ARM::dsub_1;
- for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
- Ops.push_back(CurDAG->getTargetExtractSubreg(SubIdx+Vec*2, dl, RegVT,
- RegSeq));
+ SDValue V2 = N->getOperand(2+3);
+ SDValue V3 = (NumVecs == 3)
+ ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,dl,VT), 0)
+ : N->getOperand(3+3);
+ if (is64BitVector)
+ SuperReg = SDValue(QuadDRegs(MVT::v4i64, V0, V1, V2, V3), 0);
+ else
+ SuperReg = SDValue(QuadQRegs(MVT::v8i64, V0, V1, V2, V3), 0);
}
+ Ops.push_back(SuperReg);
Ops.push_back(getI32Imm(Lane));
Ops.push_back(Pred);
Ops.push_back(Reg0);
Ops.push_back(Chain);
if (!IsLoad)
- return CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops.data(), NumVecs+6);
-
- std::vector<EVT> ResTys(NumVecs, RegVT);
- ResTys.push_back(MVT::Other);
- SDNode *VLdLn = CurDAG->getMachineNode(Opc, dl, ResTys, Ops.data(),NumVecs+6);
+ return CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops.data(), 7);
- // Form a REG_SEQUENCE to force register allocation.
- SDValue RegSeq;
- if (is64BitVector) {
- SDValue V0 = SDValue(VLdLn, 0);
- SDValue V1 = SDValue(VLdLn, 1);
- if (NumVecs == 2) {
- RegSeq = SDValue(PairDRegs(MVT::v2i64, V0, V1), 0);
- } else {
- SDValue V2 = SDValue(VLdLn, 2);
- // If it's a vld3, form a quad D-register but discard the last part.
- SDValue V3 = (NumVecs == 3)
- ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,dl,VT), 0)
- : SDValue(VLdLn, 3);
- RegSeq = SDValue(QuadDRegs(MVT::v4i64, V0, V1, V2, V3), 0);
- }
- } else {
- // For 128-bit vectors, take the 64-bit results of the load and insert
- // them as subregs into the result.
- SDValue V[8];
- for (unsigned Vec = 0, i = 0; Vec < NumVecs; ++Vec, i+=2) {
- if (Even) {
- V[i] = SDValue(VLdLn, Vec);
- V[i+1] = SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,
- dl, RegVT), 0);
- } else {
- V[i] = SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,
- dl, RegVT), 0);
- V[i+1] = SDValue(VLdLn, Vec);
- }
- }
- if (NumVecs == 3)
- V[6] = V[7] = SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,
- dl, RegVT), 0);
-
- if (NumVecs == 2)
- RegSeq = SDValue(QuadDRegs(MVT::v4i64, V[0], V[1], V[2], V[3]), 0);
- else
- RegSeq = SDValue(OctoDRegs(MVT::v8i64, V[0], V[1], V[2], V[3],
- V[4], V[5], V[6], V[7]), 0);
- }
+ EVT ResTy;
+ unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs;
+ if (!is64BitVector)
+ ResTyElts *= 2;
+ ResTy = EVT::getVectorVT(*CurDAG->getContext(), MVT::i64, ResTyElts);
+
+ SDNode *VLdLn = CurDAG->getMachineNode(Opc, dl, ResTy, MVT::Other,
+ Ops.data(), 7);
+ SuperReg = SDValue(VLdLn, 0);
+ Chain = SDValue(VLdLn, 1);
+ // Extract the subregisters.
assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering");
assert(ARM::qsub_3 == ARM::qsub_0+3 && "Unexpected subreg numbering");
unsigned SubIdx = is64BitVector ? ARM::dsub_0 : ARM::qsub_0;
for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
ReplaceUses(SDValue(N, Vec),
- CurDAG->getTargetExtractSubreg(SubIdx+Vec, dl, VT, RegSeq));
- ReplaceUses(SDValue(N, NumVecs), SDValue(VLdLn, NumVecs));
+ CurDAG->getTargetExtractSubreg(SubIdx+Vec, dl, VT, SuperReg));
+ ReplaceUses(SDValue(N, NumVecs), Chain);
return NULL;
}
@@ -2119,24 +2008,24 @@
}
case Intrinsic::arm_neon_vld2lane: {
- unsigned DOpcodes[] = { ARM::VLD2LNd8, ARM::VLD2LNd16, ARM::VLD2LNd32 };
- unsigned QOpcodes0[] = { ARM::VLD2LNq16, ARM::VLD2LNq32 };
- unsigned QOpcodes1[] = { ARM::VLD2LNq16odd, ARM::VLD2LNq32odd };
- return SelectVLDSTLane(N, true, 2, DOpcodes, QOpcodes0, QOpcodes1);
+ unsigned DOpcodes[] = { ARM::VLD2LNd8Pseudo, ARM::VLD2LNd16Pseudo,
+ ARM::VLD2LNd32Pseudo };
+ unsigned QOpcodes[] = { ARM::VLD2LNq16Pseudo, ARM::VLD2LNq32Pseudo };
+ return SelectVLDSTLane(N, true, 2, DOpcodes, QOpcodes);
}
case Intrinsic::arm_neon_vld3lane: {
- unsigned DOpcodes[] = { ARM::VLD3LNd8, ARM::VLD3LNd16, ARM::VLD3LNd32 };
- unsigned QOpcodes0[] = { ARM::VLD3LNq16, ARM::VLD3LNq32 };
- unsigned QOpcodes1[] = { ARM::VLD3LNq16odd, ARM::VLD3LNq32odd };
- return SelectVLDSTLane(N, true, 3, DOpcodes, QOpcodes0, QOpcodes1);
+ unsigned DOpcodes[] = { ARM::VLD3LNd8Pseudo, ARM::VLD3LNd16Pseudo,
+ ARM::VLD3LNd32Pseudo };
+ unsigned QOpcodes[] = { ARM::VLD3LNq16Pseudo, ARM::VLD3LNq32Pseudo };
+ return SelectVLDSTLane(N, true, 3, DOpcodes, QOpcodes);
}
case Intrinsic::arm_neon_vld4lane: {
- unsigned DOpcodes[] = { ARM::VLD4LNd8, ARM::VLD4LNd16, ARM::VLD4LNd32 };
- unsigned QOpcodes0[] = { ARM::VLD4LNq16, ARM::VLD4LNq32 };
- unsigned QOpcodes1[] = { ARM::VLD4LNq16odd, ARM::VLD4LNq32odd };
- return SelectVLDSTLane(N, true, 4, DOpcodes, QOpcodes0, QOpcodes1);
+ unsigned DOpcodes[] = { ARM::VLD4LNd8Pseudo, ARM::VLD4LNd16Pseudo,
+ ARM::VLD4LNd32Pseudo };
+ unsigned QOpcodes[] = { ARM::VLD4LNq16Pseudo, ARM::VLD4LNq32Pseudo };
+ return SelectVLDSTLane(N, true, 4, DOpcodes, QOpcodes);
}
case Intrinsic::arm_neon_vst1: {
@@ -2180,24 +2069,24 @@
}
case Intrinsic::arm_neon_vst2lane: {
- unsigned DOpcodes[] = { ARM::VST2LNd8, ARM::VST2LNd16, ARM::VST2LNd32 };
- unsigned QOpcodes0[] = { ARM::VST2LNq16, ARM::VST2LNq32 };
- unsigned QOpcodes1[] = { ARM::VST2LNq16odd, ARM::VST2LNq32odd };
- return SelectVLDSTLane(N, false, 2, DOpcodes, QOpcodes0, QOpcodes1);
+ unsigned DOpcodes[] = { ARM::VST2LNd8Pseudo, ARM::VST2LNd16Pseudo,
+ ARM::VST2LNd32Pseudo };
+ unsigned QOpcodes[] = { ARM::VST2LNq16Pseudo, ARM::VST2LNq32Pseudo };
+ return SelectVLDSTLane(N, false, 2, DOpcodes, QOpcodes);
}
case Intrinsic::arm_neon_vst3lane: {
- unsigned DOpcodes[] = { ARM::VST3LNd8, ARM::VST3LNd16, ARM::VST3LNd32 };
- unsigned QOpcodes0[] = { ARM::VST3LNq16, ARM::VST3LNq32 };
- unsigned QOpcodes1[] = { ARM::VST3LNq16odd, ARM::VST3LNq32odd };
- return SelectVLDSTLane(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1);
+ unsigned DOpcodes[] = { ARM::VST3LNd8Pseudo, ARM::VST3LNd16Pseudo,
+ ARM::VST3LNd32Pseudo };
+ unsigned QOpcodes[] = { ARM::VST3LNq16Pseudo, ARM::VST3LNq32Pseudo };
+ return SelectVLDSTLane(N, false, 3, DOpcodes, QOpcodes);
}
case Intrinsic::arm_neon_vst4lane: {
- unsigned DOpcodes[] = { ARM::VST4LNd8, ARM::VST4LNd16, ARM::VST4LNd32 };
- unsigned QOpcodes0[] = { ARM::VST4LNq16, ARM::VST4LNq32 };
- unsigned QOpcodes1[] = { ARM::VST4LNq16odd, ARM::VST4LNq32odd };
- return SelectVLDSTLane(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1);
+ unsigned DOpcodes[] = { ARM::VST4LNd8Pseudo, ARM::VST4LNd16Pseudo,
+ ARM::VST4LNd32Pseudo };
+ unsigned QOpcodes[] = { ARM::VST4LNq16Pseudo, ARM::VST4LNq32Pseudo };
+ return SelectVLDSTLane(N, false, 4, DOpcodes, QOpcodes);
}
}
break;
Modified: llvm/trunk/lib/Target/ARM/ARMInstrNEON.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMInstrNEON.td?rev=113812&r1=113811&r2=113812&view=diff
==============================================================================
--- llvm/trunk/lib/Target/ARM/ARMInstrNEON.td (original)
+++ llvm/trunk/lib/Target/ARM/ARMInstrNEON.td Mon Sep 13 18:01:35 2010
@@ -445,6 +445,33 @@
def VLD4q16oddPseudo_UPD : VLDQQQQWBPseudo;
def VLD4q32oddPseudo_UPD : VLDQQQQWBPseudo;
+// Classes for VLD*LN pseudo-instructions with multi-register operands.
+// These are expanded to real instructions after register allocation.
+class VLDQLNPseudo<InstrItinClass itin>
+ : PseudoNLdSt<(outs QPR:$dst),
+ (ins addrmode6:$addr, QPR:$src, nohash_imm:$lane),
+ itin, "$src = $dst">;
+class VLDQLNWBPseudo<InstrItinClass itin>
+ : PseudoNLdSt<(outs QPR:$dst, GPR:$wb),
+ (ins addrmode6:$addr, am6offset:$offset, QPR:$src,
+ nohash_imm:$lane), itin, "$addr.addr = $wb, $src = $dst">;
+class VLDQQLNPseudo<InstrItinClass itin>
+ : PseudoNLdSt<(outs QQPR:$dst),
+ (ins addrmode6:$addr, QQPR:$src, nohash_imm:$lane),
+ itin, "$src = $dst">;
+class VLDQQLNWBPseudo<InstrItinClass itin>
+ : PseudoNLdSt<(outs QQPR:$dst, GPR:$wb),
+ (ins addrmode6:$addr, am6offset:$offset, QQPR:$src,
+ nohash_imm:$lane), itin, "$addr.addr = $wb, $src = $dst">;
+class VLDQQQQLNPseudo<InstrItinClass itin>
+ : PseudoNLdSt<(outs QQQQPR:$dst),
+ (ins addrmode6:$addr, QQQQPR:$src, nohash_imm:$lane),
+ itin, "$src = $dst">;
+class VLDQQQQLNWBPseudo<InstrItinClass itin>
+ : PseudoNLdSt<(outs QQQQPR:$dst, GPR:$wb),
+ (ins addrmode6:$addr, am6offset:$offset, QQQQPR:$src,
+ nohash_imm:$lane), itin, "$addr.addr = $wb, $src = $dst">;
+
// VLD1LN : Vector Load (single element to one lane)
// FIXME: Not yet implemented.
@@ -459,13 +486,16 @@
def VLD2LNd16 : VLD2LN<0b0101, {?,?,0,?}, "16">;
def VLD2LNd32 : VLD2LN<0b1001, {?,0,?,?}, "32">;
+def VLD2LNd8Pseudo : VLDQLNPseudo<IIC_VLD2>;
+def VLD2LNd16Pseudo : VLDQLNPseudo<IIC_VLD2>;
+def VLD2LNd32Pseudo : VLDQLNPseudo<IIC_VLD2>;
+
// ...with double-spaced registers:
def VLD2LNq16 : VLD2LN<0b0101, {?,?,1,?}, "16">;
def VLD2LNq32 : VLD2LN<0b1001, {?,1,?,?}, "32">;
-// ...alternate versions to be allocated odd register numbers:
-def VLD2LNq16odd : VLD2LN<0b0101, {?,?,1,?}, "16">;
-def VLD2LNq32odd : VLD2LN<0b1001, {?,1,?,?}, "32">;
+def VLD2LNq16Pseudo : VLDQQLNPseudo<IIC_VLD2>;
+def VLD2LNq32Pseudo : VLDQQLNPseudo<IIC_VLD2>;
// ...with address register writeback:
class VLD2LNWB<bits<4> op11_8, bits<4> op7_4, string Dt>
@@ -479,9 +509,16 @@
def VLD2LNd16_UPD : VLD2LNWB<0b0101, {?,?,0,?}, "16">;
def VLD2LNd32_UPD : VLD2LNWB<0b1001, {?,0,?,?}, "32">;
+def VLD2LNd8Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD2>;
+def VLD2LNd16Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD2>;
+def VLD2LNd32Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD2>;
+
def VLD2LNq16_UPD : VLD2LNWB<0b0101, {?,?,1,?}, "16">;
def VLD2LNq32_UPD : VLD2LNWB<0b1001, {?,1,?,?}, "32">;
+def VLD2LNq16Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD2>;
+def VLD2LNq32Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD2>;
+
// VLD3LN : Vector Load (single 3-element structure to one lane)
class VLD3LN<bits<4> op11_8, bits<4> op7_4, string Dt>
: NLdSt<1, 0b10, op11_8, op7_4, (outs DPR:$dst1, DPR:$dst2, DPR:$dst3),
@@ -494,13 +531,16 @@
def VLD3LNd16 : VLD3LN<0b0110, {?,?,0,0}, "16">;
def VLD3LNd32 : VLD3LN<0b1010, {?,0,0,0}, "32">;
+def VLD3LNd8Pseudo : VLDQQLNPseudo<IIC_VLD3>;
+def VLD3LNd16Pseudo : VLDQQLNPseudo<IIC_VLD3>;
+def VLD3LNd32Pseudo : VLDQQLNPseudo<IIC_VLD3>;
+
// ...with double-spaced registers:
def VLD3LNq16 : VLD3LN<0b0110, {?,?,1,0}, "16">;
def VLD3LNq32 : VLD3LN<0b1010, {?,1,0,0}, "32">;
-// ...alternate versions to be allocated odd register numbers:
-def VLD3LNq16odd : VLD3LN<0b0110, {?,?,1,0}, "16">;
-def VLD3LNq32odd : VLD3LN<0b1010, {?,1,0,0}, "32">;
+def VLD3LNq16Pseudo : VLDQQQQLNPseudo<IIC_VLD3>;
+def VLD3LNq32Pseudo : VLDQQQQLNPseudo<IIC_VLD3>;
// ...with address register writeback:
class VLD3LNWB<bits<4> op11_8, bits<4> op7_4, string Dt>
@@ -517,9 +557,16 @@
def VLD3LNd16_UPD : VLD3LNWB<0b0110, {?,?,0,0}, "16">;
def VLD3LNd32_UPD : VLD3LNWB<0b1010, {?,0,0,0}, "32">;
+def VLD3LNd8Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD3>;
+def VLD3LNd16Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD3>;
+def VLD3LNd32Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD3>;
+
def VLD3LNq16_UPD : VLD3LNWB<0b0110, {?,?,1,0}, "16">;
def VLD3LNq32_UPD : VLD3LNWB<0b1010, {?,1,0,0}, "32">;
+def VLD3LNq16Pseudo_UPD : VLDQQQQLNWBPseudo<IIC_VLD3>;
+def VLD3LNq32Pseudo_UPD : VLDQQQQLNWBPseudo<IIC_VLD3>;
+
// VLD4LN : Vector Load (single 4-element structure to one lane)
class VLD4LN<bits<4> op11_8, bits<4> op7_4, string Dt>
: NLdSt<1, 0b10, op11_8, op7_4,
@@ -533,13 +580,16 @@
def VLD4LNd16 : VLD4LN<0b0111, {?,?,0,?}, "16">;
def VLD4LNd32 : VLD4LN<0b1011, {?,0,?,?}, "32">;
+def VLD4LNd8Pseudo : VLDQQLNPseudo<IIC_VLD4>;
+def VLD4LNd16Pseudo : VLDQQLNPseudo<IIC_VLD4>;
+def VLD4LNd32Pseudo : VLDQQLNPseudo<IIC_VLD4>;
+
// ...with double-spaced registers:
def VLD4LNq16 : VLD4LN<0b0111, {?,?,1,?}, "16">;
def VLD4LNq32 : VLD4LN<0b1011, {?,1,?,?}, "32">;
-// ...alternate versions to be allocated odd register numbers:
-def VLD4LNq16odd : VLD4LN<0b0111, {?,?,1,?}, "16">;
-def VLD4LNq32odd : VLD4LN<0b1011, {?,1,?,?}, "32">;
+def VLD4LNq16Pseudo : VLDQQQQLNPseudo<IIC_VLD4>;
+def VLD4LNq32Pseudo : VLDQQQQLNPseudo<IIC_VLD4>;
// ...with address register writeback:
class VLD4LNWB<bits<4> op11_8, bits<4> op7_4, string Dt>
@@ -556,9 +606,16 @@
def VLD4LNd16_UPD : VLD4LNWB<0b0111, {?,?,0,?}, "16">;
def VLD4LNd32_UPD : VLD4LNWB<0b1011, {?,0,?,?}, "32">;
+def VLD4LNd8Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD4>;
+def VLD4LNd16Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD4>;
+def VLD4LNd32Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD4>;
+
def VLD4LNq16_UPD : VLD4LNWB<0b0111, {?,?,1,?}, "16">;
def VLD4LNq32_UPD : VLD4LNWB<0b1011, {?,1,?,?}, "32">;
+def VLD4LNq16Pseudo_UPD : VLDQQQQLNWBPseudo<IIC_VLD4>;
+def VLD4LNq32Pseudo_UPD : VLDQQQQLNWBPseudo<IIC_VLD4>;
+
// VLD1DUP : Vector Load (single element to all lanes)
// VLD2DUP : Vector Load (single 2-element structure to all lanes)
// VLD3DUP : Vector Load (single 3-element structure to all lanes)
@@ -846,6 +903,30 @@
def VST4q16oddPseudo_UPD : VSTQQQQWBPseudo;
def VST4q32oddPseudo_UPD : VSTQQQQWBPseudo;
+// Classes for VST*LN pseudo-instructions with multi-register operands.
+// These are expanded to real instructions after register allocation.
+class VSTQLNPseudo<InstrItinClass itin>
+ : PseudoNLdSt<(outs), (ins addrmode6:$addr, QPR:$src, nohash_imm:$lane),
+ itin, "">;
+class VSTQLNWBPseudo<InstrItinClass itin>
+ : PseudoNLdSt<(outs GPR:$wb),
+ (ins addrmode6:$addr, am6offset:$offset, QPR:$src,
+ nohash_imm:$lane), itin, "$addr.addr = $wb">;
+class VSTQQLNPseudo<InstrItinClass itin>
+ : PseudoNLdSt<(outs), (ins addrmode6:$addr, QQPR:$src, nohash_imm:$lane),
+ itin, "">;
+class VSTQQLNWBPseudo<InstrItinClass itin>
+ : PseudoNLdSt<(outs GPR:$wb),
+ (ins addrmode6:$addr, am6offset:$offset, QQPR:$src,
+ nohash_imm:$lane), itin, "$addr.addr = $wb">;
+class VSTQQQQLNPseudo<InstrItinClass itin>
+ : PseudoNLdSt<(outs), (ins addrmode6:$addr, QQQQPR:$src, nohash_imm:$lane),
+ itin, "">;
+class VSTQQQQLNWBPseudo<InstrItinClass itin>
+ : PseudoNLdSt<(outs GPR:$wb),
+ (ins addrmode6:$addr, am6offset:$offset, QQQQPR:$src,
+ nohash_imm:$lane), itin, "$addr.addr = $wb">;
+
// VST1LN : Vector Store (single element from one lane)
// FIXME: Not yet implemented.
@@ -860,13 +941,16 @@
def VST2LNd16 : VST2LN<0b0101, {?,?,0,?}, "16">;
def VST2LNd32 : VST2LN<0b1001, {?,0,?,?}, "32">;
+def VST2LNd8Pseudo : VSTQLNPseudo<IIC_VST>;
+def VST2LNd16Pseudo : VSTQLNPseudo<IIC_VST>;
+def VST2LNd32Pseudo : VSTQLNPseudo<IIC_VST>;
+
// ...with double-spaced registers:
def VST2LNq16 : VST2LN<0b0101, {?,?,1,?}, "16">;
def VST2LNq32 : VST2LN<0b1001, {?,1,?,?}, "32">;
-// ...alternate versions to be allocated odd register numbers:
-def VST2LNq16odd : VST2LN<0b0101, {?,?,1,?}, "16">;
-def VST2LNq32odd : VST2LN<0b1001, {?,1,?,?}, "32">;
+def VST2LNq16Pseudo : VSTQQLNPseudo<IIC_VST>;
+def VST2LNq32Pseudo : VSTQQLNPseudo<IIC_VST>;
// ...with address register writeback:
class VST2LNWB<bits<4> op11_8, bits<4> op7_4, string Dt>
@@ -880,9 +964,16 @@
def VST2LNd16_UPD : VST2LNWB<0b0101, {?,?,0,?}, "16">;
def VST2LNd32_UPD : VST2LNWB<0b1001, {?,0,?,?}, "32">;
+def VST2LNd8Pseudo_UPD : VSTQLNWBPseudo<IIC_VST>;
+def VST2LNd16Pseudo_UPD : VSTQLNWBPseudo<IIC_VST>;
+def VST2LNd32Pseudo_UPD : VSTQLNWBPseudo<IIC_VST>;
+
def VST2LNq16_UPD : VST2LNWB<0b0101, {?,?,1,?}, "16">;
def VST2LNq32_UPD : VST2LNWB<0b1001, {?,1,?,?}, "32">;
+def VST2LNq16Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST>;
+def VST2LNq32Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST>;
+
// VST3LN : Vector Store (single 3-element structure from one lane)
class VST3LN<bits<4> op11_8, bits<4> op7_4, string Dt>
: NLdSt<1, 0b00, op11_8, op7_4, (outs),
@@ -894,13 +985,16 @@
def VST3LNd16 : VST3LN<0b0110, {?,?,0,0}, "16">;
def VST3LNd32 : VST3LN<0b1010, {?,0,0,0}, "32">;
+def VST3LNd8Pseudo : VSTQQLNPseudo<IIC_VST>;
+def VST3LNd16Pseudo : VSTQQLNPseudo<IIC_VST>;
+def VST3LNd32Pseudo : VSTQQLNPseudo<IIC_VST>;
+
// ...with double-spaced registers:
def VST3LNq16 : VST3LN<0b0110, {?,?,1,0}, "16">;
def VST3LNq32 : VST3LN<0b1010, {?,1,0,0}, "32">;
-// ...alternate versions to be allocated odd register numbers:
-def VST3LNq16odd : VST3LN<0b0110, {?,?,1,0}, "16">;
-def VST3LNq32odd : VST3LN<0b1010, {?,1,0,0}, "32">;
+def VST3LNq16Pseudo : VSTQQQQLNPseudo<IIC_VST>;
+def VST3LNq32Pseudo : VSTQQQQLNPseudo<IIC_VST>;
// ...with address register writeback:
class VST3LNWB<bits<4> op11_8, bits<4> op7_4, string Dt>
@@ -915,9 +1009,16 @@
def VST3LNd16_UPD : VST3LNWB<0b0110, {?,?,0,0}, "16">;
def VST3LNd32_UPD : VST3LNWB<0b1010, {?,0,0,0}, "32">;
+def VST3LNd8Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST>;
+def VST3LNd16Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST>;
+def VST3LNd32Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST>;
+
def VST3LNq16_UPD : VST3LNWB<0b0110, {?,?,1,0}, "16">;
def VST3LNq32_UPD : VST3LNWB<0b1010, {?,1,0,0}, "32">;
+def VST3LNq16Pseudo_UPD : VSTQQQQLNWBPseudo<IIC_VST>;
+def VST3LNq32Pseudo_UPD : VSTQQQQLNWBPseudo<IIC_VST>;
+
// VST4LN : Vector Store (single 4-element structure from one lane)
class VST4LN<bits<4> op11_8, bits<4> op7_4, string Dt>
: NLdSt<1, 0b00, op11_8, op7_4, (outs),
@@ -930,13 +1031,16 @@
def VST4LNd16 : VST4LN<0b0111, {?,?,0,?}, "16">;
def VST4LNd32 : VST4LN<0b1011, {?,0,?,?}, "32">;
+def VST4LNd8Pseudo : VSTQQLNPseudo<IIC_VST>;
+def VST4LNd16Pseudo : VSTQQLNPseudo<IIC_VST>;
+def VST4LNd32Pseudo : VSTQQLNPseudo<IIC_VST>;
+
// ...with double-spaced registers:
def VST4LNq16 : VST4LN<0b0111, {?,?,1,?}, "16">;
def VST4LNq32 : VST4LN<0b1011, {?,1,?,?}, "32">;
-// ...alternate versions to be allocated odd register numbers:
-def VST4LNq16odd : VST4LN<0b0111, {?,?,1,?}, "16">;
-def VST4LNq32odd : VST4LN<0b1011, {?,1,?,?}, "32">;
+def VST4LNq16Pseudo : VSTQQQQLNPseudo<IIC_VST>;
+def VST4LNq32Pseudo : VSTQQQQLNPseudo<IIC_VST>;
// ...with address register writeback:
class VST4LNWB<bits<4> op11_8, bits<4> op7_4, string Dt>
@@ -951,9 +1055,16 @@
def VST4LNd16_UPD : VST4LNWB<0b0111, {?,?,0,?}, "16">;
def VST4LNd32_UPD : VST4LNWB<0b1011, {?,0,?,?}, "32">;
+def VST4LNd8Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST>;
+def VST4LNd16Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST>;
+def VST4LNd32Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST>;
+
def VST4LNq16_UPD : VST4LNWB<0b0111, {?,?,1,?}, "16">;
def VST4LNq32_UPD : VST4LNWB<0b1011, {?,1,?,?}, "32">;
+def VST4LNq16Pseudo_UPD : VSTQQQQLNWBPseudo<IIC_VST>;
+def VST4LNq32Pseudo_UPD : VSTQQQQLNWBPseudo<IIC_VST>;
+
} // mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1
Modified: llvm/trunk/lib/Target/ARM/NEONPreAllocPass.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/NEONPreAllocPass.cpp?rev=113812&r1=113811&r2=113812&view=diff
==============================================================================
--- llvm/trunk/lib/Target/ARM/NEONPreAllocPass.cpp (original)
+++ llvm/trunk/lib/Target/ARM/NEONPreAllocPass.cpp Mon Sep 13 18:01:35 2010
@@ -51,144 +51,6 @@
default:
break;
- case ARM::VLD2LNd8:
- case ARM::VLD2LNd16:
- case ARM::VLD2LNd32:
- FirstOpnd = 0;
- NumRegs = 2;
- return true;
-
- case ARM::VLD2LNq16:
- case ARM::VLD2LNq32:
- FirstOpnd = 0;
- NumRegs = 2;
- Offset = 0;
- Stride = 2;
- return true;
-
- case ARM::VLD2LNq16odd:
- case ARM::VLD2LNq32odd:
- FirstOpnd = 0;
- NumRegs = 2;
- Offset = 1;
- Stride = 2;
- return true;
-
- case ARM::VLD3LNd8:
- case ARM::VLD3LNd16:
- case ARM::VLD3LNd32:
- FirstOpnd = 0;
- NumRegs = 3;
- return true;
-
- case ARM::VLD3LNq16:
- case ARM::VLD3LNq32:
- FirstOpnd = 0;
- NumRegs = 3;
- Offset = 0;
- Stride = 2;
- return true;
-
- case ARM::VLD3LNq16odd:
- case ARM::VLD3LNq32odd:
- FirstOpnd = 0;
- NumRegs = 3;
- Offset = 1;
- Stride = 2;
- return true;
-
- case ARM::VLD4LNd8:
- case ARM::VLD4LNd16:
- case ARM::VLD4LNd32:
- FirstOpnd = 0;
- NumRegs = 4;
- return true;
-
- case ARM::VLD4LNq16:
- case ARM::VLD4LNq32:
- FirstOpnd = 0;
- NumRegs = 4;
- Offset = 0;
- Stride = 2;
- return true;
-
- case ARM::VLD4LNq16odd:
- case ARM::VLD4LNq32odd:
- FirstOpnd = 0;
- NumRegs = 4;
- Offset = 1;
- Stride = 2;
- return true;
-
- case ARM::VST2LNd8:
- case ARM::VST2LNd16:
- case ARM::VST2LNd32:
- FirstOpnd = 2;
- NumRegs = 2;
- return true;
-
- case ARM::VST2LNq16:
- case ARM::VST2LNq32:
- FirstOpnd = 2;
- NumRegs = 2;
- Offset = 0;
- Stride = 2;
- return true;
-
- case ARM::VST2LNq16odd:
- case ARM::VST2LNq32odd:
- FirstOpnd = 2;
- NumRegs = 2;
- Offset = 1;
- Stride = 2;
- return true;
-
- case ARM::VST3LNd8:
- case ARM::VST3LNd16:
- case ARM::VST3LNd32:
- FirstOpnd = 2;
- NumRegs = 3;
- return true;
-
- case ARM::VST3LNq16:
- case ARM::VST3LNq32:
- FirstOpnd = 2;
- NumRegs = 3;
- Offset = 0;
- Stride = 2;
- return true;
-
- case ARM::VST3LNq16odd:
- case ARM::VST3LNq32odd:
- FirstOpnd = 2;
- NumRegs = 3;
- Offset = 1;
- Stride = 2;
- return true;
-
- case ARM::VST4LNd8:
- case ARM::VST4LNd16:
- case ARM::VST4LNd32:
- FirstOpnd = 2;
- NumRegs = 4;
- return true;
-
- case ARM::VST4LNq16:
- case ARM::VST4LNq32:
- FirstOpnd = 2;
- NumRegs = 4;
- Offset = 0;
- Stride = 2;
- return true;
-
- case ARM::VST4LNq16odd:
- case ARM::VST4LNq32odd:
- FirstOpnd = 2;
- NumRegs = 4;
- Offset = 1;
- Stride = 2;
- return true;
-
case ARM::VTBL2:
FirstOpnd = 1;
NumRegs = 2;
More information about the llvm-commits
mailing list