[llvm-commits] [llvm] r99261 - in /llvm/trunk/lib/Target/ARM: ARMBaseInstrInfo.cpp ARMISelDAGToDAG.cpp ARMInstrNEON.td NEONPreAllocPass.cpp
Bob Wilson
bob.wilson at apple.com
Mon Mar 22 22:25:43 PDT 2010
Author: bwilson
Date: Tue Mar 23 00:25:43 2010
New Revision: 99261
URL: http://llvm.org/viewvc/llvm-project?rev=99261&view=rev
Log:
Change VLD1 instructions for loading Q register values to operate on pairs
of D registers. Add a separate VLD1q instruction with a Q register
destination operand for use by loadRegFromStackSlot.
Modified:
llvm/trunk/lib/Target/ARM/ARMBaseInstrInfo.cpp
llvm/trunk/lib/Target/ARM/ARMISelDAGToDAG.cpp
llvm/trunk/lib/Target/ARM/ARMInstrNEON.td
llvm/trunk/lib/Target/ARM/NEONPreAllocPass.cpp
Modified: llvm/trunk/lib/Target/ARM/ARMBaseInstrInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMBaseInstrInfo.cpp?rev=99261&r1=99260&r2=99261&view=diff
==============================================================================
--- llvm/trunk/lib/Target/ARM/ARMBaseInstrInfo.cpp (original)
+++ llvm/trunk/lib/Target/ARM/ARMBaseInstrInfo.cpp Tue Mar 23 00:25:43 2010
@@ -788,7 +788,7 @@
RC == ARM::QPR_8RegisterClass) && "Unknown regclass!");
if (Align >= 16
&& (getRegisterInfo().canRealignStack(MF))) {
- AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLD1q64), DestReg)
+ AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLD1q), DestReg)
.addFrameIndex(FI).addImm(128)
.addMemOperand(MMO));
} else {
Modified: llvm/trunk/lib/Target/ARM/ARMISelDAGToDAG.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMISelDAGToDAG.cpp?rev=99261&r1=99260&r2=99261&view=diff
==============================================================================
--- llvm/trunk/lib/Target/ARM/ARMISelDAGToDAG.cpp (original)
+++ llvm/trunk/lib/Target/ARM/ARMISelDAGToDAG.cpp Tue Mar 23 00:25:43 2010
@@ -124,10 +124,10 @@
/// SelectDYN_ALLOC - Select dynamic alloc for Thumb.
SDNode *SelectDYN_ALLOC(SDNode *N);
- /// SelectVLD - Select NEON load intrinsics. NumVecs should
- /// be 2, 3 or 4. The opcode arrays specify the instructions used for
+ /// SelectVLD - Select NEON load intrinsics. NumVecs should be
+ /// 1, 2, 3 or 4. The opcode arrays specify the instructions used for
/// loads of D registers and even subregs and odd subregs of Q registers.
- /// For NumVecs == 2, QOpcodes1 is not used.
+ /// For NumVecs <= 2, QOpcodes1 is not used.
SDNode *SelectVLD(SDNode *N, unsigned NumVecs, unsigned *DOpcodes,
unsigned *QOpcodes0, unsigned *QOpcodes1);
@@ -1022,7 +1022,7 @@
SDNode *ARMDAGToDAGISel::SelectVLD(SDNode *N, unsigned NumVecs,
unsigned *DOpcodes, unsigned *QOpcodes0,
unsigned *QOpcodes1) {
- assert(NumVecs >=2 && NumVecs <= 4 && "VLD NumVecs out-of-range");
+ assert(NumVecs >= 1 && NumVecs <= 4 && "VLD NumVecs out-of-range");
DebugLoc dl = N->getDebugLoc();
SDValue MemAddr, Align;
@@ -1047,6 +1047,9 @@
case MVT::v8i16: OpcodeIndex = 1; break;
case MVT::v4f32:
case MVT::v4i32: OpcodeIndex = 2; break;
+ case MVT::v2i64: OpcodeIndex = 3;
+ assert(NumVecs == 1 && "v2i64 type only supported for VLD1/VST1");
+ break;
}
SDValue Pred = CurDAG->getTargetConstant(14, MVT::i32);
@@ -1060,15 +1063,15 @@
}
EVT RegVT = GetNEONSubregVT(VT);
- if (NumVecs == 2) {
- // Quad registers are directly supported for VLD2,
- // loading 2 pairs of D regs.
+ if (NumVecs <= 2) {
+ // Quad registers are directly supported for VLD1 and VLD2,
+ // loading pairs of D regs.
unsigned Opc = QOpcodes0[OpcodeIndex];
const SDValue Ops[] = { MemAddr, Align, Pred, Reg0, Chain };
- std::vector<EVT> ResTys(4, VT);
+ std::vector<EVT> ResTys(2 * NumVecs, RegVT);
ResTys.push_back(MVT::Other);
SDNode *VLd = CurDAG->getMachineNode(Opc, dl, ResTys, Ops, 5);
- Chain = SDValue(VLd, 4);
+ Chain = SDValue(VLd, 2 * NumVecs);
// Combine the even and odd subregs to produce the result.
for (unsigned Vec = 0; Vec < NumVecs; ++Vec) {
@@ -1831,9 +1834,17 @@
default:
break;
+ case Intrinsic::arm_neon_vld1: {
+ unsigned DOpcodes[] = { ARM::VLD1d8, ARM::VLD1d16,
+ ARM::VLD1d32, ARM::VLD1d64 };
+ unsigned QOpcodes[] = { ARM::VLD1q8, ARM::VLD1q16,
+ ARM::VLD1q32, ARM::VLD1q64 };
+ return SelectVLD(N, 1, DOpcodes, QOpcodes, 0);
+ }
+
case Intrinsic::arm_neon_vld2: {
unsigned DOpcodes[] = { ARM::VLD2d8, ARM::VLD2d16,
- ARM::VLD2d32, ARM::VLD2d64 };
+ ARM::VLD2d32, ARM::VLD1q64 };
unsigned QOpcodes[] = { ARM::VLD2q8, ARM::VLD2q16, ARM::VLD2q32 };
return SelectVLD(N, 2, DOpcodes, QOpcodes, 0);
}
Modified: llvm/trunk/lib/Target/ARM/ARMInstrNEON.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMInstrNEON.td?rev=99261&r1=99260&r2=99261&view=diff
==============================================================================
--- llvm/trunk/lib/Target/ARM/ARMInstrNEON.td (original)
+++ llvm/trunk/lib/Target/ARM/ARMInstrNEON.td Tue Mar 23 00:25:43 2010
@@ -116,6 +116,7 @@
//===----------------------------------------------------------------------===//
// Use vldmia to load a Q register as a D register pair.
+// This is equivalent to VLDMD except that it has a Q register operand.
def VLDRQ : NI4<(outs QPR:$dst), (ins addrmode4:$addr), IIC_fpLoadm,
"vldmia", "$addr, ${dst:dregpair}",
[(set QPR:$dst, (v2f64 (load addrmode4:$addr)))]> {
@@ -126,6 +127,19 @@
let Inst{11-8} = 0b1011;
}
+let mayLoad = 1 in {
+// Use vld1 to load a Q register as a D register pair.
+// This alternative to VLDRQ allows an alignment to be specified.
+// This is equivalent to VLD1q64 except that it has a Q register operand.
+def VLD1q
+ : NLdSt<0,0b10,0b1010,0b1100, (outs QPR:$dst), (ins addrmode6:$addr),
+ IIC_VLD1, "vld1", "64", "${dst:dregpair}, $addr", "", []>;
+def VLD1q_UPD
+ : NLdSt<0,0b10,0b1010,0b1100, (outs QPR:$dst, GPR:$wb),
+ (ins addrmode6:$addr, am6offset:$offset), IIC_VLD1, "vld1", "64",
+ "${dst:dregpair}, $addr$offset", "$addr.addr = $wb", []>;
+} // mayLoad = 1
+
// Use vstmia to store a Q register as a D register pair.
def VSTRQ : NI4<(outs), (ins QPR:$src, addrmode4:$addr), IIC_fpStorem,
"vstmia", "$addr, ${src:dregpair}",
@@ -137,29 +151,27 @@
let Inst{11-8} = 0b1011;
}
-// VLD1 : Vector Load (multiple single elements)
-class VLD1D<bits<4> op7_4, string Dt, ValueType Ty>
- : NLdSt<0,0b10,0b0111,op7_4, (outs DPR:$dst), (ins addrmode6:$addr), IIC_VLD1,
- "vld1", Dt, "\\{$dst\\}, $addr", "",
- [(set DPR:$dst, (Ty (int_arm_neon_vld1 addrmode6:$addr)))]>;
-class VLD1Q<bits<4> op7_4, string Dt, ValueType Ty>
- : NLdSt<0,0b10,0b1010,op7_4, (outs QPR:$dst), (ins addrmode6:$addr), IIC_VLD1,
- "vld1", Dt, "${dst:dregpair}, $addr", "",
- [(set QPR:$dst, (Ty (int_arm_neon_vld1 addrmode6:$addr)))]>;
-
-def VLD1d8 : VLD1D<0b0000, "8", v8i8>;
-def VLD1d16 : VLD1D<0b0100, "16", v4i16>;
-def VLD1d32 : VLD1D<0b1000, "32", v2i32>;
-def VLD1df : VLD1D<0b1000, "32", v2f32>;
-def VLD1d64 : VLD1D<0b1100, "64", v1i64>;
-
-def VLD1q8 : VLD1Q<0b0000, "8", v16i8>;
-def VLD1q16 : VLD1Q<0b0100, "16", v8i16>;
-def VLD1q32 : VLD1Q<0b1000, "32", v4i32>;
-def VLD1qf : VLD1Q<0b1000, "32", v4f32>;
-def VLD1q64 : VLD1Q<0b1100, "64", v2i64>;
+let mayLoad = 1, hasExtraDefRegAllocReq = 1 in {
-let mayLoad = 1 in {
+// VLD1 : Vector Load (multiple single elements)
+class VLD1D<bits<4> op7_4, string Dt>
+ : NLdSt<0,0b10,0b0111,op7_4, (outs DPR:$dst),
+ (ins addrmode6:$addr), IIC_VLD1,
+ "vld1", Dt, "\\{$dst\\}, $addr", "", []>;
+class VLD1Q<bits<4> op7_4, string Dt>
+ : NLdSt<0,0b10,0b1010,op7_4, (outs DPR:$dst1, DPR:$dst2),
+ (ins addrmode6:$addr), IIC_VLD1,
+ "vld1", Dt, "\\{$dst1, $dst2\\}, $addr", "", []>;
+
+def VLD1d8 : VLD1D<0b0000, "8">;
+def VLD1d16 : VLD1D<0b0100, "16">;
+def VLD1d32 : VLD1D<0b1000, "32">;
+def VLD1d64 : VLD1D<0b1100, "64">;
+
+def VLD1q8 : VLD1Q<0b0000, "8">;
+def VLD1q16 : VLD1Q<0b0100, "16">;
+def VLD1q32 : VLD1Q<0b1000, "32">;
+def VLD1q64 : VLD1Q<0b1100, "64">;
// ...with address register writeback:
class VLD1DWB<bits<4> op7_4, string Dt>
@@ -182,9 +194,6 @@
def VLD1q16_UPD : VLD1QWB<0b0100, "16">;
def VLD1q32_UPD : VLD1QWB<0b1000, "32">;
def VLD1q64_UPD : VLD1QWB<0b1100, "64">;
-} // mayLoad = 1
-
-let mayLoad = 1, hasExtraDefRegAllocReq = 1 in {
// ...with 3 registers (some of these are only for the disassembler):
class VLD1D3<bits<4> op7_4, string Dt>
@@ -242,9 +251,6 @@
def VLD2d8 : VLD2D<0b1000, 0b0000, "8">;
def VLD2d16 : VLD2D<0b1000, 0b0100, "16">;
def VLD2d32 : VLD2D<0b1000, 0b1000, "32">;
-def VLD2d64 : NLdSt<0,0b10,0b1010,0b1100, (outs DPR:$dst1, DPR:$dst2),
- (ins addrmode6:$addr), IIC_VLD1,
- "vld1", "64", "\\{$dst1, $dst2\\}, $addr", "", []>;
def VLD2q8 : VLD2Q<0b0000, "8">;
def VLD2q16 : VLD2Q<0b0100, "16">;
@@ -266,11 +272,6 @@
def VLD2d8_UPD : VLD2DWB<0b1000, 0b0000, "8">;
def VLD2d16_UPD : VLD2DWB<0b1000, 0b0100, "16">;
def VLD2d32_UPD : VLD2DWB<0b1000, 0b1000, "32">;
-def VLD2d64_UPD : NLdSt<0,0b10,0b1010,0b1100,
- (outs DPR:$dst1, DPR:$dst2, GPR:$wb),
- (ins addrmode6:$addr, am6offset:$offset), IIC_VLD1,
- "vld1", "64", "\\{$dst1, $dst2\\}, $addr$offset",
- "$addr.addr = $wb", []>;
def VLD2q8_UPD : VLD2QWB<0b0000, "8">;
def VLD2q16_UPD : VLD2QWB<0b0100, "16">;
Modified: llvm/trunk/lib/Target/ARM/NEONPreAllocPass.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/NEONPreAllocPass.cpp?rev=99261&r1=99260&r2=99261&view=diff
==============================================================================
--- llvm/trunk/lib/Target/ARM/NEONPreAllocPass.cpp (original)
+++ llvm/trunk/lib/Target/ARM/NEONPreAllocPass.cpp Tue Mar 23 00:25:43 2010
@@ -46,10 +46,17 @@
default:
break;
+ case ARM::VLD1q8:
+ case ARM::VLD1q16:
+ case ARM::VLD1q32:
+ case ARM::VLD1q64:
+ FirstOpnd = 0;
+ NumRegs = 2;
+ return true;
+
case ARM::VLD2d8:
case ARM::VLD2d16:
case ARM::VLD2d32:
- case ARM::VLD2d64:
case ARM::VLD2LNd8:
case ARM::VLD2LNd16:
case ARM::VLD2LNd32:
More information about the llvm-commits
mailing list