[llvm-commits] [llvm] r86404 - in /llvm/trunk/lib/Target/ARM: ARMAddressingModes.h ARMISelDAGToDAG.cpp ARMInstrInfo.td AsmPrinter/ARMAsmPrinter.cpp NEONPreAllocPass.cpp

Wed Nov 11 18:50:39 PST 2009

On Nov 11, 2009, at 8:40 AM, Jim Grosbach wrote:

> 
> On Nov 10, 2009, at 1:27 AM, Evan Cheng wrote:
> 
>> 
>> On Nov 7, 2009, at 1:25 PM, Jim Grosbach wrote:
>> 
>>> 
>>> 
>>> bool ARMDAGToDAGISel::SelectAddrMode6(SDValue Op, SDValue N,
>>>                                     SDValue &Addr, SDValue &Update,
>>> -                                      SDValue &Opc) {
>>> +                                      SDValue &Opc, SDValue &Align) {
>>> Addr = N;
>>> // Default to no writeback.
>>> Update = CurDAG->getRegister(0, MVT::i32);
>>> Opc = CurDAG->getTargetConstant(ARM_AM::getAM6Opc(false), MVT::i32);
>>> +  // Default to no alignment.
>>> +  Align = CurDAG->getTargetConstant(0, MVT::i32);
>>> return true;
>>> }
>> 
>> Shouldn't we be able to transfer the alignment on the LoadSDNode / StoreSDNode to Align (capped at 64 / 128 for 64-bit / 128-bit memory operations)?
> 
> Potentially. I'm concerned about stack objects in frames we don't dynamically align, however. That could result in aligned load/stores of objects w/o guaranteed alignment, and I wanted to avoid that.

That shouldn't happen. The alignment field on LoadSDNode and StoreSDNode should not be considered optional. They are required. Dan, is that right?

Evan

> 
> I've been wondering about the best approach for that. We could do the alignment anyway and consider the additional dynamic alignment situations (VLAs and such) to be optimization opportunities. Or we could let the user specify alignment as an optional operand to the NEON builtins. That doesn't help with any auto-vectorizing stuff we may do, however. I tend to prefer the first option. What do you think?
> 
> 
>> 
>> Evan
>> 
>>> 
>>> @@ -1008,8 +1010,8 @@
>>> SDNode *N = Op.getNode();
>>> DebugLoc dl = N->getDebugLoc();
>>> 
>>> -  SDValue MemAddr, MemUpdate, MemOpc;
>>> -  if (!SelectAddrMode6(Op, N->getOperand(2), MemAddr, MemUpdate, MemOpc))
>>> +  SDValue MemAddr, MemUpdate, MemOpc, Align;
>>> +  if (!SelectAddrMode6(Op, N->getOperand(2), MemAddr, MemUpdate, MemOpc, Align))
>>>   return NULL;
>>> 
>>> SDValue Chain = N->getOperand(0);
>>> @@ -1034,10 +1036,10 @@
>>> 
>>> if (is64BitVector) {
>>>   unsigned Opc = DOpcodes[OpcodeIndex];
>>> -    const SDValue Ops[] = { MemAddr, MemUpdate, MemOpc, Chain };
>>> +    const SDValue Ops[] = { MemAddr, MemUpdate, MemOpc, Align, Chain };
>>>   std::vector<EVT> ResTys(NumVecs, VT);
>>>   ResTys.push_back(MVT::Other);
>>> -    return CurDAG->getMachineNode(Opc, dl, ResTys, Ops, 4);
>>> +    return CurDAG->getMachineNode(Opc, dl, ResTys, Ops, 5);
>>> }
>>> 
>>> EVT RegVT = GetNEONSubregVT(VT);
>>> @@ -1045,10 +1047,10 @@
>>>   // Quad registers are directly supported for VLD2,
>>>   // loading 2 pairs of D regs.
>>>   unsigned Opc = QOpcodes0[OpcodeIndex];
>>> -    const SDValue Ops[] = { MemAddr, MemUpdate, MemOpc, Chain };
>>> +    const SDValue Ops[] = { MemAddr, MemUpdate, MemOpc, Align, Chain };
>>>   std::vector<EVT> ResTys(4, VT);
>>>   ResTys.push_back(MVT::Other);
>>> -    SDNode *VLd = CurDAG->getMachineNode(Opc, dl, ResTys, Ops, 4);
>>> +    SDNode *VLd = CurDAG->getMachineNode(Opc, dl, ResTys, Ops, 5);
>>>   Chain = SDValue(VLd, 4);
>>> 
>>>   // Combine the even and odd subregs to produce the result.
>>> @@ -1069,14 +1071,15 @@
>>> 
>>>   // Load the even subregs.
>>>   unsigned Opc = QOpcodes0[OpcodeIndex];
>>> -    const SDValue OpsA[] = { MemAddr, MemUpdate, MemOpc, Chain };
>>> -    SDNode *VLdA = CurDAG->getMachineNode(Opc, dl, ResTys, OpsA, 4);
>>> +    const SDValue OpsA[] = { MemAddr, MemUpdate, MemOpc, Align, Chain };
>>> +    SDNode *VLdA = CurDAG->getMachineNode(Opc, dl, ResTys, OpsA, 5);
>>>   Chain = SDValue(VLdA, NumVecs+1);
>>> 
>>>   // Load the odd subregs.
>>>   Opc = QOpcodes1[OpcodeIndex];
>>> -    const SDValue OpsB[] = { SDValue(VLdA, NumVecs), MemUpdate, MemOpc, Chain };
>>> -    SDNode *VLdB = CurDAG->getMachineNode(Opc, dl, ResTys, OpsB, 4);
>>> +    const SDValue OpsB[] = { SDValue(VLdA, NumVecs), MemUpdate, MemOpc,
>>> +                             Align, Chain };
>>> +    SDNode *VLdB = CurDAG->getMachineNode(Opc, dl, ResTys, OpsB, 5);
>>>   Chain = SDValue(VLdB, NumVecs+1);
>>> 
>>>   // Combine the even and odd subregs to produce the result.
>>> @@ -1096,8 +1099,8 @@
>>> SDNode *N = Op.getNode();
>>> DebugLoc dl = N->getDebugLoc();
>>> 
>>> -  SDValue MemAddr, MemUpdate, MemOpc;
>>> -  if (!SelectAddrMode6(Op, N->getOperand(2), MemAddr, MemUpdate, MemOpc))
>>> +  SDValue MemAddr, MemUpdate, MemOpc, Align;
>>> +  if (!SelectAddrMode6(Op, N->getOperand(2), MemAddr, MemUpdate, MemOpc, Align))
>>>   return NULL;
>>> 
>>> SDValue Chain = N->getOperand(0);
>>> @@ -1124,13 +1127,14 @@
>>> Ops.push_back(MemAddr);
>>> Ops.push_back(MemUpdate);
>>> Ops.push_back(MemOpc);
>>> +  Ops.push_back(Align);
>>> 
>>> if (is64BitVector) {
>>>   unsigned Opc = DOpcodes[OpcodeIndex];
>>>   for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
>>>     Ops.push_back(N->getOperand(Vec+3));
>>>   Ops.push_back(Chain);
>>> -    return CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops.data(), NumVecs+4);
>>> +    return CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops.data(), NumVecs+5);
>>> }
>>> 
>>> EVT RegVT = GetNEONSubregVT(VT);
>>> @@ -1145,7 +1149,7 @@
>>>                                                  N->getOperand(Vec+3)));
>>>   }
>>>   Ops.push_back(Chain);
>>> -    return CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops.data(), 8);
>>> +    return CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops.data(), 9);
>>> }
>>> 
>>> // Otherwise, quad registers are stored with two separate instructions,
>>> @@ -1161,18 +1165,18 @@
>>> Ops.push_back(Chain);
>>> unsigned Opc = QOpcodes0[OpcodeIndex];
>>> SDNode *VStA = CurDAG->getMachineNode(Opc, dl, MemAddr.getValueType(),
>>> -                                        MVT::Other, Ops.data(), NumVecs+4);
>>> +                                        MVT::Other, Ops.data(), NumVecs+5);
>>> Chain = SDValue(VStA, 1);
>>> 
>>> // Store the odd subregs.
>>> Ops[0] = SDValue(VStA, 0); // MemAddr
>>> for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
>>> -    Ops[Vec+3] = CurDAG->getTargetExtractSubreg(ARM::DSUBREG_1, dl, RegVT,
>>> +    Ops[Vec+4] = CurDAG->getTargetExtractSubreg(ARM::DSUBREG_1, dl, RegVT,
>>>                                               N->getOperand(Vec+3));
>>> -  Ops[NumVecs+3] = Chain;
>>> +  Ops[NumVecs+4] = Chain;
>>> Opc = QOpcodes1[OpcodeIndex];
>>> SDNode *VStB = CurDAG->getMachineNode(Opc, dl, MemAddr.getValueType(),
>>> -                                        MVT::Other, Ops.data(), NumVecs+4);
>>> +                                        MVT::Other, Ops.data(), NumVecs+5);
>>> Chain = SDValue(VStB, 1);
>>> ReplaceUses(SDValue(N, 0), Chain);
>>> return NULL;
>>> @@ -1186,8 +1190,8 @@
>>> SDNode *N = Op.getNode();
>>> DebugLoc dl = N->getDebugLoc();
>>> 
>>> -  SDValue MemAddr, MemUpdate, MemOpc;
>>> -  if (!SelectAddrMode6(Op, N->getOperand(2), MemAddr, MemUpdate, MemOpc))
>>> +  SDValue MemAddr, MemUpdate, MemOpc, Align;
>>> +  if (!SelectAddrMode6(Op, N->getOperand(2), MemAddr, MemUpdate, MemOpc, Align))
>>>   return NULL;
>>> 
>>> SDValue Chain = N->getOperand(0);
>>> @@ -1224,6 +1228,7 @@
>>> Ops.push_back(MemAddr);
>>> Ops.push_back(MemUpdate);
>>> Ops.push_back(MemOpc);
>>> +  Ops.push_back(Align);
>>> 
>>> unsigned Opc = 0;
>>> if (is64BitVector) {
>>> 
>>> Modified: llvm/trunk/lib/Target/ARM/ARMInstrInfo.td
>>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMInstrInfo.td?rev=86404&r1=86403&r2=86404&view=diff
>>> 
>>> ==============================================================================
>>> --- llvm/trunk/lib/Target/ARM/ARMInstrInfo.td (original)
>>> +++ llvm/trunk/lib/Target/ARM/ARMInstrInfo.td Sat Nov  7 15:25:39 2009
>>> @@ -340,9 +340,9 @@
>>> // addrmode6 := reg with optional writeback
>>> //
>>> def addrmode6 : Operand<i32>,
>>> -                ComplexPattern<i32, 3, "SelectAddrMode6", []> {
>>> +                ComplexPattern<i32, 4, "SelectAddrMode6", []> {
>>> let PrintMethod = "printAddrMode6Operand";
>>> -  let MIOperandInfo = (ops GPR:$addr, GPR:$upd, i32imm);
>>> +  let MIOperandInfo = (ops GPR:$addr, GPR:$upd, i32imm, i32imm);
>>> }
>>> 
>>> // addrmodepc := pc + reg
>>> 
>>> Modified: llvm/trunk/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp
>>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp?rev=86404&r1=86403&r2=86404&view=diff
>>> 
>>> ==============================================================================
>>> --- llvm/trunk/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp (original)
>>> +++ llvm/trunk/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp Sat Nov  7 15:25:39 2009
>>> @@ -638,9 +638,17 @@
>>> const MachineOperand &MO1 = MI->getOperand(Op);
>>> const MachineOperand &MO2 = MI->getOperand(Op+1);
>>> const MachineOperand &MO3 = MI->getOperand(Op+2);
>>> +  const MachineOperand &MO4 = MI->getOperand(Op+3);
>>> 
>>> -  // FIXME: No support yet for specifying alignment.
>>> -  O << "[" << getRegisterName(MO1.getReg()) << "]";
>>> +  O << "[" << getRegisterName(MO1.getReg());
>>> +  if (MO4.getImm()) {
>>> +    if (Subtarget->isTargetDarwin())
>>> +      O << ", :";
>>> +    else
>>> +      O << " @";
>>> +    O << MO4.getImm();
>>> +  }
>>> +  O << "]";
>>> 
>>> if (ARM_AM::getAM6WBFlag(MO3.getImm())) {
>>>   if (MO2.getReg() == 0)
>>> 
>>> Modified: llvm/trunk/lib/Target/ARM/NEONPreAllocPass.cpp
>>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/NEONPreAllocPass.cpp?rev=86404&r1=86403&r2=86404&view=diff
>>> 
>>> ==============================================================================
>>> --- llvm/trunk/lib/Target/ARM/NEONPreAllocPass.cpp (original)
>>> +++ llvm/trunk/lib/Target/ARM/NEONPreAllocPass.cpp Sat Nov  7 15:25:39 2009
>>> @@ -177,20 +177,20 @@
>>> case ARM::VST2LNd8:
>>> case ARM::VST2LNd16:
>>> case ARM::VST2LNd32:
>>> -    FirstOpnd = 3;
>>> +    FirstOpnd = 4;
>>>   NumRegs = 2;
>>>   return true;
>>> 
>>> case ARM::VST2q8:
>>> case ARM::VST2q16:
>>> case ARM::VST2q32:
>>> -    FirstOpnd = 3;
>>> +    FirstOpnd = 4;
>>>   NumRegs = 4;
>>>   return true;
>>> 
>>> case ARM::VST2LNq16a:
>>> case ARM::VST2LNq32a:
>>> -    FirstOpnd = 3;
>>> +    FirstOpnd = 4;
>>>   NumRegs = 2;
>>>   Offset = 0;
>>>   Stride = 2;
>>> @@ -198,7 +198,7 @@
>>> 
>>> case ARM::VST2LNq16b:
>>> case ARM::VST2LNq32b:
>>> -    FirstOpnd = 3;
>>> +    FirstOpnd = 4;
>>>   NumRegs = 2;
>>>   Offset = 1;
>>>   Stride = 2;
>>> @@ -211,14 +211,14 @@
>>> case ARM::VST3LNd8:
>>> case ARM::VST3LNd16:
>>> case ARM::VST3LNd32:
>>> -    FirstOpnd = 3;
>>> +    FirstOpnd = 4;
>>>   NumRegs = 3;
>>>   return true;
>>> 
>>> case ARM::VST3q8a:
>>> case ARM::VST3q16a:
>>> case ARM::VST3q32a:
>>> -    FirstOpnd = 4;
>>> +    FirstOpnd = 5;
>>>   NumRegs = 3;
>>>   Offset = 0;
>>>   Stride = 2;
>>> @@ -227,7 +227,7 @@
>>> case ARM::VST3q8b:
>>> case ARM::VST3q16b:
>>> case ARM::VST3q32b:
>>> -    FirstOpnd = 4;
>>> +    FirstOpnd = 5;
>>>   NumRegs = 3;
>>>   Offset = 1;
>>>   Stride = 2;
>>> @@ -235,7 +235,7 @@
>>> 
>>> case ARM::VST3LNq16a:
>>> case ARM::VST3LNq32a:
>>> -    FirstOpnd = 3;
>>> +    FirstOpnd = 4;
>>>   NumRegs = 3;
>>>   Offset = 0;
>>>   Stride = 2;
>>> @@ -243,7 +243,7 @@
>>> 
>>> case ARM::VST3LNq16b:
>>> case ARM::VST3LNq32b:
>>> -    FirstOpnd = 3;
>>> +    FirstOpnd = 4;
>>>   NumRegs = 3;
>>>   Offset = 1;
>>>   Stride = 2;
>>> @@ -256,14 +256,14 @@
>>> case ARM::VST4LNd8:
>>> case ARM::VST4LNd16:
>>> case ARM::VST4LNd32:
>>> -    FirstOpnd = 3;
>>> +    FirstOpnd = 4;
>>>   NumRegs = 4;
>>>   return true;
>>> 
>>> case ARM::VST4q8a:
>>> case ARM::VST4q16a:
>>> case ARM::VST4q32a:
>>> -    FirstOpnd = 4;
>>> +    FirstOpnd = 5;
>>>   NumRegs = 4;
>>>   Offset = 0;
>>>   Stride = 2;
>>> @@ -272,7 +272,7 @@
>>> case ARM::VST4q8b:
>>> case ARM::VST4q16b:
>>> case ARM::VST4q32b:
>>> -    FirstOpnd = 4;
>>> +    FirstOpnd = 5;
>>>   NumRegs = 4;
>>>   Offset = 1;
>>>   Stride = 2;
>>> @@ -280,7 +280,7 @@
>>> 
>>> case ARM::VST4LNq16a:
>>> case ARM::VST4LNq32a:
>>> -    FirstOpnd = 3;
>>> +    FirstOpnd = 4;
>>>   NumRegs = 4;
>>>   Offset = 0;
>>>   Stride = 2;
>>> @@ -288,7 +288,7 @@
>>> 
>>> case ARM::VST4LNq16b:
>>> case ARM::VST4LNq32b:
>>> -    FirstOpnd = 3;
>>> +    FirstOpnd = 4;
>>>   NumRegs = 4;
>>>   Offset = 1;
>>>   Stride = 2;
>>> 
>>> 
>>> _______________________________________________
>>> llvm-commits mailing list
>>> llvm-commits at cs.uiuc.edu
>>> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits
>> 
>