[llvm-commits] [llvm] r86404 - in /llvm/trunk/lib/Target/ARM: ARMAddressingModes.h ARMISelDAGToDAG.cpp ARMInstrInfo.td AsmPrinter/ARMAsmPrinter.cpp NEONPreAllocPass.cpp

Wed Nov 11 08:40:28 PST 2009

On Nov 10, 2009, at 1:27 AM, Evan Cheng wrote:

>
> On Nov 7, 2009, at 1:25 PM, Jim Grosbach wrote:
>
>>
>>
>> bool ARMDAGToDAGISel::SelectAddrMode6(SDValue Op, SDValue N,
>>                                      SDValue &Addr, SDValue &Update,
>> -                                      SDValue &Opc) {
>> +                                      SDValue &Opc, SDValue  
>> &Align) {
>>  Addr = N;
>>  // Default to no writeback.
>>  Update = CurDAG->getRegister(0, MVT::i32);
>>  Opc = CurDAG->getTargetConstant(ARM_AM::getAM6Opc(false), MVT::i32);
>> +  // Default to no alignment.
>> +  Align = CurDAG->getTargetConstant(0, MVT::i32);
>>  return true;
>> }
>
> Shouldn't we be able to transfer the alignment on the LoadSDNode /  
> StoreSDNode to Align (capped at 64 / 128 for 64-bit / 128-bit memory  
> operations)?

Potentially. I'm concerned about stack objects in frames we don't  
dynamically align, however. That could result in aligned load/stores  
of objects w/o guaranteed alignment, and I wanted to avoid that.

I've been wondering about the best approach for that. We could do the  
alignment anyway and consider the additional dynamic alignment  
situations (VLAs and such) to be optimization opportunities. Or we  
could let the user specify alignment as an optional operand to the  
NEON builtins. That doesn't help with any auto-vectorizing stuff we  
may do, however. I tend to prefer the first option. What do you think?

>
> Evan
>
>>
>> @@ -1008,8 +1010,8 @@
>>  SDNode *N = Op.getNode();
>>  DebugLoc dl = N->getDebugLoc();
>>
>> -  SDValue MemAddr, MemUpdate, MemOpc;
>> -  if (!SelectAddrMode6(Op, N->getOperand(2), MemAddr, MemUpdate,  
>> MemOpc))
>> +  SDValue MemAddr, MemUpdate, MemOpc, Align;
>> +  if (!SelectAddrMode6(Op, N->getOperand(2), MemAddr, MemUpdate,  
>> MemOpc, Align))
>>    return NULL;
>>
>>  SDValue Chain = N->getOperand(0);
>> @@ -1034,10 +1036,10 @@
>>
>>  if (is64BitVector) {
>>    unsigned Opc = DOpcodes[OpcodeIndex];
>> -    const SDValue Ops[] = { MemAddr, MemUpdate, MemOpc, Chain };
>> +    const SDValue Ops[] = { MemAddr, MemUpdate, MemOpc, Align,  
>> Chain };
>>    std::vector<EVT> ResTys(NumVecs, VT);
>>    ResTys.push_back(MVT::Other);
>> -    return CurDAG->getMachineNode(Opc, dl, ResTys, Ops, 4);
>> +    return CurDAG->getMachineNode(Opc, dl, ResTys, Ops, 5);
>>  }
>>
>>  EVT RegVT = GetNEONSubregVT(VT);
>> @@ -1045,10 +1047,10 @@
>>    // Quad registers are directly supported for VLD2,
>>    // loading 2 pairs of D regs.
>>    unsigned Opc = QOpcodes0[OpcodeIndex];
>> -    const SDValue Ops[] = { MemAddr, MemUpdate, MemOpc, Chain };
>> +    const SDValue Ops[] = { MemAddr, MemUpdate, MemOpc, Align,  
>> Chain };
>>    std::vector<EVT> ResTys(4, VT);
>>    ResTys.push_back(MVT::Other);
>> -    SDNode *VLd = CurDAG->getMachineNode(Opc, dl, ResTys, Ops, 4);
>> +    SDNode *VLd = CurDAG->getMachineNode(Opc, dl, ResTys, Ops, 5);
>>    Chain = SDValue(VLd, 4);
>>
>>    // Combine the even and odd subregs to produce the result.
>> @@ -1069,14 +1071,15 @@
>>
>>    // Load the even subregs.
>>    unsigned Opc = QOpcodes0[OpcodeIndex];
>> -    const SDValue OpsA[] = { MemAddr, MemUpdate, MemOpc, Chain };
>> -    SDNode *VLdA = CurDAG->getMachineNode(Opc, dl, ResTys, OpsA, 4);
>> +    const SDValue OpsA[] = { MemAddr, MemUpdate, MemOpc, Align,  
>> Chain };
>> +    SDNode *VLdA = CurDAG->getMachineNode(Opc, dl, ResTys, OpsA, 5);
>>    Chain = SDValue(VLdA, NumVecs+1);
>>
>>    // Load the odd subregs.
>>    Opc = QOpcodes1[OpcodeIndex];
>> -    const SDValue OpsB[] = { SDValue(VLdA, NumVecs), MemUpdate,  
>> MemOpc, Chain };
>> -    SDNode *VLdB = CurDAG->getMachineNode(Opc, dl, ResTys, OpsB, 4);
>> +    const SDValue OpsB[] = { SDValue(VLdA, NumVecs), MemUpdate,  
>> MemOpc,
>> +                             Align, Chain };
>> +    SDNode *VLdB = CurDAG->getMachineNode(Opc, dl, ResTys, OpsB, 5);
>>    Chain = SDValue(VLdB, NumVecs+1);
>>
>>    // Combine the even and odd subregs to produce the result.
>> @@ -1096,8 +1099,8 @@
>>  SDNode *N = Op.getNode();
>>  DebugLoc dl = N->getDebugLoc();
>>
>> -  SDValue MemAddr, MemUpdate, MemOpc;
>> -  if (!SelectAddrMode6(Op, N->getOperand(2), MemAddr, MemUpdate,  
>> MemOpc))
>> +  SDValue MemAddr, MemUpdate, MemOpc, Align;
>> +  if (!SelectAddrMode6(Op, N->getOperand(2), MemAddr, MemUpdate,  
>> MemOpc, Align))
>>    return NULL;
>>
>>  SDValue Chain = N->getOperand(0);
>> @@ -1124,13 +1127,14 @@
>>  Ops.push_back(MemAddr);
>>  Ops.push_back(MemUpdate);
>>  Ops.push_back(MemOpc);
>> +  Ops.push_back(Align);
>>
>>  if (is64BitVector) {
>>    unsigned Opc = DOpcodes[OpcodeIndex];
>>    for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
>>      Ops.push_back(N->getOperand(Vec+3));
>>    Ops.push_back(Chain);
>> -    return CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops.data(),  
>> NumVecs+4);
>> +    return CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops.data(),  
>> NumVecs+5);
>>  }
>>
>>  EVT RegVT = GetNEONSubregVT(VT);
>> @@ -1145,7 +1149,7 @@
>>                                                   N->getOperand(Vec 
>> +3)));
>>    }
>>    Ops.push_back(Chain);
>> -    return CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops.data(),  
>> 8);
>> +    return CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops.data(),  
>> 9);
>>  }
>>
>>  // Otherwise, quad registers are stored with two separate  
>> instructions,
>> @@ -1161,18 +1165,18 @@
>>  Ops.push_back(Chain);
>>  unsigned Opc = QOpcodes0[OpcodeIndex];
>>  SDNode *VStA = CurDAG->getMachineNode(Opc, dl, MemAddr.getValueType 
>> (),
>> -                                        MVT::Other, Ops.data(),  
>> NumVecs+4);
>> +                                        MVT::Other, Ops.data(),  
>> NumVecs+5);
>>  Chain = SDValue(VStA, 1);
>>
>>  // Store the odd subregs.
>>  Ops[0] = SDValue(VStA, 0); // MemAddr
>>  for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
>> -    Ops[Vec+3] = CurDAG->getTargetExtractSubreg(ARM::DSUBREG_1,  
>> dl, RegVT,
>> +    Ops[Vec+4] = CurDAG->getTargetExtractSubreg(ARM::DSUBREG_1,  
>> dl, RegVT,
>>                                                N->getOperand(Vec+3));
>> -  Ops[NumVecs+3] = Chain;
>> +  Ops[NumVecs+4] = Chain;
>>  Opc = QOpcodes1[OpcodeIndex];
>>  SDNode *VStB = CurDAG->getMachineNode(Opc, dl, MemAddr.getValueType 
>> (),
>> -                                        MVT::Other, Ops.data(),  
>> NumVecs+4);
>> +                                        MVT::Other, Ops.data(),  
>> NumVecs+5);
>>  Chain = SDValue(VStB, 1);
>>  ReplaceUses(SDValue(N, 0), Chain);
>>  return NULL;
>> @@ -1186,8 +1190,8 @@
>>  SDNode *N = Op.getNode();
>>  DebugLoc dl = N->getDebugLoc();
>>
>> -  SDValue MemAddr, MemUpdate, MemOpc;
>> -  if (!SelectAddrMode6(Op, N->getOperand(2), MemAddr, MemUpdate,  
>> MemOpc))
>> +  SDValue MemAddr, MemUpdate, MemOpc, Align;
>> +  if (!SelectAddrMode6(Op, N->getOperand(2), MemAddr, MemUpdate,  
>> MemOpc, Align))
>>    return NULL;
>>
>>  SDValue Chain = N->getOperand(0);
>> @@ -1224,6 +1228,7 @@
>>  Ops.push_back(MemAddr);
>>  Ops.push_back(MemUpdate);
>>  Ops.push_back(MemOpc);
>> +  Ops.push_back(Align);
>>
>>  unsigned Opc = 0;
>>  if (is64BitVector) {
>>
>> Modified: llvm/trunk/lib/Target/ARM/ARMInstrInfo.td
>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMInstrInfo.td?rev=86404&r1=86403&r2=86404&view=diff
>>
>> = 
>> = 
>> = 
>> = 
>> = 
>> = 
>> = 
>> = 
>> = 
>> =====================================================================
>> --- llvm/trunk/lib/Target/ARM/ARMInstrInfo.td (original)
>> +++ llvm/trunk/lib/Target/ARM/ARMInstrInfo.td Sat Nov  7 15:25:39  
>> 2009
>> @@ -340,9 +340,9 @@
>> // addrmode6 := reg with optional writeback
>> //
>> def addrmode6 : Operand<i32>,
>> -                ComplexPattern<i32, 3, "SelectAddrMode6", []> {
>> +                ComplexPattern<i32, 4, "SelectAddrMode6", []> {
>>  let PrintMethod = "printAddrMode6Operand";
>> -  let MIOperandInfo = (ops GPR:$addr, GPR:$upd, i32imm);
>> +  let MIOperandInfo = (ops GPR:$addr, GPR:$upd, i32imm, i32imm);
>> }
>>
>> // addrmodepc := pc + reg
>>
>> Modified: llvm/trunk/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp
>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp?rev=86404&r1=86403&r2=86404&view=diff
>>
>> = 
>> = 
>> = 
>> = 
>> = 
>> = 
>> = 
>> = 
>> = 
>> =====================================================================
>> --- llvm/trunk/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp (original)
>> +++ llvm/trunk/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp Sat Nov   
>> 7 15:25:39 2009
>> @@ -638,9 +638,17 @@
>>  const MachineOperand &MO1 = MI->getOperand(Op);
>>  const MachineOperand &MO2 = MI->getOperand(Op+1);
>>  const MachineOperand &MO3 = MI->getOperand(Op+2);
>> +  const MachineOperand &MO4 = MI->getOperand(Op+3);
>>
>> -  // FIXME: No support yet for specifying alignment.
>> -  O << "[" << getRegisterName(MO1.getReg()) << "]";
>> +  O << "[" << getRegisterName(MO1.getReg());
>> +  if (MO4.getImm()) {
>> +    if (Subtarget->isTargetDarwin())
>> +      O << ", :";
>> +    else
>> +      O << " @";
>> +    O << MO4.getImm();
>> +  }
>> +  O << "]";
>>
>>  if (ARM_AM::getAM6WBFlag(MO3.getImm())) {
>>    if (MO2.getReg() == 0)
>>
>> Modified: llvm/trunk/lib/Target/ARM/NEONPreAllocPass.cpp
>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/NEONPreAllocPass.cpp?rev=86404&r1=86403&r2=86404&view=diff
>>
>> = 
>> = 
>> = 
>> = 
>> = 
>> = 
>> = 
>> = 
>> = 
>> =====================================================================
>> --- llvm/trunk/lib/Target/ARM/NEONPreAllocPass.cpp (original)
>> +++ llvm/trunk/lib/Target/ARM/NEONPreAllocPass.cpp Sat Nov  7  
>> 15:25:39 2009
>> @@ -177,20 +177,20 @@
>>  case ARM::VST2LNd8:
>>  case ARM::VST2LNd16:
>>  case ARM::VST2LNd32:
>> -    FirstOpnd = 3;
>> +    FirstOpnd = 4;
>>    NumRegs = 2;
>>    return true;
>>
>>  case ARM::VST2q8:
>>  case ARM::VST2q16:
>>  case ARM::VST2q32:
>> -    FirstOpnd = 3;
>> +    FirstOpnd = 4;
>>    NumRegs = 4;
>>    return true;
>>
>>  case ARM::VST2LNq16a:
>>  case ARM::VST2LNq32a:
>> -    FirstOpnd = 3;
>> +    FirstOpnd = 4;
>>    NumRegs = 2;
>>    Offset = 0;
>>    Stride = 2;
>> @@ -198,7 +198,7 @@
>>
>>  case ARM::VST2LNq16b:
>>  case ARM::VST2LNq32b:
>> -    FirstOpnd = 3;
>> +    FirstOpnd = 4;
>>    NumRegs = 2;
>>    Offset = 1;
>>    Stride = 2;
>> @@ -211,14 +211,14 @@
>>  case ARM::VST3LNd8:
>>  case ARM::VST3LNd16:
>>  case ARM::VST3LNd32:
>> -    FirstOpnd = 3;
>> +    FirstOpnd = 4;
>>    NumRegs = 3;
>>    return true;
>>
>>  case ARM::VST3q8a:
>>  case ARM::VST3q16a:
>>  case ARM::VST3q32a:
>> -    FirstOpnd = 4;
>> +    FirstOpnd = 5;
>>    NumRegs = 3;
>>    Offset = 0;
>>    Stride = 2;
>> @@ -227,7 +227,7 @@
>>  case ARM::VST3q8b:
>>  case ARM::VST3q16b:
>>  case ARM::VST3q32b:
>> -    FirstOpnd = 4;
>> +    FirstOpnd = 5;
>>    NumRegs = 3;
>>    Offset = 1;
>>    Stride = 2;
>> @@ -235,7 +235,7 @@
>>
>>  case ARM::VST3LNq16a:
>>  case ARM::VST3LNq32a:
>> -    FirstOpnd = 3;
>> +    FirstOpnd = 4;
>>    NumRegs = 3;
>>    Offset = 0;
>>    Stride = 2;
>> @@ -243,7 +243,7 @@
>>
>>  case ARM::VST3LNq16b:
>>  case ARM::VST3LNq32b:
>> -    FirstOpnd = 3;
>> +    FirstOpnd = 4;
>>    NumRegs = 3;
>>    Offset = 1;
>>    Stride = 2;
>> @@ -256,14 +256,14 @@
>>  case ARM::VST4LNd8:
>>  case ARM::VST4LNd16:
>>  case ARM::VST4LNd32:
>> -    FirstOpnd = 3;
>> +    FirstOpnd = 4;
>>    NumRegs = 4;
>>    return true;
>>
>>  case ARM::VST4q8a:
>>  case ARM::VST4q16a:
>>  case ARM::VST4q32a:
>> -    FirstOpnd = 4;
>> +    FirstOpnd = 5;
>>    NumRegs = 4;
>>    Offset = 0;
>>    Stride = 2;
>> @@ -272,7 +272,7 @@
>>  case ARM::VST4q8b:
>>  case ARM::VST4q16b:
>>  case ARM::VST4q32b:
>> -    FirstOpnd = 4;
>> +    FirstOpnd = 5;
>>    NumRegs = 4;
>>    Offset = 1;
>>    Stride = 2;
>> @@ -280,7 +280,7 @@
>>
>>  case ARM::VST4LNq16a:
>>  case ARM::VST4LNq32a:
>> -    FirstOpnd = 3;
>> +    FirstOpnd = 4;
>>    NumRegs = 4;
>>    Offset = 0;
>>    Stride = 2;
>> @@ -288,7 +288,7 @@
>>
>>  case ARM::VST4LNq16b:
>>  case ARM::VST4LNq32b:
>> -    FirstOpnd = 3;
>> +    FirstOpnd = 4;
>>    NumRegs = 4;
>>    Offset = 1;
>>    Stride = 2;
>>
>>
>> _______________________________________________
>> llvm-commits mailing list
>> llvm-commits at cs.uiuc.edu
>> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits
>