/// Determines the optimal series of memory ops to replace the memset / memcpy. /// Return true if the number of memory ops is below the threshold (Limit). /// It returns the types of the sequence of memory ops to perform /// memset / memcpy by reference. static bool FindOptimalMemOpLowering(std::vector &MemOps, unsigned Limit, uint64_t Size, unsigned DstAlign, unsigned SrcAlign, bool IsMemset, bool ZeroMemset, bool MemcpyStrSrc, bool AllowOverlap, unsigned DstAS, unsigned SrcAS, SelectionDAG &DAG, const TargetLowering &TLI) { assert((SrcAlign == 0 || SrcAlign >= DstAlign) && "Expecting memcpy / memset source to meet alignment requirement!"); // If 'SrcAlign' is zero, that means the memory operation does not need to // load the value, i.e. memset or memcpy from constant string. Otherwise, // it's the inferred alignment of the source. 'DstAlign', on the other hand, // is the specified alignment of the memory operation. If it is zero, that // means it's possible to change the alignment of the destination. // 'MemcpyStrSrc' indicates whether the memcpy source is constant so it does // not need to be loaded. EVT VT = TLI.getOptimalMemOpType(Size, DstAlign, SrcAlign, IsMemset, ZeroMemset, MemcpyStrSrc, DAG.getMachineFunction()); if (VT == MVT::Other) { // Use the largest integer type whose alignment constraints are satisfied. if (DstAlign == 0) DstAlign = SrcAlign; VT = MVT::i64; while (DstAlign && DstAlign < VT.getSizeInBits() / 8 && !TLI.allowsMisalignedMemoryAccesses(VT, DstAS, DstAlign)) VT = (MVT::SimpleValueType)(VT.getSimpleVT().SimpleTy - 1); assert(VT.isInteger()); // Find the largest legal integer type. MVT LVT = MVT::i64; while (!TLI.isTypeLegal(LVT)) LVT = (MVT::SimpleValueType)(LVT.SimpleTy - 1); assert(LVT.isInteger()); // If the type we've chosen is larger than the largest legal integer type // then use that instead. if (VT.bitsGT(LVT)) VT = LVT; } unsigned NumMemOps = 0; while (Size != 0) { unsigned VTSize = VT.getSizeInBits() / 8; while (VTSize > Size) { // For now, only use non-vector load / store's for the left-over pieces. EVT NewVT = VT; unsigned NewVTSize; bool Found = false; if (VT.isVector() || VT.isFloatingPoint()) { NewVT = (VT.getSizeInBits() > 64) ? MVT::i64 : MVT::i32; if (TLI.isOperationLegalOrCustom(ISD::STORE, NewVT) && TLI.isSafeMemOpType(NewVT.getSimpleVT())) Found = true; else if (NewVT == MVT::i64 && TLI.isOperationLegalOrCustom(ISD::STORE, MVT::f64) && TLI.isSafeMemOpType(MVT::f64)) { // i64 is usually not legal on 32-bit targets, but f64 may be. NewVT = MVT::f64; Found = true; } } if (!Found) { do { NewVT = (MVT::SimpleValueType)(NewVT.getSimpleVT().SimpleTy - 1); if (NewVT == MVT::i8) break; } while (!TLI.isSafeMemOpType(NewVT.getSimpleVT())); } NewVTSize = NewVT.getSizeInBits() / 8; // If the new VT cannot cover all of the remaining bits, then consider // issuing a (or a pair of) unaligned and overlapping load / store. // FIXME: Only does this for 64-bit or more since we don't have proper // cost model for unaligned load / store. bool Fast; if (NumMemOps && AllowOverlap && VTSize >= 8 && NewVTSize < Size && TLI.allowsMisalignedMemoryAccesses(VT, DstAS, DstAlign, &Fast) && Fast) VTSize = Size; else { VT = NewVT; VTSize = NewVTSize; } } if (++NumMemOps > Limit) return false; MemOps.push_back(VT); Size -= VTSize; } return true; }