[llvm-commits] [llvm] r139995 - in /llvm/trunk: lib/Target/X86/X86ISelLowering.cpp test/CodeGen/X86/trunc-ext-ld-st.ll

Tobias Grosser tobias at grosser.es
Sun Sep 18 10:36:01 PDT 2011


On 09/18/2011 11:39 AM, Nadav Rotem wrote:
> Author: nadav
> Date: Sun Sep 18 05:39:32 2011
> New Revision: 139995
>
> URL: http://llvm.org/viewvc/llvm-project?rev=139995&view=rev
> Log:
> When promoting integer vectors we often create ext-loads. This patch adds a
> dag-combine optimization to implement the ext-load efficiently (using shuffles).
>
> For example the type<4 x i8>  is stored in memory as i32, but it needs to
> find its way into a<4 x i32>  register. Previously we scalarized the memory
> access, now we use shuffles.

I found a typo and started nit picking. ;-) See inline.

>
>
> Added:
>      llvm/trunk/test/CodeGen/X86/trunc-ext-ld-st.ll
> Modified:
>      llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
>
> Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=139995&r1=139994&r2=139995&view=diff
> ==============================================================================
> --- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
> +++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Sun Sep 18 05:39:32 2011
> @@ -1138,6 +1138,7 @@
>     setTargetDAGCombine(ISD::AND);
>     setTargetDAGCombine(ISD::ADD);
>     setTargetDAGCombine(ISD::SUB);
> +  setTargetDAGCombine(ISD::LOAD);
>     setTargetDAGCombine(ISD::STORE);
>     setTargetDAGCombine(ISD::ZERO_EXTEND);
>     setTargetDAGCombine(ISD::SINT_TO_FP);
> @@ -13433,6 +13434,89 @@
>     return SDValue();
>   }
>
> +/// PerformLOADCombine - Do target-specific dag combines on LOAD nodes.
> +static SDValue PerformLOADCombine(SDNode *N, SelectionDAG&DAG,
> +                                   const X86Subtarget *Subtarget) {
> +  LoadSDNode *Ld = cast<LoadSDNode>(N);
> +  EVT RegVT = Ld->getValueType(0);
> +  EVT MemVT = Ld->getMemoryVT();
> +  DebugLoc dl = Ld->getDebugLoc();
> +  const TargetLowering&TLI = DAG.getTargetLoweringInfo();
> +
> +  ISD::LoadExtType Ext = Ld->getExtensionType();
> +
> +  // If yhis is a vector EXT Load then attempt to optimize it using a
            ^^^^
            this
> +  // shuffle. We need SSE4 for the shuffles.
> +  // TODO: It is possible to support ZExt by zeroing the undef values
> +  // during the shuffle phase or after the shuffle.
> +  if (RegVT.isVector()&&  Ext == ISD::EXTLOAD&&  Subtarget->hasSSE41()) {
> +    assert(MemVT != RegVT&&  "Cannot extend to the same type");
> +    assert(MemVT.isVector()&&  "Must load a vector from memory");
> +
> +    unsigned NumElems = RegVT.getVectorNumElements();
> +    unsigned RegSz = RegVT.getSizeInBits();
> +    unsigned MemSz = MemVT.getSizeInBits();
> +    assert(RegSz>  MemSz&&  "Register size must be greater than the mem size");
> +    // All sized must be a power of two
               ^^^^^
               sizes

> +    if (!isPowerOf2_32(RegSz * MemSz * NumElems)) return SDValue();
> +
> +    // Attempt to load the original value using a single load op.
> +    // Find a scalar type which is equal to the loaded word size.
> +    MVT SclrLoadTy = MVT::i8;
> +    for (unsigned tp = MVT::FIRST_INTEGER_VALUETYPE;
> +         tp<  MVT::LAST_INTEGER_VALUETYPE; ++tp) {
> +      MVT Tp = (MVT::SimpleValueType)tp;
> +      if (TLI.isTypeLegal(Tp)&&   Tp.getSizeInBits() == MemSz) {
> +        SclrLoadTy = Tp;
> +        break;
> +      }
> +    }
> +
> +    // Proceed if a load word is found.
> +    if (SclrLoadTy.getSizeInBits() != MemSz) return SDValue();
> +
> +    EVT LoadUnitVecVT = EVT::getVectorVT(*DAG.getContext(), SclrLoadTy,
> +      RegSz/SclrLoadTy.getSizeInBits());
> +
> +    EVT WideVecVT = EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(),
> +                                  RegSz/MemVT.getScalarType().getSizeInBits());
> +    // Can't shuffle using an illegal type.
> +    if (!TLI.isTypeLegal(WideVecVT)) return SDValue();
> +
> +    // Perform a single load.
> +    SDValue ScalarLoad = DAG.getLoad(SclrLoadTy, dl, Ld->getChain(),
> +                                  Ld->getBasePtr(),
> +                                  Ld->getPointerInfo(), Ld->isVolatile(),
> +                                  Ld->isNonTemporal(), Ld->getAlignment());
> +
> +    // Insert the word loaded into a vector.
> +    SDValue ScalarInVector = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl,
> +      LoadUnitVecVT, ScalarLoad);
> +
> +    // Bitcast the loaded value to a vector of the original element type, in
> +    // the size of the target vector type.
> +    SDValue SlicedVec = DAG.getNode(ISD::BITCAST, dl, WideVecVT, ScalarInVector);
> +    unsigned SizeRatio = RegSz/MemSz;
> +
> +    // Redistribute the loaded elements into the different locations.
> +    SmallVector<int, 8>  ShuffleVec(NumElems * SizeRatio, -1);
> +    for (unsigned i = 0; i<  NumElems; i++) ShuffleVec[i*SizeRatio] = i;
> +
> +    SDValue Shuff = DAG.getVectorShuffle(WideVecVT, dl, SlicedVec,
> +                                DAG.getUNDEF(SlicedVec.getValueType()),
> +                                ShuffleVec.data());
> +
> +    // Bitcast to the requested type.
> +    Shuff = DAG.getNode(ISD::BITCAST, dl, RegVT, Shuff);
> +    // Replace the original load with the new sequence
> +    // and return the new chain.
You could write this in one line.

Cheers
Tobi



More information about the llvm-commits mailing list