[llvm] r300878 - ARM: handle post-indexed NEON ops where the offset isn't the access width.

Thu Apr 20 13:48:44 PDT 2017

On 4/20/2017 12:54 PM, Tim Northover via llvm-commits wrote:
> Author: tnorthover
> Date: Thu Apr 20 14:54:02 2017
> New Revision: 300878
>
> URL: http://llvm.org/viewvc/llvm-project?rev=300878&view=rev
> Log:
> ARM: handle post-indexed NEON ops where the offset isn't the access width.
>
> Before, we assumed that any ConstantInt offset was precisely the access width,
> so we could use the "[rN]!" form. ISelLowering only ever created that kind, but
> further simplification during combining could lead to unexpected constants and
> incorrect codegen.
>
> Should fix PR32658.
>
> Modified:
>      llvm/trunk/lib/Target/ARM/ARMISelDAGToDAG.cpp
>      llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp
>      llvm/trunk/test/CodeGen/ARM/alloc-no-stack-realign.ll
>      llvm/trunk/test/CodeGen/ARM/memcpy-inline.ll
>      llvm/trunk/test/CodeGen/ARM/memset-inline.ll
>      llvm/trunk/test/CodeGen/ARM/vector-load.ll
>      llvm/trunk/test/CodeGen/ARM/vector-store.ll
>      llvm/trunk/test/CodeGen/ARM/vlddup.ll
>      llvm/trunk/test/CodeGen/ARM/vldlane.ll
>      llvm/trunk/test/Transforms/LoopStrengthReduce/ARM/ivchain-ARM.ll
>
> Modified: llvm/trunk/lib/Target/ARM/ARMISelDAGToDAG.cpp
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMISelDAGToDAG.cpp?rev=300878&r1=300877&r2=300878&view=diff
> ==============================================================================
> --- llvm/trunk/lib/Target/ARM/ARMISelDAGToDAG.cpp (original)
> +++ llvm/trunk/lib/Target/ARM/ARMISelDAGToDAG.cpp Thu Apr 20 14:54:02 2017
> @@ -1854,6 +1854,14 @@ static unsigned getVLDSTRegisterUpdateOp
>     return Opc; // If not one we handle, return it unchanged.
>   }
>   
> +/// Returns true if the given increment is a Constant known to be equal to the
> +/// access size performed by a NEON load/store. This means the "[rN]!" form can
> +/// be used.
> +static bool isPerfectIncrement(SDValue Inc, EVT VecTy, unsigned NumVecs) {
> +  auto C = dyn_cast<ConstantSDNode>(Inc);
> +  return C && C->getZExtValue() == VecTy.getSizeInBits() / 8 * NumVecs;
> +}
> +
>   void ARMDAGToDAGISel::SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs,
>                                   const uint16_t *DOpcodes,
>                                   const uint16_t *QOpcodes0,
> @@ -1921,13 +1929,13 @@ void ARMDAGToDAGISel::SelectVLD(SDNode *
>         SDValue Inc = N->getOperand(AddrOpIdx + 1);
>         // FIXME: VLD1/VLD2 fixed increment doesn't need Reg0. Remove the reg0
>         // case entirely when the rest are updated to that form, too.
> -      if ((NumVecs <= 2) && !isa<ConstantSDNode>(Inc.getNode()))
> +      bool IsImmUpdate = isPerfectIncrement(Inc, VT, NumVecs);
> +      if ((NumVecs <= 2) && !IsImmUpdate)
>           Opc = getVLDSTRegisterUpdateOpcode(Opc);
>         // FIXME: We use a VLD1 for v1i64 even if the pseudo says vld2/3/4, so
>         // check for that explicitly too. Horribly hacky, but temporary.
> -      if ((NumVecs > 2 && !isVLDfixed(Opc)) ||
> -          !isa<ConstantSDNode>(Inc.getNode()))
> -        Ops.push_back(isa<ConstantSDNode>(Inc.getNode()) ? Reg0 : Inc);
> +      if ((NumVecs > 2 && !isVLDfixed(Opc)) || !IsImmUpdate)
> +        Ops.push_back(IsImmUpdate ? Reg0 : Inc);
>       }
>       Ops.push_back(Pred);
>       Ops.push_back(Reg0);
> @@ -2075,11 +2083,12 @@ void ARMDAGToDAGISel::SelectVST(SDNode *
>         SDValue Inc = N->getOperand(AddrOpIdx + 1);
>         // FIXME: VST1/VST2 fixed increment doesn't need Reg0. Remove the reg0
>         // case entirely when the rest are updated to that form, too.
> -      if (NumVecs <= 2 && !isa<ConstantSDNode>(Inc.getNode()))
> +      bool IsImmUpdate = isPerfectIncrement(Inc, VT, NumVecs);
> +      if (NumVecs <= 2 && !IsImmUpdate)
>           Opc = getVLDSTRegisterUpdateOpcode(Opc);
>         // FIXME: We use a VST1 for v1i64 even if the pseudo says vld2/3/4, so
>         // check for that explicitly too. Horribly hacky, but temporary.
> -      if  (!isa<ConstantSDNode>(Inc.getNode()))
> +      if  (!IsImmUpdate)
>           Ops.push_back(Inc);
>         else if (NumVecs > 2 && !isVSTfixed(Opc))
>           Ops.push_back(Reg0);
> @@ -2209,7 +2218,9 @@ void ARMDAGToDAGISel::SelectVLDSTLane(SD
>     Ops.push_back(Align);
>     if (isUpdating) {
>       SDValue Inc = N->getOperand(AddrOpIdx + 1);
> -    Ops.push_back(isa<ConstantSDNode>(Inc.getNode()) ? Reg0 : Inc);
> +    bool IsImmUpdate =
> +        isPerfectIncrement(Inc, VT.getVectorElementType(), NumVecs);
> +    Ops.push_back(IsImmUpdate ? Reg0 : Inc);
>     }
>   
>     SDValue SuperReg;
> @@ -2313,9 +2324,11 @@ void ARMDAGToDAGISel::SelectVLDDup(SDNod
>       // fixed-stride update instructions don't have an explicit writeback
>       // operand. It's implicit in the opcode itself.
>       SDValue Inc = N->getOperand(2);
> -    if (NumVecs <= 2 && !isa<ConstantSDNode>(Inc.getNode()))
> +    bool IsImmUpdate =
> +        isPerfectIncrement(Inc, VT.getVectorElementType(), NumVecs);
> +    if (NumVecs <= 2 && !IsImmUpdate)
>         Opc = getVLDSTRegisterUpdateOpcode(Opc);
> -    if (!isa<ConstantSDNode>(Inc.getNode()))
> +    if (!IsImmUpdate)
>         Ops.push_back(Inc);
>       // FIXME: VLD3 and VLD4 haven't been updated to that form yet.
>       else if (NumVecs > 2)
>
> Modified: llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp?rev=300878&r1=300877&r2=300878&view=diff
> ==============================================================================
> --- llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp (original)
> +++ llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp Thu Apr 20 14:54:02 2017
> @@ -10873,11 +10873,8 @@ static SDValue CombineBaseUpdate(SDNode
>   
>       // If the increment is a constant, it must match the memory ref size.
>       SDValue Inc = User->getOperand(User->getOperand(0) == Addr ? 1 : 0);
> -    if (ConstantSDNode *CInc = dyn_cast<ConstantSDNode>(Inc.getNode())) {
> -      uint64_t IncVal = CInc->getZExtValue();
> -      if (IncVal != NumBytes)
> -        continue;
> -    } else if (NumBytes >= 3 * 16) {
> +    ConstantSDNode *CInc = dyn_cast<ConstantSDNode>(Inc.getNode());
> +    if (NumBytes >= 3 * 16 && (!CInc || CInc->getZExtValue() != NumBytes)) {
>         // VLD3/4 and VST3/4 for 128-bit vectors are implemented with two
>         // separate instructions that make it harder to use a non-constant update.
>         continue;

We might want to consider keeping this check; even if the transform is 
legal, it's likely not profitable given we have to materialize the 
immediate.

-Eli

-- 
Employee of Qualcomm Innovation Center, Inc.
Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum, a Linux Foundation Collaborative Project