[llvm] r293627 - [X86][SSE] Add support for combining PINSRW into a target shuffle.

Fri Feb 17 18:51:57 PST 2017

Sorry, Simon, I'm terribly late, but this seems to have caused a new
GCC warning.
When you get a chance, can you please take a look?

[87/122] Building CXX object
lib/Target/X86/CMakeFiles/LLVMX86CodeGen.dir/X86ISelLowering.cpp.o
../lib/Target/X86/X86ISelLowering.cpp: In function ‘bool
getFauxShuffleMask(llvm::SDValue, llvm::SmallVectorImpl<int>&, l
lvm::SmallVectorImpl<llvm::SDValue>&)’:
../lib/Target/X86/X86ISelLowering.cpp:5742:35: warning: enumeral and
non-enumeral type in conditional expression [-Wextra
]
         Mask.push_back(i == InIdx ? SM_SentinelZero : i);
                        ~~~~~~~~~~~^~~~~~~~~~~~~~~~~~~~~

--
Davide

On Tue, Jan 31, 2017 at 5:51 AM, Simon Pilgrim via llvm-commits
<llvm-commits at lists.llvm.org> wrote:
> Author: rksimon
> Date: Tue Jan 31 07:51:10 2017
> New Revision: 293627
>
> URL: http://llvm.org/viewvc/llvm-project?rev=293627&view=rev
> Log:
> [X86][SSE] Add support for combining PINSRW into a target shuffle.
>
> Also add the ability to recognise PINSR(Vex, 0, Idx).
>
> Targets shuffle combines won't replace multiple insertions with a bit mask until a depth of 3 or more, so we avoid codesize bloat.
>
> The unnecessary vpblendw in clearupper8xi16a will be fixed in an upcoming patch.
>
> Modified:
>     llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
>     llvm/trunk/test/CodeGen/X86/clear_upper_vector_element_bits.ll
>
> Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=293627&r1=293626&r2=293627&view=diff
> ==============================================================================
> --- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
> +++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Tue Jan 31 07:51:10 2017
> @@ -5770,12 +5770,21 @@ static bool getFauxShuffleMask(SDValue N
>      return true;
>    }
>    case X86ISD::PINSRW: {
> -    // Attempt to recognise a PINSRW(ASSERTZEXT(PEXTRW)) shuffle pattern.
> -    // TODO: Expand this to support PINSRB/INSERT_VECTOR_ELT/etc.
>      SDValue InVec = N.getOperand(0);
>      SDValue InScl = N.getOperand(1);
>      uint64_t InIdx = N.getConstantOperandVal(2);
>      assert(InIdx < NumElts && "Illegal insertion index");
> +
> +    // Attempt to recognise a PINSRW(VEC, 0, Idx) shuffle pattern.
> +    if (X86::isZeroNode(InScl)) {
> +      Ops.push_back(InVec);
> +      for (unsigned i = 0; i != NumElts; ++i)
> +        Mask.push_back(i == InIdx ? SM_SentinelZero : i);
> +      return true;
> +    }
> +
> +    // Attempt to recognise a PINSRW(ASSERTZEXT(PEXTRW)) shuffle pattern.
> +    // TODO: Expand this to support PINSRB/INSERT_VECTOR_ELT/etc.
>      if (InScl.getOpcode() != ISD::AssertZext ||
>          InScl.getOperand(0).getOpcode() != X86ISD::PEXTRW)
>        return false;
> @@ -30597,6 +30606,24 @@ static SDValue combineVectorShift(SDNode
>    return SDValue();
>  }
>
> +static SDValue combineVectorInsert(SDNode *N, SelectionDAG &DAG,
> +                                   TargetLowering::DAGCombinerInfo &DCI,
> +                                   const X86Subtarget &Subtarget) {
> +  unsigned Opcode = N->getOpcode();
> +  assert(((X86ISD::PINSRB == Opcode && N->getValueType(0) ==MVT::v16i8) ||
> +          (X86ISD::PINSRW == Opcode && N->getValueType(0) ==MVT::v8i16)) &&
> +         "Unexpected vector insertion");
> +
> +  // Attempt to combine PINSRB/PINSRW patterns to a shuffle.
> +  SDValue Op(N, 0);
> +  SmallVector<int, 1> NonceMask; // Just a placeholder.
> +  NonceMask.push_back(0);
> +  combineX86ShufflesRecursively({Op}, 0, Op, NonceMask,
> +                                /*Depth*/ 1, /*HasVarMask*/ false, DAG,
> +                                DCI, Subtarget);
> +  return SDValue();
> +}
> +
>  /// Recognize the distinctive (AND (setcc ...) (setcc ..)) where both setccs
>  /// reference the same FP CMP, and rewrite for CMPEQSS and friends. Likewise for
>  /// OR -> CMPNEQSS.
> @@ -34159,6 +34186,8 @@ SDValue X86TargetLowering::PerformDAGCom
>    case X86ISD::VSRLI:       return combineVectorShift(N, DAG, DCI, Subtarget);
>    case X86ISD::VSEXT:
>    case X86ISD::VZEXT:       return combineVSZext(N, DAG, DCI, Subtarget);
> +  case X86ISD::PINSRB:
> +  case X86ISD::PINSRW:      return combineVectorInsert(N, DAG, DCI, Subtarget);
>    case X86ISD::SHUFP:       // Handle all target specific shuffles
>    case X86ISD::INSERTPS:
>    case X86ISD::PALIGNR:
>
> Modified: llvm/trunk/test/CodeGen/X86/clear_upper_vector_element_bits.ll
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/clear_upper_vector_element_bits.ll?rev=293627&r1=293626&r2=293627&view=diff
> ==============================================================================
> --- llvm/trunk/test/CodeGen/X86/clear_upper_vector_element_bits.ll (original)
> +++ llvm/trunk/test/CodeGen/X86/clear_upper_vector_element_bits.ll Tue Jan 31 07:51:10 2017
> @@ -94,7 +94,8 @@ define <8 x i16> @_clearupper8xi16a(<8 x
>  ;
>  ; AVX-LABEL: _clearupper8xi16a:
>  ; AVX:       # BB#0:
> -; AVX-NEXT:    vandps {{.*}}(%rip), %xmm0, %xmm0
> +; AVX-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6,7]
> +; AVX-NEXT:    vpand {{.*}}(%rip), %xmm0, %xmm0
>  ; AVX-NEXT:    retq
>    %x0 = extractelement <8 x i16> %0, i32 0
>    %x1 = extractelement <8 x i16> %0, i32 1
> @@ -317,11 +318,7 @@ define <2 x i64> @_clearupper2xi64b(<2 x
>  define <4 x i32> @_clearupper4xi32b(<4 x i32>) nounwind {
>  ; SSE-LABEL: _clearupper4xi32b:
>  ; SSE:       # BB#0:
> -; SSE-NEXT:    xorl %eax, %eax
> -; SSE-NEXT:    pinsrw $1, %eax, %xmm0
> -; SSE-NEXT:    pinsrw $3, %eax, %xmm0
> -; SSE-NEXT:    pinsrw $5, %eax, %xmm0
> -; SSE-NEXT:    pinsrw $7, %eax, %xmm0
> +; SSE-NEXT:    andps {{.*}}(%rip), %xmm0
>  ; SSE-NEXT:    retq
>  ;
>  ; AVX-LABEL: _clearupper4xi32b:
>
>
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at lists.llvm.org
> http://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-commits

-- 
Davide

"There are no solved problems; there are only problems that are more
or less solved" -- Henri Poincare