[llvm] r304659 - [X86][SSE] Add SCALAR_TO_VECTOR(PEXTRW/PEXTRB) support to faux shuffle combining
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Sat Jun 3 04:12:58 PDT 2017
Author: rksimon
Date: Sat Jun 3 06:12:57 2017
New Revision: 304659
URL: http://llvm.org/viewvc/llvm-project?rev=304659&view=rev
Log:
[X86][SSE] Add SCALAR_TO_VECTOR(PEXTRW/PEXTRB) support to faux shuffle combining
Generalized existing SCALAR_TO_VECTOR(EXTRACT_VECTOR_ELT) code to support AssertZext + PEXTRW/PEXTRB cases as well.
Modified:
llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
llvm/trunk/test/CodeGen/X86/clear_upper_vector_element_bits.ll
Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=304659&r1=304658&r2=304659&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Sat Jun 3 06:12:57 2017
@@ -5848,17 +5848,39 @@ static bool getFauxShuffleMask(SDValue N
return true;
}
case ISD::SCALAR_TO_VECTOR: {
- // Match against a scalar_to_vector of an extract from a similar vector.
+ // Match against a scalar_to_vector of an extract from a vector,
+ // for PEXTRW/PEXTRB we must handle the implicit zext of the scalar.
SDValue N0 = N.getOperand(0);
- if (N0.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
- N0.getOperand(0).getValueType() != VT ||
- !isa<ConstantSDNode>(N0.getOperand(1)) ||
- NumElts <= N0.getConstantOperandVal(1) ||
- !N->isOnlyUserOf(N0.getNode()))
+ SDValue SrcExtract;
+
+ if (N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
+ N0.getOperand(0).getValueType() == VT) {
+ SrcExtract = N0;
+ } else if (N0.getOpcode() == ISD::AssertZext &&
+ N0.getOperand(0).getOpcode() == X86ISD::PEXTRW &&
+ cast<VTSDNode>(N0.getOperand(1))->getVT() == MVT::i16) {
+ SrcExtract = N0.getOperand(0);
+ assert(SrcExtract.getOperand(0).getValueType() == MVT::v8i16);
+ } else if (N0.getOpcode() == ISD::AssertZext &&
+ N0.getOperand(0).getOpcode() == X86ISD::PEXTRB &&
+ cast<VTSDNode>(N0.getOperand(1))->getVT() == MVT::i8) {
+ SrcExtract = N0.getOperand(0);
+ assert(SrcExtract.getOperand(0).getValueType() == MVT::v16i8);
+ }
+
+ if (!SrcExtract || !isa<ConstantSDNode>(SrcExtract.getOperand(1)) ||
+ NumElts <= SrcExtract.getConstantOperandVal(1))
return false;
- Ops.push_back(N0.getOperand(0));
- Mask.push_back(N0.getConstantOperandVal(1));
- Mask.append(NumElts - 1, SM_SentinelUndef);
+
+ SDValue SrcVec = SrcExtract.getOperand(0);
+ EVT SrcVT = SrcVec.getValueType();
+ unsigned NumSrcElts = SrcVT.getVectorNumElements();
+ unsigned NumZeros = (NumBitsPerElt / SrcVT.getScalarSizeInBits()) - 1;
+
+ Ops.push_back(SrcVec);
+ Mask.push_back(SrcExtract.getConstantOperandVal(1));
+ Mask.append(NumZeros, SM_SentinelZero);
+ Mask.append(NumSrcElts - Mask.size(), SM_SentinelUndef);
return true;
}
case X86ISD::PINSRB:
Modified: llvm/trunk/test/CodeGen/X86/clear_upper_vector_element_bits.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/clear_upper_vector_element_bits.ll?rev=304659&r1=304658&r2=304659&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/clear_upper_vector_element_bits.ll (original)
+++ llvm/trunk/test/CodeGen/X86/clear_upper_vector_element_bits.ll Sat Jun 3 06:12:57 2017
@@ -405,12 +405,7 @@ define <16 x i8> @_clearupper16xi8a(<16
;
; AVX-LABEL: _clearupper16xi8a:
; AVX: # BB#0:
-; AVX-NEXT: vpextrb $0, %xmm0, %eax
-; AVX-NEXT: vpextrb $1, %xmm0, %ecx
-; AVX-NEXT: vmovd %eax, %xmm1
-; AVX-NEXT: vpinsrb $1, %ecx, %xmm1, %xmm1
-; AVX-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3,4,5,6,7]
-; AVX-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
+; AVX-NEXT: vandps {{.*}}(%rip), %xmm0, %xmm0
; AVX-NEXT: retq
%x0 = extractelement <16 x i8> %0, i32 0
%x1 = extractelement <16 x i8> %0, i32 1
@@ -575,39 +570,10 @@ define <32 x i8> @_clearupper32xi8a(<32
; SSE-NEXT: pand %xmm2, %xmm1
; SSE-NEXT: retq
;
-; AVX1-LABEL: _clearupper32xi8a:
-; AVX1: # BB#0:
-; AVX1-NEXT: vpextrb $0, %xmm0, %eax
-; AVX1-NEXT: vpextrb $1, %xmm0, %ecx
-; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
-; AVX1-NEXT: vpextrb $0, %xmm1, %edx
-; AVX1-NEXT: vpextrb $1, %xmm1, %esi
-; AVX1-NEXT: vmovd %edx, %xmm2
-; AVX1-NEXT: vpinsrb $1, %esi, %xmm2, %xmm2
-; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm2[0],xmm1[1,2,3,4,5,6,7]
-; AVX1-NEXT: vmovd %eax, %xmm2
-; AVX1-NEXT: vpinsrb $1, %ecx, %xmm2, %xmm2
-; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm2[0],xmm0[1,2,3,4,5,6,7]
-; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
-; AVX1-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0
-; AVX1-NEXT: retq
-;
-; AVX2-LABEL: _clearupper32xi8a:
-; AVX2: # BB#0:
-; AVX2-NEXT: vpextrb $0, %xmm0, %eax
-; AVX2-NEXT: vpextrb $1, %xmm0, %ecx
-; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
-; AVX2-NEXT: vpextrb $0, %xmm1, %edx
-; AVX2-NEXT: vpextrb $1, %xmm1, %esi
-; AVX2-NEXT: vmovd %edx, %xmm2
-; AVX2-NEXT: vpinsrb $1, %esi, %xmm2, %xmm2
-; AVX2-NEXT: vpblendw {{.*#+}} xmm1 = xmm2[0],xmm1[1,2,3,4,5,6,7]
-; AVX2-NEXT: vmovd %eax, %xmm2
-; AVX2-NEXT: vpinsrb $1, %ecx, %xmm2, %xmm2
-; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm2[0],xmm0[1,2,3,4,5,6,7]
-; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
-; AVX2-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0
-; AVX2-NEXT: retq
+; AVX-LABEL: _clearupper32xi8a:
+; AVX: # BB#0:
+; AVX-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0
+; AVX-NEXT: retq
%x0 = extractelement <32 x i8> %0, i32 0
%x1 = extractelement <32 x i8> %0, i32 1
%x2 = extractelement <32 x i8> %0, i32 2
More information about the llvm-commits
mailing list