[llvm] r281834 - [X86][SSE] Improve target shuffle mask extraction
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Sat Sep 17 11:50:55 PDT 2016
Author: rksimon
Date: Sat Sep 17 13:50:54 2016
New Revision: 281834
URL: http://llvm.org/viewvc/llvm-project?rev=281834&view=rev
Log:
[X86][SSE] Improve target shuffle mask extraction
Add ability to extract vXi64 'vzext_movl' masks on 32-bit targets
Modified:
llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-avx.ll
llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-xop.ll
Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=281834&r1=281833&r2=281834&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Sat Sep 17 13:50:54 2016
@@ -4763,6 +4763,7 @@ static bool getTargetShuffleMaskIndices(
MVT VT = MaskNode.getSimpleValueType();
assert(VT.isVector() && "Can't produce a non-vector with a build_vector!");
+ unsigned NumMaskElts = VT.getSizeInBits() / MaskEltSizeInBits;
// Split an APInt element into MaskEltSizeInBits sized pieces and
// insert into the shuffle mask.
@@ -4794,17 +4795,20 @@ static bool getTargetShuffleMaskIndices(
if (MaskNode.getOpcode() == X86ISD::VZEXT_MOVL &&
MaskNode.getOperand(0).getOpcode() == ISD::SCALAR_TO_VECTOR) {
-
- // TODO: Handle (MaskEltSizeInBits % VT.getScalarSizeInBits()) == 0
- if ((VT.getScalarSizeInBits() % MaskEltSizeInBits) != 0)
- return false;
- unsigned ElementSplit = VT.getScalarSizeInBits() / MaskEltSizeInBits;
-
SDValue MaskOp = MaskNode.getOperand(0).getOperand(0);
if (auto *CN = dyn_cast<ConstantSDNode>(MaskOp)) {
- SplitElementToMask(CN->getAPIntValue());
- RawMask.append((VT.getVectorNumElements() - 1) * ElementSplit, 0);
- return true;
+ if ((MaskEltSizeInBits % VT.getScalarSizeInBits()) == 0) {
+ RawMask.push_back(CN->getZExtValue());
+ RawMask.append(NumMaskElts - 1, 0);
+ return true;
+ }
+
+ if ((VT.getScalarSizeInBits() % MaskEltSizeInBits) == 0) {
+ unsigned ElementSplit = VT.getScalarSizeInBits() / MaskEltSizeInBits;
+ SplitElementToMask(CN->getAPIntValue());
+ RawMask.append((VT.getVectorNumElements() - 1) * ElementSplit, 0);
+ return true;
+ }
}
return false;
}
@@ -4815,7 +4819,7 @@ static bool getTargetShuffleMaskIndices(
// We can always decode if the buildvector is all zero constants,
// but can't use isBuildVectorAllZeros as it might contain UNDEFs.
if (all_of(MaskNode->ops(), X86::isZeroNode)) {
- RawMask.append(VT.getSizeInBits() / MaskEltSizeInBits, 0);
+ RawMask.append(NumMaskElts, 0);
return true;
}
Modified: llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-avx.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-avx.ll?rev=281834&r1=281833&r2=281834&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-avx.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-avx.ll Sat Sep 17 13:50:54 2016
@@ -258,10 +258,6 @@ define <8 x float> @combine_vpermilvar_8
define <2 x double> @combine_vpermilvar_2f64_identity(<2 x double> %a0) {
; X32-LABEL: combine_vpermilvar_2f64_identity:
; X32: # BB#0:
-; X32-NEXT: movl $2, %eax
-; X32-NEXT: vmovd %eax, %xmm1
-; X32-NEXT: vpermilpd %xmm1, %xmm0, %xmm0
-; X32-NEXT: vpermilpd %xmm1, %xmm0, %xmm0
; X32-NEXT: retl
;
; X64-LABEL: combine_vpermilvar_2f64_identity:
@@ -365,10 +361,7 @@ define <4 x float> @combine_vpermilvar_4
define <2 x double> @constant_fold_vpermilvar_pd() {
; X32-LABEL: constant_fold_vpermilvar_pd:
; X32: # BB#0:
-; X32-NEXT: movl $2, %eax
-; X32-NEXT: vmovd %eax, %xmm0
-; X32-NEXT: vmovapd {{.*#+}} xmm1 = [1.000000e+00,2.000000e+00]
-; X32-NEXT: vpermilpd %xmm0, %xmm1, %xmm0
+; X32-NEXT: vpermilpd {{.*#+}} xmm0 = mem[1,0]
; X32-NEXT: retl
;
; X64-LABEL: constant_fold_vpermilvar_pd:
Modified: llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-xop.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-xop.ll?rev=281834&r1=281833&r2=281834&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-xop.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-xop.ll Sat Sep 17 13:50:54 2016
@@ -15,10 +15,7 @@ declare <16 x i8> @llvm.x86.xop.vpperm(<
define <2 x double> @combine_vpermil2pd_identity(<2 x double> %a0, <2 x double> %a1) {
; X32-LABEL: combine_vpermil2pd_identity:
; X32: # BB#0:
-; X32-NEXT: movl $2, %eax
-; X32-NEXT: vmovd %eax, %xmm2
-; X32-NEXT: vpermil2pd $0, %xmm2, %xmm0, %xmm1, %xmm0
-; X32-NEXT: vpermil2pd $0, %xmm2, %xmm0, %xmm0, %xmm0
+; X32-NEXT: vmovaps %xmm1, %xmm0
; X32-NEXT: retl
;
; X64-LABEL: combine_vpermil2pd_identity:
More information about the llvm-commits
mailing list