[llvm] r314629 - [X86][SSE] Improve shuffle combining of PACKSS instructions.
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Sun Oct 1 10:54:55 PDT 2017
Author: rksimon
Date: Sun Oct 1 10:54:55 2017
New Revision: 314629
URL: http://llvm.org/viewvc/llvm-project?rev=314629&view=rev
Log:
[X86][SSE] Improve shuffle combining of PACKSS instructions.
Support unary packing and fix the faux shuffle mask for vectors larger than 128 bits.
Modified:
llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-avx2.ll
llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-ssse3.ll
Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=314629&r1=314628&r2=314629&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Sun Oct 1 10:54:55 2017
@@ -5932,16 +5932,34 @@ static bool getFauxShuffleMask(SDValue N
return true;
}
case X86ISD::PACKSS: {
+ SDValue N0 = N.getOperand(0);
+ SDValue N1 = N.getOperand(1);
+ assert(N0.getValueType().getVectorNumElements() == (NumElts / 2) &&
+ N1.getValueType().getVectorNumElements() == (NumElts / 2) &&
+ "Unexpected input value type");
+
// If we know input saturation won't happen we can treat this
// as a truncation shuffle.
- if (DAG.ComputeNumSignBits(N.getOperand(0)) <= NumBitsPerElt ||
- DAG.ComputeNumSignBits(N.getOperand(1)) <= NumBitsPerElt)
+ if (DAG.ComputeNumSignBits(N0) <= NumBitsPerElt ||
+ DAG.ComputeNumSignBits(N1) <= NumBitsPerElt)
return false;
- Ops.push_back(N.getOperand(0));
- Ops.push_back(N.getOperand(1));
- for (unsigned i = 0; i != NumElts; ++i)
- Mask.push_back(i * 2);
+ bool IsUnary = (N0 == N1);
+ unsigned Offset = IsUnary ? 0 : NumElts;
+ unsigned NumLanes = VT.getSizeInBits() / 128;
+ unsigned NumEltsPerLane = NumElts / NumLanes;
+ unsigned HalfEltsPerLane = NumEltsPerLane / 2;
+
+ Ops.push_back(N0);
+ if (!IsUnary)
+ Ops.push_back(N1);
+
+ for (unsigned Lane = 0; Lane != NumLanes; ++Lane) {
+ for (unsigned Elt = 0; Elt != HalfEltsPerLane; ++Elt)
+ Mask.push_back((Elt * 2) + (Lane * NumEltsPerLane));
+ for (unsigned Elt = 0; Elt != HalfEltsPerLane; ++Elt)
+ Mask.push_back((Elt * 2) + (Lane * NumEltsPerLane) + Offset);
+ }
return true;
}
case X86ISD::VSHLI:
Modified: llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-avx2.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-avx2.ll?rev=314629&r1=314628&r2=314629&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-avx2.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-avx2.ll Sun Oct 1 10:54:55 2017
@@ -808,15 +808,13 @@ define <16 x i16> @shuffle_combine_packs
; X32-LABEL: shuffle_combine_packssdw_pshufb:
; X32: # BB#0:
; X32-NEXT: vpsrad $31, %ymm0, %ymm0
-; X32-NEXT: vpackssdw %ymm0, %ymm0, %ymm0
-; X32-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[6,7,4,5,2,3,0,1,6,7,4,5,2,3,0,1,16,17,18,19,20,21,22,23,22,23,20,21,18,19,16,17]
+; X32-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[12,13,8,9,4,5,0,1,12,13,8,9,4,5,0,1,16,17,20,21,24,25,28,29,28,29,24,25,20,21,16,17]
; X32-NEXT: retl
;
; X64-LABEL: shuffle_combine_packssdw_pshufb:
; X64: # BB#0:
; X64-NEXT: vpsrad $31, %ymm0, %ymm0
-; X64-NEXT: vpackssdw %ymm0, %ymm0, %ymm0
-; X64-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[6,7,4,5,2,3,0,1,6,7,4,5,2,3,0,1,16,17,18,19,20,21,22,23,22,23,20,21,18,19,16,17]
+; X64-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[12,13,8,9,4,5,0,1,12,13,8,9,4,5,0,1,16,17,20,21,24,25,28,29,28,29,24,25,20,21,16,17]
; X64-NEXT: retq
%1 = ashr <8 x i32> %a0, <i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31>
%2 = tail call <16 x i16> @llvm.x86.avx2.packssdw(<8 x i32> %1, <8 x i32> %1)
@@ -829,17 +827,13 @@ define <32 x i8> @shuffle_combine_packss
; X32-LABEL: shuffle_combine_packsswb_pshufb:
; X32: # BB#0:
; X32-NEXT: vpsraw $15, %ymm0, %ymm0
-; X32-NEXT: vpsraw $15, %ymm1, %ymm1
-; X32-NEXT: vpacksswb %ymm1, %ymm0, %ymm0
-; X32-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[7,6,5,4,3,2,1,0,7,6,5,4,3,2,1,0,23,22,21,20,19,18,17,16,23,22,21,20,19,18,17,16]
+; X32-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[14,12,10,8,6,4,2,0,14,12,10,8,6,4,2,0,30,28,26,24,22,20,18,16,30,28,26,24,22,20,18,16]
; X32-NEXT: retl
;
; X64-LABEL: shuffle_combine_packsswb_pshufb:
; X64: # BB#0:
; X64-NEXT: vpsraw $15, %ymm0, %ymm0
-; X64-NEXT: vpsraw $15, %ymm1, %ymm1
-; X64-NEXT: vpacksswb %ymm1, %ymm0, %ymm0
-; X64-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[7,6,5,4,3,2,1,0,7,6,5,4,3,2,1,0,23,22,21,20,19,18,17,16,23,22,21,20,19,18,17,16]
+; X64-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[14,12,10,8,6,4,2,0,14,12,10,8,6,4,2,0,30,28,26,24,22,20,18,16,30,28,26,24,22,20,18,16]
; X64-NEXT: retq
%1 = ashr <16 x i16> %a0, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
%2 = ashr <16 x i16> %a1, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
Modified: llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-ssse3.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-ssse3.ll?rev=314629&r1=314628&r2=314629&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-ssse3.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-ssse3.ll Sun Oct 1 10:54:55 2017
@@ -644,15 +644,13 @@ define <16 x i8> @shuffle_combine_packss
; SSE-LABEL: shuffle_combine_packssdw_pshufb:
; SSE: # BB#0:
; SSE-NEXT: psrad $31, %xmm0
-; SSE-NEXT: packssdw %xmm0, %xmm0
-; SSE-NEXT: pshufb {{.*#+}} xmm0 = xmm0[7,6,5,4,3,2,1,0,15,14,13,12,11,10,9,8]
+; SSE-NEXT: pshufb {{.*#+}} xmm0 = xmm0[13,12,9,8,5,4,1,0,13,12,9,8,5,4,1,0]
; SSE-NEXT: retq
;
; AVX-LABEL: shuffle_combine_packssdw_pshufb:
; AVX: # BB#0:
; AVX-NEXT: vpsrad $31, %xmm0, %xmm0
-; AVX-NEXT: vpackssdw %xmm0, %xmm0, %xmm0
-; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[7,6,5,4,3,2,1,0,15,14,13,12,11,10,9,8]
+; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[13,12,9,8,5,4,1,0,13,12,9,8,5,4,1,0]
; AVX-NEXT: retq
%1 = ashr <4 x i32> %a0, <i32 31, i32 31, i32 31, i32 31>
%2 = tail call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> %1, <4 x i32> %1)
More information about the llvm-commits
mailing list