[llvm] r339696 - [X86][SSE] Avoid duplicate shuffle input sources in combineX86ShufflesRecursively
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Tue Aug 14 10:22:37 PDT 2018
Author: rksimon
Date: Tue Aug 14 10:22:37 2018
New Revision: 339696
URL: http://llvm.org/viewvc/llvm-project?rev=339696&view=rev
Log:
[X86][SSE] Avoid duplicate shuffle input sources in combineX86ShufflesRecursively
rL339686 added the case where a faux shuffle might have repeated shuffle inputs coming from either side of the OR().
This patch improves the insertion of the inputs into the source ops lists to account for this, as well as making it trivial to add support for shuffles with more than 2 inputs in the future.
Modified:
llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-avx2.ll
llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-ssse3.ll
Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=339696&r1=339695&r2=339696&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Tue Aug 14 10:22:37 2018
@@ -30279,23 +30279,26 @@ static SDValue combineX86ShufflesRecursi
// Add the inputs to the Ops list, avoiding duplicates.
SmallVector<SDValue, 16> Ops(SrcOps.begin(), SrcOps.end());
- int InputIdx0 = -1, InputIdx1 = -1;
- for (int i = 0, e = Ops.size(); i < e; ++i) {
- SDValue BC = peekThroughBitcasts(Ops[i]);
- if (Input0 && BC == peekThroughBitcasts(Input0))
- InputIdx0 = i;
- if (Input1 && BC == peekThroughBitcasts(Input1))
- InputIdx1 = i;
- }
+ auto AddOp = [&Ops](SDValue Input, int InsertionPoint = -1) -> int {
+ if (!Input)
+ return -1;
+ // Attempt to find an existing match.
+ SDValue InputBC = peekThroughBitcasts(Input);
+ for (int i = 0, e = Ops.size(); i < e; ++i)
+ if (InputBC == peekThroughBitcasts(Ops[i]))
+ return i;
+ // Match failed - should we replace an existing Op?
+ if (InsertionPoint >= 0) {
+ Ops[InsertionPoint] = Input;
+ return InsertionPoint;
+ }
+ // Add to the end of the Ops list.
+ Ops.push_back(Input);
+ return Ops.size() - 1;
+ };
- if (Input0 && InputIdx0 < 0) {
- InputIdx0 = SrcOpIndex;
- Ops[SrcOpIndex] = Input0;
- }
- if (Input1 && InputIdx1 < 0) {
- InputIdx1 = Ops.size();
- Ops.push_back(Input1);
- }
+ int InputIdx0 = AddOp(Input0, SrcOpIndex);
+ int InputIdx1 = AddOp(Input1);
assert(((RootMask.size() > OpMask.size() &&
RootMask.size() % OpMask.size() == 0) ||
Modified: llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-avx2.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-avx2.ll?rev=339696&r1=339695&r2=339696&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-avx2.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-avx2.ll Tue Aug 14 10:22:37 2018
@@ -923,18 +923,12 @@ define <32 x i8> @combine_pshufb_pshufb_
define <32 x i8> @combine_pshufb_pshufb_or_pshufb(<32 x i8> %a0) {
; X32-LABEL: combine_pshufb_pshufb_or_pshufb:
; X32: # %bb.0:
-; X32-NEXT: vpshufb {{.*#+}} ymm1 = ymm0[0,1,2,3],zero,zero,zero,zero,ymm0[0,1,2,3],zero,zero,zero,zero,ymm0[16,17,18,19],zero,zero,zero,zero,ymm0[16,17,18,19],zero,zero,zero,zero
-; X32-NEXT: vpshufb {{.*#+}} ymm0 = zero,zero,zero,zero,ymm0[0,1,2,3],zero,zero,zero,zero,ymm0[0,1,2,3],zero,zero,zero,zero,ymm0[16,17,18,19],zero,zero,zero,zero,ymm0[16,17,18,19]
-; X32-NEXT: vpor %ymm0, %ymm1, %ymm0
-; X32-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,0,0,1,4,4,4,5]
+; X32-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,0,0,4,4,4,4]
; X32-NEXT: retl
;
; X64-LABEL: combine_pshufb_pshufb_or_pshufb:
; X64: # %bb.0:
-; X64-NEXT: vpshufb {{.*#+}} ymm1 = ymm0[0,1,2,3],zero,zero,zero,zero,ymm0[0,1,2,3],zero,zero,zero,zero,ymm0[16,17,18,19],zero,zero,zero,zero,ymm0[16,17,18,19],zero,zero,zero,zero
-; X64-NEXT: vpshufb {{.*#+}} ymm0 = zero,zero,zero,zero,ymm0[0,1,2,3],zero,zero,zero,zero,ymm0[0,1,2,3],zero,zero,zero,zero,ymm0[16,17,18,19],zero,zero,zero,zero,ymm0[16,17,18,19]
-; X64-NEXT: vpor %ymm0, %ymm1, %ymm0
-; X64-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,0,0,1,4,4,4,5]
+; X64-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,0,0,4,4,4,4]
; X64-NEXT: retq
%1 = call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %a0, <32 x i8> <i8 0, i8 1, i8 2, i8 3, i8 -1, i8 -1, i8 -1, i8 -1, i8 0, i8 1, i8 2, i8 3, i8 -1, i8 -1, i8 -1, i8 -1, i8 0, i8 1, i8 2, i8 3, i8 -1, i8 -1, i8 -1, i8 -1, i8 0, i8 1, i8 2, i8 3, i8 -1, i8 -1, i8 -1, i8 -1>)
%2 = call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %a0, <32 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 0, i8 1, i8 2, i8 3, i8 -1, i8 -1, i8 -1, i8 -1, i8 0, i8 1, i8 2, i8 3, i8 -1, i8 -1, i8 -1, i8 -1, i8 0, i8 1, i8 2, i8 3, i8 -1, i8 -1, i8 -1, i8 -1, i8 0, i8 1, i8 2, i8 3>)
Modified: llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-ssse3.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-ssse3.ll?rev=339696&r1=339695&r2=339696&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-ssse3.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-ssse3.ll Tue Aug 14 10:22:37 2018
@@ -707,20 +707,23 @@ define <16 x i8> @combine_pshufb_pshufb_
define <16 x i8> @combine_pshufb_pshufb_or_pshufb(<16 x i8> %a0) {
; SSE-LABEL: combine_pshufb_pshufb_or_pshufb:
; SSE: # %bb.0:
-; SSE-NEXT: movdqa %xmm0, %xmm1
-; SSE-NEXT: pshufb {{.*#+}} xmm1 = xmm1[0,1,2,3],zero,zero,zero,zero,xmm1[0,1,2,3],zero,zero,zero,zero
-; SSE-NEXT: pshufb {{.*#+}} xmm0 = zero,zero,zero,zero,xmm0[0,1,2,3],zero,zero,zero,zero,xmm0[0,1,2,3]
-; SSE-NEXT: por %xmm1, %xmm0
-; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,1]
+; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
; SSE-NEXT: retq
;
-; AVX-LABEL: combine_pshufb_pshufb_or_pshufb:
-; AVX: # %bb.0:
-; AVX-NEXT: vpshufb {{.*#+}} xmm1 = xmm0[0,1,2,3],zero,zero,zero,zero,xmm0[0,1,2,3],zero,zero,zero,zero
-; AVX-NEXT: vpshufb {{.*#+}} xmm0 = zero,zero,zero,zero,xmm0[0,1,2,3],zero,zero,zero,zero,xmm0[0,1,2,3]
-; AVX-NEXT: vpor %xmm0, %xmm1, %xmm0
-; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,1]
-; AVX-NEXT: retq
+; AVX1-LABEL: combine_pshufb_pshufb_or_pshufb:
+; AVX1: # %bb.0:
+; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,0,0]
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: combine_pshufb_pshufb_or_pshufb:
+; AVX2: # %bb.0:
+; AVX2-NEXT: vbroadcastss %xmm0, %xmm0
+; AVX2-NEXT: retq
+;
+; AVX512F-LABEL: combine_pshufb_pshufb_or_pshufb:
+; AVX512F: # %bb.0:
+; AVX512F-NEXT: vbroadcastss %xmm0, %xmm0
+; AVX512F-NEXT: retq
%1 = call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %a0, <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 -1, i8 -1, i8 -1, i8 -1, i8 0, i8 1, i8 2, i8 3, i8 -1, i8 -1, i8 -1, i8 -1>)
%2 = call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %a0, <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 0, i8 1, i8 2, i8 3, i8 -1, i8 -1, i8 -1, i8 -1, i8 0, i8 1, i8 2, i8 3>)
%3 = or <16 x i8> %1, %2
More information about the llvm-commits
mailing list