[llvm] r248536 - [X86][SSE2] Fix zero/any extension shuffles that don't start from the first element
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Thu Sep 24 14:02:18 PDT 2015
Author: rksimon
Date: Thu Sep 24 16:02:17 2015
New Revision: 248536
URL: http://llvm.org/viewvc/llvm-project?rev=248536&view=rev
Log:
[X86][SSE2] Fix zero/any extension shuffles that don't start from the first element
Fix for D12561 - we weren't correctly ensuring that the base element for extension was moved to start on a boundary suitable for UNPCKL/H
Modified:
llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
llvm/trunk/test/CodeGen/X86/vector-zext.ll
Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=248536&r1=248535&r2=248536&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Thu Sep 24 16:02:17 2015
@@ -7485,13 +7485,15 @@ static SDValue lowerVectorShuffleAsSpeci
MVT::v16i8, PSHUFBMask)));
}
- // If we are extending from an (odd)offset, shuffle them by 1 element.
- if (Offset & 1) {
+ // If we are extending from an offset, ensure we start on a boundary that
+ // we can unpack from.
+ int AlignToUnpack = Offset % (NumElements / Scale);
+ if (AlignToUnpack) {
SmallVector<int, 8> ShMask((unsigned)NumElements, -1);
- for (int i = 1; i < NumElements; ++i)
- ShMask[i - 1] = i;
+ for (int i = AlignToUnpack; i < NumElements; ++i)
+ ShMask[i - AlignToUnpack] = i;
InputV = DAG.getVectorShuffle(VT, DL, InputV, DAG.getUNDEF(VT), ShMask);
- Offset--;
+ Offset -= AlignToUnpack;
}
// Otherwise emit a sequence of unpacks.
Modified: llvm/trunk/test/CodeGen/X86/vector-zext.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-zext.ll?rev=248536&r1=248535&r2=248536&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-zext.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-zext.ll Thu Sep 24 16:02:17 2015
@@ -1283,7 +1283,7 @@ define <8 x i32> @shuf_zext_8i16_to_8i32
; SSE2-LABEL: shuf_zext_8i16_to_8i32_offset3:
; SSE2: # BB#0: # %entry
; SSE2-NEXT: movdqa %xmm0, %xmm1
-; SSE2-NEXT: psrldq {{.*#+}} xmm0 = xmm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero,zero
+; SSE2-NEXT: psrldq {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero
; SSE2-NEXT: pxor %xmm2, %xmm2
; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
; SSE2-NEXT: psrldq {{.*#+}} xmm1 = xmm1[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
@@ -1292,7 +1292,7 @@ define <8 x i32> @shuf_zext_8i16_to_8i32
; SSSE3-LABEL: shuf_zext_8i16_to_8i32_offset3:
; SSSE3: # BB#0: # %entry
; SSSE3-NEXT: movdqa %xmm0, %xmm1
-; SSSE3-NEXT: psrldq {{.*#+}} xmm0 = xmm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero,zero
+; SSSE3-NEXT: psrldq {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero
; SSSE3-NEXT: pxor %xmm2, %xmm2
; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
; SSSE3-NEXT: psrldq {{.*#+}} xmm1 = xmm1[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
More information about the llvm-commits
mailing list