[llvm] r218707 - [x86] Rework all of the 128-bit vector shuffle tests with my handy test
Chandler Carruth
chandlerc at gmail.com
Tue Sep 30 14:44:34 PDT 2014
Author: chandlerc
Date: Tue Sep 30 16:44:34 2014
New Revision: 218707
URL: http://llvm.org/viewvc/llvm-project?rev=218707&view=rev
Log:
[x86] Rework all of the 128-bit vector shuffle tests with my handy test
updating script so that they are more thorough and consistent.
Specific fixes here include:
- Actually test VEX-encoded AVX mnemonics.
- Actually use an SSE 4.1 run to test SSE 4.1 features!
- Correctly check instructions sequences from the start of the function.
- Elide the shuffle operands and comment designator in a consistent way.
- Test all of the architectures instead of just the ones I was motivated
to manually author.
I've gone back through and fixed up any egregious issues I spotted. Let
me know if I missed something you really dislike.
One downside to this is that we're now not as diligently using FileCheck
variables for registers. I would be much more concerned with this if we
had larger register usage, but there just aren't that interesting of
register choices here and most of the registers are constrained by the
ABI. Ultimately, I don't think this is likely to be the maintenance
burden for these tests and updating them again should be staright
forward.
Modified:
llvm/trunk/test/CodeGen/X86/vector-shuffle-128-v16.ll
llvm/trunk/test/CodeGen/X86/vector-shuffle-128-v2.ll
llvm/trunk/test/CodeGen/X86/vector-shuffle-128-v4.ll
llvm/trunk/test/CodeGen/X86/vector-shuffle-128-v8.ll
Modified: llvm/trunk/test/CodeGen/X86/vector-shuffle-128-v16.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-shuffle-128-v16.ll?rev=218707&r1=218706&r2=218707&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-shuffle-128-v16.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-shuffle-128-v16.ll Tue Sep 30 16:44:34 2014
@@ -6,29 +6,29 @@ target datalayout = "e-m:e-i64:64-f80:12
target triple = "x86_64-unknown-unknown"
define <16 x i8> @shuffle_v16i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00(<16 x i8> %a, <16 x i8> %b) {
+; FIXME: SSE2 should look like the following:
; FIXME-LABEL: @shuffle_v16i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00
; FIXME: # BB#0:
; FIXME-NEXT: punpcklbw %xmm0, %xmm0
; FIXME-NEXT: pshuflw {{.*}} # xmm0 = xmm0[0,0,0,0,4,5,6,7]
; FIXME-NEXT: pshufd {{.*}} # xmm0 = xmm0[0,1,0,1]
; FIXME-NEXT: retq
-; FIXME-LABEL: @shuffle_v16i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00
;
-; SSE2-LABEL: @shuffle_v16i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00
+; SSE2-LABEL: shuffle_v16i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
; SSE2: # BB#0:
-; SSE2-NEXT: punpcklbw %xmm0, %xmm0
-; SSE2-NEXT: pshufd {{.*}} # xmm0 = xmm0[0,1,0,3]
-; SSE2-NEXT: pshuflw {{.*}} # xmm0 = xmm0[0,0,0,0,4,5,6,7]
-; SSE2-NEXT: pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,4,4,4,4]
+; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,3]
+; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
+; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
; SSE2-NEXT: retq
;
-; SSSE3-LABEL: @shuffle_v16i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00
+; SSSE3-LABEL: shuffle_v16i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
; SSSE3: # BB#0:
; SSSE3-NEXT: pxor %xmm1, %xmm1
; SSSE3-NEXT: pshufb %xmm1, %xmm0
; SSSE3-NEXT: retq
;
-; SSE41-LABEL: @shuffle_v16i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00
+; SSE41-LABEL: shuffle_v16i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
; SSE41: # BB#0:
; SSE41-NEXT: pxor %xmm1, %xmm1
; SSE41-NEXT: pshufb %xmm1, %xmm0
@@ -38,217 +38,203 @@ define <16 x i8> @shuffle_v16i8_00_00_00
}
define <16 x i8> @shuffle_v16i8_00_00_00_00_00_00_00_00_01_01_01_01_01_01_01_01(<16 x i8> %a, <16 x i8> %b) {
-; SSE2-LABEL: @shuffle_v16i8_00_00_00_00_00_00_00_00_01_01_01_01_01_01_01_01
+; SSE2-LABEL: shuffle_v16i8_00_00_00_00_00_00_00_00_01_01_01_01_01_01_01_01:
; SSE2: # BB#0:
-; SSE2-NEXT: punpcklbw %xmm0, %xmm0
-; SSE2-NEXT: pshufd {{.*}} # xmm0 = xmm0[0,1,0,3]
-; SSE2-NEXT: pshuflw {{.*}} # xmm0 = xmm0[0,0,0,0,4,5,6,7]
-; SSE2-NEXT: pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,5,5,5,5]
+; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,3]
+; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
+; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,5,5,5]
; SSE2-NEXT: retq
;
-; SSSE3-LABEL: @shuffle_v16i8_00_00_00_00_00_00_00_00_01_01_01_01_01_01_01_01
+; SSSE3-LABEL: shuffle_v16i8_00_00_00_00_00_00_00_00_01_01_01_01_01_01_01_01:
; SSSE3: # BB#0:
-; SSSE3-NEXT: pshufb {{.*}} # xmm0 = xmm0[0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1]
+; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1]
; SSSE3-NEXT: retq
;
-; SSE41-LABEL: @shuffle_v16i8_00_00_00_00_00_00_00_00_01_01_01_01_01_01_01_01
+; SSE41-LABEL: shuffle_v16i8_00_00_00_00_00_00_00_00_01_01_01_01_01_01_01_01:
; SSE41: # BB#0:
-; SSE41-NEXT: pshufb {{.*}} # xmm0 = xmm0[0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1]
+; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1]
; SSE41-NEXT: retq
%shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
ret <16 x i8> %shuffle
}
define <16 x i8> @shuffle_v16i8_00_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08(<16 x i8> %a, <16 x i8> %b) {
-; SSE2-LABEL: @shuffle_v16i8_00_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08
+; SSE2-LABEL: shuffle_v16i8_00_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08:
; SSE2: # BB#0:
-; SSE2-NEXT: pshufd {{.*}} # xmm0 = xmm0[0,2,2,3]
-; SSE2-NEXT: pshuflw {{.*}} # xmm0 = xmm0[0,2,2,2,4,5,6,7]
-; SSE2-NEXT: punpcklbw %xmm0, %xmm0
-; SSE2-NEXT: pshuflw {{.*}} # xmm0 = xmm0[0,0,0,0,4,5,6,7]
-; SSE2-NEXT: pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,6,6,6,6]
+; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,2,4,5,6,7]
+; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
+; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,6,6,6]
; SSE2-NEXT: retq
;
-; SSSE3-LABEL: @shuffle_v16i8_00_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08
+; SSSE3-LABEL: shuffle_v16i8_00_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08:
; SSSE3: # BB#0:
-; SSSE3-NEXT: pshufb {{.*}} # xmm0 = xmm0[0,0,0,0,0,0,0,0,8,8,8,8,8,8,8,8]
+; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,8,8,8,8,8,8,8,8]
; SSSE3-NEXT: retq
;
-; SSE41-LABEL: @shuffle_v16i8_00_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08
+; SSE41-LABEL: shuffle_v16i8_00_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08:
; SSE41: # BB#0:
-; SSE41-NEXT: pshufb {{.*}} # xmm0 = xmm0[0,0,0,0,0,0,0,0,8,8,8,8,8,8,8,8]
+; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,8,8,8,8,8,8,8,8]
; SSE41-NEXT: retq
%shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8>
ret <16 x i8> %shuffle
}
define <16 x i8> @shuffle_v16i8_00_00_00_00_01_01_01_01_02_02_02_02_03_03_03_03(<16 x i8> %a, <16 x i8> %b) {
-; ALL-LABEL: @shuffle_v16i8_00_00_00_00_01_01_01_01_02_02_02_02_03_03_03_03
+; ALL-LABEL: shuffle_v16i8_00_00_00_00_01_01_01_01_02_02_02_02_03_03_03_03:
; ALL: # BB#0:
-; ALL-NEXT: punpcklbw %xmm0, %xmm0
-; ALL-NEXT: punpcklwd %xmm0, %xmm0
+; ALL-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; ALL-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
; ALL-NEXT: retq
%shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3>
ret <16 x i8> %shuffle
}
define <16 x i8> @shuffle_v16i8_04_04_04_04_05_05_05_05_06_06_06_06_07_07_07_07(<16 x i8> %a, <16 x i8> %b) {
-; ALL-LABEL: @shuffle_v16i8_04_04_04_04_05_05_05_05_06_06_06_06_07_07_07_07
+; ALL-LABEL: shuffle_v16i8_04_04_04_04_05_05_05_05_06_06_06_06_07_07_07_07:
; ALL: # BB#0:
-; ALL-NEXT: punpcklbw %xmm0, %xmm0
-; ALL-NEXT: punpckhwd %xmm0, %xmm0
+; ALL-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; ALL-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4,4,5,5,6,6,7,7]
; ALL-NEXT: retq
%shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7>
ret <16 x i8> %shuffle
}
define <16 x i8> @shuffle_v16i8_00_00_00_00_04_04_04_04_08_08_08_08_12_12_12_12(<16 x i8> %a, <16 x i8> %b) {
-; SSE2-LABEL: @shuffle_v16i8_00_00_00_00_04_04_04_04_08_08_08_08_12_12_12_12
+; SSE2-LABEL: shuffle_v16i8_00_00_00_00_04_04_04_04_08_08_08_08_12_12_12_12:
; SSE2: # BB#0:
-; SSE2-NEXT: pshuflw {{.*}} # xmm0 = xmm0[0,2,2,3,4,5,6,7]
-; SSE2-NEXT: pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,4,6,6,7]
-; SSE2-NEXT: pshufd {{.*}} # xmm0 = xmm0[0,2,2,3]
-; SSE2-NEXT: punpcklbw %xmm0, %xmm0
-; SSE2-NEXT: pshuflw {{.*}} # xmm0 = xmm0[0,0,2,2,4,5,6,7]
-; SSE2-NEXT: pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,4,4,6,6]
+; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
+; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,6,7]
+; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,2,2,4,5,6,7]
+; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,6,6]
; SSE2-NEXT: retq
;
-; SSSE3-LABEL: @shuffle_v16i8_00_00_00_00_04_04_04_04_08_08_08_08_12_12_12_12
+; SSSE3-LABEL: shuffle_v16i8_00_00_00_00_04_04_04_04_08_08_08_08_12_12_12_12:
; SSSE3: # BB#0:
-; SSSE3-NEXT: pshufb {{.*}} # xmm0 = xmm0[0,0,0,0,4,4,4,4,8,8,8,8,12,12,12,12]
+; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,4,4,4,4,8,8,8,8,12,12,12,12]
; SSSE3-NEXT: retq
;
-; SSE41-LABEL: @shuffle_v16i8_00_00_00_00_04_04_04_04_08_08_08_08_12_12_12_12
+; SSE41-LABEL: shuffle_v16i8_00_00_00_00_04_04_04_04_08_08_08_08_12_12_12_12:
; SSE41: # BB#0:
-; SSE41-NEXT: pshufb {{.*}} # xmm0 = xmm0[0,0,0,0,4,4,4,4,8,8,8,8,12,12,12,12]
+; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,4,4,4,4,8,8,8,8,12,12,12,12]
; SSE41-NEXT: retq
%shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 4, i32 4, i32 4, i32 4, i32 8, i32 8, i32 8, i32 8, i32 12, i32 12, i32 12, i32 12>
ret <16 x i8> %shuffle
}
define <16 x i8> @shuffle_v16i8_00_00_01_01_02_02_03_03_04_04_05_05_06_06_07_07(<16 x i8> %a, <16 x i8> %b) {
-; ALL-LABEL: @shuffle_v16i8_00_00_01_01_02_02_03_03_04_04_05_05_06_06_07_07
+; ALL-LABEL: shuffle_v16i8_00_00_01_01_02_02_03_03_04_04_05_05_06_06_07_07:
; ALL: # BB#0:
-; ALL-NEXT: punpcklbw %xmm0, %xmm0
+; ALL-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
; ALL-NEXT: retq
%shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3, i32 4, i32 4, i32 5, i32 5, i32 6, i32 6, i32 7, i32 7>
ret <16 x i8> %shuffle
}
define <16 x i8> @shuffle_v16i8_0101010101010101(<16 x i8> %a, <16 x i8> %b) {
+; FIXME: SSE2 should be the following:
; FIXME-LABEL: @shuffle_v16i8_0101010101010101
; FIXME: # BB#0:
; FIXME-NEXT: pshuflw {{.*}} # xmm0 = xmm0[0,0,0,0,4,5,6,7]
; FIXME-NEXT: pshufd {{.*}} # xmm0 = xmm0[0,1,0,1]
; FIXME-NEXT: retq
;
-; SSE2-LABEL: @shuffle_v16i8_0101010101010101
+; SSE2-LABEL: shuffle_v16i8_0101010101010101:
; SSE2: # BB#0:
-; SSE2-NEXT: pshufd {{.*}} # xmm0 = xmm0[0,1,0,3]
-; SSE2-NEXT: pshuflw {{.*}} # xmm0 = xmm0[0,0,0,0,4,5,6,7]
-; SSE2-NEXT: pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,4,4,4,4]
+; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,3]
+; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
+; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
; SSE2-NEXT: retq
;
-; SSSE3-LABEL: @shuffle_v16i8_0101010101010101
+; SSSE3-LABEL: shuffle_v16i8_0101010101010101:
; SSSE3: # BB#0:
-; SSSE3-NEXT: pshufb {{.*}} # xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
+; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
; SSSE3-NEXT: retq
;
-; SSE41-LABEL: @shuffle_v16i8_0101010101010101
+; SSE41-LABEL: shuffle_v16i8_0101010101010101:
; SSE41: # BB#0:
-; SSE41-NEXT: pshufb {{.*}} # xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
+; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
; SSE41-NEXT: retq
%shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
ret <16 x i8> %shuffle
}
define <16 x i8> @shuffle_v16i8_00_16_01_17_02_18_03_19_04_20_05_21_06_22_07_23(<16 x i8> %a, <16 x i8> %b) {
-; ALL-LABEL: @shuffle_v16i8_00_16_01_17_02_18_03_19_04_20_05_21_06_22_07_23
-; ALL: punpcklbw %xmm1, %xmm0
+; ALL-LABEL: shuffle_v16i8_00_16_01_17_02_18_03_19_04_20_05_21_06_22_07_23:
+; ALL: # BB#0:
+; ALL-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
; ALL-NEXT: retq
%shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23>
ret <16 x i8> %shuffle
}
define <16 x i8> @shuffle_v16i8_16_00_16_01_16_02_16_03_16_04_16_05_16_06_16_07(<16 x i8> %a, <16 x i8> %b) {
-; SSE2-LABEL: @shuffle_v16i8_16_00_16_01_16_02_16_03_16_04_16_05_16_06_16_07
-; SSE2: # BB#0:
-; SSE2-NEXT: punpcklbw %xmm1, %xmm1
-; SSE2-NEXT: pshuflw {{.*}} # xmm1 = xmm1[0,0,0,0,4,5,6,7]
-; SSE2-NEXT: punpcklbw %xmm0, %xmm1
-; SSE2-NEXT: movdqa %xmm1, %xmm0
-; SSE2-NEXT: retq
-;
-; SSSE3-LABEL: @shuffle_v16i8_16_00_16_01_16_02_16_03_16_04_16_05_16_06_16_07
-; SSSE3: # BB#0:
-; SSSE3-NEXT: punpcklbw %xmm1, %xmm1
-; SSSE3-NEXT: pshuflw {{.*}} # xmm1 = xmm1[0,0,0,0,4,5,6,7]
-; SSSE3-NEXT: punpcklbw %xmm0, %xmm1
-; SSSE3-NEXT: movdqa %xmm1, %xmm0
-; SSSE3-NEXT: retq
-;
-; SSE41-LABEL: @shuffle_v16i8_16_00_16_01_16_02_16_03_16_04_16_05_16_06_16_07
-; SSE41: # BB#0:
-; SSE41-NEXT: punpcklbw %xmm1, %xmm1
-; SSE41-NEXT: pshuflw {{.*}} # xmm1 = xmm1[0,0,0,0,4,5,6,7]
-; SSE41-NEXT: punpcklbw %xmm0, %xmm1
-; SSE41-NEXT: movdqa %xmm1, %xmm0
-; SSE41-NEXT: retq
+; ALL-LABEL: shuffle_v16i8_16_00_16_01_16_02_16_03_16_04_16_05_16_06_16_07:
+; ALL: # BB#0:
+; ALL-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; ALL-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[0,0,0,0,4,5,6,7]
+; ALL-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
+; ALL-NEXT: movdqa %xmm1, %xmm0
+; ALL-NEXT: retq
%shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 16, i32 0, i32 16, i32 1, i32 16, i32 2, i32 16, i32 3, i32 16, i32 4, i32 16, i32 5, i32 16, i32 6, i32 16, i32 7>
ret <16 x i8> %shuffle
}
define <16 x i8> @shuffle_v16i8_03_02_01_00_07_06_05_04_11_10_09_08_15_14_13_12(<16 x i8> %a, <16 x i8> %b) {
-; SSE2-LABEL: @shuffle_v16i8_03_02_01_00_07_06_05_04_11_10_09_08_15_14_13_12
+; SSE2-LABEL: shuffle_v16i8_03_02_01_00_07_06_05_04_11_10_09_08_15_14_13_12:
; SSE2: # BB#0:
; SSE2-NEXT: pxor %xmm1, %xmm1
; SSE2-NEXT: movdqa %xmm0, %xmm2
-; SSE2-NEXT: punpckhbw %xmm1, %xmm2
-; SSE2-NEXT: pshuflw {{.*}} # xmm2 = xmm2[3,2,1,0,4,5,6,7]
-; SSE2-NEXT: pshufhw {{.*}} # xmm2 = xmm2[0,1,2,3,7,6,5,4]
-; SSE2-NEXT: punpcklbw %xmm1, %xmm0
-; SSE2-NEXT: pshuflw {{.*}} # xmm0 = xmm0[3,2,1,0,4,5,6,7]
-; SSE2-NEXT: pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,7,6,5,4]
+; SSE2-NEXT: punpckhbw {{.*#+}} xmm2 = xmm2[8],xmm1[8],xmm2[9],xmm1[9],xmm2[10],xmm1[10],xmm2[11],xmm1[11],xmm2[12],xmm1[12],xmm2[13],xmm1[13],xmm2[14],xmm1[14],xmm2[15],xmm1[15]
+; SSE2-NEXT: pshuflw {{.*#+}} xmm2 = xmm2[3,2,1,0,4,5,6,7]
+; SSE2-NEXT: pshufhw {{.*#+}} xmm2 = xmm2[0,1,2,3,7,6,5,4]
+; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
+; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[3,2,1,0,4,5,6,7]
+; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,6,5,4]
; SSE2-NEXT: packuswb %xmm2, %xmm0
; SSE2-NEXT: retq
;
-; SSSE3-LABEL: @shuffle_v16i8_03_02_01_00_07_06_05_04_11_10_09_08_15_14_13_12
+; SSSE3-LABEL: shuffle_v16i8_03_02_01_00_07_06_05_04_11_10_09_08_15_14_13_12:
; SSSE3: # BB#0:
-; SSSE3-NEXT: pshufb {{.*}} # xmm0 = xmm0[3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12]
+; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12]
; SSSE3-NEXT: retq
;
-; SSE41-LABEL: @shuffle_v16i8_03_02_01_00_07_06_05_04_11_10_09_08_15_14_13_12
+; SSE41-LABEL: shuffle_v16i8_03_02_01_00_07_06_05_04_11_10_09_08_15_14_13_12:
; SSE41: # BB#0:
-; SSE41-NEXT: pshufb {{.*}} # xmm0 = xmm0[3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12]
+; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12]
; SSE41-NEXT: retq
%shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4, i32 11, i32 10, i32 9, i32 8, i32 15, i32 14, i32 13, i32 12>
ret <16 x i8> %shuffle
}
define <16 x i8> @shuffle_v16i8_03_02_01_00_07_06_05_04_19_18_17_16_23_22_21_20(<16 x i8> %a, <16 x i8> %b) {
-; SSE2-LABEL: @shuffle_v16i8_03_02_01_00_07_06_05_04_19_18_17_16_23_22_21_20
+; SSE2-LABEL: shuffle_v16i8_03_02_01_00_07_06_05_04_19_18_17_16_23_22_21_20:
; SSE2: # BB#0:
; SSE2-NEXT: pxor %xmm2, %xmm2
-; SSE2-NEXT: punpcklbw %xmm2, %xmm1
-; SSE2-NEXT: pshuflw {{.*}} # xmm1 = xmm1[3,2,1,0,4,5,6,7]
-; SSE2-NEXT: pshufhw {{.*}} # xmm1 = xmm1[0,1,2,3,7,6,5,4]
-; SSE2-NEXT: punpcklbw %xmm2, %xmm0
-; SSE2-NEXT: pshuflw {{.*}} # xmm0 = xmm0[3,2,1,0,4,5,6,7]
-; SSE2-NEXT: pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,7,6,5,4]
+; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
+; SSE2-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[3,2,1,0,4,5,6,7]
+; SSE2-NEXT: pshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,7,6,5,4]
+; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
+; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[3,2,1,0,4,5,6,7]
+; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,6,5,4]
; SSE2-NEXT: packuswb %xmm1, %xmm0
; SSE2-NEXT: retq
;
-; SSSE3-LABEL: @shuffle_v16i8_03_02_01_00_07_06_05_04_19_18_17_16_23_22_21_20
+; SSSE3-LABEL: shuffle_v16i8_03_02_01_00_07_06_05_04_19_18_17_16_23_22_21_20:
; SSSE3: # BB#0:
-; SSSE3-NEXT: pshufb {{.*}} # xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,xmm1[3,2,1,0,7,6,5,4]
-; SSSE3-NEXT: pshufb {{.*}} # xmm0 = xmm0[3,2,1,0,7,6,5,4],zero,zero,zero,zero,zero,zero,zero,zero
+; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,xmm1[3,2,1,0,7,6,5,4]
+; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[3,2,1,0,7,6,5,4],zero,zero,zero,zero,zero,zero,zero,zero
; SSSE3-NEXT: por %xmm1, %xmm0
; SSSE3-NEXT: retq
;
-; SSE41-LABEL: @shuffle_v16i8_03_02_01_00_07_06_05_04_19_18_17_16_23_22_21_20
+; SSE41-LABEL: shuffle_v16i8_03_02_01_00_07_06_05_04_19_18_17_16_23_22_21_20:
; SSE41: # BB#0:
-; SSE41-NEXT: pshufb {{.*}} # xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,xmm1[3,2,1,0,7,6,5,4]
-; SSE41-NEXT: pshufb {{.*}} # xmm0 = xmm0[3,2,1,0,7,6,5,4],zero,zero,zero,zero,zero,zero,zero,zero
+; SSE41-NEXT: pshufb {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,xmm1[3,2,1,0,7,6,5,4]
+; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[3,2,1,0,7,6,5,4],zero,zero,zero,zero,zero,zero,zero,zero
; SSE41-NEXT: por %xmm1, %xmm0
; SSE41-NEXT: retq
%shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4, i32 19, i32 18, i32 17, i32 16, i32 23, i32 22, i32 21, i32 20>
@@ -256,35 +242,35 @@ define <16 x i8> @shuffle_v16i8_03_02_01
}
define <16 x i8> @shuffle_v16i8_03_02_01_00_31_30_29_28_11_10_09_08_23_22_21_20(<16 x i8> %a, <16 x i8> %b) {
-; SSE2-LABEL: @shuffle_v16i8_03_02_01_00_31_30_29_28_11_10_09_08_23_22_21_20
+; SSE2-LABEL: shuffle_v16i8_03_02_01_00_31_30_29_28_11_10_09_08_23_22_21_20:
; SSE2: # BB#0:
; SSE2-NEXT: pxor %xmm2, %xmm2
; SSE2-NEXT: movdqa %xmm1, %xmm3
-; SSE2-NEXT: punpcklbw %xmm2, %xmm3
-; SSE2-NEXT: pshufhw {{.*}} # xmm3 = xmm3[0,1,2,3,7,6,5,4]
+; SSE2-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7]
+; SSE2-NEXT: pshufhw {{.*#+}} xmm3 = xmm3[0,1,2,3,7,6,5,4]
; SSE2-NEXT: movdqa %xmm0, %xmm4
-; SSE2-NEXT: punpckhbw %xmm2, %xmm4
-; SSE2-NEXT: pshuflw {{.*}} # xmm4 = xmm4[3,2,1,0,4,5,6,7]
-; SSE2-NEXT: shufpd {{.*}} # xmm4 = xmm4[0],xmm3[1]
-; SSE2-NEXT: punpckhbw %xmm2, %xmm1
-; SSE2-NEXT: pshufhw {{.*}} # xmm1 = xmm1[0,1,2,3,7,6,5,4]
-; SSE2-NEXT: punpcklbw %xmm2, %xmm0
-; SSE2-NEXT: pshuflw {{.*}} # xmm0 = xmm0[3,2,1,0,4,5,6,7]
-; SSE2-NEXT: shufpd {{.*}} # xmm0 = xmm0[0],xmm1[1]
+; SSE2-NEXT: punpckhbw {{.*#+}} xmm4 = xmm4[8],xmm2[8],xmm4[9],xmm2[9],xmm4[10],xmm2[10],xmm4[11],xmm2[11],xmm4[12],xmm2[12],xmm4[13],xmm2[13],xmm4[14],xmm2[14],xmm4[15],xmm2[15]
+; SSE2-NEXT: pshuflw {{.*#+}} xmm4 = xmm4[3,2,1,0,4,5,6,7]
+; SSE2-NEXT: shufpd {{.*#+}} xmm4 = xmm4[0],xmm3[1]
+; SSE2-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm2[8],xmm1[9],xmm2[9],xmm1[10],xmm2[10],xmm1[11],xmm2[11],xmm1[12],xmm2[12],xmm1[13],xmm2[13],xmm1[14],xmm2[14],xmm1[15],xmm2[15]
+; SSE2-NEXT: pshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,7,6,5,4]
+; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
+; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[3,2,1,0,4,5,6,7]
+; SSE2-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
; SSE2-NEXT: packuswb %xmm4, %xmm0
; SSE2-NEXT: retq
;
-; SSSE3-LABEL: @shuffle_v16i8_03_02_01_00_31_30_29_28_11_10_09_08_23_22_21_20
+; SSSE3-LABEL: shuffle_v16i8_03_02_01_00_31_30_29_28_11_10_09_08_23_22_21_20:
; SSSE3: # BB#0:
-; SSSE3-NEXT: pshufb {{.*}} # xmm1 = zero,zero,zero,zero,xmm1[15,14,13,12],zero,zero,zero,zero,xmm1[7,6,5,4]
-; SSSE3-NEXT: pshufb {{.*}} # xmm0 = xmm0[3,2,1,0],zero,zero,zero,zero,xmm0[11,10,9,8],zero,zero,zero,zero
+; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = zero,zero,zero,zero,xmm1[15,14,13,12],zero,zero,zero,zero,xmm1[7,6,5,4]
+; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[3,2,1,0],zero,zero,zero,zero,xmm0[11,10,9,8],zero,zero,zero,zero
; SSSE3-NEXT: por %xmm1, %xmm0
; SSSE3-NEXT: retq
;
-; SSE41-LABEL: @shuffle_v16i8_03_02_01_00_31_30_29_28_11_10_09_08_23_22_21_20
+; SSE41-LABEL: shuffle_v16i8_03_02_01_00_31_30_29_28_11_10_09_08_23_22_21_20:
; SSE41: # BB#0:
-; SSE41-NEXT: pshufb {{.*}} # xmm1 = zero,zero,zero,zero,xmm1[15,14,13,12],zero,zero,zero,zero,xmm1[7,6,5,4]
-; SSE41-NEXT: pshufb {{.*}} # xmm0 = xmm0[3,2,1,0],zero,zero,zero,zero,xmm0[11,10,9,8],zero,zero,zero,zero
+; SSE41-NEXT: pshufb {{.*#+}} xmm1 = zero,zero,zero,zero,xmm1[15,14,13,12],zero,zero,zero,zero,xmm1[7,6,5,4]
+; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[3,2,1,0],zero,zero,zero,zero,xmm0[11,10,9,8],zero,zero,zero,zero
; SSE41-NEXT: por %xmm1, %xmm0
; SSE41-NEXT: retq
%shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 3, i32 2, i32 1, i32 0, i32 31, i32 30, i32 29, i32 28, i32 11, i32 10, i32 9, i32 8, i32 23, i32 22, i32 21, i32 20>
@@ -292,21 +278,21 @@ define <16 x i8> @shuffle_v16i8_03_02_01
}
define <16 x i8> @trunc_v4i32_shuffle(<16 x i8> %a) {
-; SSE2-LABEL: @trunc_v4i32_shuffle
+; SSE2-LABEL: trunc_v4i32_shuffle:
; SSE2: # BB#0:
-; SSE2-NEXT: pand
+; SSE2-NEXT: pand .LCPI13_0(%rip), %xmm0
; SSE2-NEXT: packuswb %xmm0, %xmm0
; SSE2-NEXT: packuswb %xmm0, %xmm0
; SSE2-NEXT: retq
;
-; SSSE3-LABEL: @trunc_v4i32_shuffle
+; SSSE3-LABEL: trunc_v4i32_shuffle:
; SSSE3: # BB#0:
-; SSSE3-NEXT: pshufb {{.*}} # xmm0 = xmm0[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u]
+; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u]
; SSSE3-NEXT: retq
;
-; SSE41-LABEL: @trunc_v4i32_shuffle
+; SSE41-LABEL: trunc_v4i32_shuffle:
; SSE41: # BB#0:
-; SSE41-NEXT: pshufb {{.*}} # xmm0 = xmm0[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u]
+; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u]
; SSE41-NEXT: retq
%shuffle = shufflevector <16 x i8> %a, <16 x i8> undef, <16 x i32> <i32 0, i32 4, i32 8, i32 12, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
ret <16 x i8> %shuffle
@@ -315,9 +301,8 @@ define <16 x i8> @trunc_v4i32_shuffle(<1
define <16 x i8> @stress_test0(<16 x i8> %s.0.1, <16 x i8> %s.0.2, <16 x i8> %s.0.3, <16 x i8> %s.0.4, <16 x i8> %s.0.5, <16 x i8> %s.0.6, <16 x i8> %s.0.7, <16 x i8> %s.0.8, <16 x i8> %s.0.9) {
; We don't have anything useful to check here. This generates 100s of
; instructions. Instead, just make sure we survived codegen.
-;
-; ALL-LABEL: @stress_test0
-; ALL: retq
+; ALL-LABEL: stress_test0:
+; ALL: retq
entry:
%s.1.4 = shufflevector <16 x i8> %s.0.4, <16 x i8> %s.0.5, <16 x i32> <i32 1, i32 22, i32 21, i32 28, i32 3, i32 16, i32 6, i32 1, i32 19, i32 29, i32 12, i32 31, i32 2, i32 3, i32 3, i32 6>
%s.1.5 = shufflevector <16 x i8> %s.0.5, <16 x i8> %s.0.6, <16 x i32> <i32 31, i32 20, i32 12, i32 19, i32 2, i32 15, i32 12, i32 31, i32 2, i32 28, i32 2, i32 30, i32 7, i32 8, i32 17, i32 28>
@@ -333,11 +318,13 @@ entry:
}
define <16 x i8> @stress_test1(<16 x i8> %s.0.5, <16 x i8> %s.0.8, <16 x i8> %s.0.9) noinline nounwind {
-; We don't have anything useful to check here. This generates tons of
-; instructions. Instead, just make sure we survived codegen.
+; There is nothing interesting to check about these instructions other than
+; that they survive codegen. However, we actually do better and delete all of
+; them because the result is 'undef'.
;
-; ALL-LABEL: @stress_test1
-; ALL: retq
+; ALL-LABEL: stress_test1:
+; ALL: # BB#0: # %entry
+; ALL-NEXT: retq
entry:
%s.1.8 = shufflevector <16 x i8> %s.0.8, <16 x i8> undef, <16 x i32> <i32 9, i32 9, i32 undef, i32 undef, i32 undef, i32 2, i32 undef, i32 6, i32 undef, i32 6, i32 undef, i32 14, i32 14, i32 undef, i32 undef, i32 0>
%s.2.4 = shufflevector <16 x i8> undef, <16 x i8> %s.0.5, <16 x i32> <i32 21, i32 undef, i32 undef, i32 19, i32 undef, i32 undef, i32 29, i32 24, i32 21, i32 23, i32 21, i32 17, i32 19, i32 undef, i32 20, i32 22>
@@ -357,19 +344,31 @@ entry:
}
define <16 x i8> @PR20540(<8 x i8> %a) {
-; SSSE3-LABEL: @PR20540
+; SSE2-LABEL: PR20540:
+; SSE2: # BB#0:
+; SSE2-NEXT: pand .LCPI16_0(%rip), %xmm0
+; SSE2-NEXT: packuswb %xmm0, %xmm0
+; SSE2-NEXT: pxor %xmm1, %xmm1
+; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
+; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,1,0,3]
+; SSE2-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[0,0,0,0,4,5,6,7]
+; SSE2-NEXT: pshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,4,4,4]
+; SSE2-NEXT: packuswb %xmm1, %xmm0
+; SSE2-NEXT: retq
+;
+; SSSE3-LABEL: PR20540:
; SSSE3: # BB#0:
; SSSE3-NEXT: pxor %xmm1, %xmm1
-; SSSE3-NEXT: pshufb {{.*}} # xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,0,0,0,0,0,0,0]
-; SSSE3-NEXT: pshufb {{.*}} # xmm0 = xmm0[0,2,4,6,8,10,12,14],zero,zero,zero,zero,zero,zero,zero,zero
+; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,0,0,0,0,0,0,0]
+; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14],zero,zero,zero,zero,zero,zero,zero,zero
; SSSE3-NEXT: por %xmm1, %xmm0
; SSSE3-NEXT: retq
;
-; SSE41-LABEL: @PR20540
+; SSE41-LABEL: PR20540:
; SSE41: # BB#0:
; SSE41-NEXT: pxor %xmm1, %xmm1
-; SSE41-NEXT: pshufb {{.*}} # xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,0,0,0,0,0,0,0]
-; SSE41-NEXT: pshufb {{.*}} # xmm0 = xmm0[0,2,4,6,8,10,12,14],zero,zero,zero,zero,zero,zero,zero,zero
+; SSE41-NEXT: pshufb {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,0,0,0,0,0,0,0]
+; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14],zero,zero,zero,zero,zero,zero,zero,zero
; SSE41-NEXT: por %xmm1, %xmm0
; SSE41-NEXT: retq
%shuffle = shufflevector <8 x i8> %a, <8 x i8> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8>
@@ -377,104 +376,315 @@ define <16 x i8> @PR20540(<8 x i8> %a) {
}
define <16 x i8> @shuffle_v16i8_16_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz(i8 %i) {
-; SSE2-LABEL: @shuffle_v16i8_16_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz
+; SSE2-LABEL: shuffle_v16i8_16_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz:
; SSE2: # BB#0:
-; SSE2-NEXT: movzbl {{.*}}, %[[R:.*]]
-; SSE2-NEXT: movd %[[R]], %xmm0
+; SSE2-NEXT: movzbl %dil, %eax
+; SSE2-NEXT: movd %eax, %xmm0
; SSE2-NEXT: retq
+;
+; SSSE3-LABEL: shuffle_v16i8_16_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz:
+; SSSE3: # BB#0:
+; SSSE3-NEXT: movd %edi, %xmm0
+; SSSE3-NEXT: pxor %xmm1, %xmm1
+; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = zero,xmm1[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
+; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; SSSE3-NEXT: por %xmm1, %xmm0
+; SSSE3-NEXT: retq
+;
+; SSE41-LABEL: shuffle_v16i8_16_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz:
+; SSE41: # BB#0:
+; SSE41-NEXT: movd %edi, %xmm0
+; SSE41-NEXT: pxor %xmm1, %xmm1
+; SSE41-NEXT: pshufb {{.*#+}} xmm1 = zero,xmm1[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
+; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; SSE41-NEXT: por %xmm1, %xmm0
+; SSE41-NEXT: retq
%a = insertelement <16 x i8> undef, i8 %i, i32 0
%shuffle = shufflevector <16 x i8> zeroinitializer, <16 x i8> %a, <16 x i32> <i32 16, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
ret <16 x i8> %shuffle
}
define <16 x i8> @shuffle_v16i8_zz_zz_zz_zz_zz_16_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz(i8 %i) {
-; SSE2-LABEL: @shuffle_v16i8_zz_zz_zz_zz_zz_16_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz
+; SSE2-LABEL: shuffle_v16i8_zz_zz_zz_zz_zz_16_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz:
; SSE2: # BB#0:
-; SSE2-NEXT: movzbl {{.*}}, %[[R:.*]]
-; SSE2-NEXT: movd %[[R]], %xmm0
+; SSE2-NEXT: movzbl %dil, %eax
+; SSE2-NEXT: movd %eax, %xmm0
; SSE2-NEXT: pslldq $5, %xmm0
; SSE2-NEXT: retq
+;
+; SSSE3-LABEL: shuffle_v16i8_zz_zz_zz_zz_zz_16_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz:
+; SSSE3: # BB#0:
+; SSSE3-NEXT: movd %edi, %xmm0
+; SSSE3-NEXT: pxor %xmm1, %xmm1
+; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = xmm1[0,0,0,0,0],zero,xmm1[0,0,0,0,0,0,0,0,0,0]
+; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = zero,zero,zero,zero,zero,xmm0[0],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; SSSE3-NEXT: por %xmm1, %xmm0
+; SSSE3-NEXT: retq
+;
+; SSE41-LABEL: shuffle_v16i8_zz_zz_zz_zz_zz_16_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz:
+; SSE41: # BB#0:
+; SSE41-NEXT: movd %edi, %xmm0
+; SSE41-NEXT: pxor %xmm1, %xmm1
+; SSE41-NEXT: pshufb {{.*#+}} xmm1 = xmm1[0,0,0,0,0],zero,xmm1[0,0,0,0,0,0,0,0,0,0]
+; SSE41-NEXT: pshufb {{.*#+}} xmm0 = zero,zero,zero,zero,zero,xmm0[0],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; SSE41-NEXT: por %xmm1, %xmm0
+; SSE41-NEXT: retq
%a = insertelement <16 x i8> undef, i8 %i, i32 0
%shuffle = shufflevector <16 x i8> zeroinitializer, <16 x i8> %a, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 16, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
ret <16 x i8> %shuffle
}
define <16 x i8> @shuffle_v16i8_zz_uu_uu_zz_uu_uu_zz_zz_zz_zz_zz_zz_zz_zz_zz_16(i8 %i) {
-; SSE2-LABEL: @shuffle_v16i8_zz_uu_uu_zz_uu_uu_zz_zz_zz_zz_zz_zz_zz_zz_zz_16
+; SSE2-LABEL: shuffle_v16i8_zz_uu_uu_zz_uu_uu_zz_zz_zz_zz_zz_zz_zz_zz_zz_16:
; SSE2: # BB#0:
-; SSE2-NEXT: movzbl {{.*}}, %[[R:.*]]
-; SSE2-NEXT: movd %[[R]], %xmm0
+; SSE2-NEXT: movzbl %dil, %eax
+; SSE2-NEXT: movd %eax, %xmm0
; SSE2-NEXT: pslldq $15, %xmm0
; SSE2-NEXT: retq
+;
+; SSSE3-LABEL: shuffle_v16i8_zz_uu_uu_zz_uu_uu_zz_zz_zz_zz_zz_zz_zz_zz_zz_16:
+; SSSE3: # BB#0:
+; SSSE3-NEXT: movd %edi, %xmm0
+; SSSE3-NEXT: pxor %xmm1, %xmm1
+; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = xmm1[0,u,u,3,u,u,6,7,8,9,10,11,12,13,14],zero
+; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = zero,xmm0[u,u],zero,xmm0[u,u],zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0]
+; SSSE3-NEXT: por %xmm1, %xmm0
+; SSSE3-NEXT: retq
+;
+; SSE41-LABEL: shuffle_v16i8_zz_uu_uu_zz_uu_uu_zz_zz_zz_zz_zz_zz_zz_zz_zz_16:
+; SSE41: # BB#0:
+; SSE41-NEXT: movd %edi, %xmm0
+; SSE41-NEXT: pxor %xmm1, %xmm1
+; SSE41-NEXT: pshufb {{.*#+}} xmm1 = xmm1[0,u,u,3,u,u,6,7,8,9,10,11,12,13,14],zero
+; SSE41-NEXT: pshufb {{.*#+}} xmm0 = zero,xmm0[u,u],zero,xmm0[u,u],zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0]
+; SSE41-NEXT: por %xmm1, %xmm0
+; SSE41-NEXT: retq
%a = insertelement <16 x i8> undef, i8 %i, i32 0
%shuffle = shufflevector <16 x i8> zeroinitializer, <16 x i8> %a, <16 x i32> <i32 0, i32 undef, i32 undef, i32 3, i32 undef, i32 undef, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 16>
ret <16 x i8> %shuffle
}
define <16 x i8> @shuffle_v16i8_zz_zz_19_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz(i8 %i) {
-; SSE2-LABEL: @shuffle_v16i8_zz_zz_19_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz
+; SSE2-LABEL: shuffle_v16i8_zz_zz_19_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz:
; SSE2: # BB#0:
-; SSE2-NEXT: movzbl {{.*}}, %[[R:.*]]
-; SSE2-NEXT: movd %[[R]], %xmm0
+; SSE2-NEXT: movzbl %dil, %eax
+; SSE2-NEXT: movd %eax, %xmm0
; SSE2-NEXT: pslldq $2, %xmm0
; SSE2-NEXT: retq
+;
+; SSSE3-LABEL: shuffle_v16i8_zz_zz_19_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz:
+; SSSE3: # BB#0:
+; SSSE3-NEXT: movd %edi, %xmm0
+; SSSE3-NEXT: palignr {{.*#+}} xmm0 = xmm0[13,14,15,0,1,2,3,4,5,6,7,8,9,10,11,12]
+; SSSE3-NEXT: pxor %xmm1, %xmm1
+; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = xmm1[0,1],zero,xmm1[3,4,5,6,7,8,9,10,11,12,13,14,15]
+; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; SSSE3-NEXT: por %xmm1, %xmm0
+; SSSE3-NEXT: retq
+;
+; SSE41-LABEL: shuffle_v16i8_zz_zz_19_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz:
+; SSE41: # BB#0:
+; SSE41-NEXT: movd %edi, %xmm0
+; SSE41-NEXT: palignr {{.*#+}} xmm0 = xmm0[13,14,15,0,1,2,3,4,5,6,7,8,9,10,11,12]
+; SSE41-NEXT: pxor %xmm1, %xmm1
+; SSE41-NEXT: pshufb {{.*#+}} xmm1 = xmm1[0,1],zero,xmm1[3,4,5,6,7,8,9,10,11,12,13,14,15]
+; SSE41-NEXT: pshufb {{.*#+}} xmm0 = zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; SSE41-NEXT: por %xmm1, %xmm0
+; SSE41-NEXT: retq
%a = insertelement <16 x i8> undef, i8 %i, i32 3
%shuffle = shufflevector <16 x i8> zeroinitializer, <16 x i8> %a, <16 x i32> <i32 0, i32 1, i32 19, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
ret <16 x i8> %shuffle
}
define <16 x i8> @shuffle_v16i8_31_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14(<16 x i8> %a, <16 x i8> %b) {
-; SSSE3-LABEL: @shuffle_v16i8_31_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14
+; SSE2-LABEL: shuffle_v16i8_31_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14:
+; SSE2: # BB#0:
+; SSE2-NEXT: pxor %xmm2, %xmm2
+; SSE2-NEXT: movdqa %xmm0, %xmm3
+; SSE2-NEXT: punpckhbw {{.*#+}} xmm3 = xmm3[8],xmm2[8],xmm3[9],xmm2[9],xmm3[10],xmm2[10],xmm3[11],xmm2[11],xmm3[12],xmm2[12],xmm3[13],xmm2[13],xmm3[14],xmm2[14],xmm3[15],xmm2[15]
+; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
+; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm0[2,3,0,1]
+; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm3[3,1,2,0]
+; SSE2-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[1],xmm4[1],xmm3[2],xmm4[2],xmm3[3],xmm4[3]
+; SSE2-NEXT: pshuflw {{.*#+}} xmm3 = xmm3[0,2,2,3,4,5,6,7]
+; SSE2-NEXT: pshufhw {{.*#+}} xmm3 = xmm3[0,1,2,3,4,7,6,7]
+; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[0,2,2,3]
+; SSE2-NEXT: pshuflw {{.*#+}} xmm3 = xmm3[3,0,1,2,4,5,6,7]
+; SSE2-NEXT: pshuflw {{.*#+}} xmm4 = xmm5[0,3,2,3,4,5,6,7]
+; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm4[0,2,2,3]
+; SSE2-NEXT: pshuflw {{.*#+}} xmm4 = xmm4[1,2,3,0,4,5,6,7]
+; SSE2-NEXT: punpcklqdq {{.*#+}} xmm3 = xmm3[0],xmm4[0]
+; SSE2-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm2[8],xmm1[9],xmm2[9],xmm1[10],xmm2[10],xmm1[11],xmm2[11],xmm1[12],xmm2[12],xmm1[13],xmm2[13],xmm1[14],xmm2[14],xmm1[15],xmm2[15]
+; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,3,2,3]
+; SSE2-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[3,1,2,3,4,5,6,7]
+; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,1,2,0]
+; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,3,2,3,4,5,6,7]
+; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,1,2,0]
+; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,1,2,4,5,6,7]
+; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,4,5,6]
+; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,0,1]
+; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[2,1,2,3,4,5,6,7]
+; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,6,7]
+; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[1,0,2,3,4,5,6,7]
+; SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
+; SSE2-NEXT: packuswb %xmm3, %xmm0
+; SSE2-NEXT: retq
+;
+; SSSE3-LABEL: shuffle_v16i8_31_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14:
; SSSE3: # BB#0:
-; SSSE3-NEXT: palignr $15, {{.*}} # xmm0 = xmm1[15],xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14]
+; SSSE3-NEXT: palignr {{.*#+}} xmm0 = xmm1[15],xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14]
; SSSE3-NEXT: retq
;
-; SSE41-LABEL: @shuffle_v16i8_31_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14
+; SSE41-LABEL: shuffle_v16i8_31_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14:
; SSE41: # BB#0:
-; SSE41-NEXT: palignr $15, {{.*}} # xmm0 = xmm1[15],xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14]
+; SSE41-NEXT: palignr {{.*#+}} xmm0 = xmm1[15],xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14]
; SSE41-NEXT: retq
%shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 31, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14>
ret <16 x i8> %shuffle
}
define <16 x i8> @shuffle_v16i8_15_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14(<16 x i8> %a, <16 x i8> %b) {
-; SSSE3-LABEL: @shuffle_v16i8_15_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14
+; SSE2-LABEL: shuffle_v16i8_15_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14:
+; SSE2: # BB#0:
+; SSE2-NEXT: pxor %xmm1, %xmm1
+; SSE2-NEXT: movdqa %xmm0, %xmm2
+; SSE2-NEXT: punpckhbw {{.*#+}} xmm2 = xmm2[8],xmm1[8],xmm2[9],xmm1[9],xmm2[10],xmm1[10],xmm2[11],xmm1[11],xmm2[12],xmm1[12],xmm2[13],xmm1[13],xmm2[14],xmm1[14],xmm2[15],xmm1[15]
+; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
+; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
+; SSE2-NEXT: movdqa %xmm2, %xmm3
+; SSE2-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1],xmm3[2],xmm1[2],xmm3[3],xmm1[3]
+; SSE2-NEXT: pshuflw {{.*#+}} xmm1 = xmm3[0,2,2,3,4,5,6,7]
+; SSE2-NEXT: pshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,7,6,7]
+; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
+; SSE2-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[3,0,1,2,4,5,6,7]
+; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[3,1,2,0]
+; SSE2-NEXT: pshuflw {{.*#+}} xmm3 = xmm3[0,3,2,3,4,5,6,7]
+; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[0,2,2,3]
+; SSE2-NEXT: pshuflw {{.*#+}} xmm3 = xmm3[1,2,3,0,4,5,6,7]
+; SSE2-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm3[0]
+; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[2,3,0,1]
+; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm0[3,1,2,0]
+; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
+; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
+; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,7,6,7]
+; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[3,0,1,2,4,5,6,7]
+; SSE2-NEXT: pshuflw {{.*#+}} xmm2 = xmm3[0,3,2,3,4,5,6,7]
+; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3]
+; SSE2-NEXT: pshuflw {{.*#+}} xmm2 = xmm2[1,2,3,0,4,5,6,7]
+; SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
+; SSE2-NEXT: packuswb %xmm1, %xmm0
+; SSE2-NEXT: retq
+;
+; SSSE3-LABEL: shuffle_v16i8_15_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14:
; SSSE3: # BB#0:
-; SSSE3-NEXT: palignr $15, {{.*}} # xmm0 = xmm0[15,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14]
+; SSSE3-NEXT: palignr {{.*#+}} xmm0 = xmm0[15,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14]
; SSSE3-NEXT: retq
;
-; SSE41-LABEL: @shuffle_v16i8_15_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14
+; SSE41-LABEL: shuffle_v16i8_15_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14:
; SSE41: # BB#0:
-; SSE41-NEXT: palignr $15, {{.*}} # xmm0 = xmm0[15,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14]
+; SSE41-NEXT: palignr {{.*#+}} xmm0 = xmm0[15,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14]
; SSE41-NEXT: retq
%shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 15, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14>
ret <16 x i8> %shuffle
}
define <16 x i8> @shuffle_v16i8_17_18_19_20_21_22_23_24_25_26_27_28_29_30_31_00(<16 x i8> %a, <16 x i8> %b) {
-; SSSE3-LABEL: @shuffle_v16i8_17_18_19_20_21_22_23_24_25_26_27_28_29_30_31_00
+; SSE2-LABEL: shuffle_v16i8_17_18_19_20_21_22_23_24_25_26_27_28_29_30_31_00:
+; SSE2: # BB#0:
+; SSE2-NEXT: pxor %xmm2, %xmm2
+; SSE2-NEXT: movdqa %xmm1, %xmm3
+; SSE2-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7]
+; SSE2-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm2[8],xmm1[9],xmm2[9],xmm1[10],xmm2[10],xmm1[11],xmm2[11],xmm1[12],xmm2[12],xmm1[13],xmm2[13],xmm1[14],xmm2[14],xmm1[15],xmm2[15]
+; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[0,1,0,1]
+; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm3[3,1,2,0]
+; SSE2-NEXT: punpckhwd {{.*#+}} xmm3 = xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7]
+; SSE2-NEXT: pshuflw {{.*#+}} xmm2 = xmm3[2,1,2,3,4,5,6,7]
+; SSE2-NEXT: pshufhw {{.*#+}} xmm2 = xmm2[0,1,2,3,4,6,6,7]
+; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3]
+; SSE2-NEXT: pshuflw {{.*#+}} xmm3 = xmm2[0,2,3,1,4,5,6,7]
+; SSE2-NEXT: pshufhw {{.*#+}} xmm2 = xmm4[0,1,2,3,4,7,6,7]
+; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[2,1,2,3]
+; SSE2-NEXT: pshuflw {{.*#+}} xmm2 = xmm2[1,2,3,0,4,5,6,7]
+; SSE2-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm3[0]
+; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[3,1,2,0]
+; SSE2-NEXT: pshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,5,4,7]
+; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[3,1,2,0]
+; SSE2-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[1,2,3,0,4,5,6,7]
+; SSE2-NEXT: pshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,5,6,7,7]
+; SSE2-NEXT: pand .LCPI23_0(%rip), %xmm0
+; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,3]
+; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6,4]
+; SSE2-NEXT: movdqa %xmm1, %xmm3
+; SSE2-NEXT: punpckhwd {{.*#+}} xmm3 = xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
+; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm3[0,2,2,3,4,5,6,7]
+; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,7,6,7]
+; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; SSE2-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
+; SSE2-NEXT: packuswb %xmm1, %xmm2
+; SSE2-NEXT: movdqa %xmm2, %xmm0
+; SSE2-NEXT: retq
+;
+; SSSE3-LABEL: shuffle_v16i8_17_18_19_20_21_22_23_24_25_26_27_28_29_30_31_00:
; SSSE3: # BB#0:
-; SSSE3-NEXT: palignr $1, {{.*}} # xmm0 = xmm1[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm0[0]
+; SSSE3-NEXT: palignr {{.*#+}} xmm0 = xmm1[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm0[0]
; SSSE3-NEXT: retq
;
-; SSE41-LABEL: @shuffle_v16i8_17_18_19_20_21_22_23_24_25_26_27_28_29_30_31_00
+; SSE41-LABEL: shuffle_v16i8_17_18_19_20_21_22_23_24_25_26_27_28_29_30_31_00:
; SSE41: # BB#0:
-; SSE41-NEXT: palignr $1, {{.*}} # xmm0 = xmm1[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm0[0]
+; SSE41-NEXT: palignr {{.*#+}} xmm0 = xmm1[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm0[0]
; SSE41-NEXT: retq
%shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 0>
ret <16 x i8> %shuffle
}
define <16 x i8> @shuffle_v16i8_01_02_03_04_05_06_07_08_09_10_11_12_13_14_15_16(<16 x i8> %a, <16 x i8> %b) {
-; SSSE3-LABEL: @shuffle_v16i8_01_02_03_04_05_06_07_08_09_10_11_12_13_14_15_16
+; SSE2-LABEL: shuffle_v16i8_01_02_03_04_05_06_07_08_09_10_11_12_13_14_15_16:
+; SSE2: # BB#0:
+; SSE2-NEXT: pxor %xmm2, %xmm2
+; SSE2-NEXT: movdqa %xmm0, %xmm3
+; SSE2-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7]
+; SSE2-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8],xmm2[8],xmm0[9],xmm2[9],xmm0[10],xmm2[10],xmm0[11],xmm2[11],xmm0[12],xmm2[12],xmm0[13],xmm2[13],xmm0[14],xmm2[14],xmm0[15],xmm2[15]
+; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,1,0,1]
+; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm3[3,1,2,0]
+; SSE2-NEXT: punpckhwd {{.*#+}} xmm3 = xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7]
+; SSE2-NEXT: pshuflw {{.*#+}} xmm2 = xmm3[2,1,2,3,4,5,6,7]
+; SSE2-NEXT: pshufhw {{.*#+}} xmm2 = xmm2[0,1,2,3,4,6,6,7]
+; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3]
+; SSE2-NEXT: pshuflw {{.*#+}} xmm3 = xmm2[0,2,3,1,4,5,6,7]
+; SSE2-NEXT: pshufhw {{.*#+}} xmm2 = xmm4[0,1,2,3,4,7,6,7]
+; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[2,1,2,3]
+; SSE2-NEXT: pshuflw {{.*#+}} xmm2 = xmm2[1,2,3,0,4,5,6,7]
+; SSE2-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm3[0]
+; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,1,2,0]
+; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,4,7]
+; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,1,2,0]
+; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[1,2,3,0,4,5,6,7]
+; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,6,7,7]
+; SSE2-NEXT: pand .LCPI24_0(%rip), %xmm1
+; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,1,0,3]
+; SSE2-NEXT: pshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,5,6,4]
+; SSE2-NEXT: movdqa %xmm0, %xmm3
+; SSE2-NEXT: punpckhwd {{.*#+}} xmm3 = xmm3[4],xmm1[4],xmm3[5],xmm1[5],xmm3[6],xmm1[6],xmm3[7],xmm1[7]
+; SSE2-NEXT: pshuflw {{.*#+}} xmm1 = xmm3[0,2,2,3,4,5,6,7]
+; SSE2-NEXT: pshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,7,6,7]
+; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
+; SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; SSE2-NEXT: packuswb %xmm0, %xmm2
+; SSE2-NEXT: movdqa %xmm2, %xmm0
+; SSE2-NEXT: retq
+;
+; SSSE3-LABEL: shuffle_v16i8_01_02_03_04_05_06_07_08_09_10_11_12_13_14_15_16:
; SSSE3: # BB#0:
-; SSSE3-NEXT: palignr $1, {{.*}} # xmm1 = xmm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm1[0]
+; SSSE3-NEXT: palignr {{.*#+}} xmm1 = xmm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm1[0]
; SSSE3-NEXT: movdqa %xmm1, %xmm0
; SSSE3-NEXT: retq
;
-; SSE41-LABEL: @shuffle_v16i8_01_02_03_04_05_06_07_08_09_10_11_12_13_14_15_16
+; SSE41-LABEL: shuffle_v16i8_01_02_03_04_05_06_07_08_09_10_11_12_13_14_15_16:
; SSE41: # BB#0:
-; SSE41-NEXT: palignr $1, {{.*}} # xmm1 = xmm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm1[0]
+; SSE41-NEXT: palignr {{.*#+}} xmm1 = xmm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm1[0]
; SSE41-NEXT: movdqa %xmm1, %xmm0
; SSE41-NEXT: retq
%shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16>
@@ -482,29 +692,96 @@ define <16 x i8> @shuffle_v16i8_01_02_03
}
define <16 x i8> @shuffle_v16i8_01_02_03_04_05_06_07_08_09_10_11_12_13_14_15_00(<16 x i8> %a, <16 x i8> %b) {
-; SSSE3-LABEL: @shuffle_v16i8_01_02_03_04_05_06_07_08_09_10_11_12_13_14_15_00
+; SSE2-LABEL: shuffle_v16i8_01_02_03_04_05_06_07_08_09_10_11_12_13_14_15_00:
+; SSE2: # BB#0:
+; SSE2-NEXT: pxor %xmm1, %xmm1
+; SSE2-NEXT: movdqa %xmm0, %xmm2
+; SSE2-NEXT: punpckhbw {{.*#+}} xmm2 = xmm2[8],xmm1[8],xmm2[9],xmm1[9],xmm2[10],xmm1[10],xmm2[11],xmm1[11],xmm2[12],xmm1[12],xmm2[13],xmm1[13],xmm2[14],xmm1[14],xmm2[15],xmm1[15]
+; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
+; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,1,0,1]
+; SSE2-NEXT: movdqa %xmm2, %xmm3
+; SSE2-NEXT: punpckhwd {{.*#+}} xmm3 = xmm3[4],xmm1[4],xmm3[5],xmm1[5],xmm3[6],xmm1[6],xmm3[7],xmm1[7]
+; SSE2-NEXT: pshuflw {{.*#+}} xmm1 = xmm3[2,1,2,3,4,5,6,7]
+; SSE2-NEXT: pshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,6,6,7]
+; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
+; SSE2-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[0,2,3,1,4,5,6,7]
+; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[3,1,2,0]
+; SSE2-NEXT: pshufhw {{.*#+}} xmm3 = xmm3[0,1,2,3,4,7,6,7]
+; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[2,1,2,3]
+; SSE2-NEXT: pshuflw {{.*#+}} xmm3 = xmm3[1,2,3,0,4,5,6,7]
+; SSE2-NEXT: punpcklqdq {{.*#+}} xmm3 = xmm3[0],xmm1[0]
+; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[0,1,0,1]
+; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[3,1,2,0]
+; SSE2-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
+; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[2,1,2,3,4,5,6,7]
+; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,6,7]
+; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; SSE2-NEXT: pshuflw {{.*#+}} xmm1 = xmm0[0,2,3,1,4,5,6,7]
+; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm2[0,1,2,3,4,7,6,7]
+; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,1,2,3]
+; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[1,2,3,0,4,5,6,7]
+; SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; SSE2-NEXT: packuswb %xmm3, %xmm0
+; SSE2-NEXT: retq
+;
+; SSSE3-LABEL: shuffle_v16i8_01_02_03_04_05_06_07_08_09_10_11_12_13_14_15_00:
; SSSE3: # BB#0:
-; SSSE3-NEXT: palignr $1, {{.*}} # xmm0 = xmm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,0]
+; SSSE3-NEXT: palignr {{.*#+}} xmm0 = xmm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,0]
; SSSE3-NEXT: retq
;
-; SSE41-LABEL: @shuffle_v16i8_01_02_03_04_05_06_07_08_09_10_11_12_13_14_15_00
+; SSE41-LABEL: shuffle_v16i8_01_02_03_04_05_06_07_08_09_10_11_12_13_14_15_00:
; SSE41: # BB#0:
-; SSE41-NEXT: palignr $1, {{.*}} # xmm0 = xmm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,0]
+; SSE41-NEXT: palignr {{.*#+}} xmm0 = xmm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,0]
; SSE41-NEXT: retq
%shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 0>
ret <16 x i8> %shuffle
}
define <16 x i8> @shuffle_v16i8_15_16_17_18_19_20_21_22_23_24_25_26_27_28_29_30(<16 x i8> %a, <16 x i8> %b) {
-; SSSE3-LABEL: @shuffle_v16i8_15_16_17_18_19_20_21_22_23_24_25_26_27_28_29_30
+; SSE2-LABEL: shuffle_v16i8_15_16_17_18_19_20_21_22_23_24_25_26_27_28_29_30:
+; SSE2: # BB#0:
+; SSE2-NEXT: pxor %xmm2, %xmm2
+; SSE2-NEXT: movdqa %xmm1, %xmm3
+; SSE2-NEXT: punpckhbw {{.*#+}} xmm3 = xmm3[8],xmm2[8],xmm3[9],xmm2[9],xmm3[10],xmm2[10],xmm3[11],xmm2[11],xmm3[12],xmm2[12],xmm3[13],xmm2[13],xmm3[14],xmm2[14],xmm3[15],xmm2[15]
+; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
+; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm1[2,3,0,1]
+; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm3[3,1,2,0]
+; SSE2-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[1],xmm4[1],xmm3[2],xmm4[2],xmm3[3],xmm4[3]
+; SSE2-NEXT: pshuflw {{.*#+}} xmm3 = xmm3[0,2,2,3,4,5,6,7]
+; SSE2-NEXT: pshufhw {{.*#+}} xmm3 = xmm3[0,1,2,3,4,7,6,7]
+; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[0,2,2,3]
+; SSE2-NEXT: pshuflw {{.*#+}} xmm3 = xmm3[3,0,1,2,4,5,6,7]
+; SSE2-NEXT: pshuflw {{.*#+}} xmm4 = xmm5[0,3,2,3,4,5,6,7]
+; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm4[0,2,2,3]
+; SSE2-NEXT: pshuflw {{.*#+}} xmm4 = xmm4[1,2,3,0,4,5,6,7]
+; SSE2-NEXT: punpcklqdq {{.*#+}} xmm3 = xmm3[0],xmm4[0]
+; SSE2-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8],xmm2[8],xmm0[9],xmm2[9],xmm0[10],xmm2[10],xmm0[11],xmm2[11],xmm0[12],xmm2[12],xmm0[13],xmm2[13],xmm0[14],xmm2[14],xmm0[15],xmm2[15]
+; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,3,2,3]
+; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[3,1,2,3,4,5,6,7]
+; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[3,1,2,0]
+; SSE2-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[0,3,2,3,4,5,6,7]
+; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[3,1,2,0]
+; SSE2-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[0,0,1,2,4,5,6,7]
+; SSE2-NEXT: pshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,7,4,5,6]
+; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,0,1]
+; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
+; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm1[2,1,2,3,4,5,6,7]
+; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,6,7]
+; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[1,0,2,3,4,5,6,7]
+; SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
+; SSE2-NEXT: packuswb %xmm3, %xmm0
+; SSE2-NEXT: retq
+;
+; SSSE3-LABEL: shuffle_v16i8_15_16_17_18_19_20_21_22_23_24_25_26_27_28_29_30:
; SSSE3: # BB#0:
-; SSSE3-NEXT: palignr $15, {{.*}} # xmm1 = xmm0[15],xmm1[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14]
+; SSSE3-NEXT: palignr {{.*#+}} xmm1 = xmm0[15],xmm1[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14]
; SSSE3-NEXT: movdqa %xmm1, %xmm0
; SSSE3-NEXT: retq
;
-; SSE41-LABEL: @shuffle_v16i8_15_16_17_18_19_20_21_22_23_24_25_26_27_28_29_30
+; SSE41-LABEL: shuffle_v16i8_15_16_17_18_19_20_21_22_23_24_25_26_27_28_29_30:
; SSE41: # BB#0:
-; SSE41-NEXT: palignr $15, {{.*}} # xmm1 = xmm0[15],xmm1[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14]
+; SSE41-NEXT: palignr {{.*#+}} xmm1 = xmm0[15],xmm1[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14]
; SSE41-NEXT: movdqa %xmm1, %xmm0
; SSE41-NEXT: retq
%shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30>
@@ -512,19 +789,19 @@ define <16 x i8> @shuffle_v16i8_15_16_17
}
define <16 x i8> @shuffle_v16i8_00_uu_uu_uu_uu_uu_uu_uu_01_uu_uu_uu_uu_uu_uu_uu(<16 x i8> %a) {
-; SSE2-LABEL: @shuffle_v16i8_00_uu_uu_uu_uu_uu_uu_uu_01_uu_uu_uu_uu_uu_uu_uu
+; SSE2-LABEL: shuffle_v16i8_00_uu_uu_uu_uu_uu_uu_uu_01_uu_uu_uu_uu_uu_uu_uu:
; SSE2: # BB#0:
-; SSE2-NEXT: punpcklbw {{.*}} # xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
-; SSE2-NEXT: punpcklwd {{.*}} # xmm0 = xmm0[0,0,1,1,2,2,3,3]
-; SSE2-NEXT: punpckldq {{.*}} # xmm0 = xmm0[0,0,1,1]
+; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
+; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0,0,1,1]
; SSE2-NEXT: retq
;
-; SSSE3-LABEL: @shuffle_v16i8_00_uu_uu_uu_uu_uu_uu_uu_01_uu_uu_uu_uu_uu_uu_uu
+; SSSE3-LABEL: shuffle_v16i8_00_uu_uu_uu_uu_uu_uu_uu_01_uu_uu_uu_uu_uu_uu_uu:
; SSSE3: # BB#0:
-; SSSE3-NEXT: pshufb {{.*}} # xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
+; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
; SSSE3-NEXT: retq
;
-; SSE41-LABEL: @shuffle_v16i8_00_uu_uu_uu_uu_uu_uu_uu_01_uu_uu_uu_uu_uu_uu_uu
+; SSE41-LABEL: shuffle_v16i8_00_uu_uu_uu_uu_uu_uu_uu_01_uu_uu_uu_uu_uu_uu_uu:
; SSE41: # BB#0:
; SSE41-NEXT: pmovzxbq %xmm0, %xmm0
; SSE41-NEXT: retq
@@ -533,20 +810,20 @@ define <16 x i8> @shuffle_v16i8_00_uu_uu
}
define <16 x i8> @shuffle_v16i8_00_zz_zz_zz_zz_zz_zz_zz_01_zz_zz_zz_zz_zz_zz_zz(<16 x i8> %a) {
-; SSE2-LABEL: @shuffle_v16i8_00_zz_zz_zz_zz_zz_zz_zz_01_zz_zz_zz_zz_zz_zz_zz
+; SSE2-LABEL: shuffle_v16i8_00_zz_zz_zz_zz_zz_zz_zz_01_zz_zz_zz_zz_zz_zz_zz:
; SSE2: # BB#0:
-; SSE2-NEXT: pxor %[[X:xmm[0-9]+]], %[[X]]
-; SSE2-NEXT: punpcklbw {{.*}} # xmm0 = xmm0[0],[[X]][0],xmm0[1],[[X]][1],xmm0[2],[[X]][2],xmm0[3],[[X]][3],xmm0[4],[[X]][4],xmm0[5],[[X]][5],xmm0[6],[[X]][6],xmm0[7],[[X]][7]
-; SSE2-NEXT: punpcklwd {{.*}} # xmm0 = xmm0[0],[[X]][0],xmm0[1],[[X]][1],xmm0[2],[[X]][2],xmm0[3],[[X]][3]
-; SSE2-NEXT: punpckldq {{.*}} # xmm0 = xmm0[0],[[X]][0],xmm0[1],[[X]][1]
+; SSE2-NEXT: pxor %xmm1, %xmm1
+; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
+; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; SSE2-NEXT: retq
;
-; SSSE3-LABEL: @shuffle_v16i8_00_zz_zz_zz_zz_zz_zz_zz_01_zz_zz_zz_zz_zz_zz_zz
+; SSSE3-LABEL: shuffle_v16i8_00_zz_zz_zz_zz_zz_zz_zz_01_zz_zz_zz_zz_zz_zz_zz:
; SSSE3: # BB#0:
-; SSSE3-NEXT: pshufb {{.*}} # xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
+; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
; SSSE3-NEXT: retq
;
-; SSE41-LABEL: @shuffle_v16i8_00_zz_zz_zz_zz_zz_zz_zz_01_zz_zz_zz_zz_zz_zz_zz
+; SSE41-LABEL: shuffle_v16i8_00_zz_zz_zz_zz_zz_zz_zz_01_zz_zz_zz_zz_zz_zz_zz:
; SSE41: # BB#0:
; SSE41-NEXT: pmovzxbq %xmm0, %xmm0
; SSE41-NEXT: retq
@@ -555,19 +832,19 @@ define <16 x i8> @shuffle_v16i8_00_zz_zz
}
define <16 x i8> @shuffle_v16i8_00_uu_uu_uu_01_uu_uu_uu_02_uu_uu_uu_03_uu_uu_uu(<16 x i8> %a) {
-; SSE2-LABEL: @shuffle_v16i8_00_uu_uu_uu_01_uu_uu_uu_02_uu_uu_uu_03_uu_uu_uu
+; SSE2-LABEL: shuffle_v16i8_00_uu_uu_uu_01_uu_uu_uu_02_uu_uu_uu_03_uu_uu_uu:
; SSE2: # BB#0:
-; SSE2-NEXT: punpcklbw {{.*}} # xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
-; SSE2-NEXT: punpcklwd {{.*}} # xmm0 = xmm0[0,0,1,1,2,2,3,3]
+; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
; SSE2-NEXT: retq
;
-; SSSE3-LABEL: @shuffle_v16i8_00_uu_uu_uu_01_uu_uu_uu_02_uu_uu_uu_03_uu_uu_uu
+; SSSE3-LABEL: shuffle_v16i8_00_uu_uu_uu_01_uu_uu_uu_02_uu_uu_uu_03_uu_uu_uu:
; SSSE3: # BB#0:
-; SSSE3-NEXT: punpcklbw {{.*}} # xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
-; SSSE3-NEXT: punpcklwd {{.*}} # xmm0 = xmm0[0,0,1,1,2,2,3,3]
+; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
; SSSE3-NEXT: retq
;
-; SSE41-LABEL: @shuffle_v16i8_00_uu_uu_uu_01_uu_uu_uu_02_uu_uu_uu_03_uu_uu_uu
+; SSE41-LABEL: shuffle_v16i8_00_uu_uu_uu_01_uu_uu_uu_02_uu_uu_uu_03_uu_uu_uu:
; SSE41: # BB#0:
; SSE41-NEXT: pmovzxbd %xmm0, %xmm0
; SSE41-NEXT: retq
@@ -576,21 +853,21 @@ define <16 x i8> @shuffle_v16i8_00_uu_uu
}
define <16 x i8> @shuffle_v16i8_00_zz_zz_zz_01_zz_zz_zz_02_zz_zz_zz_03_zz_zz_zz(<16 x i8> %a) {
-; SSE2-LABEL: @shuffle_v16i8_00_zz_zz_zz_01_zz_zz_zz_02_zz_zz_zz_03_zz_zz_zz
+; SSE2-LABEL: shuffle_v16i8_00_zz_zz_zz_01_zz_zz_zz_02_zz_zz_zz_03_zz_zz_zz:
; SSE2: # BB#0:
-; SSE2-NEXT: pxor %[[X:xmm[0-9]+]], %[[X]]
-; SSE2-NEXT: punpcklbw {{.*}} # xmm0 = xmm0[0],[[X]][0],xmm0[1],[[X]][1],xmm0[2],[[X]][2],xmm0[3],[[X]][3],xmm0[4],[[X]][4],xmm0[5],[[X]][5],xmm0[6],[[X]][6],xmm0[7],[[X]][7]
-; SSE2-NEXT: punpcklwd {{.*}} # xmm0 = xmm0[0],[[X]][0],xmm0[1],[[X]][1],xmm0[2],[[X]][2],xmm0[3],[[X]][3]
+; SSE2-NEXT: pxor %xmm1, %xmm1
+; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
+; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
; SSE2-NEXT: retq
;
-; SSSE3-LABEL: @shuffle_v16i8_00_zz_zz_zz_01_zz_zz_zz_02_zz_zz_zz_03_zz_zz_zz
+; SSSE3-LABEL: shuffle_v16i8_00_zz_zz_zz_01_zz_zz_zz_02_zz_zz_zz_03_zz_zz_zz:
; SSSE3: # BB#0:
-; SSSE3-NEXT: pxor %[[X1:xmm[0-9]+]], %[[X1]]
-; SSSE3-NEXT: punpcklbw {{.*}} # xmm0 = xmm0[0],[[X1]][0],xmm0[1],[[X1]][1],xmm0[2],[[X1]][2],xmm0[3],[[X1]][3],xmm0[4],[[X1]][4],xmm0[5],[[X1]][5],xmm0[6],[[X1]][6],xmm0[7],[[X1]][7]
-; SSSE3-NEXT: punpcklwd {{.*}} # xmm0 = xmm0[0],[[X1]][0],xmm0[1],[[X1]][1],xmm0[2],[[X1]][2],xmm0[3],[[X1]][3]
+; SSSE3-NEXT: pxor %xmm1, %xmm1
+; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
+; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
; SSSE3-NEXT: retq
;
-; SSE41-LABEL: @shuffle_v16i8_00_zz_zz_zz_01_zz_zz_zz_02_zz_zz_zz_03_zz_zz_zz
+; SSE41-LABEL: shuffle_v16i8_00_zz_zz_zz_01_zz_zz_zz_02_zz_zz_zz_03_zz_zz_zz:
; SSE41: # BB#0:
; SSE41-NEXT: pmovzxbd %xmm0, %xmm0
; SSE41-NEXT: retq
@@ -599,17 +876,17 @@ define <16 x i8> @shuffle_v16i8_00_zz_zz
}
define <16 x i8> @shuffle_v16i8_00_uu_01_uu_02_uu_03_uu_04_uu_05_uu_06_uu_07_uu(<16 x i8> %a) {
-; SSE2-LABEL: @shuffle_v16i8_00_uu_01_uu_02_uu_03_uu_04_uu_05_uu_06_uu_07_uu
+; SSE2-LABEL: shuffle_v16i8_00_uu_01_uu_02_uu_03_uu_04_uu_05_uu_06_uu_07_uu:
; SSE2: # BB#0:
-; SSE2-NEXT: punpcklbw {{.*}} # xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
; SSE2-NEXT: retq
;
-; SSSE3-LABEL: @shuffle_v16i8_00_uu_01_uu_02_uu_03_uu_04_uu_05_uu_06_uu_07_uu
+; SSSE3-LABEL: shuffle_v16i8_00_uu_01_uu_02_uu_03_uu_04_uu_05_uu_06_uu_07_uu:
; SSSE3: # BB#0:
-; SSSE3-NEXT: punpcklbw {{.*}} # xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
; SSSE3-NEXT: retq
;
-; SSE41-LABEL: @shuffle_v16i8_00_uu_01_uu_02_uu_03_uu_04_uu_05_uu_06_uu_07_uu
+; SSE41-LABEL: shuffle_v16i8_00_uu_01_uu_02_uu_03_uu_04_uu_05_uu_06_uu_07_uu:
; SSE41: # BB#0:
; SSE41-NEXT: pmovzxbw %xmm0, %xmm0
; SSE41-NEXT: retq
@@ -618,19 +895,19 @@ define <16 x i8> @shuffle_v16i8_00_uu_01
}
define <16 x i8> @shuffle_v16i8_00_zz_01_zz_02_zz_03_zz_04_zz_05_zz_06_zz_07_zz(<16 x i8> %a) {
-; SSE2-LABEL: @shuffle_v16i8_00_zz_01_zz_02_zz_03_zz_04_zz_05_zz_06_zz_07_zz
+; SSE2-LABEL: shuffle_v16i8_00_zz_01_zz_02_zz_03_zz_04_zz_05_zz_06_zz_07_zz:
; SSE2: # BB#0:
-; SSE2-NEXT: pxor %[[X1:xmm[0-9]+]], %[[X1]]
-; SSE2-NEXT: punpcklbw {{.*}} # xmm0 = xmm0[0],[[X1]][0],xmm0[1],[[X1]][1],xmm0[2],[[X1]][2],xmm0[3],[[X1]][3],xmm0[4],[[X1]][4],xmm0[5],[[X1]][5],xmm0[6],[[X1]][6],xmm0[7],[[X1]][7]
+; SSE2-NEXT: pxor %xmm1, %xmm1
+; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
; SSE2-NEXT: retq
;
-; SSSE3-LABEL: @shuffle_v16i8_00_zz_01_zz_02_zz_03_zz_04_zz_05_zz_06_zz_07_zz
+; SSSE3-LABEL: shuffle_v16i8_00_zz_01_zz_02_zz_03_zz_04_zz_05_zz_06_zz_07_zz:
; SSSE3: # BB#0:
-; SSSE3-NEXT: pxor %[[X1:xmm[0-9]+]], %[[X1]]
-; SSSE3-NEXT: punpcklbw {{.*}} # xmm0 = xmm0[0],[[X1]][0],xmm0[1],[[X1]][1],xmm0[2],[[X1]][2],xmm0[3],[[X1]][3],xmm0[4],[[X1]][4],xmm0[5],[[X1]][5],xmm0[6],[[X1]][6],xmm0[7],[[X1]][7]
+; SSSE3-NEXT: pxor %xmm1, %xmm1
+; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
; SSSE3-NEXT: retq
;
-; SSE41-LABEL: @shuffle_v16i8_00_zz_01_zz_02_zz_03_zz_04_zz_05_zz_06_zz_07_zz
+; SSE41-LABEL: shuffle_v16i8_00_zz_01_zz_02_zz_03_zz_04_zz_05_zz_06_zz_07_zz:
; SSE41: # BB#0:
; SSE41-NEXT: pmovzxbw %xmm0, %xmm0
; SSE41-NEXT: retq
@@ -639,25 +916,64 @@ define <16 x i8> @shuffle_v16i8_00_zz_01
}
define <16 x i8> @shuffle_v16i8_uu_10_02_07_22_14_07_02_18_03_01_14_18_09_11_00(<16 x i8> %a, <16 x i8> %b) {
-; SSSE3-LABEL: @shuffle_v16i8_uu_10_02_07_22_14_07_02_18_03_01_14_18_09_11_00
-; SSSE3: # BB#0:
-; SSSE3-NEXT: movdqa %xmm0, %[[X:xmm[0-9]+]]
-; SSSE3-NEXT: pshufb {{.*}} # [[X]] = [[X]][2,7,1,11,u,u,u,u,u,u,u,u,u,u,u,u]
-; SSSE3-NEXT: pshufb {{.*}} # xmm1 = xmm1[6,6,2,2,2,2,3,3,4,4,5,5,6,6,7,7]
-; SSSE3-NEXT: punpcklbw {{.*}} # xmm1 = xmm1[0],[[X]][0],xmm1[1],[[X]][1],xmm1[2],[[X]][2],xmm1[3],[[X]][3],xmm1[4],[[X]][4],xmm1[5],[[X]][5],xmm1[6],[[X]][6],xmm1[7],[[X]][7]
-; SSSE3-NEXT: pshufb {{.*}} # xmm0 = xmm0[10,7,14,2,3,14,9,0,u,u,u,u,u,u,u,u]
-; SSSE3-NEXT: punpcklbw {{.*}} # xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
+; SSE2-LABEL: shuffle_v16i8_uu_10_02_07_22_14_07_02_18_03_01_14_18_09_11_00:
+; SSE2: # BB#0: # %entry
+; SSE2-NEXT: pxor %xmm2, %xmm2
+; SSE2-NEXT: movdqa %xmm0, %xmm3
+; SSE2-NEXT: punpckhbw {{.*#+}} xmm3 = xmm3[8],xmm2[8],xmm3[9],xmm2[9],xmm3[10],xmm2[10],xmm3[11],xmm2[11],xmm3[12],xmm2[12],xmm3[13],xmm2[13],xmm3[14],xmm2[14],xmm3[15],xmm2[15]
+; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm3[2,3,0,1]
+; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
+; SSE2-NEXT: pshuflw {{.*#+}} xmm2 = xmm0[0,2,2,3,4,5,6,7]
+; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,3,2,3]
+; SSE2-NEXT: pshuflw {{.*#+}} xmm2 = xmm2[1,0,3,3,4,5,6,7]
+; SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1],xmm2[2],xmm4[2],xmm2[3],xmm4[3]
+; SSE2-NEXT: pshuflw {{.*#+}} xmm2 = xmm2[0,2,2,3,4,5,6,7]
+; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3]
+; SSE2-NEXT: pshuflw {{.*#+}} xmm2 = xmm2[2,0,3,1,4,5,6,7]
+; SSE2-NEXT: pshuflw {{.*#+}} xmm4 = xmm3[2,1,2,3,4,5,6,7]
+; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm4[0,3,2,3]
+; SSE2-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm0[0],xmm4[1],xmm0[1],xmm4[2],xmm0[2],xmm4[3],xmm0[3]
+; SSE2-NEXT: pshuflw {{.*#+}} xmm4 = xmm4[0,2,2,3,4,5,6,7]
+; SSE2-NEXT: pshufhw {{.*#+}} xmm4 = xmm4[0,1,2,3,4,7,6,7]
+; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm4[0,2,2,3]
+; SSE2-NEXT: pshuflw {{.*#+}} xmm4 = xmm4[0,2,3,1,4,5,6,7]
+; SSE2-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1],xmm4[2],xmm2[2],xmm4[3],xmm2[3]
+; SSE2-NEXT: packuswb %xmm0, %xmm4
+; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[2,1,2,3,4,5,6,7]
+; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,3,2,3]
+; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,1,3,3,4,5,6,7]
+; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3]
+; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
+; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,7,6,7]
+; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; SSE2-NEXT: pshuflw {{.*#+}} xmm2 = xmm0[0,2,1,3,4,5,6,7]
+; SSE2-NEXT: packuswb %xmm0, %xmm2
+; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[3,1,2,3]
+; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
+; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
+; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3],xmm0[4],xmm4[4],xmm0[5],xmm4[5],xmm0[6],xmm4[6],xmm0[7],xmm4[7]
+; SSE2-NEXT: retq
+;
+; SSSE3-LABEL: shuffle_v16i8_uu_10_02_07_22_14_07_02_18_03_01_14_18_09_11_00:
+; SSSE3: # BB#0: # %entry
+; SSSE3-NEXT: movdqa %xmm0, %xmm2
+; SSSE3-NEXT: pshufb {{.*#+}} xmm2 = xmm2[2,7,1,11,u,u,u,u,u,u,u,u,u,u,u,u]
+; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = xmm1[6,6,2,2,2,2,3,3,4,4,5,5,6,6,7,7]
+; SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
+; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[10,7,14,2,3,14,9,0,u,u,u,u,u,u,u,u]
+; SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
; SSSE3-NEXT: movdqa %xmm1, %xmm0
; SSSE3-NEXT: retq
;
-; SSE41-LABEL: @shuffle_v16i8_uu_10_02_07_22_14_07_02_18_03_01_14_18_09_11_00
-; SSE41: # BB#0:
-; SSE41-NEXT: movdqa %xmm0, %[[X:xmm[0-9]+]]
-; SSE41-NEXT: pshufb {{.*}} # [[X]] = [[X]][2,7,1,11,u,u,u,u,u,u,u,u,u,u,u,u]
-; SSE41-NEXT: pshufb {{.*}} # xmm1 = xmm1[6,6,2,2,2,2,3,3,4,4,5,5,6,6,7,7]
-; SSE41-NEXT: punpcklbw {{.*}} # xmm1 = xmm1[0],[[X]][0],xmm1[1],[[X]][1],xmm1[2],[[X]][2],xmm1[3],[[X]][3],xmm1[4],[[X]][4],xmm1[5],[[X]][5],xmm1[6],[[X]][6],xmm1[7],[[X]][7]
-; SSE41-NEXT: pshufb {{.*}} # xmm0 = xmm0[10,7,14,2,3,14,9,0,u,u,u,u,u,u,u,u]
-; SSE41-NEXT: punpcklbw {{.*}} # xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
+; SSE41-LABEL: shuffle_v16i8_uu_10_02_07_22_14_07_02_18_03_01_14_18_09_11_00:
+; SSE41: # BB#0: # %entry
+; SSE41-NEXT: movdqa %xmm0, %xmm2
+; SSE41-NEXT: pshufb {{.*#+}} xmm2 = xmm2[2,7,1,11,u,u,u,u,u,u,u,u,u,u,u,u]
+; SSE41-NEXT: pshufb {{.*#+}} xmm1 = xmm1[6,6,2,2,2,2,3,3,4,4,5,5,6,6,7,7]
+; SSE41-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
+; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[10,7,14,2,3,14,9,0,u,u,u,u,u,u,u,u]
+; SSE41-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
; SSE41-NEXT: movdqa %xmm1, %xmm0
; SSE41-NEXT: retq
entry:
@@ -667,34 +983,197 @@ entry:
}
define <16 x i8> @stress_test2(<16 x i8> %s.0.0, <16 x i8> %s.0.1, <16 x i8> %s.0.2) {
-; SSSE3-LABEL: @stress_test2
-; SSSE3: # BB#0:
-; SSSE3-NEXT: pshufb {{.*}} # xmm0 = zero,zero,xmm0[2],zero,zero,zero,xmm0[11],zero,zero,xmm0[3,4,5],zero,zero,xmm0[15,5]
-; SSSE3-NEXT: movdqa %xmm1, %[[X1:xmm[0-9]+]]
-; SSSE3-NEXT: pshufb {{.*}} # [[X1]] = [[X1]][13,14],zero,[[X1]][0,10,5],zero,[[X1]][10,10],zero,zero,zero,[[X1]][14,12],zero,zero
-; SSSE3-NEXT: por %xmm0, %[[X1]]
-; SSSE3-NEXT: movdqa %xmm2, %xmm0
-; SSSE3-NEXT: pshufb {{.*}} # xmm0 = zero,zero,zero,zero,zero,zero,xmm0[1,6,u,u,u,u,u,u,u,u]
-; SSSE3-NEXT: movdqa %xmm1, %[[X2:xmm[0-9]+]]
-; SSSE3-NEXT: pshufb {{.*}} # [[X2]] = [[X2]][1,12,5,9,1,5],zero,zero,[[X2]][u,u,u,u,u,u,u,u]
-; SSSE3-NEXT: por %xmm0, %[[X2]]
-; SSSE3-NEXT: pshufb {{.*}} # xmm1 = zero,zero,zero,xmm1[2],zero,zero,xmm1[6,15,u,u,u,u,u,u,u,u]
-; SSSE3-NEXT: pshufb {{.*}} # xmm2 = xmm2[15,8,12],zero,xmm2[13,15],zero,zero,xmm2[u,u,u,u,u,u,u,u]
-; SSSE3-NEXT: por %xmm1, %xmm2
-; SSSE3-NEXT: punpcklbw {{.*}} # xmm2 = xmm2[0],[[X2]][0],xmm2[1],[[X2]][1],xmm2[2],[[X2]][2],xmm2[3],[[X2]][3],xmm2[4],[[X2]][4],xmm2[5],[[X2]][5],xmm2[6],[[X2]][6],xmm2[7],[[X2]][7]
-; SSSE3-NEXT: movdqa %xmm2, %[[X3:xmm[0-9]+]]
-; SSSE3-NEXT: pshufb {{.*}} # [[X3]] = [[X3]][6],zero,[[X3]][14,14],zero,zero,[[X3]][11,0,u,u,u,u,u,u,u,u]
-; SSSE3-NEXT: movdqa %[[X1]], %xmm0
-; SSSE3-NEXT: pshufb {{.*}} # xmm0 = zero,xmm0[12],zero,zero,xmm0[1,14],zero,zero,xmm0[u,u,u,u,u,u,u,u]
-; SSSE3-NEXT: por %[[X3]], %xmm0
-; SSSE3-NEXT: pshufb {{.*}} # xmm2 = zero,xmm2[u,2,3,u,u,u,u,u,u,u,u,u,u,u,u]
-; SSSE3-NEXT: movdqa %[[X1]], %[[X3]]
-; SSSE3-NEXT: pshufb {{.*}} # [[X3]] = [[X3]][3,u],zero,zero,[[X3]][u,u,u,u,u,u,u,u,u,u,u,u]
-; SSSE3-NEXT: por %xmm2, %[[X3]]
-; SSSE3-NEXT: pshufb {{.*}} # [[X1]] = [[X1]][1,4,10,13,u,u,u,u,u,u,u,u,u,u,u,u]
-; SSSE3-NEXT: punpcklbw {{.*}} # [[X1]] = [[X1]][0],[[X3]][0],[[X1]][1],[[X3]][1],[[X1]][2],[[X3]][2],[[X1]][3],[[X3]][3],[[X1]][4],[[X3]][4],[[X1]][5],[[X3]][5],[[X1]][6],[[X3]][6],[[X1]][7],[[X3]][7]
-; SSSE3-NEXT: punpcklbw {{.*}} # xmm0 = xmm0[0],[[X1]][0],xmm0[1],[[X1]][1],xmm0[2],[[X1]][2],xmm0[3],[[X1]][3],xmm0[4],[[X1]][4],xmm0[5],[[X1]][5],xmm0[6],[[X1]][6],xmm0[7],[[X1]][7]
-; SSSE3-NEXT: retq
+; SSE2-LABEL: stress_test2:
+; SSE2: # BB#0: # %entry
+; SSE2-NEXT: pxor %xmm8, %xmm8
+; SSE2-NEXT: movdqa %xmm0, %xmm4
+; SSE2-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm8[0],xmm4[1],xmm8[1],xmm4[2],xmm8[2],xmm4[3],xmm8[3],xmm4[4],xmm8[4],xmm4[5],xmm8[5],xmm4[6],xmm8[6],xmm4[7],xmm8[7]
+; SSE2-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8],xmm8[8],xmm0[9],xmm8[9],xmm0[10],xmm8[10],xmm0[11],xmm8[11],xmm0[12],xmm8[12],xmm0[13],xmm8[13],xmm0[14],xmm8[14],xmm0[15],xmm8[15]
+; SSE2-NEXT: movdqa %xmm0, %xmm5
+; SSE2-NEXT: punpckhwd {{.*#+}} xmm5 = xmm5[4],xmm4[4],xmm5[5],xmm4[5],xmm5[6],xmm4[6],xmm5[7],xmm4[7]
+; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm5[3,1,2,3]
+; SSE2-NEXT: pshuflw {{.*#+}} xmm5 = xmm5[0,1,0,3,4,5,6,7]
+; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm4[1,2,2,3]
+; SSE2-NEXT: punpcklqdq {{.*#+}} xmm6 = xmm6[0],xmm5[0]
+; SSE2-NEXT: movdqa %xmm1, %xmm5
+; SSE2-NEXT: punpckhbw {{.*#+}} xmm5 = xmm5[8],xmm8[8],xmm5[9],xmm8[9],xmm5[10],xmm8[10],xmm5[11],xmm8[11],xmm5[12],xmm8[12],xmm5[13],xmm8[13],xmm5[14],xmm8[14],xmm5[15],xmm8[15]
+; SSE2-NEXT: pshuflw {{.*#+}} xmm7 = xmm5[2,1,2,3,4,5,6,7]
+; SSE2-NEXT: pshufhw {{.*#+}} xmm7 = xmm7[0,1,2,3,6,4,6,7]
+; SSE2-NEXT: movdqa %xmm6, %xmm3
+; SSE2-NEXT: shufps {{.*#+}} xmm3 = xmm3[3,0],xmm7[2,0]
+; SSE2-NEXT: shufps {{.*#+}} xmm3 = xmm3[2,0],xmm7[2,3]
+; SSE2-NEXT: punpcklwd {{.*#+}} xmm6 = xmm6[0],xmm7[0],xmm6[1],xmm7[1],xmm6[2],xmm7[2],xmm6[3],xmm7[3]
+; SSE2-NEXT: pshuflw {{.*#+}} xmm6 = xmm6[2,1,2,3,4,5,6,7]
+; SSE2-NEXT: pshufhw {{.*#+}} xmm6 = xmm6[0,1,2,3,4,6,6,7]
+; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm6[0,2,2,3]
+; SSE2-NEXT: pshuflw {{.*#+}} xmm6 = xmm6[1,0,2,3,4,5,6,7]
+; SSE2-NEXT: punpcklqdq {{.*#+}} xmm6 = xmm6[0],xmm3[0]
+; SSE2-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm0[0],xmm4[1],xmm0[1],xmm4[2],xmm0[2],xmm4[3],xmm0[3]
+; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm4[2,1,2,3]
+; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,1,0,3,4,5,6,7]
+; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,7,7]
+; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm0[3,1,2,3]
+; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm5[3,1,2,3]
+; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm8[0],xmm1[1],xmm8[1],xmm1[2],xmm8[2],xmm1[3],xmm8[3],xmm1[4],xmm8[4],xmm1[5],xmm8[5],xmm1[6],xmm8[6],xmm1[7],xmm8[7]
+; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[0,2,2,3]
+; SSE2-NEXT: pshuflw {{.*#+}} xmm7 = xmm0[0,3,2,3,4,5,6,7]
+; SSE2-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm7[0],xmm4[1],xmm7[1],xmm4[2],xmm7[2],xmm4[3],xmm7[3]
+; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm4[0,3,2,1]
+; SSE2-NEXT: pshufhw {{.*#+}} xmm4 = xmm4[0,1,2,3,4,7,6,7]
+; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm4[0,2,2,3]
+; SSE2-NEXT: pshuflw {{.*#+}} xmm4 = xmm4[0,1,3,2,4,5,6,7]
+; SSE2-NEXT: pshufd {{.*#+}} xmm7 = xmm5[2,1,2,3]
+; SSE2-NEXT: pshuflw {{.*#+}} xmm7 = xmm7[1,1,2,3,4,5,6,7]
+; SSE2-NEXT: punpcklwd {{.*#+}} xmm7 = xmm7[0],xmm4[0],xmm7[1],xmm4[1],xmm7[2],xmm4[2],xmm7[3],xmm4[3]
+; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm7[0,2,2,3]
+; SSE2-NEXT: pshuflw {{.*#+}} xmm4 = xmm4[0,2,2,3,4,5,6,7]
+; SSE2-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3]
+; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm4[0,3,2,1]
+; SSE2-NEXT: pshufhw {{.*#+}} xmm3 = xmm3[0,1,2,3,6,5,6,7]
+; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[0,2,2,3]
+; SSE2-NEXT: pshuflw {{.*#+}} xmm4 = xmm3[0,3,2,1,4,5,6,7]
+; SSE2-NEXT: pshuflw {{.*#+}} xmm3 = xmm7[3,1,2,3,4,5,6,7]
+; SSE2-NEXT: pshufhw {{.*#+}} xmm3 = xmm3[0,1,2,3,7,5,6,7]
+; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[0,2,2,3]
+; SSE2-NEXT: pshuflw {{.*#+}} xmm3 = xmm3[1,0,3,2,4,5,6,7]
+; SSE2-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3]
+; SSE2-NEXT: packuswb %xmm6, %xmm4
+; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm5[0,1,2,0]
+; SSE2-NEXT: pshufhw {{.*#+}} xmm3 = xmm3[0,1,2,3,4,7,6,7]
+; SSE2-NEXT: punpckhwd {{.*#+}} xmm3 = xmm3[4],xmm1[4],xmm3[5],xmm1[5],xmm3[6],xmm1[6],xmm3[7],xmm1[7]
+; SSE2-NEXT: pshuflw {{.*#+}} xmm3 = xmm3[0,2,3,3,4,5,6,7]
+; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[1,3,1,3,4,5,6,7]
+; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3]
+; SSE2-NEXT: movdqa %xmm2, %xmm3
+; SSE2-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm8[0],xmm3[1],xmm8[1],xmm3[2],xmm8[2],xmm3[3],xmm8[3],xmm3[4],xmm8[4],xmm3[5],xmm8[5],xmm3[6],xmm8[6],xmm3[7],xmm8[7]
+; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[0,1,0,3]
+; SSE2-NEXT: pshufhw {{.*#+}} xmm3 = xmm3[0,1,2,3,4,5,5,6]
+; SSE2-NEXT: shufps {{.*#+}} xmm3 = xmm3[3,0],xmm0[2,0]
+; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm3[2,0]
+; SSE2-NEXT: packuswb %xmm0, %xmm0
+; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,1,1,3]
+; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm5[4],xmm1[5],xmm5[5],xmm1[6],xmm5[6],xmm1[7],xmm5[7]
+; SSE2-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[0,1,2,0,4,5,6,7]
+; SSE2-NEXT: pshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,5,4,7]
+; SSE2-NEXT: punpckhbw {{.*#+}} xmm2 = xmm2[8],xmm8[8],xmm2[9],xmm8[9],xmm2[10],xmm8[10],xmm2[11],xmm8[11],xmm2[12],xmm8[12],xmm2[13],xmm8[13],xmm2[14],xmm8[14],xmm2[15],xmm8[15]
+; SSE2-NEXT: pshufhw {{.*#+}} xmm2 = xmm2[0,1,2,3,4,5,4,7]
+; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,3,2,3]
+; SSE2-NEXT: pshuflw {{.*#+}} xmm2 = xmm2[3,0,2,3,4,5,6,7]
+; SSE2-NEXT: pshufhw {{.*#+}} xmm2 = xmm2[0,1,2,3,5,7,6,7]
+; SSE2-NEXT: movdqa %xmm1, %xmm3
+; SSE2-NEXT: shufps {{.*#+}} xmm3 = xmm3[3,0],xmm2[2,0]
+; SSE2-NEXT: shufps {{.*#+}} xmm3 = xmm3[2,0],xmm2[2,3]
+; SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
+; SSE2-NEXT: pshuflw {{.*#+}} xmm1 = xmm2[0,2,2,3,4,5,6,7]
+; SSE2-NEXT: pshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,7,6,7]
+; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[0,2,2,3]
+; SSE2-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm3[0]
+; SSE2-NEXT: packuswb %xmm0, %xmm2
+; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
+; SSE2-NEXT: movdqa %xmm2, %xmm1
+; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm8[0],xmm1[1],xmm8[1],xmm1[2],xmm8[2],xmm1[3],xmm8[3],xmm1[4],xmm8[4],xmm1[5],xmm8[5],xmm1[6],xmm8[6],xmm1[7],xmm8[7]
+; SSE2-NEXT: punpckhbw {{.*#+}} xmm2 = xmm2[8],xmm8[8],xmm2[9],xmm8[9],xmm2[10],xmm8[10],xmm2[11],xmm8[11],xmm2[12],xmm8[12],xmm2[13],xmm8[13],xmm2[14],xmm8[14],xmm2[15],xmm8[15]
+; SSE2-NEXT: movdqa %xmm2, %xmm0
+; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,3,2,3]
+; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,1,2,1,4,5,6,7]
+; SSE2-NEXT: punpckhwd {{.*#+}} xmm2 = xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7]
+; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[2,1,2,3]
+; SSE2-NEXT: pshuflw {{.*#+}} xmm2 = xmm2[1,1,0,0,4,5,6,7]
+; SSE2-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm0[0]
+; SSE2-NEXT: movdqa %xmm4, %xmm3
+; SSE2-NEXT: punpckhbw {{.*#+}} xmm3 = xmm3[8],xmm8[8],xmm3[9],xmm8[9],xmm3[10],xmm8[10],xmm3[11],xmm8[11],xmm3[12],xmm8[12],xmm3[13],xmm8[13],xmm3[14],xmm8[14],xmm3[15],xmm8[15]
+; SSE2-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm8[0],xmm4[1],xmm8[1],xmm4[2],xmm8[2],xmm4[3],xmm8[3],xmm4[4],xmm8[4],xmm4[5],xmm8[5],xmm4[6],xmm8[6],xmm4[7],xmm8[7]
+; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm4[0,1,0,1]
+; SSE2-NEXT: movdqa %xmm3, %xmm5
+; SSE2-NEXT: punpckhwd {{.*#+}} xmm5 = xmm5[4],xmm0[4],xmm5[5],xmm0[5],xmm5[6],xmm0[6],xmm5[7],xmm0[7]
+; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm5[0,1,2,1]
+; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,2,3,4,5,6,7]
+; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,4,6,7]
+; SSE2-NEXT: movdqa %xmm2, %xmm5
+; SSE2-NEXT: shufps {{.*#+}} xmm5 = xmm5[3,0],xmm0[2,0]
+; SSE2-NEXT: shufps {{.*#+}} xmm5 = xmm5[2,0],xmm0[2,3]
+; SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3]
+; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm2[0,3,2,3,4,5,6,7]
+; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,6,7]
+; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm5[0]
+; SSE2-NEXT: packuswb %xmm0, %xmm0
+; SSE2-NEXT: pshuflw {{.*#+}} xmm2 = xmm4[3,1,2,3,4,5,6,7]
+; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,0],xmm2[0,0]
+; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[2,0],xmm2[2,3]
+; SSE2-NEXT: packuswb %xmm0, %xmm1
+; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm3[2,1,2,3]
+; SSE2-NEXT: pshuflw {{.*#+}} xmm2 = xmm2[1,1,2,3,4,5,6,7]
+; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm4[0,2,2,3]
+; SSE2-NEXT: pshuflw {{.*#+}} xmm3 = xmm3[2,1,2,3,4,5,6,7]
+; SSE2-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3]
+; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm3[0,3,2,1]
+; SSE2-NEXT: pshufhw {{.*#+}} xmm2 = xmm2[0,1,2,3,6,5,6,7]
+; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3]
+; SSE2-NEXT: pshuflw {{.*#+}} xmm2 = xmm2[2,0,3,1,4,5,6,7]
+; SSE2-NEXT: packuswb %xmm0, %xmm2
+; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7]
+; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
+; SSE2-NEXT: retq
+;
+; SSSE3-LABEL: stress_test2:
+; SSSE3: # BB#0: # %entry
+; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = zero,zero,xmm0[2],zero,zero,zero,xmm0[11],zero,zero,xmm0[3,4,5],zero,zero,xmm0[15,5]
+; SSSE3-NEXT: movdqa %xmm1, %xmm3
+; SSSE3-NEXT: pshufb {{.*#+}} xmm3 = xmm3[13,14],zero,xmm3[0,10,5],zero,xmm3[10,10],zero,zero,zero,xmm3[14,12],zero,zero
+; SSSE3-NEXT: por %xmm0, %xmm3
+; SSSE3-NEXT: movdqa %xmm2, %xmm0
+; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,xmm0[1,6,u,u,u,u,u,u,u,u]
+; SSSE3-NEXT: movdqa %xmm1, %xmm4
+; SSSE3-NEXT: pshufb {{.*#+}} xmm4 = xmm4[1,12,5,9,1,5],zero,zero,xmm4[u,u,u,u,u,u,u,u]
+; SSSE3-NEXT: por %xmm0, %xmm4
+; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = zero,zero,zero,xmm1[2],zero,zero,xmm1[6,15,u,u,u,u,u,u,u,u]
+; SSSE3-NEXT: pshufb {{.*#+}} xmm2 = xmm2[15,8,12],zero,xmm2[13,15],zero,zero,xmm2[u,u,u,u,u,u,u,u]
+; SSSE3-NEXT: por %xmm1, %xmm2
+; SSSE3-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1],xmm2[2],xmm4[2],xmm2[3],xmm4[3],xmm2[4],xmm4[4],xmm2[5],xmm4[5],xmm2[6],xmm4[6],xmm2[7],xmm4[7]
+; SSSE3-NEXT: movdqa %xmm2, %xmm1
+; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = xmm1[6],zero,xmm1[14,14],zero,zero,xmm1[11,0,u,u,u,u,u,u,u,u]
+; SSSE3-NEXT: movdqa %xmm3, %xmm0
+; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = zero,xmm0[12],zero,zero,xmm0[1,14],zero,zero,xmm0[u,u,u,u,u,u,u,u]
+; SSSE3-NEXT: por %xmm1, %xmm0
+; SSSE3-NEXT: pshufb {{.*#+}} xmm2 = zero,xmm2[u,2,3,u,u,u,u,u,u,u,u,u,u,u,u]
+; SSSE3-NEXT: movdqa %xmm3, %xmm1
+; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = xmm1[3,u],zero,zero,xmm1[u,u,u,u,u,u,u,u,u,u,u,u]
+; SSSE3-NEXT: por %xmm2, %xmm1
+; SSSE3-NEXT: pshufb {{.*#+}} xmm3 = xmm3[1,4,10,13,u,u,u,u,u,u,u,u,u,u,u,u]
+; SSSE3-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1],xmm3[2],xmm1[2],xmm3[3],xmm1[3],xmm3[4],xmm1[4],xmm3[5],xmm1[5],xmm3[6],xmm1[6],xmm3[7],xmm1[7]
+; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3],xmm0[4],xmm3[4],xmm0[5],xmm3[5],xmm0[6],xmm3[6],xmm0[7],xmm3[7]
+; SSSE3-NEXT: retq
+;
+; SSE41-LABEL: stress_test2:
+; SSE41: # BB#0: # %entry
+; SSE41-NEXT: pshufb {{.*#+}} xmm0 = zero,zero,xmm0[2],zero,zero,zero,xmm0[11],zero,zero,xmm0[3,4,5],zero,zero,xmm0[15,5]
+; SSE41-NEXT: movdqa %xmm1, %xmm3
+; SSE41-NEXT: pshufb {{.*#+}} xmm3 = xmm3[13,14],zero,xmm3[0,10,5],zero,xmm3[10,10],zero,zero,zero,xmm3[14,12],zero,zero
+; SSE41-NEXT: por %xmm0, %xmm3
+; SSE41-NEXT: movdqa %xmm2, %xmm0
+; SSE41-NEXT: pshufb {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,xmm0[1,6,u,u,u,u,u,u,u,u]
+; SSE41-NEXT: movdqa %xmm1, %xmm4
+; SSE41-NEXT: pshufb {{.*#+}} xmm4 = xmm4[1,12,5,9,1,5],zero,zero,xmm4[u,u,u,u,u,u,u,u]
+; SSE41-NEXT: por %xmm0, %xmm4
+; SSE41-NEXT: pshufb {{.*#+}} xmm1 = zero,zero,zero,xmm1[2],zero,zero,xmm1[6,15,u,u,u,u,u,u,u,u]
+; SSE41-NEXT: pshufb {{.*#+}} xmm2 = xmm2[15,8,12],zero,xmm2[13,15],zero,zero,xmm2[u,u,u,u,u,u,u,u]
+; SSE41-NEXT: por %xmm1, %xmm2
+; SSE41-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1],xmm2[2],xmm4[2],xmm2[3],xmm4[3],xmm2[4],xmm4[4],xmm2[5],xmm4[5],xmm2[6],xmm4[6],xmm2[7],xmm4[7]
+; SSE41-NEXT: movdqa %xmm2, %xmm1
+; SSE41-NEXT: pshufb {{.*#+}} xmm1 = xmm1[6],zero,xmm1[14,14],zero,zero,xmm1[11,0,u,u,u,u,u,u,u,u]
+; SSE41-NEXT: movdqa %xmm3, %xmm0
+; SSE41-NEXT: pshufb {{.*#+}} xmm0 = zero,xmm0[12],zero,zero,xmm0[1,14],zero,zero,xmm0[u,u,u,u,u,u,u,u]
+; SSE41-NEXT: por %xmm1, %xmm0
+; SSE41-NEXT: pshufb {{.*#+}} xmm2 = zero,xmm2[u,2,3,u,u,u,u,u,u,u,u,u,u,u,u]
+; SSE41-NEXT: movdqa %xmm3, %xmm1
+; SSE41-NEXT: pshufb {{.*#+}} xmm1 = xmm1[3,u],zero,zero,xmm1[u,u,u,u,u,u,u,u,u,u,u,u]
+; SSE41-NEXT: por %xmm2, %xmm1
+; SSE41-NEXT: pshufb {{.*#+}} xmm3 = xmm3[1,4,10,13,u,u,u,u,u,u,u,u,u,u,u,u]
+; SSE41-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1],xmm3[2],xmm1[2],xmm3[3],xmm1[3],xmm3[4],xmm1[4],xmm3[5],xmm1[5],xmm3[6],xmm1[6],xmm3[7],xmm1[7]
+; SSE41-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3],xmm0[4],xmm3[4],xmm0[5],xmm3[5],xmm0[6],xmm3[6],xmm0[7],xmm3[7]
+; SSE41-NEXT: retq
entry:
%s.1.0 = shufflevector <16 x i8> %s.0.0, <16 x i8> %s.0.1, <16 x i32> <i32 29, i32 30, i32 2, i32 16, i32 26, i32 21, i32 11, i32 26, i32 26, i32 3, i32 4, i32 5, i32 30, i32 28, i32 15, i32 5>
%s.1.1 = shufflevector <16 x i8> %s.0.1, <16 x i8> %s.0.2, <16 x i32> <i32 31, i32 1, i32 24, i32 12, i32 28, i32 5, i32 2, i32 9, i32 29, i32 1, i32 31, i32 5, i32 6, i32 17, i32 15, i32 22>
Modified: llvm/trunk/test/CodeGen/X86/vector-shuffle-128-v2.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-shuffle-128-v2.ll?rev=218707&r1=218706&r2=218707&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-shuffle-128-v2.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-shuffle-128-v2.ll Tue Sep 30 16:44:34 2014
@@ -8,429 +8,613 @@ target datalayout = "e-m:e-i64:64-f80:12
target triple = "x86_64-unknown-unknown"
define <2 x i64> @shuffle_v2i64_00(<2 x i64> %a, <2 x i64> %b) {
-; ALL-LABEL: @shuffle_v2i64_00
-; ALL: pshufd {{.*}} # xmm0 = xmm0[0,1,0,1]
-; ALL-NEXT: retq
+; SSE-LABEL: shuffle_v2i64_00:
+; SSE: # BB#0:
+; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
+; SSE-NEXT: retq
+;
+; AVX-LABEL: shuffle_v2i64_00:
+; AVX: # BB#0:
+; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
+; AVX-NEXT: retq
%shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 0>
ret <2 x i64> %shuffle
}
define <2 x i64> @shuffle_v2i64_10(<2 x i64> %a, <2 x i64> %b) {
-; ALL-LABEL: @shuffle_v2i64_10
-; ALL: pshufd {{.*}} # xmm0 = xmm0[2,3,0,1]
-; ALL-NEXT: retq
+; SSE-LABEL: shuffle_v2i64_10:
+; SSE: # BB#0:
+; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
+; SSE-NEXT: retq
+;
+; AVX-LABEL: shuffle_v2i64_10:
+; AVX: # BB#0:
+; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
+; AVX-NEXT: retq
%shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 0>
ret <2 x i64> %shuffle
}
define <2 x i64> @shuffle_v2i64_11(<2 x i64> %a, <2 x i64> %b) {
-; ALL-LABEL: @shuffle_v2i64_11
-; ALL: pshufd {{.*}} # xmm0 = xmm0[2,3,2,3]
-; ALL-NEXT: retq
+; SSE-LABEL: shuffle_v2i64_11:
+; SSE: # BB#0:
+; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
+; SSE-NEXT: retq
+;
+; AVX-LABEL: shuffle_v2i64_11:
+; AVX: # BB#0:
+; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
+; AVX-NEXT: retq
%shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 1>
ret <2 x i64> %shuffle
}
define <2 x i64> @shuffle_v2i64_22(<2 x i64> %a, <2 x i64> %b) {
-; ALL-LABEL: @shuffle_v2i64_22
-; ALL: pshufd {{.*}} # xmm0 = xmm1[0,1,0,1]
-; ALL-NEXT: retq
+; SSE-LABEL: shuffle_v2i64_22:
+; SSE: # BB#0:
+; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[0,1,0,1]
+; SSE-NEXT: retq
+;
+; AVX-LABEL: shuffle_v2i64_22:
+; AVX: # BB#0:
+; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm1[0,1,0,1]
+; AVX-NEXT: retq
%shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 2, i32 2>
ret <2 x i64> %shuffle
}
define <2 x i64> @shuffle_v2i64_32(<2 x i64> %a, <2 x i64> %b) {
-; ALL-LABEL: @shuffle_v2i64_32
-; ALL: pshufd {{.*}} # xmm0 = xmm1[2,3,0,1]
-; ALL-NEXT: retq
+; SSE-LABEL: shuffle_v2i64_32:
+; SSE: # BB#0:
+; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
+; SSE-NEXT: retq
+;
+; AVX-LABEL: shuffle_v2i64_32:
+; AVX: # BB#0:
+; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
+; AVX-NEXT: retq
%shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 3, i32 2>
ret <2 x i64> %shuffle
}
define <2 x i64> @shuffle_v2i64_33(<2 x i64> %a, <2 x i64> %b) {
-; ALL-LABEL: @shuffle_v2i64_33
-; ALL: pshufd {{.*}} # xmm0 = xmm1[2,3,2,3]
-; ALL-NEXT: retq
+; SSE-LABEL: shuffle_v2i64_33:
+; SSE: # BB#0:
+; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3]
+; SSE-NEXT: retq
+;
+; AVX-LABEL: shuffle_v2i64_33:
+; AVX: # BB#0:
+; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm1[2,3,2,3]
+; AVX-NEXT: retq
%shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 3, i32 3>
ret <2 x i64> %shuffle
}
define <2 x double> @shuffle_v2f64_00(<2 x double> %a, <2 x double> %b) {
-; SSE2-LABEL: @shuffle_v2f64_00
-; SSE2: movlhps {{.*}} # xmm0 = xmm0[0,0]
+; SSE2-LABEL: shuffle_v2f64_00:
+; SSE2: # BB#0:
+; SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0,0]
; SSE2-NEXT: retq
;
-; SSE3-LABEL: @shuffle_v2f64_00
-; SSE3: unpcklpd {{.*}} # xmm0 = xmm0[0,0]
+; SSE3-LABEL: shuffle_v2f64_00:
+; SSE3: # BB#0:
+; SSE3-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0,0]
; SSE3-NEXT: retq
;
-; SSSE3-LABEL: @shuffle_v2f64_00
-; SSSE3: unpcklpd {{.*}} # xmm0 = xmm0[0,0]
+; SSSE3-LABEL: shuffle_v2f64_00:
+; SSSE3: # BB#0:
+; SSSE3-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0,0]
; SSSE3-NEXT: retq
;
-; SSE41-LABEL: @shuffle_v2f64_00
-; SSE41: unpcklpd {{.*}} # xmm0 = xmm0[0,0]
+; SSE41-LABEL: shuffle_v2f64_00:
+; SSE41: # BB#0:
+; SSE41-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0,0]
; SSE41-NEXT: retq
+;
+; AVX-LABEL: shuffle_v2f64_00:
+; AVX: # BB#0:
+; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0,0]
+; AVX-NEXT: retq
%shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 0, i32 0>
ret <2 x double> %shuffle
}
define <2 x double> @shuffle_v2f64_10(<2 x double> %a, <2 x double> %b) {
-; SSE-LABEL: @shuffle_v2f64_10
-; SSE: shufpd {{.*}} # xmm0 = xmm0[1,0]
+; SSE-LABEL: shuffle_v2f64_10:
+; SSE: # BB#0:
+; SSE-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1,0]
; SSE-NEXT: retq
;
-; AVX-LABEL: @shuffle_v2f64_10
-; AVX: vpermilpd {{.*}} # xmm0 = xmm0[1,0]
+; AVX-LABEL: shuffle_v2f64_10:
+; AVX: # BB#0:
+; AVX-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
; AVX-NEXT: retq
%shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 1, i32 0>
ret <2 x double> %shuffle
}
define <2 x double> @shuffle_v2f64_11(<2 x double> %a, <2 x double> %b) {
-; ALL-LABEL: @shuffle_v2f64_11
-; ALL: movhlps {{.*}} # xmm0 = xmm0[1,1]
-; ALL-NEXT: retq
+; SSE-LABEL: shuffle_v2f64_11:
+; SSE: # BB#0:
+; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
+; SSE-NEXT: retq
+;
+; AVX-LABEL: shuffle_v2f64_11:
+; AVX: # BB#0:
+; AVX-NEXT: vmovhlps {{.*#+}} xmm0 = xmm0[1,1]
+; AVX-NEXT: retq
%shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 1, i32 1>
ret <2 x double> %shuffle
}
define <2 x double> @shuffle_v2f64_22(<2 x double> %a, <2 x double> %b) {
-; SSE2-LABEL: @shuffle_v2f64_22
-; SSE2: movlhps {{.*}} # xmm1 = xmm1[0,0]
+; SSE2-LABEL: shuffle_v2f64_22:
+; SSE2: # BB#0:
+; SSE2-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0,0]
; SSE2-NEXT: movaps %xmm1, %xmm0
; SSE2-NEXT: retq
;
-; SSE3-LABEL: @shuffle_v2f64_22
-; SSE3: unpcklpd {{.*}} # xmm1 = xmm1[0,0]
+; SSE3-LABEL: shuffle_v2f64_22:
+; SSE3: # BB#0:
+; SSE3-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0,0]
; SSE3-NEXT: movapd %xmm1, %xmm0
; SSE3-NEXT: retq
;
-; SSSE3-LABEL: @shuffle_v2f64_22
-; SSSE3: unpcklpd {{.*}} # xmm1 = xmm1[0,0]
+; SSSE3-LABEL: shuffle_v2f64_22:
+; SSSE3: # BB#0:
+; SSSE3-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0,0]
; SSSE3-NEXT: movapd %xmm1, %xmm0
; SSSE3-NEXT: retq
;
-; SSE41-LABEL: @shuffle_v2f64_22
-; SSE41: unpcklpd {{.*}} # xmm1 = xmm1[0,0]
+; SSE41-LABEL: shuffle_v2f64_22:
+; SSE41: # BB#0:
+; SSE41-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0,0]
; SSE41-NEXT: movapd %xmm1, %xmm0
; SSE41-NEXT: retq
+;
+; AVX-LABEL: shuffle_v2f64_22:
+; AVX: # BB#0:
+; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0,0]
+; AVX-NEXT: retq
%shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 2, i32 2>
ret <2 x double> %shuffle
}
define <2 x double> @shuffle_v2f64_32(<2 x double> %a, <2 x double> %b) {
-; SSE-LABEL: @shuffle_v2f64_32
-; SSE: pshufd {{.*}} # xmm0 = xmm1[2,3,0,1]
+; SSE-LABEL: shuffle_v2f64_32:
+; SSE: # BB#0:
+; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
; SSE-NEXT: retq
;
-; AVX-LABEL: @shuffle_v2f64_32
-; AVX: vpermilpd {{.*}} # xmm0 = xmm1[1,0]
+; AVX-LABEL: shuffle_v2f64_32:
+; AVX: # BB#0:
+; AVX-NEXT: vpermilpd {{.*#+}} xmm0 = xmm1[1,0]
; AVX-NEXT: retq
%shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 3, i32 2>
ret <2 x double> %shuffle
}
define <2 x double> @shuffle_v2f64_33(<2 x double> %a, <2 x double> %b) {
-; SSE-LABEL: @shuffle_v2f64_33
-; SSE: movhlps {{.*}} # xmm1 = xmm1[1,1]
+; SSE-LABEL: shuffle_v2f64_33:
+; SSE: # BB#0:
+; SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm1[1,1]
; SSE-NEXT: movaps %xmm1, %xmm0
; SSE-NEXT: retq
;
-; AVX-LABEL: @shuffle_v2f64_33
-; AVX: vmovhlps {{.*}} # xmm0 = xmm1[1,1]
+; AVX-LABEL: shuffle_v2f64_33:
+; AVX: # BB#0:
+; AVX-NEXT: vmovhlps {{.*#+}} xmm0 = xmm1[1,1]
; AVX-NEXT: retq
%shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 3, i32 3>
ret <2 x double> %shuffle
}
define <2 x double> @shuffle_v2f64_03(<2 x double> %a, <2 x double> %b) {
-; SSE2-LABEL: @shuffle_v2f64_03
-; SSE2: shufpd {{.*}} # xmm0 = xmm0[0],xmm1[1]
+; SSE2-LABEL: shuffle_v2f64_03:
+; SSE2: # BB#0:
+; SSE2-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
; SSE2-NEXT: retq
;
-; SSE3-LABEL: @shuffle_v2f64_03
-; SSE3: shufpd {{.*}} # xmm0 = xmm0[0],xmm1[1]
+; SSE3-LABEL: shuffle_v2f64_03:
+; SSE3: # BB#0:
+; SSE3-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
; SSE3-NEXT: retq
;
-; SSSE3-LABEL: @shuffle_v2f64_03
-; SSSE3: shufpd {{.*}} # xmm0 = xmm0[0],xmm1[1]
+; SSSE3-LABEL: shuffle_v2f64_03:
+; SSSE3: # BB#0:
+; SSSE3-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
; SSSE3-NEXT: retq
;
-; SSE41-LABEL: @shuffle_v2f64_03
-; SSE41: blendpd {{.*}} # xmm0 = xmm0[0],xmm1[1]
+; SSE41-LABEL: shuffle_v2f64_03:
+; SSE41: # BB#0:
+; SSE41-NEXT: blendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
; SSE41-NEXT: retq
+;
+; AVX-LABEL: shuffle_v2f64_03:
+; AVX: # BB#0:
+; AVX-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
+; AVX-NEXT: retq
%shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 0, i32 3>
ret <2 x double> %shuffle
}
define <2 x double> @shuffle_v2f64_21(<2 x double> %a, <2 x double> %b) {
-; SSE2-LABEL: @shuffle_v2f64_21
-; SSE2: shufpd {{.*}} # xmm1 = xmm1[0],xmm0[1]
+; SSE2-LABEL: shuffle_v2f64_21:
+; SSE2: # BB#0:
+; SSE2-NEXT: shufpd {{.*#+}} xmm1 = xmm1[0],xmm0[1]
; SSE2-NEXT: movapd %xmm1, %xmm0
; SSE2-NEXT: retq
;
-; SSE3-LABEL: @shuffle_v2f64_21
-; SSE3: shufpd {{.*}} # xmm1 = xmm1[0],xmm0[1]
+; SSE3-LABEL: shuffle_v2f64_21:
+; SSE3: # BB#0:
+; SSE3-NEXT: shufpd {{.*#+}} xmm1 = xmm1[0],xmm0[1]
; SSE3-NEXT: movapd %xmm1, %xmm0
; SSE3-NEXT: retq
;
-; SSSE3-LABEL: @shuffle_v2f64_21
-; SSSE3: shufpd {{.*}} # xmm1 = xmm1[0],xmm0[1]
+; SSSE3-LABEL: shuffle_v2f64_21:
+; SSSE3: # BB#0:
+; SSSE3-NEXT: shufpd {{.*#+}} xmm1 = xmm1[0],xmm0[1]
; SSSE3-NEXT: movapd %xmm1, %xmm0
; SSSE3-NEXT: retq
;
-; SSE41-LABEL: @shuffle_v2f64_21
-; SSE41: blendpd {{.*}} # xmm1 = xmm1[0],xmm0[1]
+; SSE41-LABEL: shuffle_v2f64_21:
+; SSE41: # BB#0:
+; SSE41-NEXT: blendpd {{.*#+}} xmm1 = xmm1[0],xmm0[1]
; SSE41-NEXT: movapd %xmm1, %xmm0
; SSE41-NEXT: retq
+;
+; AVX-LABEL: shuffle_v2f64_21:
+; AVX: # BB#0:
+; AVX-NEXT: vblendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
+; AVX-NEXT: retq
%shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 2, i32 1>
ret <2 x double> %shuffle
}
define <2 x i64> @shuffle_v2i64_02(<2 x i64> %a, <2 x i64> %b) {
-; ALL-LABEL: @shuffle_v2i64_02
-; ALL: punpcklqdq {{.*}} # xmm0 = xmm0[0],xmm1[0]
-; ALL-NEXT: retq
+; SSE-LABEL: shuffle_v2i64_02:
+; SSE: # BB#0:
+; SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; SSE-NEXT: retq
+;
+; AVX-LABEL: shuffle_v2i64_02:
+; AVX: # BB#0:
+; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; AVX-NEXT: retq
%shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 2>
ret <2 x i64> %shuffle
}
define <2 x i64> @shuffle_v2i64_02_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) {
-; SSE-LABEL: @shuffle_v2i64_02_copy
-; SSE: punpcklqdq {{.*}} # xmm1 = xmm1[0],xmm2[0]
+; SSE-LABEL: shuffle_v2i64_02_copy:
+; SSE: # BB#0:
+; SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
; SSE-NEXT: movdqa %xmm1, %xmm0
; SSE-NEXT: retq
;
-; AVX-LABEL: @shuffle_v2i64_02_copy
-; AVX: punpcklqdq {{.*}} # xmm0 = xmm1[0],xmm2[0]
+; AVX-LABEL: shuffle_v2i64_02_copy:
+; AVX: # BB#0:
+; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm2[0]
; AVX-NEXT: retq
%shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 2>
ret <2 x i64> %shuffle
}
define <2 x i64> @shuffle_v2i64_03(<2 x i64> %a, <2 x i64> %b) {
-; SSE2-LABEL: @shuffle_v2i64_03
-; SSE2: shufpd {{.*}} # xmm0 = xmm0[0],xmm1[1]
+; SSE2-LABEL: shuffle_v2i64_03:
+; SSE2: # BB#0:
+; SSE2-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
; SSE2-NEXT: retq
;
-; SSE3-LABEL: @shuffle_v2i64_03
-; SSE3: shufpd {{.*}} # xmm0 = xmm0[0],xmm1[1]
+; SSE3-LABEL: shuffle_v2i64_03:
+; SSE3: # BB#0:
+; SSE3-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
; SSE3-NEXT: retq
;
-; SSSE3-LABEL: @shuffle_v2i64_03
-; SSSE3: shufpd {{.*}} # xmm0 = xmm0[0],xmm1[1]
+; SSSE3-LABEL: shuffle_v2i64_03:
+; SSSE3: # BB#0:
+; SSSE3-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
; SSSE3-NEXT: retq
;
-; SSE41-LABEL: @shuffle_v2i64_03
-; SSE41: pblendw {{.*}} # xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
+; SSE41-LABEL: shuffle_v2i64_03:
+; SSE41: # BB#0:
+; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
; SSE41-NEXT: retq
+;
+; AVX-LABEL: shuffle_v2i64_03:
+; AVX: # BB#0:
+; AVX-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
+; AVX-NEXT: retq
%shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 3>
ret <2 x i64> %shuffle
}
define <2 x i64> @shuffle_v2i64_03_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) {
-; SSE2-LABEL: @shuffle_v2i64_03_copy
-; SSE2: shufpd {{.*}} # xmm1 = xmm1[0],xmm2[1]
+; SSE2-LABEL: shuffle_v2i64_03_copy:
+; SSE2: # BB#0:
+; SSE2-NEXT: shufpd {{.*#+}} xmm1 = xmm1[0],xmm2[1]
; SSE2-NEXT: movapd %xmm1, %xmm0
; SSE2-NEXT: retq
;
-; SSE3-LABEL: @shuffle_v2i64_03_copy
-; SSE3: shufpd {{.*}} # xmm1 = xmm1[0],xmm2[1]
+; SSE3-LABEL: shuffle_v2i64_03_copy:
+; SSE3: # BB#0:
+; SSE3-NEXT: shufpd {{.*#+}} xmm1 = xmm1[0],xmm2[1]
; SSE3-NEXT: movapd %xmm1, %xmm0
; SSE3-NEXT: retq
;
-; SSSE3-LABEL: @shuffle_v2i64_03_copy
-; SSSE3: shufpd {{.*}} # xmm1 = xmm1[0],xmm2[1]
+; SSSE3-LABEL: shuffle_v2i64_03_copy:
+; SSSE3: # BB#0:
+; SSSE3-NEXT: shufpd {{.*#+}} xmm1 = xmm1[0],xmm2[1]
; SSSE3-NEXT: movapd %xmm1, %xmm0
; SSSE3-NEXT: retq
;
-; SSE41-LABEL: @shuffle_v2i64_03_copy
-; SSE41: pblendw {{.*}} # xmm1 = xmm1[0,1,2,3],xmm2[4,5,6,7]
+; SSE41-LABEL: shuffle_v2i64_03_copy:
+; SSE41: # BB#0:
+; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm2[4,5,6,7]
; SSE41-NEXT: movdqa %xmm1, %xmm0
; SSE41-NEXT: retq
+;
+; AVX-LABEL: shuffle_v2i64_03_copy:
+; AVX: # BB#0:
+; AVX-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm2[4,5,6,7]
+; AVX-NEXT: retq
%shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 3>
ret <2 x i64> %shuffle
}
define <2 x i64> @shuffle_v2i64_12(<2 x i64> %a, <2 x i64> %b) {
-; SSE2-LABEL: @shuffle_v2i64_12
-; SSE2: shufpd {{.*}} # xmm0 = xmm0[1],xmm1[0]
+; SSE2-LABEL: shuffle_v2i64_12:
+; SSE2: # BB#0:
+; SSE2-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0]
; SSE2-NEXT: retq
;
-; SSE3-LABEL: @shuffle_v2i64_12
-; SSE3: shufpd {{.*}} # xmm0 = xmm0[1],xmm1[0]
+; SSE3-LABEL: shuffle_v2i64_12:
+; SSE3: # BB#0:
+; SSE3-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0]
; SSE3-NEXT: retq
;
-; SSSE3-LABEL: @shuffle_v2i64_12
-; SSSE3: palignr {{.*}} # xmm1 = xmm0[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7]
+; SSSE3-LABEL: shuffle_v2i64_12:
+; SSSE3: # BB#0:
+; SSSE3-NEXT: palignr {{.*#+}} xmm1 = xmm0[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7]
; SSSE3-NEXT: movdqa %xmm1, %xmm0
; SSSE3-NEXT: retq
;
-; SSE41-LABEL: @shuffle_v2i64_12
-; SSE41: palignr {{.*}} # xmm1 = xmm0[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7]
+; SSE41-LABEL: shuffle_v2i64_12:
+; SSE41: # BB#0:
+; SSE41-NEXT: palignr {{.*#+}} xmm1 = xmm0[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7]
; SSE41-NEXT: movdqa %xmm1, %xmm0
; SSE41-NEXT: retq
+;
+; AVX-LABEL: shuffle_v2i64_12:
+; AVX: # BB#0:
+; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7]
+; AVX-NEXT: retq
%shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 2>
ret <2 x i64> %shuffle
}
define <2 x i64> @shuffle_v2i64_12_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) {
-; SSE2-LABEL: @shuffle_v2i64_12_copy
-; SSE2: shufpd {{.*}} # xmm1 = xmm1[1],xmm2[0]
+; SSE2-LABEL: shuffle_v2i64_12_copy:
+; SSE2: # BB#0:
+; SSE2-NEXT: shufpd {{.*#+}} xmm1 = xmm1[1],xmm2[0]
; SSE2-NEXT: movapd %xmm1, %xmm0
; SSE2-NEXT: retq
;
-; SSE3-LABEL: @shuffle_v2i64_12_copy
-; SSE3: shufpd {{.*}} # xmm1 = xmm1[1],xmm2[0]
+; SSE3-LABEL: shuffle_v2i64_12_copy:
+; SSE3: # BB#0:
+; SSE3-NEXT: shufpd {{.*#+}} xmm1 = xmm1[1],xmm2[0]
; SSE3-NEXT: movapd %xmm1, %xmm0
; SSE3-NEXT: retq
;
-; SSSE3-LABEL: @shuffle_v2i64_12_copy
-; SSSE3: palignr {{.*}} # xmm2 = xmm1[8,9,10,11,12,13,14,15],xmm2[0,1,2,3,4,5,6,7]
+; SSSE3-LABEL: shuffle_v2i64_12_copy:
+; SSSE3: # BB#0:
+; SSSE3-NEXT: palignr {{.*#+}} xmm2 = xmm1[8,9,10,11,12,13,14,15],xmm2[0,1,2,3,4,5,6,7]
; SSSE3-NEXT: movdqa %xmm2, %xmm0
; SSSE3-NEXT: retq
;
-; SSE41-LABEL: @shuffle_v2i64_12_copy
-; SSE41: palignr {{.*}} # xmm2 = xmm1[8,9,10,11,12,13,14,15],xmm2[0,1,2,3,4,5,6,7]
+; SSE41-LABEL: shuffle_v2i64_12_copy:
+; SSE41: # BB#0:
+; SSE41-NEXT: palignr {{.*#+}} xmm2 = xmm1[8,9,10,11,12,13,14,15],xmm2[0,1,2,3,4,5,6,7]
; SSE41-NEXT: movdqa %xmm2, %xmm0
; SSE41-NEXT: retq
+;
+; AVX-LABEL: shuffle_v2i64_12_copy:
+; AVX: # BB#0:
+; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm1[8,9,10,11,12,13,14,15],xmm2[0,1,2,3,4,5,6,7]
+; AVX-NEXT: retq
%shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 2>
ret <2 x i64> %shuffle
}
define <2 x i64> @shuffle_v2i64_13(<2 x i64> %a, <2 x i64> %b) {
-; ALL-LABEL: @shuffle_v2i64_13
-; ALL: punpckhqdq {{.*}} # xmm0 = xmm0[1],xmm1[1]
-; ALL-NEXT: retq
+; SSE-LABEL: shuffle_v2i64_13:
+; SSE: # BB#0:
+; SSE-NEXT: punpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1]
+; SSE-NEXT: retq
+;
+; AVX-LABEL: shuffle_v2i64_13:
+; AVX: # BB#0:
+; AVX-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1]
+; AVX-NEXT: retq
%shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 3>
ret <2 x i64> %shuffle
}
define <2 x i64> @shuffle_v2i64_13_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) {
-; SSE-LABEL: @shuffle_v2i64_13_copy
-; SSE: punpckhqdq {{.*}} # xmm1 = xmm1[1],xmm2[1]
+; SSE-LABEL: shuffle_v2i64_13_copy:
+; SSE: # BB#0:
+; SSE-NEXT: punpckhqdq {{.*#+}} xmm1 = xmm1[1],xmm2[1]
; SSE-NEXT: movdqa %xmm1, %xmm0
; SSE-NEXT: retq
;
-; AVX-LABEL: @shuffle_v2i64_13_copy
-; AVX: punpckhqdq {{.*}} # xmm0 = xmm1[1],xmm2[1]
+; AVX-LABEL: shuffle_v2i64_13_copy:
+; AVX: # BB#0:
+; AVX-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm1[1],xmm2[1]
; AVX-NEXT: retq
%shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 3>
ret <2 x i64> %shuffle
}
define <2 x i64> @shuffle_v2i64_20(<2 x i64> %a, <2 x i64> %b) {
-; SSE-LABEL: @shuffle_v2i64_20
-; SSE: punpcklqdq {{.*}} # xmm1 = xmm1[0],xmm0[0]
+; SSE-LABEL: shuffle_v2i64_20:
+; SSE: # BB#0:
+; SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
; SSE-NEXT: movdqa %xmm1, %xmm0
; SSE-NEXT: retq
;
-; AVX-LABEL: @shuffle_v2i64_20
-; AVX: vpunpcklqdq {{.*}} # xmm0 = xmm1[0],xmm0[0]
+; AVX-LABEL: shuffle_v2i64_20:
+; AVX: # BB#0:
+; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
; AVX-NEXT: retq
%shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 2, i32 0>
ret <2 x i64> %shuffle
}
define <2 x i64> @shuffle_v2i64_20_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) {
-; SSE-LABEL: @shuffle_v2i64_20_copy
-; SSE: punpcklqdq {{.*}} # xmm2 = xmm2[0],xmm1[0]
+; SSE-LABEL: shuffle_v2i64_20_copy:
+; SSE: # BB#0:
+; SSE-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm1[0]
; SSE-NEXT: movdqa %xmm2, %xmm0
; SSE-NEXT: retq
;
-; AVX-LABEL: @shuffle_v2i64_20_copy
-; AVX: vpunpcklqdq {{.*}} # xmm0 = xmm2[0],xmm1[0]
+; AVX-LABEL: shuffle_v2i64_20_copy:
+; AVX: # BB#0:
+; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm1[0]
; AVX-NEXT: retq
%shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 2, i32 0>
ret <2 x i64> %shuffle
}
define <2 x i64> @shuffle_v2i64_21(<2 x i64> %a, <2 x i64> %b) {
-; SSE2-LABEL: @shuffle_v2i64_21
-; SSE2: shufpd {{.*}} # xmm1 = xmm1[0],xmm0[1]
+; SSE2-LABEL: shuffle_v2i64_21:
+; SSE2: # BB#0:
+; SSE2-NEXT: shufpd {{.*#+}} xmm1 = xmm1[0],xmm0[1]
; SSE2-NEXT: movapd %xmm1, %xmm0
; SSE2-NEXT: retq
;
-; SSE3-LABEL: @shuffle_v2i64_21
-; SSE3: shufpd {{.*}} # xmm1 = xmm1[0],xmm0[1]
+; SSE3-LABEL: shuffle_v2i64_21:
+; SSE3: # BB#0:
+; SSE3-NEXT: shufpd {{.*#+}} xmm1 = xmm1[0],xmm0[1]
; SSE3-NEXT: movapd %xmm1, %xmm0
; SSE3-NEXT: retq
;
-; SSSE3-LABEL: @shuffle_v2i64_21
-; SSSE3: shufpd {{.*}} # xmm1 = xmm1[0],xmm0[1]
+; SSSE3-LABEL: shuffle_v2i64_21:
+; SSSE3: # BB#0:
+; SSSE3-NEXT: shufpd {{.*#+}} xmm1 = xmm1[0],xmm0[1]
; SSSE3-NEXT: movapd %xmm1, %xmm0
; SSSE3-NEXT: retq
;
-; SSE41-LABEL: @shuffle_v2i64_21
-; SSE41: pblendw {{.*}} # xmm1 = xmm1[0,1,2,3],xmm0[4,5,6,7]
+; SSE41-LABEL: shuffle_v2i64_21:
+; SSE41: # BB#0:
+; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm0[4,5,6,7]
; SSE41-NEXT: movdqa %xmm1, %xmm0
; SSE41-NEXT: retq
+;
+; AVX-LABEL: shuffle_v2i64_21:
+; AVX: # BB#0:
+; AVX-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7]
+; AVX-NEXT: retq
%shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 2, i32 1>
ret <2 x i64> %shuffle
}
define <2 x i64> @shuffle_v2i64_21_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) {
-; SSE2-LABEL: @shuffle_v2i64_21_copy
-; SSE2: shufpd {{.*}} # xmm2 = xmm2[0],xmm1[1]
+; SSE2-LABEL: shuffle_v2i64_21_copy:
+; SSE2: # BB#0:
+; SSE2-NEXT: shufpd {{.*#+}} xmm2 = xmm2[0],xmm1[1]
; SSE2-NEXT: movapd %xmm2, %xmm0
; SSE2-NEXT: retq
;
-; SSE3-LABEL: @shuffle_v2i64_21_copy
-; SSE3: shufpd {{.*}} # xmm2 = xmm2[0],xmm1[1]
+; SSE3-LABEL: shuffle_v2i64_21_copy:
+; SSE3: # BB#0:
+; SSE3-NEXT: shufpd {{.*#+}} xmm2 = xmm2[0],xmm1[1]
; SSE3-NEXT: movapd %xmm2, %xmm0
; SSE3-NEXT: retq
;
-; SSSE3-LABEL: @shuffle_v2i64_21_copy
-; SSSE3: shufpd {{.*}} # xmm2 = xmm2[0],xmm1[1]
+; SSSE3-LABEL: shuffle_v2i64_21_copy:
+; SSSE3: # BB#0:
+; SSSE3-NEXT: shufpd {{.*#+}} xmm2 = xmm2[0],xmm1[1]
; SSSE3-NEXT: movapd %xmm2, %xmm0
; SSSE3-NEXT: retq
;
-; SSE41-LABEL: @shuffle_v2i64_21_copy
-; SSE41: pblendw {{.*}} # xmm2 = xmm2[0,1,2,3],xmm1[4,5,6,7]
+; SSE41-LABEL: shuffle_v2i64_21_copy:
+; SSE41: # BB#0:
+; SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1,2,3],xmm1[4,5,6,7]
; SSE41-NEXT: movdqa %xmm2, %xmm0
; SSE41-NEXT: retq
+;
+; AVX-LABEL: shuffle_v2i64_21_copy:
+; AVX: # BB#0:
+; AVX-NEXT: vpblendw {{.*#+}} xmm0 = xmm2[0,1,2,3],xmm1[4,5,6,7]
+; AVX-NEXT: retq
%shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 2, i32 1>
ret <2 x i64> %shuffle
}
define <2 x i64> @shuffle_v2i64_30(<2 x i64> %a, <2 x i64> %b) {
-; SSE2-LABEL: @shuffle_v2i64_30
-; SSE2: shufpd {{.*}} # xmm1 = xmm1[1],xmm0[0]
+; SSE2-LABEL: shuffle_v2i64_30:
+; SSE2: # BB#0:
+; SSE2-NEXT: shufpd {{.*#+}} xmm1 = xmm1[1],xmm0[0]
; SSE2-NEXT: movapd %xmm1, %xmm0
; SSE2-NEXT: retq
;
-; SSE3-LABEL: @shuffle_v2i64_30
-; SSE3: shufpd {{.*}} # xmm1 = xmm1[1],xmm0[0]
+; SSE3-LABEL: shuffle_v2i64_30:
+; SSE3: # BB#0:
+; SSE3-NEXT: shufpd {{.*#+}} xmm1 = xmm1[1],xmm0[0]
; SSE3-NEXT: movapd %xmm1, %xmm0
; SSE3-NEXT: retq
;
-; SSSE3-LABEL: @shuffle_v2i64_30
-; SSSE3: palignr {{.*}} # xmm0 = xmm1[8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7]
+; SSSE3-LABEL: shuffle_v2i64_30:
+; SSSE3: # BB#0:
+; SSSE3-NEXT: palignr {{.*#+}} xmm0 = xmm1[8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7]
; SSSE3-NEXT: retq
+;
+; SSE41-LABEL: shuffle_v2i64_30:
+; SSE41: # BB#0:
+; SSE41-NEXT: palignr {{.*#+}} xmm0 = xmm1[8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7]
+; SSE41-NEXT: retq
+;
+; AVX-LABEL: shuffle_v2i64_30:
+; AVX: # BB#0:
+; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm1[8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7]
+; AVX-NEXT: retq
%shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 3, i32 0>
ret <2 x i64> %shuffle
}
define <2 x i64> @shuffle_v2i64_30_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) {
-; SSE2-LABEL: @shuffle_v2i64_30_copy
-; SSE2: shufpd {{.*}} # xmm2 = xmm2[1],xmm1[0]
+; SSE2-LABEL: shuffle_v2i64_30_copy:
+; SSE2: # BB#0:
+; SSE2-NEXT: shufpd {{.*#+}} xmm2 = xmm2[1],xmm1[0]
; SSE2-NEXT: movapd %xmm2, %xmm0
; SSE2-NEXT: retq
;
-; SSE3-LABEL: @shuffle_v2i64_30_copy
-; SSE3: shufpd {{.*}} # xmm2 = xmm2[1],xmm1[0]
+; SSE3-LABEL: shuffle_v2i64_30_copy:
+; SSE3: # BB#0:
+; SSE3-NEXT: shufpd {{.*#+}} xmm2 = xmm2[1],xmm1[0]
; SSE3-NEXT: movapd %xmm2, %xmm0
; SSE3-NEXT: retq
;
-; SSSE3-LABEL: @shuffle_v2i64_30_copy
-; SSSE3: palignr {{.*}} # xmm1 = xmm2[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7]
+; SSSE3-LABEL: shuffle_v2i64_30_copy:
+; SSSE3: # BB#0:
+; SSSE3-NEXT: palignr {{.*#+}} xmm1 = xmm2[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7]
; SSSE3-NEXT: movdqa %xmm1, %xmm0
; SSSE3-NEXT: retq
;
-; SSE41-LABEL: @shuffle_v2i64_30_copy
-; SSE41: palignr {{.*}} # xmm1 = xmm2[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7]
+; SSE41-LABEL: shuffle_v2i64_30_copy:
+; SSE41: # BB#0:
+; SSE41-NEXT: palignr {{.*#+}} xmm1 = xmm2[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7]
; SSE41-NEXT: movdqa %xmm1, %xmm0
; SSE41-NEXT: retq
+;
+; AVX-LABEL: shuffle_v2i64_30_copy:
+; AVX: # BB#0:
+; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm2[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7]
+; AVX-NEXT: retq
%shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 3, i32 0>
ret <2 x i64> %shuffle
}
define <2 x i64> @shuffle_v2i64_31(<2 x i64> %a, <2 x i64> %b) {
-; SSE-LABEL: @shuffle_v2i64_31
-; SSE: punpckhqdq {{.*}} # xmm1 = xmm1[1],xmm0[1]
+; SSE-LABEL: shuffle_v2i64_31:
+; SSE: # BB#0:
+; SSE-NEXT: punpckhqdq {{.*#+}} xmm1 = xmm1[1],xmm0[1]
; SSE-NEXT: movdqa %xmm1, %xmm0
; SSE-NEXT: retq
;
-; AVX-LABEL: @shuffle_v2i64_31
-; AVX: vpunpckhqdq {{.*}} # xmm0 = xmm1[1],xmm0[1]
+; AVX-LABEL: shuffle_v2i64_31:
+; AVX: # BB#0:
+; AVX-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm1[1],xmm0[1]
; AVX-NEXT: retq
%shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 3, i32 1>
ret <2 x i64> %shuffle
}
define <2 x i64> @shuffle_v2i64_31_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) {
-; SSE-LABEL: @shuffle_v2i64_31_copy
-; SSE: punpckhqdq {{.*}} # xmm2 = xmm2[1],xmm1[1]
+; SSE-LABEL: shuffle_v2i64_31_copy:
+; SSE: # BB#0:
+; SSE-NEXT: punpckhqdq {{.*#+}} xmm2 = xmm2[1],xmm1[1]
; SSE-NEXT: movdqa %xmm2, %xmm0
; SSE-NEXT: retq
;
-; AVX-LABEL: @shuffle_v2i64_31_copy
-; AVX: vpunpckhqdq {{.*}} # xmm0 = xmm2[1],xmm1[1]
+; AVX-LABEL: shuffle_v2i64_31_copy:
+; AVX: # BB#0:
+; AVX-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm2[1],xmm1[1]
; AVX-NEXT: retq
%shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 3, i32 1>
ret <2 x i64> %shuffle
@@ -438,12 +622,14 @@ define <2 x i64> @shuffle_v2i64_31_copy(
define <2 x i64> @insert_reg_and_zero_v2i64(i64 %a) {
-; SSE-LABEL: @insert_reg_and_zero_v2i64
-; SSE: movd %rdi, %xmm0
+; SSE-LABEL: insert_reg_and_zero_v2i64:
+; SSE: # BB#0:
+; SSE-NEXT: movd %rdi, %xmm0
; SSE-NEXT: retq
;
-; AVX-LABEL: @insert_reg_and_zero_v2i64
-; AVX: vmovq %rdi, %xmm0
+; AVX-LABEL: insert_reg_and_zero_v2i64:
+; AVX: # BB#0:
+; AVX-NEXT: vmovq %rdi, %xmm0
; AVX-NEXT: retq
%v = insertelement <2 x i64> undef, i64 %a, i32 0
%shuffle = shufflevector <2 x i64> %v, <2 x i64> zeroinitializer, <2 x i32> <i32 0, i32 3>
@@ -451,9 +637,15 @@ define <2 x i64> @insert_reg_and_zero_v2
}
define <2 x i64> @insert_mem_and_zero_v2i64(i64* %ptr) {
-; ALL-LABEL: @insert_mem_and_zero_v2i64
-; ALL: movq (%rdi), %xmm0
-; ALL-NEXT: retq
+; SSE-LABEL: insert_mem_and_zero_v2i64:
+; SSE: # BB#0:
+; SSE-NEXT: movq (%rdi), %xmm0
+; SSE-NEXT: retq
+;
+; AVX-LABEL: insert_mem_and_zero_v2i64:
+; AVX: # BB#0:
+; AVX-NEXT: vmovq (%rdi), %xmm0
+; AVX-NEXT: retq
%a = load i64* %ptr
%v = insertelement <2 x i64> undef, i64 %a, i32 0
%shuffle = shufflevector <2 x i64> %v, <2 x i64> zeroinitializer, <2 x i32> <i32 0, i32 3>
@@ -461,18 +653,30 @@ define <2 x i64> @insert_mem_and_zero_v2
}
define <2 x double> @insert_reg_and_zero_v2f64(double %a) {
-; ALL-LABEL: @insert_reg_and_zero_v2f64
-; ALL: movq %xmm0, %xmm0
-; ALL-NEXT: retq
+; SSE-LABEL: insert_reg_and_zero_v2f64:
+; SSE: # BB#0:
+; SSE-NEXT: movq %xmm0, %xmm0
+; SSE-NEXT: retq
+;
+; AVX-LABEL: insert_reg_and_zero_v2f64:
+; AVX: # BB#0:
+; AVX-NEXT: vmovq %xmm0, %xmm0
+; AVX-NEXT: retq
%v = insertelement <2 x double> undef, double %a, i32 0
%shuffle = shufflevector <2 x double> %v, <2 x double> zeroinitializer, <2 x i32> <i32 0, i32 3>
ret <2 x double> %shuffle
}
define <2 x double> @insert_mem_and_zero_v2f64(double* %ptr) {
-; ALL-LABEL: @insert_mem_and_zero_v2f64
-; ALL: movsd (%rdi), %xmm0
-; ALL-NEXT: retq
+; SSE-LABEL: insert_mem_and_zero_v2f64:
+; SSE: # BB#0:
+; SSE-NEXT: movsd (%rdi), %xmm0
+; SSE-NEXT: retq
+;
+; AVX-LABEL: insert_mem_and_zero_v2f64:
+; AVX: # BB#0:
+; AVX-NEXT: vmovsd (%rdi), %xmm0
+; AVX-NEXT: retq
%a = load double* %ptr
%v = insertelement <2 x double> undef, double %a, i32 0
%shuffle = shufflevector <2 x double> %v, <2 x double> zeroinitializer, <2 x i32> <i32 0, i32 3>
@@ -480,45 +684,62 @@ define <2 x double> @insert_mem_and_zero
}
define <2 x double> @insert_dup_reg_v2f64(double %a) {
-; SSE2-LABEL: @insert_dup_reg_v2f64
-; SSE2: movlhps {{.*}} # xmm0 = xmm0[0,0]
+; FIXME: We should match movddup for SSE3 and higher here.
+;
+; SSE2-LABEL: insert_dup_reg_v2f64:
+; SSE2: # BB#0:
+; SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0,0]
; SSE2-NEXT: retq
;
-; FIXME: This should match movddup as well!
-; SSE3-LABEL: @insert_dup_reg_v2f64
-; SSE3: unpcklpd {{.*}} # xmm0 = xmm0[0,0]
+; SSE3-LABEL: insert_dup_reg_v2f64:
+; SSE3: # BB#0:
+; SSE3-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0,0]
; SSE3-NEXT: retq
;
-; FIXME: This should match movddup as well!
-; SSSE3-LABEL: @insert_dup_reg_v2f64
-; SSSE3: unpcklpd {{.*}} # xmm0 = xmm0[0,0]
+; SSSE3-LABEL: insert_dup_reg_v2f64:
+; SSSE3: # BB#0:
+; SSSE3-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0,0]
; SSSE3-NEXT: retq
;
-; FIXME: This should match movddup as well!
-; SSE41-LABEL: @insert_dup_reg_v2f64
-; SSE41: unpcklpd {{.*}} # xmm0 = xmm0[0,0]
+; SSE41-LABEL: insert_dup_reg_v2f64:
+; SSE41: # BB#0:
+; SSE41-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0,0]
; SSE41-NEXT: retq
+;
+; AVX-LABEL: insert_dup_reg_v2f64:
+; AVX: # BB#0:
+; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0,0]
+; AVX-NEXT: retq
%v = insertelement <2 x double> undef, double %a, i32 0
%shuffle = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> <i32 0, i32 0>
ret <2 x double> %shuffle
}
define <2 x double> @insert_dup_mem_v2f64(double* %ptr) {
-; SSE2-LABEL: @insert_dup_mem_v2f64
-; SSE2: movsd {{.*}}, %xmm0
-; SSE2-NEXT: movlhps {{.*}} # xmm0 = xmm0[0,0]
+; SSE2-LABEL: insert_dup_mem_v2f64:
+; SSE2: # BB#0:
+; SSE2-NEXT: movsd (%rdi), %xmm0
+; SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0,0]
; SSE2-NEXT: retq
;
-; SSE3-LABEL: @insert_dup_mem_v2f64
-; SSE3: movddup {{.*}}, %xmm0
+; SSE3-LABEL: insert_dup_mem_v2f64:
+; SSE3: # BB#0:
+; SSE3-NEXT: movddup (%rdi), %xmm0
; SSE3-NEXT: retq
;
-; SSSE3-LABEL: @insert_dup_mem_v2f64
-; SSSE3: movddup {{.*}}, %xmm0
+; SSSE3-LABEL: insert_dup_mem_v2f64:
+; SSSE3: # BB#0:
+; SSSE3-NEXT: movddup (%rdi), %xmm0
; SSSE3-NEXT: retq
;
-; SSE41-LABEL: @insert_dup_mem_v2f64
-; SSE41: movddup {{.*}}, %xmm0
+; SSE41-LABEL: insert_dup_mem_v2f64:
+; SSE41: # BB#0:
+; SSE41-NEXT: movddup (%rdi), %xmm0
; SSE41-NEXT: retq
+;
+; AVX-LABEL: insert_dup_mem_v2f64:
+; AVX: # BB#0:
+; AVX-NEXT: vmovddup (%rdi), %xmm0
+; AVX-NEXT: retq
%a = load double* %ptr
%v = insertelement <2 x double> undef, double %a, i32 0
%shuffle = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> <i32 0, i32 0>
@@ -526,15 +747,15 @@ define <2 x double> @insert_dup_mem_v2f6
}
define <2 x double> @shuffle_mem_v2f64_10(<2 x double>* %ptr) {
-; SSE-LABEL: @shuffle_mem_v2f64_10
+; SSE-LABEL: shuffle_mem_v2f64_10:
; SSE: # BB#0:
; SSE-NEXT: movapd (%rdi), %xmm0
-; SSE-NEXT: shufpd {{.*}} # xmm0 = xmm0[1,0]
+; SSE-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1,0]
; SSE-NEXT: retq
;
-; AVX-LABEL: @shuffle_mem_v2f64_10
+; AVX-LABEL: shuffle_mem_v2f64_10:
; AVX: # BB#0:
-; AVX-NEXT: vpermilpd {{.*}} # xmm0 = mem[1,0]
+; AVX-NEXT: vpermilpd {{.*#+}} xmm0 = mem[1,0]
; AVX-NEXT: retq
%a = load <2 x double>* %ptr
%shuffle = shufflevector <2 x double> %a, <2 x double> undef, <2 x i32> <i32 1, i32 0>
Modified: llvm/trunk/test/CodeGen/X86/vector-shuffle-128-v4.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-shuffle-128-v4.ll?rev=218707&r1=218706&r2=218707&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-shuffle-128-v4.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-shuffle-128-v4.ll Tue Sep 30 16:44:34 2014
@@ -8,789 +8,972 @@ target datalayout = "e-m:e-i64:64-f80:12
target triple = "x86_64-unknown-unknown"
define <4 x i32> @shuffle_v4i32_0001(<4 x i32> %a, <4 x i32> %b) {
-; ALL-LABEL: @shuffle_v4i32_0001
-; ALL: pshufd {{.*}} # xmm0 = xmm0[0,0,0,1]
-; ALL-NEXT: retq
+; SSE-LABEL: shuffle_v4i32_0001:
+; SSE: # BB#0:
+; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,1]
+; SSE-NEXT: retq
+;
+; AVX-LABEL: shuffle_v4i32_0001:
+; AVX: # BB#0:
+; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,1]
+; AVX-NEXT: retq
%shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 0, i32 0, i32 1>
ret <4 x i32> %shuffle
}
define <4 x i32> @shuffle_v4i32_0020(<4 x i32> %a, <4 x i32> %b) {
-; ALL-LABEL: @shuffle_v4i32_0020
-; ALL: pshufd {{.*}} # xmm0 = xmm0[0,0,2,0]
-; ALL-NEXT: retq
+; SSE-LABEL: shuffle_v4i32_0020:
+; SSE: # BB#0:
+; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,2,0]
+; SSE-NEXT: retq
+;
+; AVX-LABEL: shuffle_v4i32_0020:
+; AVX: # BB#0:
+; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,2,0]
+; AVX-NEXT: retq
%shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 0, i32 2, i32 0>
ret <4 x i32> %shuffle
}
define <4 x i32> @shuffle_v4i32_0112(<4 x i32> %a, <4 x i32> %b) {
-; ALL-LABEL: @shuffle_v4i32_0112
-; ALL: pshufd {{.*}} # xmm0 = xmm0[0,1,1,2]
-; ALL-NEXT: retq
+; SSE-LABEL: shuffle_v4i32_0112:
+; SSE: # BB#0:
+; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,1,2]
+; SSE-NEXT: retq
+;
+; AVX-LABEL: shuffle_v4i32_0112:
+; AVX: # BB#0:
+; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,1,2]
+; AVX-NEXT: retq
%shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 1, i32 1, i32 2>
ret <4 x i32> %shuffle
}
define <4 x i32> @shuffle_v4i32_0300(<4 x i32> %a, <4 x i32> %b) {
-; ALL-LABEL: @shuffle_v4i32_0300
-; ALL: pshufd {{.*}} # xmm0 = xmm0[0,3,0,0]
-; ALL-NEXT: retq
+; SSE-LABEL: shuffle_v4i32_0300:
+; SSE: # BB#0:
+; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,3,0,0]
+; SSE-NEXT: retq
+;
+; AVX-LABEL: shuffle_v4i32_0300:
+; AVX: # BB#0:
+; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,3,0,0]
+; AVX-NEXT: retq
%shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 3, i32 0, i32 0>
ret <4 x i32> %shuffle
}
define <4 x i32> @shuffle_v4i32_1000(<4 x i32> %a, <4 x i32> %b) {
-; ALL-LABEL: @shuffle_v4i32_1000
-; ALL: pshufd {{.*}} # xmm0 = xmm0[1,0,0,0]
-; ALL-NEXT: retq
+; SSE-LABEL: shuffle_v4i32_1000:
+; SSE: # BB#0:
+; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,0,0,0]
+; SSE-NEXT: retq
+;
+; AVX-LABEL: shuffle_v4i32_1000:
+; AVX: # BB#0:
+; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,0,0,0]
+; AVX-NEXT: retq
%shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 1, i32 0, i32 0, i32 0>
ret <4 x i32> %shuffle
}
define <4 x i32> @shuffle_v4i32_2200(<4 x i32> %a, <4 x i32> %b) {
-; ALL-LABEL: @shuffle_v4i32_2200
-; ALL: pshufd {{.*}} # xmm0 = xmm0[2,2,0,0]
-; ALL-NEXT: retq
+; SSE-LABEL: shuffle_v4i32_2200:
+; SSE: # BB#0:
+; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,2,0,0]
+; SSE-NEXT: retq
+;
+; AVX-LABEL: shuffle_v4i32_2200:
+; AVX: # BB#0:
+; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,2,0,0]
+; AVX-NEXT: retq
%shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 2, i32 2, i32 0, i32 0>
ret <4 x i32> %shuffle
}
define <4 x i32> @shuffle_v4i32_3330(<4 x i32> %a, <4 x i32> %b) {
-; ALL-LABEL: @shuffle_v4i32_3330
-; ALL: pshufd {{.*}} # xmm0 = xmm0[3,3,3,0]
-; ALL-NEXT: retq
+; SSE-LABEL: shuffle_v4i32_3330:
+; SSE: # BB#0:
+; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,3,3,0]
+; SSE-NEXT: retq
+;
+; AVX-LABEL: shuffle_v4i32_3330:
+; AVX: # BB#0:
+; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[3,3,3,0]
+; AVX-NEXT: retq
%shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 3, i32 3, i32 3, i32 0>
ret <4 x i32> %shuffle
}
define <4 x i32> @shuffle_v4i32_3210(<4 x i32> %a, <4 x i32> %b) {
-; ALL-LABEL: @shuffle_v4i32_3210
-; ALL: pshufd {{.*}} # xmm0 = xmm0[3,2,1,0]
-; ALL-NEXT: retq
+; SSE-LABEL: shuffle_v4i32_3210:
+; SSE: # BB#0:
+; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,2,1,0]
+; SSE-NEXT: retq
+;
+; AVX-LABEL: shuffle_v4i32_3210:
+; AVX: # BB#0:
+; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[3,2,1,0]
+; AVX-NEXT: retq
%shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
ret <4 x i32> %shuffle
}
define <4 x i32> @shuffle_v4i32_2121(<4 x i32> %a, <4 x i32> %b) {
-; ALL-LABEL: @shuffle_v4i32_2121
-; ALL: pshufd {{.*}} # xmm0 = xmm0[2,1,2,1]
-; ALL-NEXT: retq
+; SSE-LABEL: shuffle_v4i32_2121:
+; SSE: # BB#0:
+; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,1,2,1]
+; SSE-NEXT: retq
+;
+; AVX-LABEL: shuffle_v4i32_2121:
+; AVX: # BB#0:
+; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,1,2,1]
+; AVX-NEXT: retq
%shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 2, i32 1, i32 2, i32 1>
ret <4 x i32> %shuffle
}
define <4 x float> @shuffle_v4f32_0001(<4 x float> %a, <4 x float> %b) {
-; SSE-LABEL: @shuffle_v4f32_0001
+; SSE-LABEL: shuffle_v4f32_0001:
; SSE: # BB#0:
-; SSE-NEXT: shufps {{.*}} # xmm0 = xmm0[0,0,0,1]
+; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0,0,1]
; SSE-NEXT: retq
;
-; AVX-LABEL: @shuffle_v4f32_0001
+; AVX-LABEL: shuffle_v4f32_0001:
; AVX: # BB#0:
-; AVX-NEXT: vpermilps {{.*}} # xmm0 = xmm0[0,0,0,1]
+; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,0,1]
; AVX-NEXT: retq
%shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 0, i32 0, i32 1>
ret <4 x float> %shuffle
}
define <4 x float> @shuffle_v4f32_0020(<4 x float> %a, <4 x float> %b) {
-; SSE-LABEL: @shuffle_v4f32_0020
+; SSE-LABEL: shuffle_v4f32_0020:
; SSE: # BB#0:
-; SSE-NEXT: shufps {{.*}} # xmm0 = xmm0[0,0,2,0]
+; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0,2,0]
; SSE-NEXT: retq
;
-; AVX-LABEL: @shuffle_v4f32_0020
+; AVX-LABEL: shuffle_v4f32_0020:
; AVX: # BB#0:
-; AVX-NEXT: vpermilps {{.*}} # xmm0 = xmm0[0,0,2,0]
+; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,2,0]
; AVX-NEXT: retq
%shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 0, i32 2, i32 0>
ret <4 x float> %shuffle
}
define <4 x float> @shuffle_v4f32_0300(<4 x float> %a, <4 x float> %b) {
-; SSE-LABEL: @shuffle_v4f32_0300
+; SSE-LABEL: shuffle_v4f32_0300:
; SSE: # BB#0:
-; SSE-NEXT: shufps {{.*}} # xmm0 = xmm0[0,3,0,0]
+; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,3,0,0]
; SSE-NEXT: retq
;
-; AVX-LABEL: @shuffle_v4f32_0300
+; AVX-LABEL: shuffle_v4f32_0300:
; AVX: # BB#0:
-; AVX-NEXT: vpermilps {{.*}} # xmm0 = xmm0[0,3,0,0]
+; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,3,0,0]
; AVX-NEXT: retq
%shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 3, i32 0, i32 0>
ret <4 x float> %shuffle
}
define <4 x float> @shuffle_v4f32_1000(<4 x float> %a, <4 x float> %b) {
-; SSE-LABEL: @shuffle_v4f32_1000
+; SSE-LABEL: shuffle_v4f32_1000:
; SSE: # BB#0:
-; SSE-NEXT: shufps {{.*}} # xmm0 = xmm0[1,0,0,0]
+; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,0,0,0]
; SSE-NEXT: retq
;
-; AVX-LABEL: @shuffle_v4f32_1000
+; AVX-LABEL: shuffle_v4f32_1000:
; AVX: # BB#0:
-; AVX-NEXT: vpermilps {{.*}} # xmm0 = xmm0[1,0,0,0]
+; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[1,0,0,0]
; AVX-NEXT: retq
%shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 1, i32 0, i32 0, i32 0>
ret <4 x float> %shuffle
}
define <4 x float> @shuffle_v4f32_2200(<4 x float> %a, <4 x float> %b) {
-; SSE-LABEL: @shuffle_v4f32_2200
+; SSE-LABEL: shuffle_v4f32_2200:
; SSE: # BB#0:
-; SSE-NEXT: shufps {{.*}} # xmm0 = xmm0[2,2,0,0]
+; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,2,0,0]
; SSE-NEXT: retq
;
-; AVX-LABEL: @shuffle_v4f32_2200
+; AVX-LABEL: shuffle_v4f32_2200:
; AVX: # BB#0:
-; AVX-NEXT: vpermilps {{.*}} # xmm0 = xmm0[2,2,0,0]
+; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,2,0,0]
; AVX-NEXT: retq
%shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 2, i32 2, i32 0, i32 0>
ret <4 x float> %shuffle
}
define <4 x float> @shuffle_v4f32_3330(<4 x float> %a, <4 x float> %b) {
-; SSE-LABEL: @shuffle_v4f32_3330
+; SSE-LABEL: shuffle_v4f32_3330:
; SSE: # BB#0:
-; SSE-NEXT: shufps {{.*}} # xmm0 = xmm0[3,3,3,0]
+; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,3,3,0]
; SSE-NEXT: retq
;
-; AVX-LABEL: @shuffle_v4f32_3330
+; AVX-LABEL: shuffle_v4f32_3330:
; AVX: # BB#0:
-; AVX-NEXT: vpermilps {{.*}} # xmm0 = xmm0[3,3,3,0]
+; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,3,3,0]
; AVX-NEXT: retq
%shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 3, i32 3, i32 3, i32 0>
ret <4 x float> %shuffle
}
define <4 x float> @shuffle_v4f32_3210(<4 x float> %a, <4 x float> %b) {
-; SSE-LABEL: @shuffle_v4f32_3210
+; SSE-LABEL: shuffle_v4f32_3210:
; SSE: # BB#0:
-; SSE-NEXT: shufps {{.*}} # xmm0 = xmm0[3,2,1,0]
+; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,2,1,0]
; SSE-NEXT: retq
;
-; AVX-LABEL: @shuffle_v4f32_3210
+; AVX-LABEL: shuffle_v4f32_3210:
; AVX: # BB#0:
-; AVX-NEXT: vpermilps {{.*}} # xmm0 = xmm0[3,2,1,0]
+; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,2,1,0]
; AVX-NEXT: retq
%shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
ret <4 x float> %shuffle
}
define <4 x float> @shuffle_v4f32_0011(<4 x float> %a, <4 x float> %b) {
-; ALL-LABEL: @shuffle_v4f32_0011
-; ALL: unpcklps {{.*}} # xmm0 = xmm0[0,0,1,1]
-; ALL-NEXT: retq
+; SSE-LABEL: shuffle_v4f32_0011:
+; SSE: # BB#0:
+; SSE-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0,0,1,1]
+; SSE-NEXT: retq
+;
+; AVX-LABEL: shuffle_v4f32_0011:
+; AVX: # BB#0:
+; AVX-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0,0,1,1]
+; AVX-NEXT: retq
%shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 0, i32 1, i32 1>
ret <4 x float> %shuffle
}
define <4 x float> @shuffle_v4f32_2233(<4 x float> %a, <4 x float> %b) {
-; ALL-LABEL: @shuffle_v4f32_2233
-; ALL: unpckhps {{.*}} # xmm0 = xmm0[2,2,3,3]
-; ALL-NEXT: retq
+; SSE-LABEL: shuffle_v4f32_2233:
+; SSE: # BB#0:
+; SSE-NEXT: unpckhps {{.*#+}} xmm0 = xmm0[2,2,3,3]
+; SSE-NEXT: retq
+;
+; AVX-LABEL: shuffle_v4f32_2233:
+; AVX: # BB#0:
+; AVX-NEXT: vunpckhps {{.*#+}} xmm0 = xmm0[2,2,3,3]
+; AVX-NEXT: retq
%shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 2, i32 2, i32 3, i32 3>
ret <4 x float> %shuffle
}
define <4 x float> @shuffle_v4f32_0022(<4 x float> %a, <4 x float> %b) {
-; SSE2-LABEL: @shuffle_v4f32_0022
-; SSE2: shufps {{.*}} # xmm0 = xmm0[0,0,2,2]
+; SSE2-LABEL: shuffle_v4f32_0022:
+; SSE2: # BB#0:
+; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0,2,2]
; SSE2-NEXT: retq
;
-; SSE3-LABEL: @shuffle_v4f32_0022
-; SSE3: movsldup {{.*}} # xmm0 = xmm0[0,0,2,2]
+; SSE3-LABEL: shuffle_v4f32_0022:
+; SSE3: # BB#0:
+; SSE3-NEXT: movsldup {{.*#+}} xmm0 = xmm0[0,0,2,2]
; SSE3-NEXT: retq
;
-; SSSE3-LABEL: @shuffle_v4f32_0022
-; SSSE3: movsldup {{.*}} # xmm0 = xmm0[0,0,2,2]
+; SSSE3-LABEL: shuffle_v4f32_0022:
+; SSSE3: # BB#0:
+; SSSE3-NEXT: movsldup {{.*#+}} xmm0 = xmm0[0,0,2,2]
; SSSE3-NEXT: retq
;
-; SSE41-LABEL: @shuffle_v4f32_0022
-; SSE41: movsldup {{.*}} # xmm0 = xmm0[0,0,2,2]
+; SSE41-LABEL: shuffle_v4f32_0022:
+; SSE41: # BB#0:
+; SSE41-NEXT: movsldup {{.*#+}} xmm0 = xmm0[0,0,2,2]
; SSE41-NEXT: retq
;
-; AVX-LABEL: @shuffle_v4f32_0022
-; AVX: vmovsldup {{.*}} # xmm0 = xmm0[0,0,2,2]
+; AVX-LABEL: shuffle_v4f32_0022:
+; AVX: # BB#0:
+; AVX-NEXT: vmovsldup {{.*#+}} xmm0 = xmm0[0,0,2,2]
; AVX-NEXT: retq
%shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 0, i32 2, i32 2>
ret <4 x float> %shuffle
}
define <4 x float> @shuffle_v4f32_1133(<4 x float> %a, <4 x float> %b) {
-; SSE2-LABEL: @shuffle_v4f32_1133
-; SSE2: shufps {{.*}} # xmm0 = xmm0[1,1,3,3]
+; SSE2-LABEL: shuffle_v4f32_1133:
+; SSE2: # BB#0:
+; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,3,3]
; SSE2-NEXT: retq
;
-; SSE3-LABEL: @shuffle_v4f32_1133
-; SSE3: movshdup {{.*}} # xmm0 = xmm0[1,1,3,3]
+; SSE3-LABEL: shuffle_v4f32_1133:
+; SSE3: # BB#0:
+; SSE3-NEXT: movshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
; SSE3-NEXT: retq
;
-; SSSE3-LABEL: @shuffle_v4f32_1133
-; SSSE3: movshdup {{.*}} # xmm0 = xmm0[1,1,3,3]
+; SSSE3-LABEL: shuffle_v4f32_1133:
+; SSSE3: # BB#0:
+; SSSE3-NEXT: movshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
; SSSE3-NEXT: retq
;
-; SSE41-LABEL: @shuffle_v4f32_1133
-; SSE41: movshdup {{.*}} # xmm0 = xmm0[1,1,3,3]
+; SSE41-LABEL: shuffle_v4f32_1133:
+; SSE41: # BB#0:
+; SSE41-NEXT: movshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
; SSE41-NEXT: retq
;
-; AVX-LABEL: @shuffle_v4f32_1133
-; AVX: vmovshdup {{.*}} # xmm0 = xmm0[1,1,3,3]
+; AVX-LABEL: shuffle_v4f32_1133:
+; AVX: # BB#0:
+; AVX-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
; AVX-NEXT: retq
%shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 1, i32 1, i32 3, i32 3>
ret <4 x float> %shuffle
}
define <4 x i32> @shuffle_v4i32_0124(<4 x i32> %a, <4 x i32> %b) {
-; SSE2-LABEL: @shuffle_v4i32_0124
-; SSE2: shufps {{.*}} # xmm1 = xmm1[0,0],xmm0[2,0]
-; SSE2-NEXT: shufps {{.*}} # xmm0 = xmm0[0,1],xmm1[2,0]
+; SSE2-LABEL: shuffle_v4i32_0124:
+; SSE2: # BB#0:
+; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[2,0]
+; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,0]
; SSE2-NEXT: retq
;
-; SSE3-LABEL: @shuffle_v4i32_0124
-; SSE3: shufps {{.*}} # xmm1 = xmm1[0,0],xmm0[2,0]
-; SSE3-NEXT: shufps {{.*}} # xmm0 = xmm0[0,1],xmm1[2,0]
+; SSE3-LABEL: shuffle_v4i32_0124:
+; SSE3: # BB#0:
+; SSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[2,0]
+; SSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,0]
; SSE3-NEXT: retq
;
-; SSSE3-LABEL: @shuffle_v4i32_0124
-; SSSE3: shufps {{.*}} # xmm1 = xmm1[0,0],xmm0[2,0]
-; SSSE3-NEXT: shufps {{.*}} # xmm0 = xmm0[0,1],xmm1[2,0]
+; SSSE3-LABEL: shuffle_v4i32_0124:
+; SSSE3: # BB#0:
+; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[2,0]
+; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,0]
; SSSE3-NEXT: retq
;
-; SSE41-LABEL: @shuffle_v4i32_0124
-; SSE41: insertps {{.*}} # xmm0 = xmm0[0,1,2],xmm1[0]
+; SSE41-LABEL: shuffle_v4i32_0124:
+; SSE41: # BB#0:
+; SSE41-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[0]
; SSE41-NEXT: retq
;
-; AVX-LABEL: @shuffle_v4i32_0124
-; AVX: vinsertps {{.*}} # xmm0 = xmm0[0,1,2],xmm1[0]
+; AVX-LABEL: shuffle_v4i32_0124:
+; AVX: # BB#0:
+; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[0]
; AVX-NEXT: retq
%shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 1, i32 2, i32 4>
ret <4 x i32> %shuffle
}
define <4 x i32> @shuffle_v4i32_0142(<4 x i32> %a, <4 x i32> %b) {
-; ALL-LABEL: @shuffle_v4i32_0142
-; ALL: shufps {{.*}} # xmm1 = xmm1[0,0],xmm0[2,0]
-; ALL-NEXT: shufps {{.*}} # xmm0 = xmm0[0,1],xmm1[0,2]
-; ALL-NEXT: retq
+; SSE-LABEL: shuffle_v4i32_0142:
+; SSE: # BB#0:
+; SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[2,0]
+; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,2]
+; SSE-NEXT: retq
+;
+; AVX-LABEL: shuffle_v4i32_0142:
+; AVX: # BB#0:
+; AVX-NEXT: vshufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[2,0]
+; AVX-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,2]
+; AVX-NEXT: retq
%shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 1, i32 4, i32 2>
ret <4 x i32> %shuffle
}
define <4 x i32> @shuffle_v4i32_0412(<4 x i32> %a, <4 x i32> %b) {
-; SSE-LABEL: @shuffle_v4i32_0412
-; SSE: shufps {{.*}} # xmm1 = xmm1[0,0],xmm0[0,0]
-; SSE-NEXT: shufps {{.*}} # xmm1 = xmm1[2,0],xmm0[1,2]
+; SSE-LABEL: shuffle_v4i32_0412:
+; SSE: # BB#0:
+; SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[0,0]
+; SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[2,0],xmm0[1,2]
; SSE-NEXT: movaps %xmm1, %xmm0
; SSE-NEXT: retq
;
-; AVX-LABEL: @shuffle_v4i32_0412
-; AVX: vshufps {{.*}} # xmm1 = xmm1[0,0],xmm0[0,0]
-; AVX-NEXT: vshufps {{.*}} # xmm0 = xmm1[2,0],xmm0[1,2]
+; AVX-LABEL: shuffle_v4i32_0412:
+; AVX: # BB#0:
+; AVX-NEXT: vshufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[0,0]
+; AVX-NEXT: vshufps {{.*#+}} xmm0 = xmm1[2,0],xmm0[1,2]
; AVX-NEXT: retq
%shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 4, i32 1, i32 2>
ret <4 x i32> %shuffle
}
define <4 x i32> @shuffle_v4i32_4012(<4 x i32> %a, <4 x i32> %b) {
-; SSE-LABEL: @shuffle_v4i32_4012
-; SSE: shufps {{.*}} # xmm1 = xmm1[0,0],xmm0[0,0]
-; SSE-NEXT: shufps {{.*}} # xmm1 = xmm1[0,2],xmm0[1,2]
+; SSE-LABEL: shuffle_v4i32_4012:
+; SSE: # BB#0:
+; SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[0,0]
+; SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,2],xmm0[1,2]
; SSE-NEXT: movaps %xmm1, %xmm0
; SSE-NEXT: retq
;
-; AVX-LABEL: @shuffle_v4i32_4012
-; AVX: vshufps {{.*}} # xmm1 = xmm1[0,0],xmm0[0,0]
-; AVX-NEXT: vshufps {{.*}} # xmm0 = xmm1[0,2],xmm0[1,2]
+; AVX-LABEL: shuffle_v4i32_4012:
+; AVX: # BB#0:
+; AVX-NEXT: vshufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[0,0]
+; AVX-NEXT: vshufps {{.*#+}} xmm0 = xmm1[0,2],xmm0[1,2]
; AVX-NEXT: retq
%shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 4, i32 0, i32 1, i32 2>
ret <4 x i32> %shuffle
}
define <4 x i32> @shuffle_v4i32_0145(<4 x i32> %a, <4 x i32> %b) {
-; ALL-LABEL: @shuffle_v4i32_0145
-; ALL: punpcklqdq {{.*}} # xmm0 = xmm0[0],xmm1[0]
-; ALL-NEXT: retq
+; SSE-LABEL: shuffle_v4i32_0145:
+; SSE: # BB#0:
+; SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; SSE-NEXT: retq
+;
+; AVX-LABEL: shuffle_v4i32_0145:
+; AVX: # BB#0:
+; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; AVX-NEXT: retq
%shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
ret <4 x i32> %shuffle
}
define <4 x i32> @shuffle_v4i32_0451(<4 x i32> %a, <4 x i32> %b) {
-; ALL-LABEL: @shuffle_v4i32_0451
-; ALL: shufps {{.*}} # xmm0 = xmm0[0,1],xmm1[0,1]
-; ALL-NEXT: shufps {{.*}} # xmm0 = xmm0[0,2,3,1]
-; ALL-NEXT: retq
+; SSE-LABEL: shuffle_v4i32_0451:
+; SSE: # BB#0:
+; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,1]
+; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2,3,1]
+; SSE-NEXT: retq
+;
+; AVX-LABEL: shuffle_v4i32_0451:
+; AVX: # BB#0:
+; AVX-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,1]
+; AVX-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2,3,1]
+; AVX-NEXT: retq
%shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 4, i32 5, i32 1>
ret <4 x i32> %shuffle
}
define <4 x i32> @shuffle_v4i32_4501(<4 x i32> %a, <4 x i32> %b) {
-; SSE-LABEL: @shuffle_v4i32_4501
-; SSE: punpcklqdq {{.*}} # xmm1 = xmm1[0],xmm0[0]
+; SSE-LABEL: shuffle_v4i32_4501:
+; SSE: # BB#0:
+; SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
; SSE-NEXT: movdqa %xmm1, %xmm0
; SSE-NEXT: retq
;
-; AVX-LABEL: @shuffle_v4i32_4501
-; AVX: punpcklqdq {{.*}} # xmm0 = xmm1[0],xmm0[0]
+; AVX-LABEL: shuffle_v4i32_4501:
+; AVX: # BB#0:
+; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
; AVX-NEXT: retq
%shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 4, i32 5, i32 0, i32 1>
ret <4 x i32> %shuffle
}
define <4 x i32> @shuffle_v4i32_4015(<4 x i32> %a, <4 x i32> %b) {
-; ALL-LABEL: @shuffle_v4i32_4015
-; ALL: shufps {{.*}} # xmm0 = xmm0[0,1],xmm1[0,1]
-; ALL-NEXT: shufps {{.*}} # xmm0 = xmm0[2,0,1,3]
-; ALL-NEXT: retq
+; SSE-LABEL: shuffle_v4i32_4015:
+; SSE: # BB#0:
+; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,1]
+; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,0,1,3]
+; SSE-NEXT: retq
+;
+; AVX-LABEL: shuffle_v4i32_4015:
+; AVX: # BB#0:
+; AVX-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,1]
+; AVX-NEXT: vshufps {{.*#+}} xmm0 = xmm0[2,0,1,3]
+; AVX-NEXT: retq
%shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 4, i32 0, i32 1, i32 5>
ret <4 x i32> %shuffle
}
define <4 x float> @shuffle_v4f32_4zzz(<4 x float> %a) {
-; SSE2-LABEL: @shuffle_v4f32_4zzz
-; SSE2: xorps %[[X:xmm[0-9]+]], %[[X]]
-; SSE2-NEXT: shufps {{.*}} # xmm0 = xmm0[0,0],[[X]][1,0]
-; SSE2-NEXT: shufps {{.*}} # xmm0 = xmm0[0,2],[[X]][2,3]
+; SSE2-LABEL: shuffle_v4f32_4zzz:
+; SSE2: # BB#0:
+; SSE2-NEXT: xorps %xmm1, %xmm1
+; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[1,0]
+; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[2,3]
; SSE2-NEXT: retq
;
-; SSE3-LABEL: @shuffle_v4f32_4zzz
-; SSE3: xorps %[[X:xmm[0-9]+]], %[[X]]
-; SSE3-NEXT: shufps {{.*}} # xmm0 = xmm0[0,0],[[X]][1,0]
-; SSE3-NEXT: shufps {{.*}} # xmm0 = xmm0[0,2],[[X]][2,3]
+; SSE3-LABEL: shuffle_v4f32_4zzz:
+; SSE3: # BB#0:
+; SSE3-NEXT: xorps %xmm1, %xmm1
+; SSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[1,0]
+; SSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[2,3]
; SSE3-NEXT: retq
;
-; SSSE3-LABEL: @shuffle_v4f32_4zzz
-; SSSE3: xorps %[[X:xmm[0-9]+]], %[[X]]
-; SSSE3-NEXT: shufps {{.*}} # xmm0 = xmm0[0,0],[[X]][1,0]
-; SSSE3-NEXT: shufps {{.*}} # xmm0 = xmm0[0,2],[[X]][2,3]
+; SSSE3-LABEL: shuffle_v4f32_4zzz:
+; SSSE3: # BB#0:
+; SSSE3-NEXT: xorps %xmm1, %xmm1
+; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[1,0]
+; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[2,3]
; SSSE3-NEXT: retq
;
-; SSE41-LABEL: @shuffle_v4f32_4zzz
-; SSE41: xorps %[[X:xmm[0-9]+]], %[[X]]
-; SSE41-NEXT: blendps {{.*}} # [[X]] = xmm0[0],[[X]][1,2,3]
-; SSE41-NEXT: movaps %[[X]], %xmm0
+; SSE41-LABEL: shuffle_v4f32_4zzz:
+; SSE41: # BB#0:
+; SSE41-NEXT: xorps %xmm1, %xmm1
+; SSE41-NEXT: blendps {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3]
+; SSE41-NEXT: movaps %xmm1, %xmm0
; SSE41-NEXT: retq
;
-; AVX-LABEL: @shuffle_v4f32_4zzz
-; AVX: vxorps %[[X:xmm[0-9]+]], %[[X]]
-; AVX-NEXT: vblendps {{.*}} # xmm0 = xmm0[0],[[X]][1,2,3]
+; AVX-LABEL: shuffle_v4f32_4zzz:
+; AVX: # BB#0:
+; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1
+; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
; AVX-NEXT: retq
%shuffle = shufflevector <4 x float> zeroinitializer, <4 x float> %a, <4 x i32> <i32 4, i32 1, i32 2, i32 3>
ret <4 x float> %shuffle
}
define <4 x float> @shuffle_v4f32_z4zz(<4 x float> %a) {
-; SSE2-LABEL: @shuffle_v4f32_z4zz
-; SSE2: xorps %[[X:xmm[0-9]+]], %[[X]]
-; SSE2-NEXT: shufps {{.*}} # xmm0 = xmm0[0,0],[[X]][2,0]
-; SSE2-NEXT: shufps {{.*}} # xmm0 = xmm0[2,0],[[X]][3,0]
+; SSE2-LABEL: shuffle_v4f32_z4zz:
+; SSE2: # BB#0:
+; SSE2-NEXT: xorps %xmm1, %xmm1
+; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[2,0]
+; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,0],xmm1[3,0]
; SSE2-NEXT: retq
;
-; SSE3-LABEL: @shuffle_v4f32_z4zz
-; SSE3: xorps %[[X:xmm[0-9]+]], %[[X]]
-; SSE3-NEXT: shufps {{.*}} # xmm0 = xmm0[0,0],[[X]][2,0]
-; SSE3-NEXT: shufps {{.*}} # xmm0 = xmm0[2,0],[[X]][3,0]
+; SSE3-LABEL: shuffle_v4f32_z4zz:
+; SSE3: # BB#0:
+; SSE3-NEXT: xorps %xmm1, %xmm1
+; SSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[2,0]
+; SSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,0],xmm1[3,0]
; SSE3-NEXT: retq
;
-; SSSE3-LABEL: @shuffle_v4f32_z4zz
-; SSSE3: xorps %[[X:xmm[0-9]+]], %[[X]]
-; SSSE3-NEXT: shufps {{.*}} # xmm0 = xmm0[0,0],[[X]][2,0]
-; SSSE3-NEXT: shufps {{.*}} # xmm0 = xmm0[2,0],[[X]][3,0]
+; SSSE3-LABEL: shuffle_v4f32_z4zz:
+; SSSE3: # BB#0:
+; SSSE3-NEXT: xorps %xmm1, %xmm1
+; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[2,0]
+; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,0],xmm1[3,0]
; SSSE3-NEXT: retq
;
-; SSE41-LABEL: @shuffle_v4f32_z4zz
-; SSE41: insertps {{.*}} # xmm0 = zero,xmm0[0],zero,zero
+; SSE41-LABEL: shuffle_v4f32_z4zz:
+; SSE41: # BB#0:
+; SSE41-NEXT: insertps {{.*#+}} xmm0 = zero,xmm0[0],zero,zero
; SSE41-NEXT: retq
;
-; AVX-LABEL: @shuffle_v4f32_z4zz
-; AVX: vinsertps {{.*}} # xmm0 = zero,xmm0[0],zero,zero
+; AVX-LABEL: shuffle_v4f32_z4zz:
+; AVX: # BB#0:
+; AVX-NEXT: vinsertps {{.*#+}} xmm0 = zero,xmm0[0],zero,zero
; AVX-NEXT: retq
%shuffle = shufflevector <4 x float> zeroinitializer, <4 x float> %a, <4 x i32> <i32 2, i32 4, i32 3, i32 0>
ret <4 x float> %shuffle
}
define <4 x float> @shuffle_v4f32_zz4z(<4 x float> %a) {
-; SSE2-LABEL: @shuffle_v4f32_zz4z
-; SSE2: xorps %[[X:xmm[0-9]+]], %[[X]]
-; SSE2-NEXT: shufps {{.*}} # xmm0 = xmm0[0,0],[[X]][0,0]
-; SSE2-NEXT: shufps {{.*}} # [[X]] = [[X]][0,0],xmm0[0,2]
-; SSE2-NEXT: movaps %[[X]], %xmm0
+; SSE2-LABEL: shuffle_v4f32_zz4z:
+; SSE2: # BB#0:
+; SSE2-NEXT: xorps %xmm1, %xmm1
+; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0]
+; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[0,2]
+; SSE2-NEXT: movaps %xmm1, %xmm0
; SSE2-NEXT: retq
;
-; SSE3-LABEL: @shuffle_v4f32_zz4z
-; SSE3: xorps %[[X:xmm[0-9]+]], %[[X]]
-; SSE3-NEXT: shufps {{.*}} # xmm0 = xmm0[0,0],[[X]][0,0]
-; SSE3-NEXT: shufps {{.*}} # [[X]] = [[X]][0,0],xmm0[0,2]
-; SSE3-NEXT: movaps %[[X]], %xmm0
+; SSE3-LABEL: shuffle_v4f32_zz4z:
+; SSE3: # BB#0:
+; SSE3-NEXT: xorps %xmm1, %xmm1
+; SSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0]
+; SSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[0,2]
+; SSE3-NEXT: movaps %xmm1, %xmm0
; SSE3-NEXT: retq
;
-; SSSE3-LABEL: @shuffle_v4f32_zz4z
-; SSSE3: xorps %[[X:xmm[0-9]+]], %[[X]]
-; SSSE3-NEXT: shufps {{.*}} # xmm0 = xmm0[0,0],[[X]][0,0]
-; SSSE3-NEXT: shufps {{.*}} # [[X]] = [[X]][0,0],xmm0[0,2]
-; SSSE3-NEXT: movaps %[[X]], %xmm0
+; SSSE3-LABEL: shuffle_v4f32_zz4z:
+; SSSE3: # BB#0:
+; SSSE3-NEXT: xorps %xmm1, %xmm1
+; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0]
+; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[0,2]
+; SSSE3-NEXT: movaps %xmm1, %xmm0
; SSSE3-NEXT: retq
;
-; SSE41-LABEL: @shuffle_v4f32_zz4z
-; SSE41: insertps {{.*}} # xmm0 = zero,zero,xmm0[0],zero
+; SSE41-LABEL: shuffle_v4f32_zz4z:
+; SSE41: # BB#0:
+; SSE41-NEXT: insertps {{.*#+}} xmm0 = zero,zero,xmm0[0],zero
; SSE41-NEXT: retq
;
-; AVX-LABEL: @shuffle_v4f32_zz4z
-; AVX: vinsertps {{.*}} # xmm0 = zero,zero,xmm0[0],zero
+; AVX-LABEL: shuffle_v4f32_zz4z:
+; AVX: # BB#0:
+; AVX-NEXT: vinsertps {{.*#+}} xmm0 = zero,zero,xmm0[0],zero
; AVX-NEXT: retq
%shuffle = shufflevector <4 x float> zeroinitializer, <4 x float> %a, <4 x i32> <i32 0, i32 0, i32 4, i32 0>
ret <4 x float> %shuffle
}
define <4 x float> @shuffle_v4f32_zuu4(<4 x float> %a) {
-; SSE2-LABEL: @shuffle_v4f32_zuu4
-; SSE2: xorps %[[X:xmm[0-9]+]], %[[X]]
-; SSE2-NEXT: shufps {{.*}} # [[X]] = [[X]][0,1],xmm0[2,0]
-; SSE2-NEXT: movaps %[[X]], %xmm0
+; SSE2-LABEL: shuffle_v4f32_zuu4:
+; SSE2: # BB#0:
+; SSE2-NEXT: xorps %xmm1, %xmm1
+; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[2,0]
+; SSE2-NEXT: movaps %xmm1, %xmm0
; SSE2-NEXT: retq
;
-; SSE3-LABEL: @shuffle_v4f32_zuu4
-; SSE3: xorps %[[X:xmm[0-9]+]], %[[X]]
-; SSE3-NEXT: shufps {{.*}} # [[X]] = [[X]][0,1],xmm0[2,0]
-; SSE3-NEXT: movaps %[[X]], %xmm0
+; SSE3-LABEL: shuffle_v4f32_zuu4:
+; SSE3: # BB#0:
+; SSE3-NEXT: xorps %xmm1, %xmm1
+; SSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[2,0]
+; SSE3-NEXT: movaps %xmm1, %xmm0
; SSE3-NEXT: retq
;
-; SSSE3-LABEL: @shuffle_v4f32_zuu4
-; SSSE3: xorps %[[X:xmm[0-9]+]], %[[X]]
-; SSSE3-NEXT: shufps {{.*}} # [[X]] = [[X]][0,1],xmm0[2,0]
-; SSSE3-NEXT: movaps %[[X]], %xmm0
+; SSSE3-LABEL: shuffle_v4f32_zuu4:
+; SSSE3: # BB#0:
+; SSSE3-NEXT: xorps %xmm1, %xmm1
+; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[2,0]
+; SSSE3-NEXT: movaps %xmm1, %xmm0
; SSSE3-NEXT: retq
;
-; SSE41-LABEL: @shuffle_v4f32_zuu4
-; SSE41: insertps {{.*}} # xmm0 = zero,zero,zero,xmm0[0]
+; SSE41-LABEL: shuffle_v4f32_zuu4:
+; SSE41: # BB#0:
+; SSE41-NEXT: insertps {{.*#+}} xmm0 = zero,zero,zero,xmm0[0]
; SSE41-NEXT: retq
;
-; AVX-LABEL: @shuffle_v4f32_zuu4
-; AVX: vinsertps {{.*}} # xmm0 = zero,zero,zero,xmm0[0]
+; AVX-LABEL: shuffle_v4f32_zuu4:
+; AVX: # BB#0:
+; AVX-NEXT: vinsertps {{.*#+}} xmm0 = zero,zero,zero,xmm0[0]
; AVX-NEXT: retq
%shuffle = shufflevector <4 x float> zeroinitializer, <4 x float> %a, <4 x i32> <i32 0, i32 undef, i32 undef, i32 4>
ret <4 x float> %shuffle
}
define <4 x float> @shuffle_v4f32_zzz7(<4 x float> %a) {
-; SSE2-LABEL: @shuffle_v4f32_zzz7
-; SSE2: xorps %[[X:xmm[0-9]+]], %[[X]]
-; SSE2-NEXT: shufps {{.*}} # xmm0 = xmm0[3,0],[[X]][2,0]
-; SSE2-NEXT: shufps {{.*}} # [[X]] = [[X]][0,1],xmm0[2,0]
-; SSE2-NEXT: movaps %[[X]], %xmm0
+; SSE2-LABEL: shuffle_v4f32_zzz7:
+; SSE2: # BB#0:
+; SSE2-NEXT: xorps %xmm1, %xmm1
+; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,0],xmm1[2,0]
+; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[2,0]
+; SSE2-NEXT: movaps %xmm1, %xmm0
; SSE2-NEXT: retq
;
-; SSE3-LABEL: @shuffle_v4f32_zzz7
-; SSE3: xorps %[[X:xmm[0-9]+]], %[[X]]
-; SSE3-NEXT: shufps {{.*}} # xmm0 = xmm0[3,0],[[X]][2,0]
-; SSE3-NEXT: shufps {{.*}} # [[X]] = [[X]][0,1],xmm0[2,0]
-; SSE3-NEXT: movaps %[[X]], %xmm0
+; SSE3-LABEL: shuffle_v4f32_zzz7:
+; SSE3: # BB#0:
+; SSE3-NEXT: xorps %xmm1, %xmm1
+; SSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,0],xmm1[2,0]
+; SSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[2,0]
+; SSE3-NEXT: movaps %xmm1, %xmm0
; SSE3-NEXT: retq
;
-; SSSE3-LABEL: @shuffle_v4f32_zzz7
-; SSSE3: xorps %[[X:xmm[0-9]+]], %[[X]]
-; SSSE3-NEXT: shufps {{.*}} # xmm0 = xmm0[3,0],[[X]][2,0]
-; SSSE3-NEXT: shufps {{.*}} # [[X]] = [[X]][0,1],xmm0[2,0]
-; SSSE3-NEXT: movaps %[[X]], %xmm0
+; SSSE3-LABEL: shuffle_v4f32_zzz7:
+; SSSE3: # BB#0:
+; SSSE3-NEXT: xorps %xmm1, %xmm1
+; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,0],xmm1[2,0]
+; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[2,0]
+; SSSE3-NEXT: movaps %xmm1, %xmm0
; SSSE3-NEXT: retq
;
-; SSE41-LABEL: @shuffle_v4f32_zzz7
-; SSE41: xorps %[[X:xmm[0-9]+]], %[[X]]
-; SSE41-NEXT: blendps {{.*}} # [[X]] = [[X]][0,1,2],xmm0[3]
-; SSE41-NEXT: movaps %[[X]], %xmm0
+; SSE41-LABEL: shuffle_v4f32_zzz7:
+; SSE41: # BB#0:
+; SSE41-NEXT: xorps %xmm1, %xmm1
+; SSE41-NEXT: blendps {{.*#+}} xmm1 = xmm1[0,1,2],xmm0[3]
+; SSE41-NEXT: movaps %xmm1, %xmm0
; SSE41-NEXT: retq
;
-; AVX-LABEL: @shuffle_v4f32_zzz7
-; AVX: vxorps %[[X:xmm[0-9]+]], %[[X]]
-; AVX-NEXT: vblendps {{.*}} # xmm0 = [[X]][0,1,2],xmm0[3]
+; AVX-LABEL: shuffle_v4f32_zzz7:
+; AVX: # BB#0:
+; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1
+; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[3]
; AVX-NEXT: retq
%shuffle = shufflevector <4 x float> zeroinitializer, <4 x float> %a, <4 x i32> <i32 0, i32 1, i32 2, i32 7>
ret <4 x float> %shuffle
}
define <4 x float> @shuffle_v4f32_z6zz(<4 x float> %a) {
-; SSE2-LABEL: @shuffle_v4f32_z6zz
-; SSE2: xorps %[[X:xmm[0-9]+]], %[[X]]
-; SSE2-NEXT: shufps {{.*}} # xmm0 = xmm0[2,0],[[X]][0,0]
-; SSE2-NEXT: shufps {{.*}} # xmm0 = xmm0[2,0],[[X]][2,3]
+; SSE2-LABEL: shuffle_v4f32_z6zz:
+; SSE2: # BB#0:
+; SSE2-NEXT: xorps %xmm1, %xmm1
+; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,0],xmm1[0,0]
+; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,0],xmm1[2,3]
; SSE2-NEXT: retq
;
-; SSE3-LABEL: @shuffle_v4f32_z6zz
-; SSE3: xorps %[[X:xmm[0-9]+]], %[[X]]
-; SSE3-NEXT: shufps {{.*}} # xmm0 = xmm0[2,0],[[X]][0,0]
-; SSE3-NEXT: shufps {{.*}} # xmm0 = xmm0[2,0],[[X]][2,3]
+; SSE3-LABEL: shuffle_v4f32_z6zz:
+; SSE3: # BB#0:
+; SSE3-NEXT: xorps %xmm1, %xmm1
+; SSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,0],xmm1[0,0]
+; SSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,0],xmm1[2,3]
; SSE3-NEXT: retq
;
-; SSSE3-LABEL: @shuffle_v4f32_z6zz
-; SSSE3: xorps %[[X:xmm[0-9]+]], %[[X]]
-; SSSE3-NEXT: shufps {{.*}} # xmm0 = xmm0[2,0],[[X]][0,0]
-; SSSE3-NEXT: shufps {{.*}} # xmm0 = xmm0[2,0],[[X]][2,3]
+; SSSE3-LABEL: shuffle_v4f32_z6zz:
+; SSSE3: # BB#0:
+; SSSE3-NEXT: xorps %xmm1, %xmm1
+; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,0],xmm1[0,0]
+; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,0],xmm1[2,3]
; SSSE3-NEXT: retq
;
-; SSE41-LABEL: @shuffle_v4f32_z6zz
-; SSE41: insertps {{.*}} # xmm0 = zero,xmm0[2],zero,zero
+; SSE41-LABEL: shuffle_v4f32_z6zz:
+; SSE41: # BB#0:
+; SSE41-NEXT: insertps {{.*#+}} xmm0 = zero,xmm0[2],zero,zero
; SSE41-NEXT: retq
;
-; AVX-LABEL: @shuffle_v4f32_z6zz
-; AVX: vinsertps {{.*}} # xmm0 = zero,xmm0[2],zero,zero
+; AVX-LABEL: shuffle_v4f32_z6zz:
+; AVX: # BB#0:
+; AVX-NEXT: vinsertps {{.*#+}} xmm0 = zero,xmm0[2],zero,zero
; AVX-NEXT: retq
%shuffle = shufflevector <4 x float> zeroinitializer, <4 x float> %a, <4 x i32> <i32 0, i32 6, i32 2, i32 3>
ret <4 x float> %shuffle
}
define <4 x i32> @shuffle_v4i32_4zzz(i32 %i) {
-; ALL-LABEL: @shuffle_v4i32_4zzz
-; ALL: movd {{.*}}, %xmm0
-; ALL-NEXT: retq
+; SSE-LABEL: shuffle_v4i32_4zzz:
+; SSE: # BB#0:
+; SSE-NEXT: movd %edi, %xmm0
+; SSE-NEXT: retq
+;
+; AVX-LABEL: shuffle_v4i32_4zzz:
+; AVX: # BB#0:
+; AVX-NEXT: vmovd %edi, %xmm0
+; AVX-NEXT: retq
%a = insertelement <4 x i32> undef, i32 %i, i32 0
%shuffle = shufflevector <4 x i32> zeroinitializer, <4 x i32> %a, <4 x i32> <i32 4, i32 1, i32 2, i32 3>
ret <4 x i32> %shuffle
}
define <4 x i32> @shuffle_v4i32_z4zz(i32 %i) {
-; ALL-LABEL: @shuffle_v4i32_z4zz
-; ALL: movd {{.*}}, %xmm0
-; ALL-NEXT: pshufd {{.*}} # xmm0 = xmm0[1,0,1,1]
-; ALL-NEXT: retq
+; SSE-LABEL: shuffle_v4i32_z4zz:
+; SSE: # BB#0:
+; SSE-NEXT: movd %edi, %xmm0
+; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,0,1,1]
+; SSE-NEXT: retq
+;
+; AVX-LABEL: shuffle_v4i32_z4zz:
+; AVX: # BB#0:
+; AVX-NEXT: vmovd %edi, %xmm0
+; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,0,1,1]
+; AVX-NEXT: retq
%a = insertelement <4 x i32> undef, i32 %i, i32 0
%shuffle = shufflevector <4 x i32> zeroinitializer, <4 x i32> %a, <4 x i32> <i32 2, i32 4, i32 3, i32 0>
ret <4 x i32> %shuffle
}
define <4 x i32> @shuffle_v4i32_zz4z(i32 %i) {
-; ALL-LABEL: @shuffle_v4i32_zz4z
-; ALL: movd {{.*}}, %xmm0
-; ALL-NEXT: pshufd {{.*}} # xmm0 = xmm0[1,1,0,1]
-; ALL-NEXT: retq
+; SSE-LABEL: shuffle_v4i32_zz4z:
+; SSE: # BB#0:
+; SSE-NEXT: movd %edi, %xmm0
+; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,0,1]
+; SSE-NEXT: retq
+;
+; AVX-LABEL: shuffle_v4i32_zz4z:
+; AVX: # BB#0:
+; AVX-NEXT: vmovd %edi, %xmm0
+; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,0,1]
+; AVX-NEXT: retq
%a = insertelement <4 x i32> undef, i32 %i, i32 0
%shuffle = shufflevector <4 x i32> zeroinitializer, <4 x i32> %a, <4 x i32> <i32 0, i32 0, i32 4, i32 0>
ret <4 x i32> %shuffle
}
define <4 x i32> @shuffle_v4i32_zuu4(i32 %i) {
-; ALL-LABEL: @shuffle_v4i32_zuu4
-; ALL: movd {{.*}}, %xmm0
-; ALL-NEXT: pshufd {{.*}} # xmm0 = xmm0[1,1,1,0]
-; ALL-NEXT: retq
+; SSE-LABEL: shuffle_v4i32_zuu4:
+; SSE: # BB#0:
+; SSE-NEXT: movd %edi, %xmm0
+; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,1,0]
+; SSE-NEXT: retq
+;
+; AVX-LABEL: shuffle_v4i32_zuu4:
+; AVX: # BB#0:
+; AVX-NEXT: vmovd %edi, %xmm0
+; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,1,0]
+; AVX-NEXT: retq
%a = insertelement <4 x i32> undef, i32 %i, i32 0
%shuffle = shufflevector <4 x i32> zeroinitializer, <4 x i32> %a, <4 x i32> <i32 0, i32 undef, i32 undef, i32 4>
ret <4 x i32> %shuffle
}
define <4 x i32> @shuffle_v4i32_z6zz(i32 %i) {
-; ALL-LABEL: @shuffle_v4i32_z6zz
-; ALL: movd {{.*}}, %xmm0
-; ALL-NEXT: pshufd {{.*}} # xmm0 = xmm0[1,0,1,1]
-; ALL-NEXT: retq
+; SSE-LABEL: shuffle_v4i32_z6zz:
+; SSE: # BB#0:
+; SSE-NEXT: movd %edi, %xmm0
+; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,0,1,1]
+; SSE-NEXT: retq
+;
+; AVX-LABEL: shuffle_v4i32_z6zz:
+; AVX: # BB#0:
+; AVX-NEXT: vmovd %edi, %xmm0
+; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,0,1,1]
+; AVX-NEXT: retq
%a = insertelement <4 x i32> undef, i32 %i, i32 2
%shuffle = shufflevector <4 x i32> zeroinitializer, <4 x i32> %a, <4 x i32> <i32 0, i32 6, i32 2, i32 3>
ret <4 x i32> %shuffle
}
define <4 x i32> @shuffle_v4i32_7012(<4 x i32> %a, <4 x i32> %b) {
-; SSE2-LABEL: @shuffle_v4i32_7012
+; SSE2-LABEL: shuffle_v4i32_7012:
; SSE2: # BB#0:
-; SSE2-NEXT: shufps {{.*}} # xmm1 = xmm1[3,0],xmm0[0,0]
-; SSE2-NEXT: shufps {{.*}} # xmm1 = xmm1[0,2],xmm0[1,2]
+; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,0],xmm0[0,0]
+; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,2],xmm0[1,2]
; SSE2-NEXT: movaps %xmm1, %xmm0
; SSE2-NEXT: retq
;
-; SSE3-LABEL: @shuffle_v4i32_7012
+; SSE3-LABEL: shuffle_v4i32_7012:
; SSE3: # BB#0:
-; SSE3-NEXT: shufps {{.*}} # xmm1 = xmm1[3,0],xmm0[0,0]
-; SSE3-NEXT: shufps {{.*}} # xmm1 = xmm1[0,2],xmm0[1,2]
+; SSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,0],xmm0[0,0]
+; SSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,2],xmm0[1,2]
; SSE3-NEXT: movaps %xmm1, %xmm0
; SSE3-NEXT: retq
;
-; SSSE3-LABEL: @shuffle_v4i32_7012
+; SSSE3-LABEL: shuffle_v4i32_7012:
; SSSE3: # BB#0:
-; SSSE3-NEXT: palignr $12, {{.*}} # xmm0 = xmm1[12,13,14,15],xmm0[0,1,2,3,4,5,6,7,8,9,10,11]
+; SSSE3-NEXT: palignr {{.*#+}} xmm0 = xmm1[12,13,14,15],xmm0[0,1,2,3,4,5,6,7,8,9,10,11]
; SSSE3-NEXT: retq
;
-; SSE41-LABEL: @shuffle_v4i32_7012
+; SSE41-LABEL: shuffle_v4i32_7012:
; SSE41: # BB#0:
-; SSE41-NEXT: palignr $12, {{.*}} # xmm0 = xmm1[12,13,14,15],xmm0[0,1,2,3,4,5,6,7,8,9,10,11]
+; SSE41-NEXT: palignr {{.*#+}} xmm0 = xmm1[12,13,14,15],xmm0[0,1,2,3,4,5,6,7,8,9,10,11]
; SSE41-NEXT: retq
;
-; AVX-LABEL: @shuffle_v4i32_7012
+; AVX-LABEL: shuffle_v4i32_7012:
; AVX: # BB#0:
-; AVX-NEXT: vpalignr $12, {{.*}} # xmm0 = xmm1[12,13,14,15],xmm0[0,1,2,3,4,5,6,7,8,9,10,11]
+; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm1[12,13,14,15],xmm0[0,1,2,3,4,5,6,7,8,9,10,11]
; AVX-NEXT: retq
%shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 7, i32 0, i32 1, i32 2>
ret <4 x i32> %shuffle
}
define <4 x i32> @shuffle_v4i32_6701(<4 x i32> %a, <4 x i32> %b) {
-; SSE2-LABEL: @shuffle_v4i32_6701
+; SSE2-LABEL: shuffle_v4i32_6701:
; SSE2: # BB#0:
-; SSE2-NEXT: shufpd {{.*}} # xmm1 = xmm1[1],xmm0[0]
+; SSE2-NEXT: shufpd {{.*#+}} xmm1 = xmm1[1],xmm0[0]
; SSE2-NEXT: movapd %xmm1, %xmm0
; SSE2-NEXT: retq
;
-; SSE3-LABEL: @shuffle_v4i32_6701
+; SSE3-LABEL: shuffle_v4i32_6701:
; SSE3: # BB#0:
-; SSE3-NEXT: shufpd {{.*}} # xmm1 = xmm1[1],xmm0[0]
+; SSE3-NEXT: shufpd {{.*#+}} xmm1 = xmm1[1],xmm0[0]
; SSE3-NEXT: movapd %xmm1, %xmm0
; SSE3-NEXT: retq
;
-; SSSE3-LABEL: @shuffle_v4i32_6701
+; SSSE3-LABEL: shuffle_v4i32_6701:
; SSSE3: # BB#0:
-; SSSE3-NEXT: palignr $8, {{.*}} # xmm0 = xmm1[8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7]
+; SSSE3-NEXT: palignr {{.*#+}} xmm0 = xmm1[8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7]
; SSSE3-NEXT: retq
;
-; SSE41-LABEL: @shuffle_v4i32_6701
+; SSE41-LABEL: shuffle_v4i32_6701:
; SSE41: # BB#0:
-; SSE41-NEXT: palignr $8, {{.*}} # xmm0 = xmm1[8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7]
+; SSE41-NEXT: palignr {{.*#+}} xmm0 = xmm1[8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7]
; SSE41-NEXT: retq
;
-; AVX-LABEL: @shuffle_v4i32_6701
+; AVX-LABEL: shuffle_v4i32_6701:
; AVX: # BB#0:
-; AVX-NEXT: vpalignr $8, {{.*}} # xmm0 = xmm1[8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7]
+; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm1[8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7]
; AVX-NEXT: retq
%shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 6, i32 7, i32 0, i32 1>
ret <4 x i32> %shuffle
}
define <4 x i32> @shuffle_v4i32_5670(<4 x i32> %a, <4 x i32> %b) {
-; SSE2-LABEL: @shuffle_v4i32_5670
+; SSE2-LABEL: shuffle_v4i32_5670:
; SSE2: # BB#0:
-; SSE2-NEXT: shufps {{.*}} # xmm0 = xmm0[0,0],xmm1[3,0]
-; SSE2-NEXT: shufps {{.*}} # xmm1 = xmm1[1,2],xmm0[2,0]
+; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[3,0]
+; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,2],xmm0[2,0]
; SSE2-NEXT: movaps %xmm1, %xmm0
; SSE2-NEXT: retq
;
-; SSE3-LABEL: @shuffle_v4i32_5670
+; SSE3-LABEL: shuffle_v4i32_5670:
; SSE3: # BB#0:
-; SSE3-NEXT: shufps {{.*}} # xmm0 = xmm0[0,0],xmm1[3,0]
-; SSE3-NEXT: shufps {{.*}} # xmm1 = xmm1[1,2],xmm0[2,0]
+; SSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[3,0]
+; SSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,2],xmm0[2,0]
; SSE3-NEXT: movaps %xmm1, %xmm0
; SSE3-NEXT: retq
;
-; SSSE3-LABEL: @shuffle_v4i32_5670
+; SSSE3-LABEL: shuffle_v4i32_5670:
; SSSE3: # BB#0:
-; SSSE3-NEXT: palignr $4, {{.*}} # xmm0 = xmm1[4,5,6,7,8,9,10,11,12,13,14,15],xmm0[0,1,2,3]
+; SSSE3-NEXT: palignr {{.*#+}} xmm0 = xmm1[4,5,6,7,8,9,10,11,12,13,14,15],xmm0[0,1,2,3]
; SSSE3-NEXT: retq
;
-; SSE41-LABEL: @shuffle_v4i32_5670
+; SSE41-LABEL: shuffle_v4i32_5670:
; SSE41: # BB#0:
-; SSE41-NEXT: palignr $4, {{.*}} # xmm0 = xmm1[4,5,6,7,8,9,10,11,12,13,14,15],xmm0[0,1,2,3]
+; SSE41-NEXT: palignr {{.*#+}} xmm0 = xmm1[4,5,6,7,8,9,10,11,12,13,14,15],xmm0[0,1,2,3]
; SSE41-NEXT: retq
;
-; AVX-LABEL: @shuffle_v4i32_5670
+; AVX-LABEL: shuffle_v4i32_5670:
; AVX: # BB#0:
-; AVX-NEXT: vpalignr $4, {{.*}} # xmm0 = xmm1[4,5,6,7,8,9,10,11,12,13,14,15],xmm0[0,1,2,3]
+; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm1[4,5,6,7,8,9,10,11,12,13,14,15],xmm0[0,1,2,3]
; AVX-NEXT: retq
%shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 5, i32 6, i32 7, i32 0>
ret <4 x i32> %shuffle
}
define <4 x i32> @shuffle_v4i32_1234(<4 x i32> %a, <4 x i32> %b) {
-; SSE2-LABEL: @shuffle_v4i32_1234
+; SSE2-LABEL: shuffle_v4i32_1234:
; SSE2: # BB#0:
-; SSE2-NEXT: shufps {{.*}} # xmm1 = xmm1[0,0],xmm0[3,0]
-; SSE2-NEXT: shufps {{.*}} # xmm0 = xmm0[1,2],xmm1[2,0]
+; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[3,0]
+; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,2],xmm1[2,0]
; SSE2-NEXT: retq
;
-; SSE3-LABEL: @shuffle_v4i32_1234
+; SSE3-LABEL: shuffle_v4i32_1234:
; SSE3: # BB#0:
-; SSE3-NEXT: shufps {{.*}} # xmm1 = xmm1[0,0],xmm0[3,0]
-; SSE3-NEXT: shufps {{.*}} # xmm0 = xmm0[1,2],xmm1[2,0]
+; SSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[3,0]
+; SSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,2],xmm1[2,0]
; SSE3-NEXT: retq
;
-; SSSE3-LABEL: @shuffle_v4i32_1234
+; SSSE3-LABEL: shuffle_v4i32_1234:
; SSSE3: # BB#0:
-; SSSE3-NEXT: palignr $4, {{.*}} # xmm1 = xmm0[4,5,6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3]
+; SSSE3-NEXT: palignr {{.*#+}} xmm1 = xmm0[4,5,6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3]
; SSSE3-NEXT: movdqa %xmm1, %xmm0
; SSSE3-NEXT: retq
;
-; SSE41-LABEL: @shuffle_v4i32_1234
+; SSE41-LABEL: shuffle_v4i32_1234:
; SSE41: # BB#0:
-; SSE41-NEXT: palignr $4, {{.*}} # xmm1 = xmm0[4,5,6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3]
+; SSE41-NEXT: palignr {{.*#+}} xmm1 = xmm0[4,5,6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3]
; SSE41-NEXT: movdqa %xmm1, %xmm0
; SSE41-NEXT: retq
;
-; AVX-LABEL: @shuffle_v4i32_1234
+; AVX-LABEL: shuffle_v4i32_1234:
; AVX: # BB#0:
-; AVX-NEXT: vpalignr $4, {{.*}} # xmm0 = xmm0[4,5,6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3]
+; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[4,5,6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3]
; AVX-NEXT: retq
%shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 1, i32 2, i32 3, i32 4>
ret <4 x i32> %shuffle
}
define <4 x i32> @shuffle_v4i32_2345(<4 x i32> %a, <4 x i32> %b) {
-; SSE2-LABEL: @shuffle_v4i32_2345
+; SSE2-LABEL: shuffle_v4i32_2345:
; SSE2: # BB#0:
-; SSE2-NEXT: shufpd {{.*}} # xmm0 = xmm0[1],xmm1[0]
+; SSE2-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0]
; SSE2-NEXT: retq
;
-; SSE3-LABEL: @shuffle_v4i32_2345
+; SSE3-LABEL: shuffle_v4i32_2345:
; SSE3: # BB#0:
-; SSE3-NEXT: shufpd {{.*}} # xmm0 = xmm0[1],xmm1[0]
+; SSE3-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0]
; SSE3-NEXT: retq
;
-; SSSE3-LABEL: @shuffle_v4i32_2345
+; SSSE3-LABEL: shuffle_v4i32_2345:
; SSSE3: # BB#0:
-; SSSE3-NEXT: palignr $8, {{.*}} # xmm1 = xmm0[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7]
+; SSSE3-NEXT: palignr {{.*#+}} xmm1 = xmm0[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7]
; SSSE3-NEXT: movdqa %xmm1, %xmm0
; SSSE3-NEXT: retq
;
-; SSE41-LABEL: @shuffle_v4i32_2345
+; SSE41-LABEL: shuffle_v4i32_2345:
; SSE41: # BB#0:
-; SSE41-NEXT: palignr $8, {{.*}} # xmm1 = xmm0[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7]
+; SSE41-NEXT: palignr {{.*#+}} xmm1 = xmm0[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7]
; SSE41-NEXT: movdqa %xmm1, %xmm0
; SSE41-NEXT: retq
;
-; AVX-LABEL: @shuffle_v4i32_2345
+; AVX-LABEL: shuffle_v4i32_2345:
; AVX: # BB#0:
-; AVX-NEXT: vpalignr $8, {{.*}} # xmm0 = xmm0[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7]
+; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7]
; AVX-NEXT: retq
%shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 2, i32 3, i32 4, i32 5>
ret <4 x i32> %shuffle
}
define <4 x i32> @shuffle_v4i32_3456(<4 x i32> %a, <4 x i32> %b) {
-; SSE2-LABEL: @shuffle_v4i32_3456
+; SSE2-LABEL: shuffle_v4i32_3456:
; SSE2: # BB#0:
-; SSE2-NEXT: shufps {{.*}} # xmm0 = xmm0[3,0],xmm1[0,0]
-; SSE2-NEXT: shufps {{.*}} # xmm0 = xmm0[0,2],xmm1[1,2]
+; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,0],xmm1[0,0]
+; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[1,2]
; SSE2-NEXT: retq
;
-; SSE3-LABEL: @shuffle_v4i32_3456
+; SSE3-LABEL: shuffle_v4i32_3456:
; SSE3: # BB#0:
-; SSE3-NEXT: shufps {{.*}} # xmm0 = xmm0[3,0],xmm1[0,0]
-; SSE3-NEXT: shufps {{.*}} # xmm0 = xmm0[0,2],xmm1[1,2]
+; SSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,0],xmm1[0,0]
+; SSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[1,2]
; SSE3-NEXT: retq
;
-; SSSE3-LABEL: @shuffle_v4i32_3456
+; SSSE3-LABEL: shuffle_v4i32_3456:
; SSSE3: # BB#0:
-; SSSE3-NEXT: palignr $12, {{.*}} # xmm1 = xmm0[12,13,14,15],xmm1[0,1,2,3,4,5,6,7,8,9,10,11]
+; SSSE3-NEXT: palignr {{.*#+}} xmm1 = xmm0[12,13,14,15],xmm1[0,1,2,3,4,5,6,7,8,9,10,11]
; SSSE3-NEXT: movdqa %xmm1, %xmm0
; SSSE3-NEXT: retq
;
-; SSE41-LABEL: @shuffle_v4i32_3456
+; SSE41-LABEL: shuffle_v4i32_3456:
; SSE41: # BB#0:
-; SSE41-NEXT: palignr $12, {{.*}} # xmm1 = xmm0[12,13,14,15],xmm1[0,1,2,3,4,5,6,7,8,9,10,11]
+; SSE41-NEXT: palignr {{.*#+}} xmm1 = xmm0[12,13,14,15],xmm1[0,1,2,3,4,5,6,7,8,9,10,11]
; SSE41-NEXT: movdqa %xmm1, %xmm0
; SSE41-NEXT: retq
;
-; AVX-LABEL: @shuffle_v4i32_3456
+; AVX-LABEL: shuffle_v4i32_3456:
; AVX: # BB#0:
-; AVX-NEXT: vpalignr $12, {{.*}} # xmm0 = xmm0[12,13,14,15],xmm1[0,1,2,3,4,5,6,7,8,9,10,11]
+; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[12,13,14,15],xmm1[0,1,2,3,4,5,6,7,8,9,10,11]
; AVX-NEXT: retq
%shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 3, i32 4, i32 5, i32 6>
ret <4 x i32> %shuffle
}
define <4 x i32> @shuffle_v4i32_0u1u(<4 x i32> %a, <4 x i32> %b) {
-; ALL-LABEL: @shuffle_v4i32_0u1u
-; ALL: # BB#0:
-; ALL-NEXT: pshufd {{.*}} # xmm0 = xmm0[0,0,1,1]
-; ALL-NEXT: retq
+; SSE-LABEL: shuffle_v4i32_0u1u:
+; SSE: # BB#0:
+; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
+; SSE-NEXT: retq
+;
+; AVX-LABEL: shuffle_v4i32_0u1u:
+; AVX: # BB#0:
+; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
+; AVX-NEXT: retq
%shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 undef, i32 1, i32 undef>
ret <4 x i32> %shuffle
}
define <4 x i32> @shuffle_v4i32_0z1z(<4 x i32> %a) {
-; SSE2-LABEL: @shuffle_v4i32_0z1z
+; SSE2-LABEL: shuffle_v4i32_0z1z:
; SSE2: # BB#0:
-; SSE2-NEXT: pxor %[[X:xmm[0-9]+]], %[[X]]
-; SSE2-NEXT: punpckldq {{.*}} # xmm0 = xmm0[0],[[X]][0],xmm0[1],[[X]][1]
+; SSE2-NEXT: pxor %xmm1, %xmm1
+; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; SSE2-NEXT: retq
;
-; SSE3-LABEL: @shuffle_v4i32_0z1z
+; SSE3-LABEL: shuffle_v4i32_0z1z:
; SSE3: # BB#0:
-; SSE3-NEXT: pxor %[[X:xmm[0-9]+]], %[[X]]
-; SSE3-NEXT: punpckldq {{.*}} # xmm0 = xmm0[0],[[X]][0],xmm0[1],[[X]][1]
+; SSE3-NEXT: pxor %xmm1, %xmm1
+; SSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; SSE3-NEXT: retq
;
-; SSSE3-LABEL: @shuffle_v4i32_0z1z
+; SSSE3-LABEL: shuffle_v4i32_0z1z:
; SSSE3: # BB#0:
-; SSSE3-NEXT: pxor %[[X:xmm[0-9]+]], %[[X]]
-; SSSE3-NEXT: punpckldq {{.*}} # xmm0 = xmm0[0],[[X]][0],xmm0[1],[[X]][1]
+; SSSE3-NEXT: pxor %xmm1, %xmm1
+; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; SSSE3-NEXT: retq
;
-; SSE41-LABEL: @shuffle_v4i32_0z1z
+; SSE41-LABEL: shuffle_v4i32_0z1z:
; SSE41: # BB#0:
; SSE41-NEXT: pmovzxdq %xmm0, %xmm0
; SSE41-NEXT: retq
;
-; AVX-LABEL: @shuffle_v4i32_0z1z
+; AVX-LABEL: shuffle_v4i32_0z1z:
; AVX: # BB#0:
; AVX-NEXT: vpmovzxdq %xmm0, %xmm0
; AVX-NEXT: retq
@@ -799,20 +982,30 @@ define <4 x i32> @shuffle_v4i32_0z1z(<4
}
define <4 x i32> @insert_reg_and_zero_v4i32(i32 %a) {
-; ALL-LABEL: @insert_reg_and_zero_v4i32
-; ALL: # BB#0:
-; ALL-NEXT: movd %edi, %xmm0
-; ALL-NEXT: retq
+; SSE-LABEL: insert_reg_and_zero_v4i32:
+; SSE: # BB#0:
+; SSE-NEXT: movd %edi, %xmm0
+; SSE-NEXT: retq
+;
+; AVX-LABEL: insert_reg_and_zero_v4i32:
+; AVX: # BB#0:
+; AVX-NEXT: vmovd %edi, %xmm0
+; AVX-NEXT: retq
%v = insertelement <4 x i32> undef, i32 %a, i32 0
%shuffle = shufflevector <4 x i32> %v, <4 x i32> zeroinitializer, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
ret <4 x i32> %shuffle
}
define <4 x i32> @insert_mem_and_zero_v4i32(i32* %ptr) {
-; ALL-LABEL: @insert_mem_and_zero_v4i32
-; ALL: # BB#0:
-; ALL-NEXT: movd (%rdi), %xmm0
-; ALL-NEXT: retq
+; SSE-LABEL: insert_mem_and_zero_v4i32:
+; SSE: # BB#0:
+; SSE-NEXT: movd (%rdi), %xmm0
+; SSE-NEXT: retq
+;
+; AVX-LABEL: insert_mem_and_zero_v4i32:
+; AVX: # BB#0:
+; AVX-NEXT: vmovd (%rdi), %xmm0
+; AVX-NEXT: retq
%a = load i32* %ptr
%v = insertelement <4 x i32> undef, i32 %a, i32 0
%shuffle = shufflevector <4 x i32> %v, <4 x i32> zeroinitializer, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
@@ -820,17 +1013,17 @@ define <4 x i32> @insert_mem_and_zero_v4
}
define <4 x float> @insert_reg_and_zero_v4f32(float %a) {
-; SSE-LABEL: @insert_reg_and_zero_v4f32
+; SSE-LABEL: insert_reg_and_zero_v4f32:
; SSE: # BB#0:
-; SSE-NEXT: xorps %[[X:xmm[0-9]+]], %[[X]]
-; SSE-NEXT: movss %xmm0, %[[X]]
-; SSE-NEXT: movaps %[[X]], %xmm0
+; SSE-NEXT: xorps %xmm1, %xmm1
+; SSE-NEXT: movss %xmm0, %xmm1
+; SSE-NEXT: movaps %xmm1, %xmm0
; SSE-NEXT: retq
;
-; AVX-LABEL: @insert_reg_and_zero_v4f32
+; AVX-LABEL: insert_reg_and_zero_v4f32:
; AVX: # BB#0:
-; AVX-NEXT: vxorps %[[X:xmm[0-9]+]], %[[X]], %[[X]]
-; AVX-NEXT: vmovss %xmm0, %[[X]], %xmm0
+; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1
+; AVX-NEXT: vmovss %xmm0, %xmm1, %xmm0
; AVX-NEXT: retq
%v = insertelement <4 x float> undef, float %a, i32 0
%shuffle = shufflevector <4 x float> %v, <4 x float> zeroinitializer, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
@@ -838,10 +1031,15 @@ define <4 x float> @insert_reg_and_zero_
}
define <4 x float> @insert_mem_and_zero_v4f32(float* %ptr) {
-; ALL-LABEL: @insert_mem_and_zero_v4f32
-; ALL: # BB#0:
-; ALL-NEXT: movss (%rdi), %xmm0
-; ALL-NEXT: retq
+; SSE-LABEL: insert_mem_and_zero_v4f32:
+; SSE: # BB#0:
+; SSE-NEXT: movss (%rdi), %xmm0
+; SSE-NEXT: retq
+;
+; AVX-LABEL: insert_mem_and_zero_v4f32:
+; AVX: # BB#0:
+; AVX-NEXT: vmovss (%rdi), %xmm0
+; AVX-NEXT: retq
%a = load float* %ptr
%v = insertelement <4 x float> undef, float %a, i32 0
%shuffle = shufflevector <4 x float> %v, <4 x float> zeroinitializer, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
@@ -849,15 +1047,15 @@ define <4 x float> @insert_mem_and_zero_
}
define <4 x float> @shuffle_mem_v4f32_3210(<4 x float>* %ptr) {
-; SSE-LABEL: @shuffle_mem_v4f32_3210
+; SSE-LABEL: shuffle_mem_v4f32_3210:
; SSE: # BB#0:
; SSE-NEXT: movaps (%rdi), %xmm0
-; SSE-NEXT: shufps {{.*}} # xmm0 = xmm0[3,2,1,0]
+; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,2,1,0]
; SSE-NEXT: retq
;
-; AVX-LABEL: @shuffle_mem_v4f32_3210
+; AVX-LABEL: shuffle_mem_v4f32_3210:
; AVX: # BB#0:
-; AVX-NEXT: vpermilps {{.*}} # xmm0 = mem[3,2,1,0]
+; AVX-NEXT: vpermilps {{.*#+}} xmm0 = mem[3,2,1,0]
; AVX-NEXT: retq
%a = load <4 x float>* %ptr
%shuffle = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
Modified: llvm/trunk/test/CodeGen/X86/vector-shuffle-128-v8.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-shuffle-128-v8.ll?rev=218707&r1=218706&r2=218707&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-shuffle-128-v8.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-shuffle-128-v8.ll Tue Sep 30 16:44:34 2014
@@ -1,593 +1,730 @@
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -x86-experimental-vector-shuffle-lowering | FileCheck %s --check-prefix=ALL --check-prefix=SSE2
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+ssse3 -x86-experimental-vector-shuffle-lowering | FileCheck %s --check-prefix=ALL --check-prefix=SSSE3
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+sse4.1 -x86-experimental-vector-shuffle-lowering | FileCheck %s --check-prefix=ALL --check-prefix=SSE41
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-unknown"
define <8 x i16> @shuffle_v8i16_01012323(<8 x i16> %a, <8 x i16> %b) {
-; ALL-LABEL: @shuffle_v8i16_01012323
+; ALL-LABEL: shuffle_v8i16_01012323:
; ALL: # BB#0:
-; ALL-NEXT: pshufd {{.*}} # xmm0 = xmm0[0,0,1,1]
+; ALL-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
; ALL-NEXT: retq
%shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 2, i32 3, i32 2, i32 3>
ret <8 x i16> %shuffle
}
define <8 x i16> @shuffle_v8i16_67452301(<8 x i16> %a, <8 x i16> %b) {
-; ALL-LABEL: @shuffle_v8i16_67452301
+; ALL-LABEL: shuffle_v8i16_67452301:
; ALL: # BB#0:
-; ALL-NEXT: pshufd {{.*}} # xmm0 = xmm0[3,2,1,0]
+; ALL-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,2,1,0]
; ALL-NEXT: retq
%shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 6, i32 7, i32 4, i32 5, i32 2, i32 3, i32 0, i32 1>
ret <8 x i16> %shuffle
}
define <8 x i16> @shuffle_v8i16_456789AB(<8 x i16> %a, <8 x i16> %b) {
-; SSE2-LABEL: @shuffle_v8i16_456789AB
+; SSE2-LABEL: shuffle_v8i16_456789AB:
; SSE2: # BB#0:
-; SSE2-NEXT: shufpd {{.*}} # xmm0 = xmm0[1],xmm1[0]
+; SSE2-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0]
; SSE2-NEXT: retq
;
-; SSSE3-LABEL: @shuffle_v8i16_456789AB
+; SSSE3-LABEL: shuffle_v8i16_456789AB:
; SSSE3: # BB#0:
-; SSSE3-NEXT: palignr {{.*}} # xmm1 = xmm0[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7]
+; SSSE3-NEXT: palignr {{.*#+}} xmm1 = xmm0[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7]
; SSSE3-NEXT: movdqa %xmm1, %xmm0
; SSSE3-NEXT: retq
+;
+; SSE41-LABEL: shuffle_v8i16_456789AB:
+; SSE41: # BB#0:
+; SSE41-NEXT: palignr {{.*#+}} xmm1 = xmm0[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7]
+; SSE41-NEXT: movdqa %xmm1, %xmm0
+; SSE41-NEXT: retq
%shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
ret <8 x i16> %shuffle
}
define <8 x i16> @shuffle_v8i16_00000000(<8 x i16> %a, <8 x i16> %b) {
-; SSE2-LABEL: @shuffle_v8i16_00000000
+; SSE2-LABEL: shuffle_v8i16_00000000:
; SSE2: # BB#0:
-; SSE2-NEXT: pshufd {{.*}} # xmm0 = xmm0[0,1,0,3]
-; SSE2-NEXT: pshuflw {{.*}} # xmm0 = xmm0[0,0,0,0,4,5,6,7]
-; SSE2-NEXT: pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,4,4,4,4]
+; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,3]
+; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
+; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
; SSE2-NEXT: retq
;
-; SSSE3-LABEL: @shuffle_v8i16_00000000
+; SSSE3-LABEL: shuffle_v8i16_00000000:
; SSSE3: # BB#0:
-; SSSE3-NEXT: pshufb {{.*}} # xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
+; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
; SSSE3-NEXT: retq
+;
+; SSE41-LABEL: shuffle_v8i16_00000000:
+; SSE41: # BB#0:
+; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
+; SSE41-NEXT: retq
%shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
ret <8 x i16> %shuffle
}
define <8 x i16> @shuffle_v8i16_00004444(<8 x i16> %a, <8 x i16> %b) {
-; ALL-LABEL: @shuffle_v8i16_00004444
+; ALL-LABEL: shuffle_v8i16_00004444:
; ALL: # BB#0:
-; ALL-NEXT: pshuflw {{.*}} # xmm0 = xmm0[0,0,0,0,4,5,6,7]
-; ALL-NEXT: pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,4,4,4,4]
+; ALL-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
+; ALL-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
; ALL-NEXT: retq
%shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 4, i32 4, i32 4, i32 4>
ret <8 x i16> %shuffle
}
define <8 x i16> @shuffle_v8i16_u0u1u2u3(<8 x i16> %a, <8 x i16> %b) {
-; ALL-LABEL: @shuffle_v8i16_u0u1u2u3
+; ALL-LABEL: shuffle_v8i16_u0u1u2u3:
; ALL: # BB#0:
-; ALL-NEXT: unpcklwd {{.*}} # xmm0 = xmm0[0,0,1,1,2,2,3,3]
+; ALL-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
; ALL-NEXT: retq
%shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 0, i32 undef, i32 1, i32 undef, i32 2, i32 undef, i32 3>
ret <8 x i16> %shuffle
}
define <8 x i16> @shuffle_v8i16_u4u5u6u7(<8 x i16> %a, <8 x i16> %b) {
-; ALL-LABEL: @shuffle_v8i16_u4u5u6u7
+; ALL-LABEL: shuffle_v8i16_u4u5u6u7:
; ALL: # BB#0:
-; ALL-NEXT: unpckhwd {{.*}} # xmm0 = xmm0[4,4,5,5,6,6,7,7]
+; ALL-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4,4,5,5,6,6,7,7]
; ALL-NEXT: retq
%shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 4, i32 undef, i32 5, i32 undef, i32 6, i32 undef, i32 7>
ret <8 x i16> %shuffle
}
define <8 x i16> @shuffle_v8i16_31206745(<8 x i16> %a, <8 x i16> %b) {
-; ALL-LABEL: @shuffle_v8i16_31206745
+; ALL-LABEL: shuffle_v8i16_31206745:
; ALL: # BB#0:
-; ALL-NEXT: pshuflw {{.*}} # xmm0 = xmm0[3,1,2,0,4,5,6,7]
-; ALL-NEXT: pshufd {{.*}} # xmm0 = xmm0[0,1,3,2]
+; ALL-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[3,1,2,0,4,5,6,7]
+; ALL-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,3,2]
; ALL-NEXT: retq
%shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 3, i32 1, i32 2, i32 0, i32 6, i32 7, i32 4, i32 5>
ret <8 x i16> %shuffle
}
define <8 x i16> @shuffle_v8i16_44440000(<8 x i16> %a, <8 x i16> %b) {
-; SSE2-LABEL: @shuffle_v8i16_44440000
+; SSE2-LABEL: shuffle_v8i16_44440000:
; SSE2: # BB#0:
-; SSE2-NEXT: pshufd {{.*}} # xmm0 = xmm0[2,1,0,3]
-; SSE2-NEXT: pshuflw {{.*}} # xmm0 = xmm0[0,0,0,0,4,5,6,7]
-; SSE2-NEXT: pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,4,4,4,4]
+; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,1,0,3]
+; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
+; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
; SSE2-NEXT: retq
;
-; SSSE3-LABEL: @shuffle_v8i16_44440000
+; SSSE3-LABEL: shuffle_v8i16_44440000:
; SSSE3: # BB#0:
-; SSSE3-NEXT: pshufb {{.*}} # xmm0 = xmm0[8,9,8,9,8,9,8,9,0,1,0,1,0,1,0,1]
+; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[8,9,8,9,8,9,8,9,0,1,0,1,0,1,0,1]
; SSSE3-NEXT: retq
+;
+; SSE41-LABEL: shuffle_v8i16_44440000:
+; SSE41: # BB#0:
+; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[8,9,8,9,8,9,8,9,0,1,0,1,0,1,0,1]
+; SSE41-NEXT: retq
%shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 4, i32 4, i32 4, i32 0, i32 0, i32 0, i32 0>
ret <8 x i16> %shuffle
}
define <8 x i16> @shuffle_v8i16_23016745(<8 x i16> %a, <8 x i16> %b) {
-; ALL-LABEL: @shuffle_v8i16_23016745
+; ALL-LABEL: shuffle_v8i16_23016745:
; ALL: # BB#0:
-; ALL-NEXT: pshufd {{.*}} # xmm0 = xmm0[1,0,3,2]
+; ALL-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,0,3,2]
; ALL-NEXT: retq
%shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 2, i32 3, i32 0, i32 1, i32 6, i32 7, i32 4, i32 5>
ret <8 x i16> %shuffle
}
define <8 x i16> @shuffle_v8i16_23026745(<8 x i16> %a, <8 x i16> %b) {
-; ALL-LABEL: @shuffle_v8i16_23026745
+; ALL-LABEL: shuffle_v8i16_23026745:
; ALL: # BB#0:
-; ALL-NEXT: pshuflw {{.*}} # xmm0 = xmm0[2,3,0,2,4,5,6,7]
-; ALL-NEXT: pshufd {{.*}} # xmm0 = xmm0[0,1,3,2]
+; ALL-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[2,3,0,2,4,5,6,7]
+; ALL-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,3,2]
; ALL-NEXT: retq
%shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 2, i32 3, i32 0, i32 2, i32 6, i32 7, i32 4, i32 5>
ret <8 x i16> %shuffle
}
define <8 x i16> @shuffle_v8i16_23016747(<8 x i16> %a, <8 x i16> %b) {
-; ALL-LABEL: @shuffle_v8i16_23016747
+; ALL-LABEL: shuffle_v8i16_23016747:
; ALL: # BB#0:
-; ALL-NEXT: pshufd {{.*}} # xmm0 = xmm0[1,0,2,3]
-; ALL-NEXT: pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,6,7,4,7]
+; ALL-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,0,2,3]
+; ALL-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,7,4,7]
; ALL-NEXT: retq
%shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 2, i32 3, i32 0, i32 1, i32 6, i32 7, i32 4, i32 7>
ret <8 x i16> %shuffle
}
define <8 x i16> @shuffle_v8i16_75643120(<8 x i16> %a, <8 x i16> %b) {
-; SSE2-LABEL: @shuffle_v8i16_75643120
+; SSE2-LABEL: shuffle_v8i16_75643120:
; SSE2: # BB#0:
-; SSE2-NEXT: pshufd {{.*}} # xmm0 = xmm0[2,3,0,1]
-; SSE2-NEXT: pshuflw {{.*}} # xmm0 = xmm0[3,1,2,0,4,5,6,7]
-; SSE2-NEXT: pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,7,5,6,4]
+; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
+; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[3,1,2,0,4,5,6,7]
+; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,5,6,4]
; SSE2-NEXT: retq
;
-; SSSE3-LABEL: @shuffle_v8i16_75643120
+; SSSE3-LABEL: shuffle_v8i16_75643120:
; SSSE3: # BB#0:
-; SSSE3-NEXT: pshufb {{.*}} # xmm0 = xmm0[14,15,10,11,12,13,8,9,6,7,2,3,4,5,0,1]
+; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[14,15,10,11,12,13,8,9,6,7,2,3,4,5,0,1]
; SSSE3-NEXT: retq
+;
+; SSE41-LABEL: shuffle_v8i16_75643120:
+; SSE41: # BB#0:
+; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[14,15,10,11,12,13,8,9,6,7,2,3,4,5,0,1]
+; SSE41-NEXT: retq
%shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 7, i32 5, i32 6, i32 4, i32 3, i32 1, i32 2, i32 0>
ret <8 x i16> %shuffle
}
define <8 x i16> @shuffle_v8i16_10545410(<8 x i16> %a, <8 x i16> %b) {
-; SSE2-LABEL: @shuffle_v8i16_10545410
+; SSE2-LABEL: shuffle_v8i16_10545410:
; SSE2: # BB#0:
-; SSE2-NEXT: pshufd {{.*}} # xmm0 = xmm0[0,2,2,0]
-; SSE2-NEXT: pshuflw {{.*}} # xmm0 = xmm0[1,0,3,2,4,5,6,7]
-; SSE2-NEXT: pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,5,4,7,6]
+; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,0]
+; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[1,0,3,2,4,5,6,7]
+; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,4,7,6]
; SSE2-NEXT: retq
;
-; SSSE3-LABEL: @shuffle_v8i16_10545410
+; SSSE3-LABEL: shuffle_v8i16_10545410:
; SSSE3: # BB#0:
-; SSSE3-NEXT: pshufb {{.*}} # xmm0 = xmm0[2,3,0,1,10,11,8,9,10,11,8,9,2,3,0,1]
+; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[2,3,0,1,10,11,8,9,10,11,8,9,2,3,0,1]
; SSSE3-NEXT: retq
+;
+; SSE41-LABEL: shuffle_v8i16_10545410:
+; SSE41: # BB#0:
+; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[2,3,0,1,10,11,8,9,10,11,8,9,2,3,0,1]
+; SSE41-NEXT: retq
%shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 1, i32 0, i32 5, i32 4, i32 5, i32 4, i32 1, i32 0>
ret <8 x i16> %shuffle
}
define <8 x i16> @shuffle_v8i16_54105410(<8 x i16> %a, <8 x i16> %b) {
-; SSE2-LABEL: @shuffle_v8i16_54105410
+; SSE2-LABEL: shuffle_v8i16_54105410:
; SSE2: # BB#0:
-; SSE2-NEXT: pshufd {{.*}} # xmm0 = xmm0[0,2,2,0]
-; SSE2-NEXT: pshuflw {{.*}} # xmm0 = xmm0[3,2,1,0,4,5,6,7]
-; SSE2-NEXT: pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,5,4,7,6]
+; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,0]
+; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[3,2,1,0,4,5,6,7]
+; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,4,7,6]
; SSE2-NEXT: retq
;
-; SSSE3-LABEL: @shuffle_v8i16_54105410
+; SSSE3-LABEL: shuffle_v8i16_54105410:
; SSSE3: # BB#0:
-; SSSE3-NEXT: pshufb {{.*}} # xmm0 = xmm0[10,11,8,9,2,3,0,1,10,11,8,9,2,3,0,1]
+; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[10,11,8,9,2,3,0,1,10,11,8,9,2,3,0,1]
; SSSE3-NEXT: retq
+;
+; SSE41-LABEL: shuffle_v8i16_54105410:
+; SSE41: # BB#0:
+; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[10,11,8,9,2,3,0,1,10,11,8,9,2,3,0,1]
+; SSE41-NEXT: retq
%shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 5, i32 4, i32 1, i32 0, i32 5, i32 4, i32 1, i32 0>
ret <8 x i16> %shuffle
}
define <8 x i16> @shuffle_v8i16_54101054(<8 x i16> %a, <8 x i16> %b) {
-; SSE2-LABEL: @shuffle_v8i16_54101054
+; SSE2-LABEL: shuffle_v8i16_54101054:
; SSE2: # BB#0:
-; SSE2-NEXT: pshufd {{.*}} # xmm0 = xmm0[0,2,2,0]
-; SSE2-NEXT: pshuflw {{.*}} # xmm0 = xmm0[3,2,1,0,4,5,6,7]
-; SSE2-NEXT: pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,7,6,5,4]
+; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,0]
+; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[3,2,1,0,4,5,6,7]
+; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,6,5,4]
; SSE2-NEXT: retq
;
-; SSSE3-LABEL: @shuffle_v8i16_54101054
+; SSSE3-LABEL: shuffle_v8i16_54101054:
; SSSE3: # BB#0:
-; SSSE3-NEXT: pshufb {{.*}} # xmm0 = xmm0[10,11,8,9,2,3,0,1,2,3,0,1,10,11,8,9]
+; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[10,11,8,9,2,3,0,1,2,3,0,1,10,11,8,9]
; SSSE3-NEXT: retq
+;
+; SSE41-LABEL: shuffle_v8i16_54101054:
+; SSE41: # BB#0:
+; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[10,11,8,9,2,3,0,1,2,3,0,1,10,11,8,9]
+; SSE41-NEXT: retq
%shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 5, i32 4, i32 1, i32 0, i32 1, i32 0, i32 5, i32 4>
ret <8 x i16> %shuffle
}
define <8 x i16> @shuffle_v8i16_04400440(<8 x i16> %a, <8 x i16> %b) {
-; SSE2-LABEL: @shuffle_v8i16_04400440
+; SSE2-LABEL: shuffle_v8i16_04400440:
; SSE2: # BB#0:
-; SSE2-NEXT: pshufd {{.*}} # xmm0 = xmm0[0,2,2,0]
-; SSE2-NEXT: pshuflw {{.*}} # xmm0 = xmm0[0,2,2,0,4,5,6,7]
-; SSE2-NEXT: pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,6,4,4,6]
+; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,0]
+; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,0,4,5,6,7]
+; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,4,4,6]
; SSE2-NEXT: retq
;
-; SSSE3-LABEL: @shuffle_v8i16_04400440
+; SSSE3-LABEL: shuffle_v8i16_04400440:
; SSSE3: # BB#0:
-; SSSE3-NEXT: pshufb {{.*}} # xmm0 = xmm0[0,1,8,9,8,9,0,1,0,1,8,9,8,9,0,1]
+; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,0,1,8,9,8,9,0,1]
; SSSE3-NEXT: retq
+;
+; SSE41-LABEL: shuffle_v8i16_04400440:
+; SSE41: # BB#0:
+; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,0,1,8,9,8,9,0,1]
+; SSE41-NEXT: retq
%shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 4, i32 4, i32 0, i32 0, i32 4, i32 4, i32 0>
ret <8 x i16> %shuffle
}
define <8 x i16> @shuffle_v8i16_40044004(<8 x i16> %a, <8 x i16> %b) {
-; SSE2-LABEL: @shuffle_v8i16_40044004
+; SSE2-LABEL: shuffle_v8i16_40044004:
; SSE2: # BB#0:
-; SSE2-NEXT: pshufd {{.*}} # xmm0 = xmm0[0,2,2,0]
-; SSE2-NEXT: pshuflw {{.*}} # xmm0 = xmm0[2,0,0,2,4,5,6,7]
-; SSE2-NEXT: pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,4,6,6,4]
+; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,0]
+; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[2,0,0,2,4,5,6,7]
+; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,6,4]
; SSE2-NEXT: retq
;
-; SSSE3-LABEL: @shuffle_v8i16_40044004
+; SSSE3-LABEL: shuffle_v8i16_40044004:
; SSSE3: # BB#0:
-; SSSE3-NEXT: pshufb {{.*}} # xmm0 = xmm0[8,9,0,1,0,1,8,9,8,9,0,1,0,1,8,9]
+; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[8,9,0,1,0,1,8,9,8,9,0,1,0,1,8,9]
; SSSE3-NEXT: retq
+;
+; SSE41-LABEL: shuffle_v8i16_40044004:
+; SSE41: # BB#0:
+; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[8,9,0,1,0,1,8,9,8,9,0,1,0,1,8,9]
+; SSE41-NEXT: retq
%shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 0, i32 0, i32 4, i32 4, i32 0, i32 0, i32 4>
ret <8 x i16> %shuffle
}
define <8 x i16> @shuffle_v8i16_26405173(<8 x i16> %a, <8 x i16> %b) {
-; SSE2-LABEL: @shuffle_v8i16_26405173
+; SSE2-LABEL: shuffle_v8i16_26405173:
; SSE2: # BB#0:
-; SSE2-NEXT: pshuflw {{.*}} # xmm0 = xmm0[0,2,1,3,4,5,6,7]
-; SSE2-NEXT: pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,7,5,6,4]
-; SSE2-NEXT: pshufd {{.*}} # xmm0 = xmm0[0,3,2,1]
-; SSE2-NEXT: pshuflw {{.*}} # xmm0 = xmm0[1,2,3,0,4,5,6,7]
-; SSE2-NEXT: pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,5,6,4,7]
+; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,1,3,4,5,6,7]
+; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,5,6,4]
+; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,3,2,1]
+; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[1,2,3,0,4,5,6,7]
+; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,6,4,7]
; SSE2-NEXT: retq
;
-; SSSE3-LABEL: @shuffle_v8i16_26405173
+; SSSE3-LABEL: shuffle_v8i16_26405173:
; SSSE3: # BB#0:
-; SSSE3-NEXT: pshufb {{.*}} # xmm0 = xmm0[4,5,12,13,8,9,0,1,10,11,2,3,14,15,6,7]
+; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[4,5,12,13,8,9,0,1,10,11,2,3,14,15,6,7]
; SSSE3-NEXT: retq
+;
+; SSE41-LABEL: shuffle_v8i16_26405173:
+; SSE41: # BB#0:
+; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[4,5,12,13,8,9,0,1,10,11,2,3,14,15,6,7]
+; SSE41-NEXT: retq
%shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 2, i32 6, i32 4, i32 0, i32 5, i32 1, i32 7, i32 3>
ret <8 x i16> %shuffle
}
define <8 x i16> @shuffle_v8i16_20645173(<8 x i16> %a, <8 x i16> %b) {
-; SSE2-LABEL: @shuffle_v8i16_20645173
+; SSE2-LABEL: shuffle_v8i16_20645173:
; SSE2: # BB#0:
-; SSE2-NEXT: pshuflw {{.*}} # xmm0 = xmm0[0,2,1,3,4,5,6,7]
-; SSE2-NEXT: pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,7,5,6,4]
-; SSE2-NEXT: pshufd {{.*}} # xmm0 = xmm0[0,3,2,1]
-; SSE2-NEXT: pshuflw {{.*}} # xmm0 = xmm0[1,0,2,3,4,5,6,7]
-; SSE2-NEXT: pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,5,6,4,7]
+; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,1,3,4,5,6,7]
+; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,5,6,4]
+; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,3,2,1]
+; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[1,0,2,3,4,5,6,7]
+; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,6,4,7]
; SSE2-NEXT: retq
;
-; SSSE3-LABEL: @shuffle_v8i16_20645173
+; SSSE3-LABEL: shuffle_v8i16_20645173:
; SSSE3: # BB#0:
-; SSSE3-NEXT: pshufb {{.*}} # xmm0 = xmm0[4,5,0,1,12,13,8,9,10,11,2,3,14,15,6,7]
+; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[4,5,0,1,12,13,8,9,10,11,2,3,14,15,6,7]
; SSSE3-NEXT: retq
+;
+; SSE41-LABEL: shuffle_v8i16_20645173:
+; SSE41: # BB#0:
+; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[4,5,0,1,12,13,8,9,10,11,2,3,14,15,6,7]
+; SSE41-NEXT: retq
%shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 2, i32 0, i32 6, i32 4, i32 5, i32 1, i32 7, i32 3>
ret <8 x i16> %shuffle
}
define <8 x i16> @shuffle_v8i16_26401375(<8 x i16> %a, <8 x i16> %b) {
-; SSE2-LABEL: @shuffle_v8i16_26401375
+; SSE2-LABEL: shuffle_v8i16_26401375:
; SSE2: # BB#0:
-; SSE2-NEXT: pshuflw {{.*}} # xmm0 = xmm0[0,2,1,3,4,5,6,7]
-; SSE2-NEXT: pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,7,5,6,4]
-; SSE2-NEXT: pshufd {{.*}} # xmm0 = xmm0[0,3,1,2]
-; SSE2-NEXT: pshuflw {{.*}} # xmm0 = xmm0[1,2,3,0,4,5,6,7]
+; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,1,3,4,5,6,7]
+; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,5,6,4]
+; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,3,1,2]
+; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[1,2,3,0,4,5,6,7]
; SSE2-NEXT: retq
;
-; SSSE3-LABEL: @shuffle_v8i16_26401375
+; SSSE3-LABEL: shuffle_v8i16_26401375:
; SSSE3: # BB#0:
-; SSSE3-NEXT: pshufb {{.*}} # xmm0 = xmm0[4,5,12,13,8,9,0,1,2,3,6,7,14,15,10,11]
+; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[4,5,12,13,8,9,0,1,2,3,6,7,14,15,10,11]
; SSSE3-NEXT: retq
+;
+; SSE41-LABEL: shuffle_v8i16_26401375:
+; SSE41: # BB#0:
+; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[4,5,12,13,8,9,0,1,2,3,6,7,14,15,10,11]
+; SSE41-NEXT: retq
%shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 2, i32 6, i32 4, i32 0, i32 1, i32 3, i32 7, i32 5>
ret <8 x i16> %shuffle
}
define <8 x i16> @shuffle_v8i16_66751643(<8 x i16> %a, <8 x i16> %b) {
-; SSE2-LABEL: @shuffle_v8i16_66751643
+; SSE2-LABEL: shuffle_v8i16_66751643:
; SSE2: # BB#0:
-; SSE2-NEXT: pshuflw {{.*}} # xmm0 = xmm0[3,1,2,3,4,5,6,7]
-; SSE2-NEXT: pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,4,6,5,7]
-; SSE2-NEXT: pshufd {{.*}} # xmm0 = xmm0[2,3,2,0]
-; SSE2-NEXT: pshuflw {{.*}} # xmm0 = xmm0[1,1,3,2,4,5,6,7]
-; SSE2-NEXT: pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,7,5,4,6]
+; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[3,1,2,3,4,5,6,7]
+; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,5,7]
+; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,0]
+; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[1,1,3,2,4,5,6,7]
+; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,5,4,6]
; SSE2-NEXT: retq
;
-; SSSE3-LABEL: @shuffle_v8i16_66751643
+; SSSE3-LABEL: shuffle_v8i16_66751643:
; SSSE3: # BB#0:
-; SSSE3-NEXT: pshufb {{.*}} # xmm0 = xmm0[12,13,12,13,14,15,10,11,2,3,12,13,8,9,6,7]
+; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[12,13,12,13,14,15,10,11,2,3,12,13,8,9,6,7]
; SSSE3-NEXT: retq
+;
+; SSE41-LABEL: shuffle_v8i16_66751643:
+; SSE41: # BB#0:
+; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[12,13,12,13,14,15,10,11,2,3,12,13,8,9,6,7]
+; SSE41-NEXT: retq
%shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 6, i32 6, i32 7, i32 5, i32 1, i32 6, i32 4, i32 3>
ret <8 x i16> %shuffle
}
define <8 x i16> @shuffle_v8i16_60514754(<8 x i16> %a, <8 x i16> %b) {
-; SSE2-LABEL: @shuffle_v8i16_60514754
+; SSE2-LABEL: shuffle_v8i16_60514754:
; SSE2: # BB#0:
-; SSE2-NEXT: pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,6,5,4,7]
-; SSE2-NEXT: pshufd {{.*}} # xmm0 = xmm0[0,2,2,3]
-; SSE2-NEXT: pshuflw {{.*}} # xmm0 = xmm0[2,0,3,1,4,5,6,7]
-; SSE2-NEXT: pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,6,7,5,6]
+; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,5,4,7]
+; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[2,0,3,1,4,5,6,7]
+; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,7,5,6]
; SSE2-NEXT: retq
;
-; SSSE3-LABEL: @shuffle_v8i16_60514754
+; SSSE3-LABEL: shuffle_v8i16_60514754:
; SSSE3: # BB#0:
-; SSSE3-NEXT: pshufb {{.*}} # xmm0 = xmm0[12,13,0,1,10,11,2,3,8,9,14,15,10,11,8,9]
+; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[12,13,0,1,10,11,2,3,8,9,14,15,10,11,8,9]
; SSSE3-NEXT: retq
+;
+; SSE41-LABEL: shuffle_v8i16_60514754:
+; SSE41: # BB#0:
+; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[12,13,0,1,10,11,2,3,8,9,14,15,10,11,8,9]
+; SSE41-NEXT: retq
%shuffle = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> <i32 6, i32 0, i32 5, i32 1, i32 4, i32 7, i32 5, i32 4>
ret <8 x i16> %shuffle
}
define <8 x i16> @shuffle_v8i16_00444444(<8 x i16> %a, <8 x i16> %b) {
-; SSE2-LABEL: @shuffle_v8i16_00444444
+; SSE2-LABEL: shuffle_v8i16_00444444:
; SSE2: # BB#0:
-; SSE2-NEXT: pshufd {{.*}} # xmm0 = xmm0[0,2,2,3]
-; SSE2-NEXT: pshuflw {{.*}} # xmm0 = xmm0[0,0,2,2,4,5,6,7]
-; SSE2-NEXT: pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,4,4,4,4]
+; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,2,2,4,5,6,7]
+; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
; SSE2-NEXT: retq
;
-; SSSE3-LABEL: @shuffle_v8i16_00444444
+; SSSE3-LABEL: shuffle_v8i16_00444444:
; SSSE3: # BB#0:
-; SSSE3-NEXT: pshufb {{.*}} # xmm0 = xmm0[0,1,0,1,8,9,8,9,8,9,8,9,8,9,8,9]
+; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,8,9,8,9,8,9,8,9,8,9,8,9]
; SSSE3-NEXT: retq
+;
+; SSE41-LABEL: shuffle_v8i16_00444444:
+; SSE41: # BB#0:
+; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,8,9,8,9,8,9,8,9,8,9,8,9]
+; SSE41-NEXT: retq
%shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 0, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4>
ret <8 x i16> %shuffle
}
define <8 x i16> @shuffle_v8i16_44004444(<8 x i16> %a, <8 x i16> %b) {
-; SSE2-LABEL: @shuffle_v8i16_44004444
+; SSE2-LABEL: shuffle_v8i16_44004444:
; SSE2: # BB#0:
-; SSE2-NEXT: pshufd {{.*}} # xmm0 = xmm0[0,2,2,3]
-; SSE2-NEXT: pshuflw {{.*}} # xmm0 = xmm0[2,2,0,0,4,5,6,7]
-; SSE2-NEXT: pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,4,4,4,4]
+; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[2,2,0,0,4,5,6,7]
+; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
; SSE2-NEXT: retq
;
-; SSSE3-LABEL: @shuffle_v8i16_44004444
+; SSSE3-LABEL: shuffle_v8i16_44004444:
; SSSE3: # BB#0:
-; SSSE3-NEXT: pshufb {{.*}} # xmm0 = xmm0[8,9,8,9,0,1,0,1,8,9,8,9,8,9,8,9]
+; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[8,9,8,9,0,1,0,1,8,9,8,9,8,9,8,9]
; SSSE3-NEXT: retq
+;
+; SSE41-LABEL: shuffle_v8i16_44004444:
+; SSE41: # BB#0:
+; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[8,9,8,9,0,1,0,1,8,9,8,9,8,9,8,9]
+; SSE41-NEXT: retq
%shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 4, i32 0, i32 0, i32 4, i32 4, i32 4, i32 4>
ret <8 x i16> %shuffle
}
define <8 x i16> @shuffle_v8i16_04404444(<8 x i16> %a, <8 x i16> %b) {
-; SSE2-LABEL: @shuffle_v8i16_04404444
+; SSE2-LABEL: shuffle_v8i16_04404444:
; SSE2: # BB#0:
-; SSE2-NEXT: pshufd {{.*}} # xmm0 = xmm0[0,2,2,3]
-; SSE2-NEXT: pshuflw {{.*}} # xmm0 = xmm0[0,2,2,0,4,5,6,7]
-; SSE2-NEXT: pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,4,4,4,4]
+; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,0,4,5,6,7]
+; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
; SSE2-NEXT: retq
;
-; SSSE3-LABEL: @shuffle_v8i16_04404444
+; SSSE3-LABEL: shuffle_v8i16_04404444:
; SSSE3: # BB#0:
-; SSSE3-NEXT: pshufb {{.*}} # xmm0 = xmm0[0,1,8,9,8,9,0,1,8,9,8,9,8,9,8,9]
+; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,8,9,8,9,8,9,8,9]
; SSSE3-NEXT: retq
+;
+; SSE41-LABEL: shuffle_v8i16_04404444:
+; SSE41: # BB#0:
+; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,8,9,8,9,8,9,8,9]
+; SSE41-NEXT: retq
%shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 4, i32 4, i32 0, i32 4, i32 4, i32 4, i32 4>
ret <8 x i16> %shuffle
}
define <8 x i16> @shuffle_v8i16_04400000(<8 x i16> %a, <8 x i16> %b) {
-; SSE2-LABEL: @shuffle_v8i16_04400000
+; SSE2-LABEL: shuffle_v8i16_04400000:
; SSE2: # BB#0:
-; SSE2-NEXT: pshufd {{.*}} # xmm0 = xmm0[0,2,0,3]
-; SSE2-NEXT: pshuflw {{.*}} # xmm0 = xmm0[0,2,2,0,4,5,6,7]
-; SSE2-NEXT: pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,4,4,4,4]
+; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,0,3]
+; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,0,4,5,6,7]
+; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
; SSE2-NEXT: retq
;
-; SSSE3-LABEL: @shuffle_v8i16_04400000
+; SSSE3-LABEL: shuffle_v8i16_04400000:
; SSSE3: # BB#0:
-; SSSE3-NEXT: pshufb {{.*}} # xmm0 = xmm0[0,1,8,9,8,9,0,1,0,1,0,1,0,1,0,1]
+; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,0,1,0,1,0,1,0,1]
; SSSE3-NEXT: retq
+;
+; SSE41-LABEL: shuffle_v8i16_04400000:
+; SSE41: # BB#0:
+; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,0,1,0,1,0,1,0,1]
+; SSE41-NEXT: retq
%shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 4, i32 4, i32 0, i32 0, i32 0, i32 0, i32 0>
ret <8 x i16> %shuffle
}
define <8 x i16> @shuffle_v8i16_04404567(<8 x i16> %a, <8 x i16> %b) {
-; ALL-LABEL: @shuffle_v8i16_04404567
+; ALL-LABEL: shuffle_v8i16_04404567:
; ALL: # BB#0:
-; ALL-NEXT: pshufd {{.*}} # xmm0 = xmm0[0,2,2,3]
-; ALL-NEXT: pshuflw {{.*}} # xmm0 = xmm0[0,2,2,0,4,5,6,7]
+; ALL-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; ALL-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,0,4,5,6,7]
; ALL-NEXT: retq
%shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 4, i32 4, i32 0, i32 4, i32 5, i32 6, i32 7>
ret <8 x i16> %shuffle
}
define <8 x i16> @shuffle_v8i16_0X444444(<8 x i16> %a, <8 x i16> %b) {
-; SSE2-LABEL: @shuffle_v8i16_0X444444
+; SSE2-LABEL: shuffle_v8i16_0X444444:
; SSE2: # BB#0:
-; SSE2-NEXT: pshufd {{.*}} # xmm0 = xmm0[0,2,2,3]
-; SSE2-NEXT: pshuflw {{.*}} # xmm0 = xmm0[0,1,2,2,4,5,6,7]
-; SSE2-NEXT: pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,4,4,4,4]
+; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,1,2,2,4,5,6,7]
+; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
; SSE2-NEXT: retq
;
-; SSSE3-LABEL: @shuffle_v8i16_0X444444
+; SSSE3-LABEL: shuffle_v8i16_0X444444:
; SSSE3: # BB#0:
-; SSSE3-NEXT: pshufb {{.*}} # xmm0 = xmm0[0,1,{{[0-9]+,[0-9]+}},8,9,8,9,8,9,8,9,8,9,8,9]
+; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,8,9,8,9,8,9,8,9,8,9,8,9]
; SSSE3-NEXT: retq
+;
+; SSE41-LABEL: shuffle_v8i16_0X444444:
+; SSE41: # BB#0:
+; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,8,9,8,9,8,9,8,9,8,9,8,9]
+; SSE41-NEXT: retq
%shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 undef, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4>
ret <8 x i16> %shuffle
}
define <8 x i16> @shuffle_v8i16_44X04444(<8 x i16> %a, <8 x i16> %b) {
-; SSE2-LABEL: @shuffle_v8i16_44X04444
+; SSE2-LABEL: shuffle_v8i16_44X04444:
; SSE2: # BB#0:
-; SSE2-NEXT: pshufd {{.*}} # xmm0 = xmm0[0,2,2,3]
-; SSE2-NEXT: pshuflw {{.*}} # xmm0 = xmm0[2,2,2,0,4,5,6,7]
-; SSE2-NEXT: pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,4,4,4,4]
+; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[2,2,2,0,4,5,6,7]
+; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
; SSE2-NEXT: retq
;
-; SSSE3-LABEL: @shuffle_v8i16_44X04444
+; SSSE3-LABEL: shuffle_v8i16_44X04444:
; SSSE3: # BB#0:
-; SSSE3-NEXT: pshufb {{.*}} # xmm0 = xmm0[8,9,8,9,{{[0-9]+,[0-9]+}},0,1,8,9,8,9,8,9,8,9]
+; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[8,9,8,9,8,9,0,1,8,9,8,9,8,9,8,9]
; SSSE3-NEXT: retq
+;
+; SSE41-LABEL: shuffle_v8i16_44X04444:
+; SSE41: # BB#0:
+; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[8,9,8,9,8,9,0,1,8,9,8,9,8,9,8,9]
+; SSE41-NEXT: retq
%shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 4, i32 undef, i32 0, i32 4, i32 4, i32 4, i32 4>
ret <8 x i16> %shuffle
}
define <8 x i16> @shuffle_v8i16_X4404444(<8 x i16> %a, <8 x i16> %b) {
-; SSE2-LABEL: @shuffle_v8i16_X4404444
+; SSE2-LABEL: shuffle_v8i16_X4404444:
; SSE2: # BB#0:
-; SSE2-NEXT: pshufd {{.*}} # xmm0 = xmm0[0,2,2,3]
-; SSE2-NEXT: pshuflw {{.*}} # xmm0 = xmm0[0,2,2,0,4,5,6,7]
-; SSE2-NEXT: pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,4,4,4,4]
+; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,0,4,5,6,7]
+; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
; SSE2-NEXT: retq
;
-; SSSE3-LABEL: @shuffle_v8i16_X4404444
+; SSSE3-LABEL: shuffle_v8i16_X4404444:
; SSSE3: # BB#0:
-; SSSE3-NEXT: pshufb {{.*}} # xmm0 = xmm0[{{[0-9]+,[0-9]+}},8,9,8,9,0,1,8,9,8,9,8,9,8,9]
+; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,8,9,8,9,8,9,8,9]
; SSSE3-NEXT: retq
+;
+; SSE41-LABEL: shuffle_v8i16_X4404444:
+; SSE41: # BB#0:
+; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,8,9,8,9,8,9,8,9]
+; SSE41-NEXT: retq
%shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 4, i32 4, i32 0, i32 4, i32 4, i32 4, i32 4>
ret <8 x i16> %shuffle
}
define <8 x i16> @shuffle_v8i16_0127XXXX(<8 x i16> %a, <8 x i16> %b) {
-; SSE2-LABEL: @shuffle_v8i16_0127XXXX
+; SSE2-LABEL: shuffle_v8i16_0127XXXX:
; SSE2: # BB#0:
-; SSE2-NEXT: pshufd {{.*}} # xmm0 = xmm0[0,2,1,3]
-; SSE2-NEXT: pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,4,7,6,7]
-; SSE2-NEXT: pshufd {{.*}} # xmm0 = xmm0[0,2,2,3]
+; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,1,3]
+; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,7,6,7]
+; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
; SSE2-NEXT: retq
;
-; SSSE3-LABEL: @shuffle_v8i16_0127XXXX
+; SSSE3-LABEL: shuffle_v8i16_0127XXXX:
; SSSE3: # BB#0:
-; SSSE3-NEXT: pshufb {{.*}} # xmm0 = xmm0[0,1,2,3,4,5,14,15,{{[0-9]+,[0-9]+,[0-9]+,[0-9]+,[0-9]+,[0-9]+,[0-9]+,[0-9]+}}]
+; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,14,15,4,5,14,15,12,13,14,15]
; SSSE3-NEXT: retq
+;
+; SSE41-LABEL: shuffle_v8i16_0127XXXX:
+; SSE41: # BB#0:
+; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,14,15,4,5,14,15,12,13,14,15]
+; SSE41-NEXT: retq
%shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
ret <8 x i16> %shuffle
}
define <8 x i16> @shuffle_v8i16_XXXX4563(<8 x i16> %a, <8 x i16> %b) {
-; SSE2-LABEL: @shuffle_v8i16_XXXX4563
+; SSE2-LABEL: shuffle_v8i16_XXXX4563:
; SSE2: # BB#0:
-; SSE2-NEXT: pshufd {{.*}} # xmm0 = xmm0[3,1,2,0]
-; SSE2-NEXT: pshuflw {{.*}} # xmm0 = xmm0[0,3,2,3,4,5,6,7]
-; SSE2-NEXT: pshufd {{.*}} # xmm0 = xmm0[0,1,2,0]
+; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,1,2,0]
+; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,3,2,3,4,5,6,7]
+; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,2,0]
; SSE2-NEXT: retq
;
-; SSSE3-LABEL: @shuffle_v8i16_XXXX4563
+; SSSE3-LABEL: shuffle_v8i16_XXXX4563:
; SSSE3: # BB#0:
-; SSSE3-NEXT: pshufb {{.*}} # xmm0 = xmm0[{{[0-9]+,[0-9]+,[0-9]+,[0-9]+,[0-9]+,[0-9]+,[0-9]+,[0-9]+}},8,9,10,11,12,13,6,7]
+; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[12,13,6,7,4,5,6,7,8,9,10,11,12,13,6,7]
; SSSE3-NEXT: retq
+;
+; SSE41-LABEL: shuffle_v8i16_XXXX4563:
+; SSE41: # BB#0:
+; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[12,13,6,7,4,5,6,7,8,9,10,11,12,13,6,7]
+; SSE41-NEXT: retq
%shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 4, i32 5, i32 6, i32 3>
ret <8 x i16> %shuffle
}
define <8 x i16> @shuffle_v8i16_4563XXXX(<8 x i16> %a, <8 x i16> %b) {
-; SSE2-LABEL: @shuffle_v8i16_4563XXXX
+; SSE2-LABEL: shuffle_v8i16_4563XXXX:
; SSE2: # BB#0:
-; SSE2-NEXT: pshufd {{.*}} # xmm0 = xmm0[3,1,2,0]
-; SSE2-NEXT: pshuflw {{.*}} # xmm0 = xmm0[0,3,2,3,4,5,6,7]
-; SSE2-NEXT: pshufd {{.*}} # xmm0 = xmm0[2,0,2,3]
+; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,1,2,0]
+; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,3,2,3,4,5,6,7]
+; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,0,2,3]
; SSE2-NEXT: retq
;
-; SSSE3-LABEL: @shuffle_v8i16_4563XXXX
+; SSSE3-LABEL: shuffle_v8i16_4563XXXX:
; SSSE3: # BB#0:
-; SSSE3-NEXT: pshufb {{.*}} # xmm0 = xmm0[8,9,10,11,12,13,6,7,{{[0-9]+,[0-9]+,[0-9]+,[0-9]+,[0-9]+,[0-9]+,[0-9]+,[0-9]+}}]
+; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,6,7,8,9,10,11,0,1,2,3]
; SSSE3-NEXT: retq
+;
+; SSE41-LABEL: shuffle_v8i16_4563XXXX:
+; SSE41: # BB#0:
+; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,6,7,8,9,10,11,0,1,2,3]
+; SSE41-NEXT: retq
%shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 5, i32 6, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
ret <8 x i16> %shuffle
}
define <8 x i16> @shuffle_v8i16_01274563(<8 x i16> %a, <8 x i16> %b) {
-; SSE2-LABEL: @shuffle_v8i16_01274563
+; SSE2-LABEL: shuffle_v8i16_01274563:
; SSE2: # BB#0:
-; SSE2-NEXT: pshufd {{.*}} # xmm0 = xmm0[0,2,1,3]
-; SSE2-NEXT: pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,6,5,4,7]
-; SSE2-NEXT: pshufd {{.*}} # xmm0 = xmm0[0,3,1,2]
+; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,1,3]
+; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,5,4,7]
+; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,3,1,2]
; SSE2-NEXT: retq
;
-; SSSE3-LABEL: @shuffle_v8i16_01274563
+; SSSE3-LABEL: shuffle_v8i16_01274563:
; SSSE3: # BB#0:
-; SSSE3-NEXT: pshufb {{.*}} # xmm0 = xmm0[0,1,2,3,4,5,14,15,8,9,10,11,12,13,6,7]
+; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,14,15,8,9,10,11,12,13,6,7]
; SSSE3-NEXT: retq
+;
+; SSE41-LABEL: shuffle_v8i16_01274563:
+; SSE41: # BB#0:
+; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,14,15,8,9,10,11,12,13,6,7]
+; SSE41-NEXT: retq
%shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 7, i32 4, i32 5, i32 6, i32 3>
ret <8 x i16> %shuffle
}
define <8 x i16> @shuffle_v8i16_45630127(<8 x i16> %a, <8 x i16> %b) {
-; SSE2-LABEL: @shuffle_v8i16_45630127
+; SSE2-LABEL: shuffle_v8i16_45630127:
; SSE2: # BB#0:
-; SSE2-NEXT: pshufd {{.*}} # xmm0 = xmm0[3,1,2,0]
-; SSE2-NEXT: pshuflw {{.*}} # xmm0 = xmm0[0,3,2,1,4,5,6,7]
-; SSE2-NEXT: pshufd {{.*}} # xmm0 = xmm0[2,0,3,1]
+; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,1,2,0]
+; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,3,2,1,4,5,6,7]
+; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,0,3,1]
; SSE2-NEXT: retq
;
-; SSSE3-LABEL: @shuffle_v8i16_45630127
+; SSSE3-LABEL: shuffle_v8i16_45630127:
; SSSE3: # BB#0:
-; SSSE3-NEXT: pshufb {{.*}} # xmm0 = xmm0[8,9,10,11,12,13,6,7,0,1,2,3,4,5,14,15]
+; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,6,7,0,1,2,3,4,5,14,15]
; SSSE3-NEXT: retq
+;
+; SSE41-LABEL: shuffle_v8i16_45630127:
+; SSE41: # BB#0:
+; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,6,7,0,1,2,3,4,5,14,15]
+; SSE41-NEXT: retq
%shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 5, i32 6, i32 3, i32 0, i32 1, i32 2, i32 7>
ret <8 x i16> %shuffle
}
define <8 x i16> @shuffle_v8i16_37102735(<8 x i16> %a, <8 x i16> %b) {
-; SSE2-LABEL: @shuffle_v8i16_37102735
+; SSE2-LABEL: shuffle_v8i16_37102735:
; SSE2: # BB#0:
-; SSE2-NEXT: pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,4,6,5,7]
-; SSE2-NEXT: pshufd {{.*}} # xmm0 = xmm0[0,2,1,3]
-; SSE2-NEXT: pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,7,5,6,4]
-; SSE2-NEXT: pshufd {{.*}} # xmm0 = xmm0[0,2,2,3]
-; SSE2-NEXT: pshuflw {{.*}} # xmm0 = xmm0[3,2,1,0,4,5,6,7]
-; SSE2-NEXT: pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,7,4,5,6]
+; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,5,7]
+; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,1,3]
+; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,5,6,4]
+; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[3,2,1,0,4,5,6,7]
+; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,4,5,6]
; SSE2-NEXT: retq
;
-; SSSE3-LABEL: @shuffle_v8i16_37102735
+; SSSE3-LABEL: shuffle_v8i16_37102735:
; SSSE3: # BB#0:
-; SSSE3-NEXT: pshufb {{.*}} # xmm0 = xmm0[6,7,14,15,2,3,0,1,4,5,14,15,6,7,10,11]
+; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[6,7,14,15,2,3,0,1,4,5,14,15,6,7,10,11]
; SSSE3-NEXT: retq
+;
+; SSE41-LABEL: shuffle_v8i16_37102735:
+; SSE41: # BB#0:
+; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[6,7,14,15,2,3,0,1,4,5,14,15,6,7,10,11]
+; SSE41-NEXT: retq
%shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 3, i32 7, i32 1, i32 0, i32 2, i32 7, i32 3, i32 5>
ret <8 x i16> %shuffle
}
define <8 x i16> @shuffle_v8i16_08192a3b(<8 x i16> %a, <8 x i16> %b) {
-; ALL-LABEL: @shuffle_v8i16_08192a3b
+; ALL-LABEL: shuffle_v8i16_08192a3b:
; ALL: # BB#0:
-; ALL-NEXT: punpcklwd %xmm1, %xmm0
+; ALL-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
; ALL-NEXT: retq
%shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
ret <8 x i16> %shuffle
}
define <8 x i16> @shuffle_v8i16_0c1d2e3f(<8 x i16> %a, <8 x i16> %b) {
-; ALL-LABEL: @shuffle_v8i16_0c1d2e3f
+; ALL-LABEL: shuffle_v8i16_0c1d2e3f:
; ALL: # BB#0:
-; ALL-NEXT: pshufd {{.*}} # xmm1 = xmm1[2,3,0,1]
-; ALL-NEXT: punpcklwd %xmm1, %xmm0
+; ALL-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
+; ALL-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
; ALL-NEXT: retq
%shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 12, i32 1, i32 13, i32 2, i32 14, i32 3, i32 15>
ret <8 x i16> %shuffle
}
define <8 x i16> @shuffle_v8i16_4c5d6e7f(<8 x i16> %a, <8 x i16> %b) {
-; ALL-LABEL: @shuffle_v8i16_4c5d6e7f
+; ALL-LABEL: shuffle_v8i16_4c5d6e7f:
; ALL: # BB#0:
-; ALL-NEXT: pshufd {{.*}} # xmm1 = xmm1[2,3,0,1]
-; ALL-NEXT: pshufd {{.*}} # xmm0 = xmm0[2,3,0,1]
-; ALL-NEXT: punpcklwd %xmm1, %xmm0
+; ALL-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
+; ALL-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
+; ALL-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
; ALL-NEXT: retq
%shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
ret <8 x i16> %shuffle
}
define <8 x i16> @shuffle_v8i16_48596a7b(<8 x i16> %a, <8 x i16> %b) {
-; ALL-LABEL: @shuffle_v8i16_48596a7b
+; ALL-LABEL: shuffle_v8i16_48596a7b:
; ALL: # BB#0:
-; ALL-NEXT: pshufd {{.*}} # xmm0 = xmm0[2,3,0,1]
-; ALL-NEXT: punpcklwd %xmm1, %xmm0
+; ALL-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
+; ALL-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
; ALL-NEXT: retq
%shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 8, i32 5, i32 9, i32 6, i32 10, i32 7, i32 11>
ret <8 x i16> %shuffle
}
define <8 x i16> @shuffle_v8i16_08196e7f(<8 x i16> %a, <8 x i16> %b) {
-; ALL-LABEL: @shuffle_v8i16_08196e7f
+; ALL-LABEL: shuffle_v8i16_08196e7f:
; ALL: # BB#0:
-; ALL-NEXT: pshufd {{.*}} # xmm1 = xmm1[0,3,2,3]
-; ALL-NEXT: pshufd {{.*}} # xmm0 = xmm0[0,3,2,3]
-; ALL-NEXT: punpcklwd %xmm1, %xmm0
+; ALL-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,3,2,3]
+; ALL-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,3,2,3]
+; ALL-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
; ALL-NEXT: retq
%shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 6, i32 14, i32 7, i32 15>
ret <8 x i16> %shuffle
}
define <8 x i16> @shuffle_v8i16_0c1d6879(<8 x i16> %a, <8 x i16> %b) {
-; ALL-LABEL: @shuffle_v8i16_0c1d6879
+; ALL-LABEL: shuffle_v8i16_0c1d6879:
; ALL: # BB#0:
-; ALL-NEXT: pshufd {{.*}} # xmm1 = xmm1[2,0,2,3]
-; ALL-NEXT: pshufd {{.*}} # xmm0 = xmm0[0,3,2,3]
-; ALL-NEXT: punpcklwd %xmm1, %xmm0
+; ALL-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,0,2,3]
+; ALL-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,3,2,3]
+; ALL-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
; ALL-NEXT: retq
%shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 12, i32 1, i32 13, i32 6, i32 8, i32 7, i32 9>
ret <8 x i16> %shuffle
}
define <8 x i16> @shuffle_v8i16_109832ba(<8 x i16> %a, <8 x i16> %b) {
-; ALL-LABEL: @shuffle_v8i16_109832ba
+; ALL-LABEL: shuffle_v8i16_109832ba:
; ALL: # BB#0:
-; ALL-NEXT: punpcklwd %xmm1, %xmm0
-; ALL-NEXT: pshuflw {{.*}} # xmm1 = xmm0[2,0,3,1,4,5,6,7]
-; ALL-NEXT: pshufd {{.*}} # xmm0 = xmm0[2,3,2,3]
-; ALL-NEXT: pshuflw {{.*}} # xmm0 = xmm0[2,0,3,1,4,5,6,7]
-; ALL-NEXT: punpcklqdq %xmm0, %xmm1
+; ALL-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; ALL-NEXT: pshuflw {{.*#+}} xmm1 = xmm0[2,0,3,1,4,5,6,7]
+; ALL-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
+; ALL-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[2,0,3,1,4,5,6,7]
+; ALL-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
; ALL-NEXT: movdqa %xmm1, %xmm0
; ALL-NEXT: retq
%shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 1, i32 0, i32 9, i32 8, i32 3, i32 2, i32 11, i32 10>
@@ -595,235 +732,300 @@ define <8 x i16> @shuffle_v8i16_109832ba
}
define <8 x i16> @shuffle_v8i16_8091a2b3(<8 x i16> %a, <8 x i16> %b) {
-; ALL-LABEL: @shuffle_v8i16_8091a2b3
+; ALL-LABEL: shuffle_v8i16_8091a2b3:
; ALL: # BB#0:
-; ALL-NEXT: punpcklwd %xmm0, %xmm1
+; ALL-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
; ALL-NEXT: movdqa %xmm1, %xmm0
; ALL-NEXT: retq
%shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 8, i32 0, i32 9, i32 1, i32 10, i32 2, i32 11, i32 3>
ret <8 x i16> %shuffle
}
define <8 x i16> @shuffle_v8i16_c4d5e6f7(<8 x i16> %a, <8 x i16> %b) {
-; ALL-LABEL: @shuffle_v8i16_c4d5e6f7
+; ALL-LABEL: shuffle_v8i16_c4d5e6f7:
; ALL: # BB#0:
-; ALL-NEXT: pshufd {{.*}} # xmm2 = xmm0[2,3,0,1]
-; ALL-NEXT: pshufd {{.*}} # xmm0 = xmm1[2,3,0,1]
-; ALL-NEXT: punpcklwd %xmm2, %xmm0
+; ALL-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,0,1]
+; ALL-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
+; ALL-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
; ALL-NEXT: retq
%shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 12, i32 4, i32 13, i32 5, i32 14, i32 6, i32 15, i32 7>
ret <8 x i16> %shuffle
}
define <8 x i16> @shuffle_v8i16_0213cedf(<8 x i16> %a, <8 x i16> %b) {
-; ALL-LABEL: @shuffle_v8i16_0213cedf
+; ALL-LABEL: shuffle_v8i16_0213cedf:
; ALL: # BB#0:
-; ALL-NEXT: pshuflw {{.*}} # xmm0 = xmm0[0,2,1,3,4,5,6,7]
-; ALL-NEXT: pshufd {{.*}} # xmm1 = xmm1[2,3,2,3]
-; ALL-NEXT: pshuflw {{.*}} # xmm1 = xmm1[0,2,1,3,4,5,6,7]
-; ALL-NEXT: punpcklqdq %xmm1, %xmm0
+; ALL-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,1,3,4,5,6,7]
+; ALL-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
+; ALL-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[0,2,1,3,4,5,6,7]
+; ALL-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; ALL-NEXT: retq
%shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 2, i32 1, i32 3, i32 12, i32 14, i32 13, i32 15>
ret <8 x i16> %shuffle
}
define <8 x i16> @shuffle_v8i16_443aXXXX(<8 x i16> %a, <8 x i16> %b) {
-; SSE2-LABEL: @shuffle_v8i16_443aXXXX
+; SSE2-LABEL: shuffle_v8i16_443aXXXX:
; SSE2: # BB#0:
-; SSE2-NEXT: pshufd {{.*}} # xmm0 = xmm0[2,1,2,3]
-; SSE2-NEXT: pshuflw {{.*}} # xmm0 = xmm0[0,0,2,3,4,5,6,7]
-; SSE2-NEXT: punpcklwd %xmm1, %xmm0 # xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
-; SSE2-NEXT: pshuflw {{.*}} # xmm0 = xmm0[0,2,2,3,4,5,6,7]
-; SSE2-NEXT: pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,6,5,6,7]
-; SSE2-NEXT: pshufd {{.*}} # xmm0 = xmm0[0,2,2,3]
-; SSE2-NEXT: retq
-;
-; SSSE3-LABEL: @shuffle_v8i16_443aXXXX
-; SSSE3: # BB#0:
-; SSSE3-NEXT: pshufd {{.*}} # xmm0 = xmm0[2,1,2,3]
-; SSSE3-NEXT: pshuflw {{.*}} # xmm0 = xmm0[0,0,2,3,4,5,6,7]
-; SSSE3-NEXT: punpcklwd %xmm1, %xmm0 # xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
-; SSSE3-NEXT: pshufb {{.*}} # xmm0 = xmm0[0,1,4,5,12,13,10,11,{{[0-9]+,[0-9]+,[0-9]+,[0-9]+,[0-9]+,[0-9]+,[0-9]+,[0-9]+}}]
+; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,1,2,3]
+; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,2,3,4,5,6,7]
+; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
+; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,5,6,7]
+; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; SSE2-NEXT: retq
+;
+; SSSE3-LABEL: shuffle_v8i16_443aXXXX:
+; SSSE3: # BB#0:
+; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,1,2,3]
+; SSSE3-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,2,3,4,5,6,7]
+; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,12,13,10,11,12,13,10,11,12,13,14,15]
; SSSE3-NEXT: retq
+;
+; SSE41-LABEL: shuffle_v8i16_443aXXXX:
+; SSE41: # BB#0:
+; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,1,2,3]
+; SSE41-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,2,3,4,5,6,7]
+; SSE41-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,12,13,10,11,12,13,10,11,12,13,14,15]
+; SSE41-NEXT: retq
%shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 4, i32 3, i32 10, i32 undef, i32 undef, i32 undef, i32 undef>
ret <8 x i16> %shuffle
}
define <8 x i16> @shuffle_v8i16_032dXXXX(<8 x i16> %a, <8 x i16> %b) {
-; SSE2-LABEL: @shuffle_v8i16_032dXXXX
+; SSE2-LABEL: shuffle_v8i16_032dXXXX:
; SSE2: # BB#0:
-; SSE2-NEXT: pshufd {{.*}} # xmm1 = xmm1[2,3,0,1]
-; SSE2-NEXT: punpcklwd %xmm1, %xmm0
-; SSE2-NEXT: pshuflw {{.*}} # xmm0 = xmm0[0,3,2,3,4,5,6,7]
-; SSE2-NEXT: pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,4,6,6,7]
-; SSE2-NEXT: pshufd {{.*}} # xmm0 = xmm0[0,2,2,3]
-; SSE2-NEXT: pshuflw {{.*}} # xmm0 = xmm0[0,3,2,1,4,5,6,7]
+; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
+; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,3,2,3,4,5,6,7]
+; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,6,7]
+; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,3,2,1,4,5,6,7]
; SSE2-NEXT: retq
;
-; SSSE3-LABEL: @shuffle_v8i16_032dXXXX
+; SSSE3-LABEL: shuffle_v8i16_032dXXXX:
; SSSE3: # BB#0:
-; SSSE3-NEXT: pshufd {{.*}} # xmm1 = xmm1[2,3,0,1]
-; SSSE3-NEXT: punpcklwd %xmm1, %xmm0 # xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
-; SSSE3-NEXT: pshufb {{.*}} # xmm0 = xmm0[0,1,12,13,8,9,6,7,{{[0-9]+,[0-9]+,[0-9]+,[0-9]+,[0-9]+,[0-9]+,[0-9]+,[0-9]+}}]
+; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
+; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,12,13,8,9,6,7,8,9,12,13,12,13,14,15]
; SSSE3-NEXT: retq
+;
+; SSE41-LABEL: shuffle_v8i16_032dXXXX:
+; SSE41: # BB#0:
+; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
+; SSE41-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,12,13,8,9,6,7,8,9,12,13,12,13,14,15]
+; SSE41-NEXT: retq
%shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 3, i32 2, i32 13, i32 undef, i32 undef, i32 undef, i32 undef>
ret <8 x i16> %shuffle
}
define <8 x i16> @shuffle_v8i16_XXXdXXXX(<8 x i16> %a, <8 x i16> %b) {
-; ALL-LABEL: @shuffle_v8i16_XXXdXXXX
+; ALL-LABEL: shuffle_v8i16_XXXdXXXX:
; ALL: # BB#0:
-; ALL-NEXT: pshufd {{.*}} # xmm0 = xmm1[2,2,3,3]
+; ALL-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,2,3,3]
; ALL-NEXT: retq
%shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 13, i32 undef, i32 undef, i32 undef, i32 undef>
ret <8 x i16> %shuffle
}
define <8 x i16> @shuffle_v8i16_012dXXXX(<8 x i16> %a, <8 x i16> %b) {
-; SSE2-LABEL: @shuffle_v8i16_012dXXXX
+; SSE2-LABEL: shuffle_v8i16_012dXXXX:
; SSE2: # BB#0:
-; SSE2-NEXT: pshufd {{.*}} # xmm1 = xmm1[2,3,0,1]
-; SSE2-NEXT: punpcklwd %xmm1, %xmm0
-; SSE2-NEXT: pshufd {{.*}} # xmm0 = xmm0[3,1,2,0]
-; SSE2-NEXT: pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,4,6,6,7]
-; SSE2-NEXT: pshufd {{.*}} # xmm0 = xmm0[2,1,2,3]
-; SSE2-NEXT: pshuflw {{.*}} # xmm0 = xmm0[1,2,0,3,4,5,6,7]
+; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
+; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,1,2,0]
+; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,6,7]
+; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,1,2,3]
+; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[1,2,0,3,4,5,6,7]
; SSE2-NEXT: retq
;
-; SSSE3-LABEL: @shuffle_v8i16_012dXXXX
+; SSSE3-LABEL: shuffle_v8i16_012dXXXX:
; SSSE3: # BB#0:
-; SSSE3-NEXT: pshufd {{.*}} # xmm1 = xmm1[2,3,0,1]
-; SSSE3-NEXT: punpcklwd %xmm1, %xmm0 # xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
-; SSSE3-NEXT: pshufb {{.*}} # xmm0 = xmm0[0,1,4,5,8,9,6,7,{{[0-9]+,[0-9]+,[0-9]+,[0-9]+,[0-9]+,[0-9]+,[0-9]+,[0-9]+}}]
+; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
+; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,6,7,8,9,0,1,0,1,2,3]
; SSSE3-NEXT: retq
+;
+; SSE41-LABEL: shuffle_v8i16_012dXXXX:
+; SSE41: # BB#0:
+; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
+; SSE41-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,6,7,8,9,0,1,0,1,2,3]
+; SSE41-NEXT: retq
%shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 13, i32 undef, i32 undef, i32 undef, i32 undef>
ret <8 x i16> %shuffle
}
define <8 x i16> @shuffle_v8i16_XXXXcde3(<8 x i16> %a, <8 x i16> %b) {
-; SSE2-LABEL: @shuffle_v8i16_XXXXcde3
+; SSE2-LABEL: shuffle_v8i16_XXXXcde3:
; SSE2: # BB#0:
-; SSE2-NEXT: pshufd {{.*}} # xmm0 = xmm0[0,1,0,1]
-; SSE2-NEXT: punpckhwd %xmm0, %xmm1
-; SSE2-NEXT: pshuflw {{.*}} # xmm0 = xmm1[0,2,2,3,4,5,6,7]
-; SSE2-NEXT: pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,4,7,6,7]
-; SSE2-NEXT: pshufd {{.*}} # xmm0 = xmm0[0,1,0,2]
+; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
+; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
+; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm1[0,2,2,3,4,5,6,7]
+; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,7,6,7]
+; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,2]
; SSE2-NEXT: retq
;
-; SSSE3-LABEL: @shuffle_v8i16_XXXXcde3
+; SSSE3-LABEL: shuffle_v8i16_XXXXcde3:
; SSSE3: # BB#0:
-; SSSE3-NEXT: pshufd {{.*}} # xmm0 = xmm0[0,1,0,1]
-; SSSE3-NEXT: punpckhwd %xmm0, %xmm1 # xmm1 = xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
-; SSSE3-NEXT: pshufb {{.*}} # xmm1 = xmm1[{{[0-9]+,[0-9]+,[0-9]+,[0-9]+,[0-9]+,[0-9]+,[0-9]+,[0-9]+}},0,1,4,5,8,9,14,15]
+; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
+; SSSE3-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
+; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = xmm1[0,1,4,5,4,5,6,7,0,1,4,5,8,9,14,15]
; SSSE3-NEXT: movdqa %xmm1, %xmm0
; SSSE3-NEXT: retq
+;
+; SSE41-LABEL: shuffle_v8i16_XXXXcde3:
+; SSE41: # BB#0:
+; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
+; SSE41-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
+; SSE41-NEXT: pshufb {{.*#+}} xmm1 = xmm1[0,1,4,5,4,5,6,7,0,1,4,5,8,9,14,15]
+; SSE41-NEXT: movdqa %xmm1, %xmm0
+; SSE41-NEXT: retq
%shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 12, i32 13, i32 14, i32 3>
ret <8 x i16> %shuffle
}
define <8 x i16> @shuffle_v8i16_cde3XXXX(<8 x i16> %a, <8 x i16> %b) {
-; SSE2-LABEL: @shuffle_v8i16_cde3XXXX
+; SSE2-LABEL: shuffle_v8i16_cde3XXXX:
; SSE2: # BB#0:
-; SSE2-NEXT: pshufd {{.*}} # xmm0 = xmm0[0,1,0,1]
-; SSE2-NEXT: punpckhwd %xmm0, %xmm1
-; SSE2-NEXT: pshuflw {{.*}} # xmm0 = xmm1[0,2,2,3,4,5,6,7]
-; SSE2-NEXT: pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,4,7,6,7]
-; SSE2-NEXT: pshufd {{.*}} # xmm0 = xmm0[0,2,2,3]
+; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
+; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
+; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm1[0,2,2,3,4,5,6,7]
+; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,7,6,7]
+; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
; SSE2-NEXT: retq
;
-; SSSE3-LABEL: @shuffle_v8i16_cde3XXXX
+; SSSE3-LABEL: shuffle_v8i16_cde3XXXX:
; SSSE3: # BB#0:
-; SSSE3-NEXT: pshufd {{.*}} # xmm0 = xmm0[0,1,0,1]
-; SSSE3-NEXT: punpckhwd %xmm0, %xmm1 # xmm1 = xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
-; SSSE3-NEXT: pshufb {{.*}} # xmm1 = xmm1[0,1,4,5,8,9,14,15,{{[0-9]+,[0-9]+,[0-9]+,[0-9]+,[0-9]+,[0-9]+,[0-9]+,[0-9]+}}]
+; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
+; SSSE3-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
+; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = xmm1[0,1,4,5,8,9,14,15,8,9,14,15,12,13,14,15]
; SSSE3-NEXT: movdqa %xmm1, %xmm0
; SSSE3-NEXT: retq
+;
+; SSE41-LABEL: shuffle_v8i16_cde3XXXX:
+; SSE41: # BB#0:
+; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
+; SSE41-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
+; SSE41-NEXT: pshufb {{.*#+}} xmm1 = xmm1[0,1,4,5,8,9,14,15,8,9,14,15,12,13,14,15]
+; SSE41-NEXT: movdqa %xmm1, %xmm0
+; SSE41-NEXT: retq
%shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 12, i32 13, i32 14, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
ret <8 x i16> %shuffle
}
define <8 x i16> @shuffle_v8i16_012dcde3(<8 x i16> %a, <8 x i16> %b) {
-; SSE2-LABEL: @shuffle_v8i16_012dcde3
+; SSE2-LABEL: shuffle_v8i16_012dcde3:
; SSE2: # BB#0:
-; SSE2-NEXT: pshufd {{.*}} # xmm2 = xmm0[0,1,0,1]
-; SSE2-NEXT: pshufd {{.*}} # xmm3 = xmm1[2,3,0,1]
-; SSE2-NEXT: punpckhwd %xmm2, %xmm1
-; SSE2-NEXT: pshuflw {{.*}} # xmm1 = xmm1[0,2,2,3,4,5,6,7]
-; SSE2-NEXT: pshufhw {{.*}} # xmm1 = xmm1[0,1,2,3,4,7,6,7]
-; SSE2-NEXT: pshufd {{.*}} # xmm1 = xmm1[0,2,2,3]
-; SSE2-NEXT: punpcklwd %xmm3, %xmm0
-; SSE2-NEXT: pshufd {{.*}} # xmm0 = xmm0[3,1,2,0]
-; SSE2-NEXT: pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,4,6,6,7]
-; SSE2-NEXT: pshufd {{.*}} # xmm0 = xmm0[2,1,2,3]
-; SSE2-NEXT: pshuflw {{.*}} # xmm0 = xmm0[1,2,0,3,4,5,6,7]
-; SSE2-NEXT: punpcklqdq %xmm1, %xmm0
-; SSE2-NEXT: retq
-;
-; SSSE3-LABEL: @shuffle_v8i16_012dcde3
-; SSSE3: # BB#0:
-; SSSE3-NEXT: pshufd {{.*}} # xmm2 = xmm0[0,1,0,1]
-; SSSE3-NEXT: pshufd {{.*}} # xmm3 = xmm1[2,3,0,1]
-; SSSE3-NEXT: punpckhwd %xmm2, %xmm1 # xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
-; SSSE3-NEXT: pshufb {{.*}} # xmm1 = xmm1[0,1,4,5,8,9,14,15,{{[0-9]+,[0-9]+,[0-9]+,[0-9]+,[0-9]+,[0-9]+,[0-9]+,[0-9]+}}]
-; SSSE3-NEXT: punpcklwd %xmm3, %xmm0 # xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3]
-; SSSE3-NEXT: pshufb {{.*}} # xmm0 = xmm0[0,1,4,5,8,9,6,7,{{[0-9]+,[0-9]+,[0-9]+,[0-9]+,[0-9]+,[0-9]+,[0-9]+,[0-9]+}}]
-; SSSE3-NEXT: punpcklqdq %xmm1, %xmm0
+; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,1,0,1]
+; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm1[2,3,0,1]
+; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
+; SSE2-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[0,2,2,3,4,5,6,7]
+; SSE2-NEXT: pshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,7,6,7]
+; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
+; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3]
+; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,1,2,0]
+; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,6,7]
+; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,1,2,3]
+; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[1,2,0,3,4,5,6,7]
+; SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; SSE2-NEXT: retq
+;
+; SSSE3-LABEL: shuffle_v8i16_012dcde3:
+; SSSE3: # BB#0:
+; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,1,0,1]
+; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm1[2,3,0,1]
+; SSSE3-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
+; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = xmm1[0,1,4,5,8,9,14,15,8,9,14,15,12,13,14,15]
+; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3]
+; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,6,7,8,9,0,1,0,1,2,3]
+; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; SSSE3-NEXT: retq
+;
+; SSE41-LABEL: shuffle_v8i16_012dcde3:
+; SSE41: # BB#0:
+; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,1,0,1]
+; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm1[2,3,0,1]
+; SSE41-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
+; SSE41-NEXT: pshufb {{.*#+}} xmm1 = xmm1[0,1,4,5,8,9,14,15,8,9,14,15,12,13,14,15]
+; SSE41-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3]
+; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,6,7,8,9,0,1,0,1,2,3]
+; SSE41-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; SSE41-NEXT: retq
%shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 13, i32 12, i32 13, i32 14, i32 3>
ret <8 x i16> %shuffle
}
define <8 x i16> @shuffle_v8i16_XXX1X579(<8 x i16> %a, <8 x i16> %b) {
-; SSE2-LABEL: @shuffle_v8i16_XXX1X579
+; SSE2-LABEL: shuffle_v8i16_XXX1X579:
; SSE2: # BB#0:
-; SSE2-NEXT: pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,7,5,6,7]
-; SSE2-NEXT: pshufd {{.*}} # xmm0 = xmm0[0,2,2,3]
-; SSE2-NEXT: pshuflw {{.*}} # xmm0 = xmm0[0,1,3,2,4,5,6,7]
-; SSE2-NEXT: punpcklwd {{.*}} # xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
-; SSE2-NEXT: pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,4,6,6,7]
-; SSE2-NEXT: pshufd {{.*}} # xmm0 = xmm0[0,1,2,1]
-; SSE2-NEXT: pshuflw {{.*}} # xmm0 = xmm0[0,1,2,2,4,5,6,7]
-; SSE2-NEXT: pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,4,4,5,7]
-; SSE2-NEXT: retq
-;
-; SSSE3-LABEL: @shuffle_v8i16_XXX1X579
-; SSSE3: # BB#0:
-; SSSE3-NEXT: pshufb {{.*}} # xmm0 = xmm0[{{[0-9]+,[0-9]+}},2,3,10,11,14,15,{{[0-9]+,[0-9]+,[0-9]+,[0-9]+,[0-9]+,[0-9]+,[0-9]+,[0-9]+}}]
-; SSSE3-NEXT: punpcklwd {{.*}} # xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
-; SSSE3-NEXT: pshufb {{.*}} # xmm0 = xmm0[{{[0-9]+,[0-9]+,[0-9]+,[0-9]+,[0-9]+,[0-9]+}},4,5,{{[0-9]+,[0-9]+}},8,9,12,13,6,7]
+; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,5,6,7]
+; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,1,3,2,4,5,6,7]
+; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,6,7]
+; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,2,1]
+; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,1,2,2,4,5,6,7]
+; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,5,7]
+; SSE2-NEXT: retq
+;
+; SSSE3-LABEL: shuffle_v8i16_XXX1X579:
+; SSSE3: # BB#0:
+; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,10,11,14,15,14,15,10,11,12,13,14,15]
+; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,4,5,8,9,8,9,12,13,6,7]
; SSSE3-NEXT: retq
+;
+; SSE41-LABEL: shuffle_v8i16_XXX1X579:
+; SSE41: # BB#0:
+; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,10,11,14,15,14,15,10,11,12,13,14,15]
+; SSE41-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,4,5,8,9,8,9,12,13,6,7]
+; SSE41-NEXT: retq
%shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 1, i32 undef, i32 5, i32 7, i32 9>
ret <8 x i16> %shuffle
}
define <8 x i16> @shuffle_v8i16_XX4X8acX(<8 x i16> %a, <8 x i16> %b) {
-; SSE2-LABEL: @shuffle_v8i16_XX4X8acX
+; SSE2-LABEL: shuffle_v8i16_XX4X8acX:
; SSE2: # BB#0:
-; SSE2-NEXT: pshufd {{.*}} # xmm0 = xmm0[2,3,0,1]
-; SSE2-NEXT: pshuflw {{.*}} # xmm1 = xmm1[0,2,2,3,4,5,6,7]
-; SSE2-NEXT: pshufd {{.*}} # xmm1 = xmm1[0,2,2,3]
-; SSE2-NEXT: punpcklwd {{.*}} # xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
-; SSE2-NEXT: pshuflw {{.*}} # xmm0 = xmm1[0,1,2,0,4,5,6,7]
-; SSE2-NEXT: pshufd {{.*}} # xmm0 = xmm0[0,1,2,1]
-; SSE2-NEXT: pshuflw {{.*}} # xmm0 = xmm0[0,1,1,3,4,5,6,7]
-; SSE2-NEXT: pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,7,6,4,7]
-; SSE2-NEXT: retq
-;
-; SSSE3-LABEL: @shuffle_v8i16_XX4X8acX
-; SSSE3: # BB#0:
-; SSSE3-NEXT: pshufd {{.*}} # [[X:xmm[0-9]+]] = xmm0[2,3,0,1]
-; SSSE3-NEXT: pshuflw {{.*}} # xmm0 = xmm1[0,2,2,3,4,5,6,7]
-; SSSE3-NEXT: pshufd {{.*}} # xmm0 = xmm0[0,2,2,3]
-; SSSE3-NEXT: punpcklwd {{.*}} # xmm0 = xmm0[0],[[X]][0],xmm0[1],[[X]][1],xmm0[2],[[X]][2],xmm0[3],[[X]][3]
-; SSSE3-NEXT: pshufb {{.*}} # xmm0 = xmm0[{{[0-9]+,[0-9]+,[0-9]+,[0-9]+}},2,3,{{[0-9]+,[0-9]+}},0,1,4,5,8,9,{{[0-9]+,[0-9]+}}]
+; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
+; SSE2-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[0,2,2,3,4,5,6,7]
+; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
+; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
+; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm1[0,1,2,0,4,5,6,7]
+; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,2,1]
+; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,1,1,3,4,5,6,7]
+; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,6,4,7]
+; SSE2-NEXT: retq
+;
+; SSSE3-LABEL: shuffle_v8i16_XX4X8acX:
+; SSSE3: # BB#0:
+; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,0,1]
+; SSSE3-NEXT: pshuflw {{.*#+}} xmm0 = xmm1[0,2,2,3,4,5,6,7]
+; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
+; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,2,3,0,1,0,1,4,5,8,9,0,1]
; SSSE3-NEXT: retq
+;
+; SSE41-LABEL: shuffle_v8i16_XX4X8acX:
+; SSE41: # BB#0:
+; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,0,1]
+; SSE41-NEXT: pshuflw {{.*#+}} xmm0 = xmm1[0,2,2,3,4,5,6,7]
+; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; SSE41-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
+; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,2,3,0,1,0,1,4,5,8,9,0,1]
+; SSE41-NEXT: retq
%shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 undef, i32 4, i32 undef, i32 8, i32 10, i32 12, i32 undef>
ret <8 x i16> %shuffle
}
define <8 x i16> @shuffle_v8i16_8zzzzzzz(i16 %i) {
-; ALL-LABEL: @shuffle_v8i16_8zzzzzzz
+; ALL-LABEL: shuffle_v8i16_8zzzzzzz:
; ALL: # BB#0:
-; ALL-NEXT: movzwl {{.*}}, %[[R:.*]]
-; ALL-NEXT: movd %[[R]], %xmm0
+; ALL-NEXT: movzwl %di, %eax
+; ALL-NEXT: movd %eax, %xmm0
; ALL-NEXT: retq
%a = insertelement <8 x i16> undef, i16 %i, i32 0
%shuffle = shufflevector <8 x i16> zeroinitializer, <8 x i16> %a, <8 x i32> <i32 8, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
@@ -831,10 +1033,10 @@ define <8 x i16> @shuffle_v8i16_8zzzzzzz
}
define <8 x i16> @shuffle_v8i16_z8zzzzzz(i16 %i) {
-; ALL-LABEL: @shuffle_v8i16_z8zzzzzz
+; ALL-LABEL: shuffle_v8i16_z8zzzzzz:
; ALL: # BB#0:
-; ALL-NEXT: movzwl {{.*}}, %[[R:.*]]
-; ALL-NEXT: movd %[[R]], %xmm0
+; ALL-NEXT: movzwl %di, %eax
+; ALL-NEXT: movd %eax, %xmm0
; ALL-NEXT: pslldq $2, %xmm0
; ALL-NEXT: retq
%a = insertelement <8 x i16> undef, i16 %i, i32 0
@@ -843,10 +1045,10 @@ define <8 x i16> @shuffle_v8i16_z8zzzzzz
}
define <8 x i16> @shuffle_v8i16_zzzzz8zz(i16 %i) {
-; ALL-LABEL: @shuffle_v8i16_zzzzz8zz
+; ALL-LABEL: shuffle_v8i16_zzzzz8zz:
; ALL: # BB#0:
-; ALL-NEXT: movzwl {{.*}}, %[[R:.*]]
-; ALL-NEXT: movd %[[R]], %xmm0
+; ALL-NEXT: movzwl %di, %eax
+; ALL-NEXT: movd %eax, %xmm0
; ALL-NEXT: pslldq $10, %xmm0
; ALL-NEXT: retq
%a = insertelement <8 x i16> undef, i16 %i, i32 0
@@ -855,10 +1057,10 @@ define <8 x i16> @shuffle_v8i16_zzzzz8zz
}
define <8 x i16> @shuffle_v8i16_zuuzuuz8(i16 %i) {
-; ALL-LABEL: @shuffle_v8i16_zuuzuuz8
+; ALL-LABEL: shuffle_v8i16_zuuzuuz8:
; ALL: # BB#0:
-; ALL-NEXT: movzwl {{.*}}, %[[R:.*]]
-; ALL-NEXT: movd %[[R]], %xmm0
+; ALL-NEXT: movzwl %di, %eax
+; ALL-NEXT: movd %eax, %xmm0
; ALL-NEXT: pslldq $14, %xmm0
; ALL-NEXT: retq
%a = insertelement <8 x i16> undef, i16 %i, i32 0
@@ -867,10 +1069,10 @@ define <8 x i16> @shuffle_v8i16_zuuzuuz8
}
define <8 x i16> @shuffle_v8i16_zzBzzzzz(i16 %i) {
-; ALL-LABEL: @shuffle_v8i16_zzBzzzzz
+; ALL-LABEL: shuffle_v8i16_zzBzzzzz:
; ALL: # BB#0:
-; ALL-NEXT: movzwl {{.*}}, %[[R:.*]]
-; ALL-NEXT: movd %[[R]], %xmm0
+; ALL-NEXT: movzwl %di, %eax
+; ALL-NEXT: movd %eax, %xmm0
; ALL-NEXT: pslldq $4, %xmm0
; ALL-NEXT: retq
%a = insertelement <8 x i16> undef, i16 %i, i32 3
@@ -879,149 +1081,368 @@ define <8 x i16> @shuffle_v8i16_zzBzzzzz
}
define <8 x i16> @shuffle_v8i16_def01234(<8 x i16> %a, <8 x i16> %b) {
-; SSSE3-LABEL: @shuffle_v8i16_def01234
+; SSE2-LABEL: shuffle_v8i16_def01234:
+; SSE2: # BB#0:
+; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,1,0,1]
+; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
+; SSE2-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[2,1,2,3,4,5,6,7]
+; SSE2-NEXT: pshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,6,6,7]
+; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
+; SSE2-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[0,2,3,1,4,5,6,7]
+; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,1,2,0]
+; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,7,6,7]
+; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,1,2,3]
+; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[1,2,3,0,4,5,6,7]
+; SSE2-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
+; SSE2-NEXT: movdqa %xmm1, %xmm0
+; SSE2-NEXT: retq
+;
+; SSSE3-LABEL: shuffle_v8i16_def01234:
; SSSE3: # BB#0:
-; SSSE3-NEXT: palignr $10, {{.*}} # xmm0 = xmm1[10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7,8,9]
+; SSSE3-NEXT: palignr {{.*#+}} xmm0 = xmm1[10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7,8,9]
; SSSE3-NEXT: retq
+;
+; SSE41-LABEL: shuffle_v8i16_def01234:
+; SSE41: # BB#0:
+; SSE41-NEXT: palignr {{.*#+}} xmm0 = xmm1[10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7,8,9]
+; SSE41-NEXT: retq
%shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 13, i32 14, i32 15, i32 0, i32 1, i32 2, i32 3, i32 4>
ret <8 x i16> %shuffle
}
define <8 x i16> @shuffle_v8i16_ueuu123u(<8 x i16> %a, <8 x i16> %b) {
-; SSSE3-LABEL: @shuffle_v8i16_ueuu123u
+; SSE2-LABEL: shuffle_v8i16_ueuu123u:
+; SSE2: # BB#0:
+; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
+; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,5,7]
+; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,3,2,1]
+; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
+; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,4,5,7]
+; SSE2-NEXT: retq
+;
+; SSSE3-LABEL: shuffle_v8i16_ueuu123u:
; SSSE3: # BB#0:
-; SSSE3-NEXT: palignr $10, {{.*}} # xmm0 = xmm1[10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7,8,9]
+; SSSE3-NEXT: palignr {{.*#+}} xmm0 = xmm1[10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7,8,9]
; SSSE3-NEXT: retq
+;
+; SSE41-LABEL: shuffle_v8i16_ueuu123u:
+; SSE41: # BB#0:
+; SSE41-NEXT: palignr {{.*#+}} xmm0 = xmm1[10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7,8,9]
+; SSE41-NEXT: retq
%shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 14, i32 undef, i32 undef, i32 1, i32 2, i32 3, i32 undef>
ret <8 x i16> %shuffle
}
define <8 x i16> @shuffle_v8i16_56701234(<8 x i16> %a, <8 x i16> %b) {
-; SSSE3-LABEL: @shuffle_v8i16_56701234
+; SSE2-LABEL: shuffle_v8i16_56701234:
+; SSE2: # BB#0:
+; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,1,3]
+; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,3,2,1,4,5,6,7]
+; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,3,2,1]
+; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[1,2,3,0,4,5,6,7]
+; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,4,5,6]
+; SSE2-NEXT: retq
+;
+; SSSE3-LABEL: shuffle_v8i16_56701234:
; SSSE3: # BB#0:
-; SSSE3-NEXT: palignr $10, {{.*}} # xmm0 = xmm0[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9]
+; SSSE3-NEXT: palignr {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9]
; SSSE3-NEXT: retq
+;
+; SSE41-LABEL: shuffle_v8i16_56701234:
+; SSE41: # BB#0:
+; SSE41-NEXT: palignr {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9]
+; SSE41-NEXT: retq
%shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4>
ret <8 x i16> %shuffle
}
define <8 x i16> @shuffle_v8i16_u6uu123u(<8 x i16> %a, <8 x i16> %b) {
-; SSSE3-LABEL: @shuffle_v8i16_u6uu123u
+; SSE2-LABEL: shuffle_v8i16_u6uu123u:
+; SSE2: # BB#0:
+; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,3,0,1]
+; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
+; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,6,7,7]
+; SSE2-NEXT: retq
+;
+; SSSE3-LABEL: shuffle_v8i16_u6uu123u:
; SSSE3: # BB#0:
-; SSSE3-NEXT: palignr $10, {{.*}} # xmm0 = xmm0[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9]
+; SSSE3-NEXT: palignr {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9]
; SSSE3-NEXT: retq
+;
+; SSE41-LABEL: shuffle_v8i16_u6uu123u:
+; SSE41: # BB#0:
+; SSE41-NEXT: palignr {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9]
+; SSE41-NEXT: retq
%shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 6, i32 undef, i32 undef, i32 1, i32 2, i32 3, i32 undef>
ret <8 x i16> %shuffle
}
define <8 x i16> @shuffle_v8i16_uuuu123u(<8 x i16> %a, <8 x i16> %b) {
-; SSSE3-LABEL: @shuffle_v8i16_uuuu123u
+; SSE2-LABEL: shuffle_v8i16_uuuu123u:
+; SSE2: # BB#0:
+; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
+; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,6,7,7]
+; SSE2-NEXT: retq
+;
+; SSSE3-LABEL: shuffle_v8i16_uuuu123u:
; SSSE3: # BB#0:
-; SSSE3-NEXT: palignr $10, {{.*}} # xmm0 = xmm0[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9]
+; SSSE3-NEXT: palignr {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9]
; SSSE3-NEXT: retq
+;
+; SSE41-LABEL: shuffle_v8i16_uuuu123u:
+; SSE41: # BB#0:
+; SSE41-NEXT: palignr {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9]
+; SSE41-NEXT: retq
%shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 1, i32 2, i32 3, i32 undef>
ret <8 x i16> %shuffle
}
define <8 x i16> @shuffle_v8i16_bcdef012(<8 x i16> %a, <8 x i16> %b) {
-; SSSE3-LABEL: @shuffle_v8i16_bcdef012
+; SSE2-LABEL: shuffle_v8i16_bcdef012:
+; SSE2: # BB#0:
+; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,0,1]
+; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
+; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
+; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,7,6,7]
+; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; SSE2-NEXT: pshuflw {{.*#+}} xmm2 = xmm0[3,0,1,2,4,5,6,7]
+; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[3,1,2,0]
+; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,3,2,3,4,5,6,7]
+; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[1,2,3,0,4,5,6,7]
+; SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
+; SSE2-NEXT: retq
+;
+; SSSE3-LABEL: shuffle_v8i16_bcdef012:
; SSSE3: # BB#0:
-; SSSE3-NEXT: palignr $6, {{.*}} # xmm0 = xmm1[6,7,8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5]
+; SSSE3-NEXT: palignr {{.*#+}} xmm0 = xmm1[6,7,8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5]
; SSSE3-NEXT: retq
+;
+; SSE41-LABEL: shuffle_v8i16_bcdef012:
+; SSE41: # BB#0:
+; SSE41-NEXT: palignr {{.*#+}} xmm0 = xmm1[6,7,8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5]
+; SSE41-NEXT: retq
%shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 11, i32 12, i32 13, i32 14, i32 15, i32 0, i32 1, i32 2>
ret <8 x i16> %shuffle
}
define <8 x i16> @shuffle_v8i16_ucdeuu1u(<8 x i16> %a, <8 x i16> %b) {
-; SSSE3-LABEL: @shuffle_v8i16_ucdeuu1u
+; SSE2-LABEL: shuffle_v8i16_ucdeuu1u:
+; SSE2: # BB#0:
+; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
+; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
+; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm1[0,2,2,3,4,5,6,7]
+; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,1]
+; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,1,2,4,5,6,7]
+; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,7,7]
+; SSE2-NEXT: retq
+;
+; SSSE3-LABEL: shuffle_v8i16_ucdeuu1u:
; SSSE3: # BB#0:
-; SSSE3-NEXT: palignr $6, {{.*}} # xmm0 = xmm1[6,7,8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5]
+; SSSE3-NEXT: palignr {{.*#+}} xmm0 = xmm1[6,7,8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5]
; SSSE3-NEXT: retq
+;
+; SSE41-LABEL: shuffle_v8i16_ucdeuu1u:
+; SSE41: # BB#0:
+; SSE41-NEXT: palignr {{.*#+}} xmm0 = xmm1[6,7,8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5]
+; SSE41-NEXT: retq
%shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 12, i32 13, i32 14, i32 undef, i32 undef, i32 1, i32 undef>
ret <8 x i16> %shuffle
}
define <8 x i16> @shuffle_v8i16_34567012(<8 x i16> %a, <8 x i16> %b) {
-; SSSE3-LABEL: @shuffle_v8i16_34567012
+; SSE2-LABEL: shuffle_v8i16_34567012:
+; SSE2: # BB#0:
+; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,1,2,0]
+; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,3,2,1,4,5,6,7]
+; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,1,3]
+; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[1,2,3,0,4,5,6,7]
+; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,6,7,4]
+; SSE2-NEXT: retq
+;
+; SSSE3-LABEL: shuffle_v8i16_34567012:
; SSSE3: # BB#0:
-; SSSE3-NEXT: palignr $6, {{.*}} # xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5]
+; SSSE3-NEXT: palignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5]
; SSSE3-NEXT: retq
+;
+; SSE41-LABEL: shuffle_v8i16_34567012:
+; SSE41: # BB#0:
+; SSE41-NEXT: palignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5]
+; SSE41-NEXT: retq
%shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2>
ret <8 x i16> %shuffle
}
define <8 x i16> @shuffle_v8i16_u456uu1u(<8 x i16> %a, <8 x i16> %b) {
-; SSSE3-LABEL: @shuffle_v8i16_u456uu1u
+; SSE2-LABEL: shuffle_v8i16_u456uu1u:
+; SSE2: # BB#0:
+; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,3]
+; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,1,2,4,5,6,7]
+; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,5,7]
+; SSE2-NEXT: retq
+;
+; SSSE3-LABEL: shuffle_v8i16_u456uu1u:
; SSSE3: # BB#0:
-; SSSE3-NEXT: palignr $6, {{.*}} # xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5]
+; SSSE3-NEXT: palignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5]
; SSSE3-NEXT: retq
+;
+; SSE41-LABEL: shuffle_v8i16_u456uu1u:
+; SSE41: # BB#0:
+; SSE41-NEXT: palignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5]
+; SSE41-NEXT: retq
%shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 4, i32 5, i32 6, i32 undef, i32 undef, i32 1, i32 undef>
ret <8 x i16> %shuffle
}
define <8 x i16> @shuffle_v8i16_u456uuuu(<8 x i16> %a, <8 x i16> %b) {
-; SSSE3-LABEL: @shuffle_v8i16_u456uuuu
+; SSE2-LABEL: shuffle_v8i16_u456uuuu:
+; SSE2: # BB#0:
+; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
+; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,1,2,4,5,6,7]
+; SSE2-NEXT: retq
+;
+; SSSE3-LABEL: shuffle_v8i16_u456uuuu:
; SSSE3: # BB#0:
-; SSSE3-NEXT: palignr $6, {{.*}} # xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5]
+; SSSE3-NEXT: palignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5]
; SSSE3-NEXT: retq
+;
+; SSE41-LABEL: shuffle_v8i16_u456uuuu:
+; SSE41: # BB#0:
+; SSE41-NEXT: palignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5]
+; SSE41-NEXT: retq
%shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 4, i32 5, i32 6, i32 undef, i32 undef, i32 undef, i32 undef>
ret <8 x i16> %shuffle
}
define <8 x i16> @shuffle_v8i16_3456789a(<8 x i16> %a, <8 x i16> %b) {
-; SSSE3-LABEL: @shuffle_v8i16_3456789a
+; SSE2-LABEL: shuffle_v8i16_3456789a:
+; SSE2: # BB#0:
+; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,0,1]
+; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3]
+; SSE2-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[0,2,2,3,4,5,6,7]
+; SSE2-NEXT: pshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,7,6,7]
+; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
+; SSE2-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[3,0,1,2,4,5,6,7]
+; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,1,2,0]
+; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,3,2,3,4,5,6,7]
+; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[1,2,3,0,4,5,6,7]
+; SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; SSE2-NEXT: retq
+;
+; SSSE3-LABEL: shuffle_v8i16_3456789a:
; SSSE3: # BB#0:
-; SSSE3-NEXT: palignr $6, {{.*}} # xmm1 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5]
+; SSSE3-NEXT: palignr {{.*#+}} xmm1 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5]
; SSSE3-NEXT: movdqa %xmm1, %xmm0
; SSSE3-NEXT: retq
+;
+; SSE41-LABEL: shuffle_v8i16_3456789a:
+; SSE41: # BB#0:
+; SSE41-NEXT: palignr {{.*#+}} xmm1 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5]
+; SSE41-NEXT: movdqa %xmm1, %xmm0
+; SSE41-NEXT: retq
%shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10>
ret <8 x i16> %shuffle
}
define <8 x i16> @shuffle_v8i16_u456uu9u(<8 x i16> %a, <8 x i16> %b) {
-; SSSE3-LABEL: @shuffle_v8i16_u456uu9u
+; SSE2-LABEL: shuffle_v8i16_u456uu9u:
+; SSE2: # BB#0:
+; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,1,0,1]
+; SSE2-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
+; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
+; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,1]
+; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,1,2,4,5,6,7]
+; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,7,7]
+; SSE2-NEXT: retq
+;
+; SSSE3-LABEL: shuffle_v8i16_u456uu9u:
; SSSE3: # BB#0:
-; SSSE3-NEXT: palignr $6, {{.*}} # xmm1 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5]
+; SSSE3-NEXT: palignr {{.*#+}} xmm1 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5]
; SSSE3-NEXT: movdqa %xmm1, %xmm0
; SSSE3-NEXT: retq
+;
+; SSE41-LABEL: shuffle_v8i16_u456uu9u:
+; SSE41: # BB#0:
+; SSE41-NEXT: palignr {{.*#+}} xmm1 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5]
+; SSE41-NEXT: movdqa %xmm1, %xmm0
+; SSE41-NEXT: retq
%shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 4, i32 5, i32 6, i32 undef, i32 undef, i32 9, i32 undef>
ret <8 x i16> %shuffle
}
define <8 x i16> @shuffle_v8i16_56789abc(<8 x i16> %a, <8 x i16> %b) {
-; SSSE3-LABEL: @shuffle_v8i16_56789abc
+; SSE2-LABEL: shuffle_v8i16_56789abc:
+; SSE2: # BB#0:
+; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[0,1,0,1]
+; SSE2-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
+; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[2,1,2,3,4,5,6,7]
+; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,6,7]
+; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,3,1,4,5,6,7]
+; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[3,1,2,0]
+; SSE2-NEXT: pshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,7,6,7]
+; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,1,2,3]
+; SSE2-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[1,2,3,0,4,5,6,7]
+; SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; SSE2-NEXT: retq
+;
+; SSSE3-LABEL: shuffle_v8i16_56789abc:
; SSSE3: # BB#0:
-; SSSE3-NEXT: palignr $10, {{.*}} # xmm1 = xmm0[10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7,8,9]
+; SSSE3-NEXT: palignr {{.*#+}} xmm1 = xmm0[10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7,8,9]
; SSSE3-NEXT: movdqa %xmm1, %xmm0
; SSSE3-NEXT: retq
+;
+; SSE41-LABEL: shuffle_v8i16_56789abc:
+; SSE41: # BB#0:
+; SSE41-NEXT: palignr {{.*#+}} xmm1 = xmm0[10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7,8,9]
+; SSE41-NEXT: movdqa %xmm1, %xmm0
+; SSE41-NEXT: retq
%shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12>
ret <8 x i16> %shuffle
}
define <8 x i16> @shuffle_v8i16_u6uu9abu(<8 x i16> %a, <8 x i16> %b) {
-; SSSE3-LABEL: @shuffle_v8i16_u6uu9abu
+; SSE2-LABEL: shuffle_v8i16_u6uu9abu:
+; SSE2: # BB#0:
+; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
+; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
+; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm1[0,1,2,3,4,6,5,7]
+; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,3,2,1]
+; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
+; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,4,5,7]
+; SSE2-NEXT: retq
+;
+; SSSE3-LABEL: shuffle_v8i16_u6uu9abu:
; SSSE3: # BB#0:
-; SSSE3-NEXT: palignr $10, {{.*}} # xmm1 = xmm0[10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7,8,9]
+; SSSE3-NEXT: palignr {{.*#+}} xmm1 = xmm0[10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7,8,9]
; SSSE3-NEXT: movdqa %xmm1, %xmm0
; SSSE3-NEXT: retq
+;
+; SSE41-LABEL: shuffle_v8i16_u6uu9abu:
+; SSE41: # BB#0:
+; SSE41-NEXT: palignr {{.*#+}} xmm1 = xmm0[10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7,8,9]
+; SSE41-NEXT: movdqa %xmm1, %xmm0
+; SSE41-NEXT: retq
%shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 6, i32 undef, i32 undef, i32 9, i32 10, i32 11, i32 undef>
ret <8 x i16> %shuffle
}
define <8 x i16> @shuffle_v8i16_0uuu1uuu(<8 x i16> %a) {
-; SSE2-LABEL: @shuffle_v8i16_0uuu1uuu
+; SSE2-LABEL: shuffle_v8i16_0uuu1uuu:
; SSE2: # BB#0:
-; SSE2-NEXT: pshufd {{.*}} # xmm0 = xmm0[0,1,0,3]
-; SSE2-NEXT: pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,5,5,6,7]
+; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,3]
+; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,5,6,7]
; SSE2-NEXT: retq
;
-; SSSE3-LABEL: @shuffle_v8i16_0uuu1uuu
+; SSSE3-LABEL: shuffle_v8i16_0uuu1uuu:
; SSSE3: # BB#0:
-; SSSE3-NEXT: pshufd {{.*}} # xmm0 = xmm0[0,1,0,3]
-; SSSE3-NEXT: pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,5,5,6,7]
+; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,3]
+; SSSE3-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,5,6,7]
; SSSE3-NEXT: retq
;
-; SSE41-LABEL: @shuffle_v8i16_0uuu1uuu
+; SSE41-LABEL: shuffle_v8i16_0uuu1uuu:
; SSE41: # BB#0:
; SSE41-NEXT: pmovzxwq %xmm0, %xmm0
; SSE41-NEXT: retq
@@ -1030,21 +1451,21 @@ define <8 x i16> @shuffle_v8i16_0uuu1uuu
}
define <8 x i16> @shuffle_v8i16_0zzz1zzz(<8 x i16> %a) {
-; SSE2-LABEL: @shuffle_v8i16_0zzz1zzz
+; SSE2-LABEL: shuffle_v8i16_0zzz1zzz:
; SSE2: # BB#0:
-; SSE2-NEXT: pxor %[[X1:xmm[0-9]+]], %[[X1]]
-; SSE2-NEXT: punpcklwd {{.*}} # xmm0 = xmm0[0],[[X1]][0],xmm0[1],[[X1]][1],xmm0[2],[[X1]][2],xmm0[3],[[X1]][3]
-; SSE2-NEXT: punpckldq {{.*}} # xmm0 = xmm0[0],[[X1]][0],xmm0[1],[[X1]][1]
+; SSE2-NEXT: pxor %xmm1, %xmm1
+; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; SSE2-NEXT: retq
;
-; SSSE3-LABEL: @shuffle_v8i16_0zzz1zzz
+; SSSE3-LABEL: shuffle_v8i16_0zzz1zzz:
; SSSE3: # BB#0:
-; SSSE3-NEXT: pxor %[[X1:xmm[0-9]+]], %[[X1]]
-; SSSE3-NEXT: punpcklwd {{.*}} # xmm0 = xmm0[0],[[X1]][0],xmm0[1],[[X1]][1],xmm0[2],[[X1]][2],xmm0[3],[[X1]][3]
-; SSSE3-NEXT: punpckldq {{.*}} # xmm0 = xmm0[0],[[X1]][0],xmm0[1],[[X1]][1]
+; SSSE3-NEXT: pxor %xmm1, %xmm1
+; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; SSSE3-NEXT: retq
;
-; SSE41-LABEL: @shuffle_v8i16_0zzz1zzz
+; SSE41-LABEL: shuffle_v8i16_0zzz1zzz:
; SSE41: # BB#0:
; SSE41-NEXT: pmovzxwq %xmm0, %xmm0
; SSE41-NEXT: retq
@@ -1053,17 +1474,17 @@ define <8 x i16> @shuffle_v8i16_0zzz1zzz
}
define <8 x i16> @shuffle_v8i16_0u1u2u3u(<8 x i16> %a) {
-; SSE2-LABEL: @shuffle_v8i16_0u1u2u3u
+; SSE2-LABEL: shuffle_v8i16_0u1u2u3u:
; SSE2: # BB#0:
-; SSE2-NEXT: punpcklwd {{.*}} # xmm0 = xmm0[0,0,1,1,2,2,3,3]
+; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
; SSE2-NEXT: retq
;
-; SSSE3-LABEL: @shuffle_v8i16_0u1u2u3u
+; SSSE3-LABEL: shuffle_v8i16_0u1u2u3u:
; SSSE3: # BB#0:
-; SSSE3-NEXT: punpcklwd {{.*}} # xmm0 = xmm0[0,0,1,1,2,2,3,3]
+; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
; SSSE3-NEXT: retq
;
-; SSE41-LABEL: @shuffle_v8i16_0u1u2u3u
+; SSE41-LABEL: shuffle_v8i16_0u1u2u3u:
; SSE41: # BB#0:
; SSE41-NEXT: pmovzxwd %xmm0, %xmm0
; SSE41-NEXT: retq
@@ -1072,19 +1493,19 @@ define <8 x i16> @shuffle_v8i16_0u1u2u3u
}
define <8 x i16> @shuffle_v8i16_0z1z2z3z(<8 x i16> %a) {
-; SSE2-LABEL: @shuffle_v8i16_0z1z2z3z
+; SSE2-LABEL: shuffle_v8i16_0z1z2z3z:
; SSE2: # BB#0:
-; SSE2-NEXT: pxor %[[X:xmm[0-9]+]], %[[X]]
-; SSE2-NEXT: punpcklwd {{.*}} # xmm0 = xmm0[0],[[X]][0],xmm0[1],[[X]][1],xmm0[2],[[X]][2],xmm0[3],[[X]][3]
+; SSE2-NEXT: pxor %xmm1, %xmm1
+; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
; SSE2-NEXT: retq
;
-; SSSE3-LABEL: @shuffle_v8i16_0z1z2z3z
+; SSSE3-LABEL: shuffle_v8i16_0z1z2z3z:
; SSSE3: # BB#0:
-; SSSE3-NEXT: pxor %[[X:xmm[0-9]+]], %[[X]]
-; SSSE3-NEXT: punpcklwd {{.*}} # xmm0 = xmm0[0],[[X]][0],xmm0[1],[[X]][1],xmm0[2],[[X]][2],xmm0[3],[[X]][3]
+; SSSE3-NEXT: pxor %xmm1, %xmm1
+; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
; SSSE3-NEXT: retq
;
-; SSE41-LABEL: @shuffle_v8i16_0z1z2z3z
+; SSE41-LABEL: shuffle_v8i16_0z1z2z3z:
; SSE41: # BB#0:
; SSE41-NEXT: pmovzxwd %xmm0, %xmm0
; SSE41-NEXT: retq
More information about the llvm-commits
mailing list