[llvm] r266535 - [X86][AVX] Add shuffle combine tests for MOVDDUP/MOVSHDUP/MOVSLDUP

Sat Apr 16 13:31:01 PDT 2016

Author: rksimon
Date: Sat Apr 16 15:30:59 2016
New Revision: 266535

URL: http://llvm.org/viewvc/llvm-project?rev=266535&view=rev
Log:
[X86][AVX] Add shuffle combine tests for MOVDDUP/MOVSHDUP/MOVSLDUP

128, 256 and 512 bit implementations (some not yet supported by combineX86ShuffleChain)

Modified:
    llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-avx.ll
    llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-avx512bw.ll

Modified: llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-avx.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-avx.ll?rev=266535&r1=266534&r2=266535&view=diff
==============================================================================

--- llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-avx.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-avx.ll Sat Apr 16 15:30:59 2016
@@ -28,6 +28,51 @@ define <4 x float> @combine_vpermilvar_4
   ret <4 x float> %2
 }
 
+define <4 x float> @combine_vpermilvar_4f32_movddup(<4 x float> %a0) {
+; ALL-LABEL: combine_vpermilvar_4f32_movddup:
+; ALL:       # BB#0:
+; ALL-NEXT:    vmovddup {{.*#+}} xmm0 = xmm0[0,0]
+; ALL-NEXT:    retq
+  %1 = tail call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> %a0, <4 x i32> <i32 0, i32 1, i32 0, i32 1>)
+  ret <4 x float> %1
+}
+
+define <4 x float> @combine_vpermilvar_4f32_movshdup(<4 x float> %a0) {
+; ALL-LABEL: combine_vpermilvar_4f32_movshdup:
+; ALL:       # BB#0:
+; ALL-NEXT:    vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
+; ALL-NEXT:    retq
+  %1 = tail call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> %a0, <4 x i32> <i32 1, i32 1, i32 3, i32 3>)
+  ret <4 x float> %1
+}
+
+define <4 x float> @combine_vpermilvar_4f32_movsldup(<4 x float> %a0) {
+; ALL-LABEL: combine_vpermilvar_4f32_movsldup:
+; ALL:       # BB#0:
+; ALL-NEXT:    vmovsldup {{.*#+}} xmm0 = xmm0[0,0,2,2]
+; ALL-NEXT:    retq
+  %1 = tail call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> %a0, <4 x i32> <i32 0, i32 0, i32 2, i32 2>)
+  ret <4 x float> %1
+}
+
+define <4 x float> @combine_vpermilvar_4f32_unpckh(<4 x float> %a0) {
+; ALL-LABEL: combine_vpermilvar_4f32_unpckh:
+; ALL:       # BB#0:
+; ALL-NEXT:    vunpckhps {{.*#+}} xmm0 = xmm0[2,2,3,3]
+; ALL-NEXT:    retq
+  %1 = tail call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> %a0, <4 x i32> <i32 2, i32 2, i32 3, i32 3>)
+  ret <4 x float> %1
+}
+
+define <4 x float> @combine_vpermilvar_4f32_unpckl(<4 x float> %a0) {
+; ALL-LABEL: combine_vpermilvar_4f32_unpckl:
+; ALL:       # BB#0:
+; ALL-NEXT:    vunpcklps {{.*#+}} xmm0 = xmm0[0,0,1,1]
+; ALL-NEXT:    retq
+  %1 = tail call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> %a0, <4 x i32> <i32 0, i32 0, i32 1, i32 1>)
+  ret <4 x float> %1
+}
+
 define <8 x float> @combine_vpermilvar_8f32_identity(<8 x float> %a0) {
 ; ALL-LABEL: combine_vpermilvar_8f32_identity:
 ; ALL:       # BB#0:
@@ -37,6 +82,33 @@ define <8 x float> @combine_vpermilvar_8
   ret <8 x float> %2
 }
 
+define <8 x float> @combine_vpermilvar_8f32_movddup(<8 x float> %a0) {
+; ALL-LABEL: combine_vpermilvar_8f32_movddup:
+; ALL:       # BB#0:
+; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,1,0,1,4,5,4,5]
+; ALL-NEXT:    retq
+  %1 = tail call <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float> %a0, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 4, i32 5, i32 4, i32 5>)
+  ret <8 x float> %1
+}
+
+define <8 x float> @combine_vpermilvar_8f32_movshdup(<8 x float> %a0) {
+; ALL-LABEL: combine_vpermilvar_8f32_movshdup:
+; ALL:       # BB#0:
+; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[1,1,3,3,5,5,7,7]
+; ALL-NEXT:    retq
+  %1 = tail call <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float> %a0, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 5, i32 5, i32 7, i32 7>)
+  ret <8 x float> %1
+}
+
+define <8 x float> @combine_vpermilvar_8f32_movsldup(<8 x float> %a0) {
+; ALL-LABEL: combine_vpermilvar_8f32_movsldup:
+; ALL:       # BB#0:
+; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,0,2,2,4,4,6,6]
+; ALL-NEXT:    retq
+  %1 = tail call <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float> %a0, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6>)
+  ret <8 x float> %1
+}
+
 define <2 x double> @combine_vpermilvar_2f64_identity(<2 x double> %a0) {
 ; ALL-LABEL: combine_vpermilvar_2f64_identity:
 ; ALL:       # BB#0:
@@ -46,6 +118,16 @@ define <2 x double> @combine_vpermilvar_
   ret <2 x double> %2
 }
 
+define <2 x double> @combine_vpermilvar_2f64_movddup(<2 x double> %a0) {
+; ALL-LABEL: combine_vpermilvar_2f64_movddup:
+; ALL:       # BB#0:
+; ALL-NEXT:    vxorpd %xmm1, %xmm1, %xmm1
+; ALL-NEXT:    vpermilpd %xmm1, %xmm0, %xmm0
+; ALL-NEXT:    retq
+  %1 = tail call <2 x double> @llvm.x86.avx.vpermilvar.pd(<2 x double> %a0, <2 x i64> <i64 0, i64 0>)
+  ret <2 x double> %1
+}
+
 define <4 x double> @combine_vpermilvar_4f64_identity(<4 x double> %a0) {
 ; ALL-LABEL: combine_vpermilvar_4f64_identity:
 ; ALL:       # BB#0:
@@ -55,6 +137,15 @@ define <4 x double> @combine_vpermilvar_
   ret <4 x double> %2
 }
 
+define <4 x double> @combine_vpermilvar_4f64_movddup(<4 x double> %a0) {
+; ALL-LABEL: combine_vpermilvar_4f64_movddup:
+; ALL:       # BB#0:
+; ALL-NEXT:    vpermilpd {{.*#+}} ymm0 = ymm0[0,0,2,2]
+; ALL-NEXT:    retq
+  %1 = tail call <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double> %a0, <4 x i64> <i64 0, i64 0, i64 4, i64 4>)
+  ret <4 x double> %1
+}
+
 define <4 x float> @combine_vpermilvar_4f32_4stage(<4 x float> %a0) {
 ; ALL-LABEL: combine_vpermilvar_4f32_4stage:
 ; ALL:       # BB#0:

Modified: llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-avx512bw.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-avx512bw.ll?rev=266535&r1=266534&r2=266535&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-avx512bw.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-avx512bw.ll Sat Apr 16 15:30:59 2016
@@ -19,6 +19,16 @@ define <8 x double> @combine_vpermt2var_
   ret <8 x double> %res1
 }
 
+define <8 x double> @combine_vpermt2var_8f64_movddup(<8 x double> %x0, <8 x double> %x1) {
+; CHECK-LABEL: combine_vpermt2var_8f64_movddup:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    vmovdqa64 {{.*#+}} zmm2 = [0,0,2,2,4,4,6,6]
+; CHECK-NEXT:    vpermt2pd %zmm1, %zmm2, %zmm0
+; CHECK-NEXT:    retq
+  %res0 = call <8 x double> @llvm.x86.avx512.maskz.vpermt2var.pd.512(<8 x i64> <i64 0, i64 0, i64 2, i64 2, i64 4, i64 4, i64 6, i64 6>, <8 x double> %x0, <8 x double> %x1, i8 -1)
+  ret <8 x double> %res0
+}
+
 define <8 x i64> @combine_vpermt2var_8i64_identity(<8 x i64> %x0, <8 x i64> %x1) {
 ; CHECK-LABEL: combine_vpermt2var_8i64_identity:
 ; CHECK:       # BB#0:
@@ -37,6 +47,26 @@ define <16 x float> @combine_vpermt2var_
   ret <16 x float> %res1
 }
 
+define <16 x float> @combine_vpermt2var_16f32_vmovshdup(<16 x float> %x0, <16 x float> %x1) {
+; CHECK-LABEL: combine_vpermt2var_16f32_vmovshdup:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    vmovdqa32 {{.*#+}} zmm2 = [1,1,3,3,5,5,7,7,9,9,11,11,13,13,15,15]
+; CHECK-NEXT:    vpermt2ps %zmm1, %zmm2, %zmm0
+; CHECK-NEXT:    retq
+  %res0 = call <16 x float> @llvm.x86.avx512.maskz.vpermt2var.ps.512(<16 x i32> <i32 1, i32 1, i32 3, i32 3, i32 5, i32 5, i32 7, i32 7, i32 9, i32 9, i32 11, i32 11, i32 13, i32 13, i32 15, i32 15>, <16 x float> %x0, <16 x float> %x1, i16 -1)
+  ret <16 x float> %res0
+}
+
+define <16 x float> @combine_vpermt2var_16f32_vmovsldup(<16 x float> %x0, <16 x float> %x1) {
+; CHECK-LABEL: combine_vpermt2var_16f32_vmovsldup:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    vmovdqa32 {{.*#+}} zmm2 = [0,0,2,2,4,4,6,6,8,8,10,10,12,12,14,14]
+; CHECK-NEXT:    vpermt2ps %zmm1, %zmm2, %zmm0
+; CHECK-NEXT:    retq
+  %res0 = call <16 x float> @llvm.x86.avx512.maskz.vpermt2var.ps.512(<16 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6, i32 8, i32 8, i32 10, i32 10, i32 12, i32 12, i32 14, i32 14>, <16 x float> %x0, <16 x float> %x1, i16 -1)
+  ret <16 x float> %res0
+}
+
 define <16 x i32> @combine_vpermt2var_16i32_identity(<16 x i32> %x0, <16 x i32> %x1) {
 ; CHECK-LABEL: combine_vpermt2var_16i32_identity:
 ; CHECK:       # BB#0: