[llvm] r374459 - [X86] Add test cases for packus/ssat/usat 32i32->v32i8 test cases. NFC

Craig Topper via llvm-commits llvm-commits at lists.llvm.org
Thu Oct 10 14:46:44 PDT 2019


Author: ctopper
Date: Thu Oct 10 14:46:44 2019
New Revision: 374459

URL: http://llvm.org/viewvc/llvm-project?rev=374459&view=rev
Log:
[X86] Add test cases for packus/ssat/usat 32i32->v32i8 test cases. NFC

Modified:
    llvm/trunk/test/CodeGen/X86/min-legal-vector-width.ll
    llvm/trunk/test/CodeGen/X86/vector-trunc-packus.ll
    llvm/trunk/test/CodeGen/X86/vector-trunc-ssat.ll
    llvm/trunk/test/CodeGen/X86/vector-trunc-usat.ll

Modified: llvm/trunk/test/CodeGen/X86/min-legal-vector-width.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/min-legal-vector-width.ll?rev=374459&r1=374458&r2=374459&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/min-legal-vector-width.ll (original)
+++ llvm/trunk/test/CodeGen/X86/min-legal-vector-width.ll Thu Oct 10 14:46:44 2019
@@ -1097,3 +1097,22 @@ define <16 x i8> @trunc_packus_v16i32_v1
   %f = trunc <16 x i32> %e to <16 x i8>
   ret <16 x i8> %f
 }
+
+define <32 x i8> @trunc_packus_v32i32_v32i8(<32 x i32> %a0) {
+; CHECK-LABEL: trunc_packus_v32i32_v32i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT:    vpmaxsd %zmm2, %zmm0, %zmm0
+; CHECK-NEXT:    vpmovusdb %zmm0, %xmm0
+; CHECK-NEXT:    vpmaxsd %zmm2, %zmm1, %zmm1
+; CHECK-NEXT:    vpmovusdb %zmm1, %xmm1
+; CHECK-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
+; CHECK-NEXT:    retq
+  %1 = icmp slt <32 x i32> %a0, <i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255>
+  %2 = select <32 x i1> %1, <32 x i32> %a0, <32 x i32> <i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255>
+  %3 = icmp sgt <32 x i32> %2, zeroinitializer
+  %4 = select <32 x i1> %3, <32 x i32> %2, <32 x i32> zeroinitializer
+  %5 = trunc <32 x i32> %4 to <32 x i8>
+  ret <32 x i8> %5
+}
+

Modified: llvm/trunk/test/CodeGen/X86/vector-trunc-packus.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-trunc-packus.ll?rev=374459&r1=374458&r2=374459&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-trunc-packus.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-trunc-packus.ll Thu Oct 10 14:46:44 2019
@@ -3087,3 +3087,57 @@ define <32 x i8> @trunc_packus_v32i16_v3
   %5 = trunc <32 x i16> %4 to <32 x i8>
   ret <32 x i8> %5
 }
+
+define <32 x i8> @trunc_packus_v32i32_v32i8(<32 x i32> %a0) {
+; SSE-LABEL: trunc_packus_v32i32_v32i8:
+; SSE:       # %bb.0:
+; SSE-NEXT:    packssdw %xmm3, %xmm2
+; SSE-NEXT:    packssdw %xmm1, %xmm0
+; SSE-NEXT:    packuswb %xmm2, %xmm0
+; SSE-NEXT:    packssdw %xmm7, %xmm6
+; SSE-NEXT:    packssdw %xmm5, %xmm4
+; SSE-NEXT:    packuswb %xmm6, %xmm4
+; SSE-NEXT:    movdqa %xmm4, %xmm1
+; SSE-NEXT:    retq
+;
+; AVX1-LABEL: trunc_packus_v32i32_v32i8:
+; AVX1:       # %bb.0:
+; AVX1-NEXT:    vextractf128 $1, %ymm3, %xmm4
+; AVX1-NEXT:    vpackssdw %xmm4, %xmm3, %xmm3
+; AVX1-NEXT:    vextractf128 $1, %ymm2, %xmm4
+; AVX1-NEXT:    vpackssdw %xmm4, %xmm2, %xmm2
+; AVX1-NEXT:    vpackuswb %xmm3, %xmm2, %xmm2
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm3
+; AVX1-NEXT:    vpackssdw %xmm3, %xmm1, %xmm1
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
+; AVX1-NEXT:    vpackssdw %xmm3, %xmm0, %xmm0
+; AVX1-NEXT:    vpackuswb %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: trunc_packus_v32i32_v32i8:
+; AVX2:       # %bb.0:
+; AVX2-NEXT:    vpackssdw %ymm3, %ymm2, %ymm2
+; AVX2-NEXT:    vpermq {{.*#+}} ymm2 = ymm2[0,2,1,3]
+; AVX2-NEXT:    vpackssdw %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
+; AVX2-NEXT:    vpackuswb %ymm2, %ymm0, %ymm0
+; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
+; AVX2-NEXT:    retq
+;
+; AVX512-LABEL: trunc_packus_v32i32_v32i8:
+; AVX512:       # %bb.0:
+; AVX512-NEXT:    vpxor %xmm2, %xmm2, %xmm2
+; AVX512-NEXT:    vpmaxsd %zmm2, %zmm0, %zmm0
+; AVX512-NEXT:    vpmovusdb %zmm0, %xmm0
+; AVX512-NEXT:    vpmaxsd %zmm2, %zmm1, %zmm1
+; AVX512-NEXT:    vpmovusdb %zmm1, %xmm1
+; AVX512-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
+; AVX512-NEXT:    retq
+  %1 = icmp slt <32 x i32> %a0, <i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255>
+  %2 = select <32 x i1> %1, <32 x i32> %a0, <32 x i32> <i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255>
+  %3 = icmp sgt <32 x i32> %2, zeroinitializer
+  %4 = select <32 x i1> %3, <32 x i32> %2, <32 x i32> zeroinitializer
+  %5 = trunc <32 x i32> %4 to <32 x i8>
+  ret <32 x i8> %5
+}

Modified: llvm/trunk/test/CodeGen/X86/vector-trunc-ssat.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-trunc-ssat.ll?rev=374459&r1=374458&r2=374459&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-trunc-ssat.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-trunc-ssat.ll Thu Oct 10 14:46:44 2019
@@ -3049,3 +3049,54 @@ define <32 x i8> @trunc_ssat_v32i16_v32i
   %5 = trunc <32 x i16> %4 to <32 x i8>
   ret <32 x i8> %5
 }
+
+define <32 x i8> @trunc_ssat_v32i32_v32i8(<32 x i32> %a0) {
+; SSE-LABEL: trunc_ssat_v32i32_v32i8:
+; SSE:       # %bb.0:
+; SSE-NEXT:    packssdw %xmm3, %xmm2
+; SSE-NEXT:    packssdw %xmm1, %xmm0
+; SSE-NEXT:    packsswb %xmm2, %xmm0
+; SSE-NEXT:    packssdw %xmm7, %xmm6
+; SSE-NEXT:    packssdw %xmm5, %xmm4
+; SSE-NEXT:    packsswb %xmm6, %xmm4
+; SSE-NEXT:    movdqa %xmm4, %xmm1
+; SSE-NEXT:    retq
+;
+; AVX1-LABEL: trunc_ssat_v32i32_v32i8:
+; AVX1:       # %bb.0:
+; AVX1-NEXT:    vextractf128 $1, %ymm3, %xmm4
+; AVX1-NEXT:    vpackssdw %xmm4, %xmm3, %xmm3
+; AVX1-NEXT:    vextractf128 $1, %ymm2, %xmm4
+; AVX1-NEXT:    vpackssdw %xmm4, %xmm2, %xmm2
+; AVX1-NEXT:    vpacksswb %xmm3, %xmm2, %xmm2
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm3
+; AVX1-NEXT:    vpackssdw %xmm3, %xmm1, %xmm1
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
+; AVX1-NEXT:    vpackssdw %xmm3, %xmm0, %xmm0
+; AVX1-NEXT:    vpacksswb %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: trunc_ssat_v32i32_v32i8:
+; AVX2:       # %bb.0:
+; AVX2-NEXT:    vpackssdw %ymm3, %ymm2, %ymm2
+; AVX2-NEXT:    vpermq {{.*#+}} ymm2 = ymm2[0,2,1,3]
+; AVX2-NEXT:    vpackssdw %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
+; AVX2-NEXT:    vpacksswb %ymm2, %ymm0, %ymm0
+; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
+; AVX2-NEXT:    retq
+;
+; AVX512-LABEL: trunc_ssat_v32i32_v32i8:
+; AVX512:       # %bb.0:
+; AVX512-NEXT:    vpmovsdb %zmm0, %xmm0
+; AVX512-NEXT:    vpmovsdb %zmm1, %xmm1
+; AVX512-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
+; AVX512-NEXT:    retq
+  %1 = icmp slt <32 x i32> %a0, <i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127>
+  %2 = select <32 x i1> %1, <32 x i32> %a0, <32 x i32> <i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127>
+  %3 = icmp sgt <32 x i32> %2, <i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128>
+  %4 = select <32 x i1> %3, <32 x i32> %2, <32 x i32> <i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128>
+  %5 = trunc <32 x i32> %4 to <32 x i8>
+  ret <32 x i8> %5
+}

Modified: llvm/trunk/test/CodeGen/X86/vector-trunc-usat.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-trunc-usat.ll?rev=374459&r1=374458&r2=374459&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-trunc-usat.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-trunc-usat.ll Thu Oct 10 14:46:44 2019
@@ -2453,3 +2453,211 @@ define <32 x i8> @trunc_usat_v32i16_v32i
   %3 = trunc <32 x i16> %2 to <32 x i8>
   ret <32 x i8> %3
 }
+
+define <32 x i8> @trunc_usat_v32i32_v32i8(<32 x i32> %a0) {
+; SSE2-LABEL: trunc_usat_v32i32_v32i8:
+; SSE2:       # %bb.0:
+; SSE2-NEXT:    movdqa %xmm1, %xmm8
+; SSE2-NEXT:    movdqa {{.*#+}} xmm10 = [255,255,255,255]
+; SSE2-NEXT:    movdqa {{.*#+}} xmm11 = [2147483648,2147483648,2147483648,2147483648]
+; SSE2-NEXT:    movdqa %xmm5, %xmm1
+; SSE2-NEXT:    pxor %xmm11, %xmm1
+; SSE2-NEXT:    movdqa {{.*#+}} xmm9 = [2147483903,2147483903,2147483903,2147483903]
+; SSE2-NEXT:    movdqa %xmm9, %xmm12
+; SSE2-NEXT:    pcmpgtd %xmm1, %xmm12
+; SSE2-NEXT:    pand %xmm12, %xmm5
+; SSE2-NEXT:    pandn %xmm10, %xmm12
+; SSE2-NEXT:    por %xmm5, %xmm12
+; SSE2-NEXT:    movdqa %xmm4, %xmm5
+; SSE2-NEXT:    pxor %xmm11, %xmm5
+; SSE2-NEXT:    movdqa %xmm9, %xmm1
+; SSE2-NEXT:    pcmpgtd %xmm5, %xmm1
+; SSE2-NEXT:    pand %xmm1, %xmm4
+; SSE2-NEXT:    pandn %xmm10, %xmm1
+; SSE2-NEXT:    por %xmm4, %xmm1
+; SSE2-NEXT:    packuswb %xmm12, %xmm1
+; SSE2-NEXT:    movdqa %xmm7, %xmm4
+; SSE2-NEXT:    pxor %xmm11, %xmm4
+; SSE2-NEXT:    movdqa %xmm9, %xmm5
+; SSE2-NEXT:    pcmpgtd %xmm4, %xmm5
+; SSE2-NEXT:    pand %xmm5, %xmm7
+; SSE2-NEXT:    pandn %xmm10, %xmm5
+; SSE2-NEXT:    por %xmm7, %xmm5
+; SSE2-NEXT:    movdqa %xmm6, %xmm4
+; SSE2-NEXT:    pxor %xmm11, %xmm4
+; SSE2-NEXT:    movdqa %xmm9, %xmm7
+; SSE2-NEXT:    pcmpgtd %xmm4, %xmm7
+; SSE2-NEXT:    pand %xmm7, %xmm6
+; SSE2-NEXT:    pandn %xmm10, %xmm7
+; SSE2-NEXT:    por %xmm6, %xmm7
+; SSE2-NEXT:    packuswb %xmm5, %xmm7
+; SSE2-NEXT:    packuswb %xmm7, %xmm1
+; SSE2-NEXT:    movdqa %xmm8, %xmm4
+; SSE2-NEXT:    pxor %xmm11, %xmm4
+; SSE2-NEXT:    movdqa %xmm9, %xmm5
+; SSE2-NEXT:    pcmpgtd %xmm4, %xmm5
+; SSE2-NEXT:    pand %xmm5, %xmm8
+; SSE2-NEXT:    pandn %xmm10, %xmm5
+; SSE2-NEXT:    por %xmm8, %xmm5
+; SSE2-NEXT:    movdqa %xmm0, %xmm4
+; SSE2-NEXT:    pxor %xmm11, %xmm4
+; SSE2-NEXT:    movdqa %xmm9, %xmm6
+; SSE2-NEXT:    pcmpgtd %xmm4, %xmm6
+; SSE2-NEXT:    pand %xmm6, %xmm0
+; SSE2-NEXT:    pandn %xmm10, %xmm6
+; SSE2-NEXT:    por %xmm6, %xmm0
+; SSE2-NEXT:    packuswb %xmm5, %xmm0
+; SSE2-NEXT:    movdqa %xmm3, %xmm4
+; SSE2-NEXT:    pxor %xmm11, %xmm4
+; SSE2-NEXT:    movdqa %xmm9, %xmm5
+; SSE2-NEXT:    pcmpgtd %xmm4, %xmm5
+; SSE2-NEXT:    pand %xmm5, %xmm3
+; SSE2-NEXT:    pandn %xmm10, %xmm5
+; SSE2-NEXT:    por %xmm3, %xmm5
+; SSE2-NEXT:    pxor %xmm2, %xmm11
+; SSE2-NEXT:    pcmpgtd %xmm11, %xmm9
+; SSE2-NEXT:    pand %xmm9, %xmm2
+; SSE2-NEXT:    pandn %xmm10, %xmm9
+; SSE2-NEXT:    por %xmm2, %xmm9
+; SSE2-NEXT:    packuswb %xmm5, %xmm9
+; SSE2-NEXT:    packuswb %xmm9, %xmm0
+; SSE2-NEXT:    retq
+;
+; SSSE3-LABEL: trunc_usat_v32i32_v32i8:
+; SSSE3:       # %bb.0:
+; SSSE3-NEXT:    movdqa %xmm1, %xmm8
+; SSSE3-NEXT:    movdqa {{.*#+}} xmm10 = [255,255,255,255]
+; SSSE3-NEXT:    movdqa {{.*#+}} xmm11 = [2147483648,2147483648,2147483648,2147483648]
+; SSSE3-NEXT:    movdqa %xmm5, %xmm1
+; SSSE3-NEXT:    pxor %xmm11, %xmm1
+; SSSE3-NEXT:    movdqa {{.*#+}} xmm9 = [2147483903,2147483903,2147483903,2147483903]
+; SSSE3-NEXT:    movdqa %xmm9, %xmm12
+; SSSE3-NEXT:    pcmpgtd %xmm1, %xmm12
+; SSSE3-NEXT:    pand %xmm12, %xmm5
+; SSSE3-NEXT:    pandn %xmm10, %xmm12
+; SSSE3-NEXT:    por %xmm5, %xmm12
+; SSSE3-NEXT:    movdqa %xmm4, %xmm5
+; SSSE3-NEXT:    pxor %xmm11, %xmm5
+; SSSE3-NEXT:    movdqa %xmm9, %xmm1
+; SSSE3-NEXT:    pcmpgtd %xmm5, %xmm1
+; SSSE3-NEXT:    pand %xmm1, %xmm4
+; SSSE3-NEXT:    pandn %xmm10, %xmm1
+; SSSE3-NEXT:    por %xmm4, %xmm1
+; SSSE3-NEXT:    packuswb %xmm12, %xmm1
+; SSSE3-NEXT:    movdqa %xmm7, %xmm4
+; SSSE3-NEXT:    pxor %xmm11, %xmm4
+; SSSE3-NEXT:    movdqa %xmm9, %xmm5
+; SSSE3-NEXT:    pcmpgtd %xmm4, %xmm5
+; SSSE3-NEXT:    pand %xmm5, %xmm7
+; SSSE3-NEXT:    pandn %xmm10, %xmm5
+; SSSE3-NEXT:    por %xmm7, %xmm5
+; SSSE3-NEXT:    movdqa %xmm6, %xmm4
+; SSSE3-NEXT:    pxor %xmm11, %xmm4
+; SSSE3-NEXT:    movdqa %xmm9, %xmm7
+; SSSE3-NEXT:    pcmpgtd %xmm4, %xmm7
+; SSSE3-NEXT:    pand %xmm7, %xmm6
+; SSSE3-NEXT:    pandn %xmm10, %xmm7
+; SSSE3-NEXT:    por %xmm6, %xmm7
+; SSSE3-NEXT:    packuswb %xmm5, %xmm7
+; SSSE3-NEXT:    packuswb %xmm7, %xmm1
+; SSSE3-NEXT:    movdqa %xmm8, %xmm4
+; SSSE3-NEXT:    pxor %xmm11, %xmm4
+; SSSE3-NEXT:    movdqa %xmm9, %xmm5
+; SSSE3-NEXT:    pcmpgtd %xmm4, %xmm5
+; SSSE3-NEXT:    pand %xmm5, %xmm8
+; SSSE3-NEXT:    pandn %xmm10, %xmm5
+; SSSE3-NEXT:    por %xmm8, %xmm5
+; SSSE3-NEXT:    movdqa %xmm0, %xmm4
+; SSSE3-NEXT:    pxor %xmm11, %xmm4
+; SSSE3-NEXT:    movdqa %xmm9, %xmm6
+; SSSE3-NEXT:    pcmpgtd %xmm4, %xmm6
+; SSSE3-NEXT:    pand %xmm6, %xmm0
+; SSSE3-NEXT:    pandn %xmm10, %xmm6
+; SSSE3-NEXT:    por %xmm6, %xmm0
+; SSSE3-NEXT:    packuswb %xmm5, %xmm0
+; SSSE3-NEXT:    movdqa %xmm3, %xmm4
+; SSSE3-NEXT:    pxor %xmm11, %xmm4
+; SSSE3-NEXT:    movdqa %xmm9, %xmm5
+; SSSE3-NEXT:    pcmpgtd %xmm4, %xmm5
+; SSSE3-NEXT:    pand %xmm5, %xmm3
+; SSSE3-NEXT:    pandn %xmm10, %xmm5
+; SSSE3-NEXT:    por %xmm3, %xmm5
+; SSSE3-NEXT:    pxor %xmm2, %xmm11
+; SSSE3-NEXT:    pcmpgtd %xmm11, %xmm9
+; SSSE3-NEXT:    pand %xmm9, %xmm2
+; SSSE3-NEXT:    pandn %xmm10, %xmm9
+; SSSE3-NEXT:    por %xmm2, %xmm9
+; SSSE3-NEXT:    packuswb %xmm5, %xmm9
+; SSSE3-NEXT:    packuswb %xmm9, %xmm0
+; SSSE3-NEXT:    retq
+;
+; SSE41-LABEL: trunc_usat_v32i32_v32i8:
+; SSE41:       # %bb.0:
+; SSE41-NEXT:    movdqa {{.*#+}} xmm8 = [255,255,255,255]
+; SSE41-NEXT:    pminud %xmm8, %xmm5
+; SSE41-NEXT:    pminud %xmm8, %xmm4
+; SSE41-NEXT:    packusdw %xmm5, %xmm4
+; SSE41-NEXT:    pminud %xmm8, %xmm7
+; SSE41-NEXT:    pminud %xmm8, %xmm6
+; SSE41-NEXT:    packusdw %xmm7, %xmm6
+; SSE41-NEXT:    packuswb %xmm6, %xmm4
+; SSE41-NEXT:    pminud %xmm8, %xmm1
+; SSE41-NEXT:    pminud %xmm8, %xmm0
+; SSE41-NEXT:    packusdw %xmm1, %xmm0
+; SSE41-NEXT:    pminud %xmm8, %xmm3
+; SSE41-NEXT:    pminud %xmm8, %xmm2
+; SSE41-NEXT:    packusdw %xmm3, %xmm2
+; SSE41-NEXT:    packuswb %xmm2, %xmm0
+; SSE41-NEXT:    movdqa %xmm4, %xmm1
+; SSE41-NEXT:    retq
+;
+; AVX1-LABEL: trunc_usat_v32i32_v32i8:
+; AVX1:       # %bb.0:
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm4
+; AVX1-NEXT:    vmovdqa {{.*#+}} xmm5 = [255,255,255,255]
+; AVX1-NEXT:    vpminud %xmm5, %xmm4, %xmm4
+; AVX1-NEXT:    vpminud %xmm5, %xmm0, %xmm0
+; AVX1-NEXT:    vpackusdw %xmm4, %xmm0, %xmm0
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm4
+; AVX1-NEXT:    vpminud %xmm5, %xmm4, %xmm4
+; AVX1-NEXT:    vpminud %xmm5, %xmm1, %xmm1
+; AVX1-NEXT:    vpackusdw %xmm4, %xmm1, %xmm1
+; AVX1-NEXT:    vpackuswb %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    vextractf128 $1, %ymm2, %xmm1
+; AVX1-NEXT:    vpminud %xmm5, %xmm1, %xmm1
+; AVX1-NEXT:    vpminud %xmm5, %xmm2, %xmm2
+; AVX1-NEXT:    vpackusdw %xmm1, %xmm2, %xmm1
+; AVX1-NEXT:    vextractf128 $1, %ymm3, %xmm2
+; AVX1-NEXT:    vpminud %xmm5, %xmm2, %xmm2
+; AVX1-NEXT:    vpminud %xmm5, %xmm3, %xmm3
+; AVX1-NEXT:    vpackusdw %xmm2, %xmm3, %xmm2
+; AVX1-NEXT:    vpackuswb %xmm2, %xmm1, %xmm1
+; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: trunc_usat_v32i32_v32i8:
+; AVX2:       # %bb.0:
+; AVX2-NEXT:    vpbroadcastd {{.*#+}} ymm4 = [255,255,255,255,255,255,255,255]
+; AVX2-NEXT:    vpminud %ymm4, %ymm1, %ymm1
+; AVX2-NEXT:    vpminud %ymm4, %ymm0, %ymm0
+; AVX2-NEXT:    vpackusdw %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    vpminud %ymm4, %ymm3, %ymm1
+; AVX2-NEXT:    vpminud %ymm4, %ymm2, %ymm2
+; AVX2-NEXT:    vpackusdw %ymm1, %ymm2, %ymm1
+; AVX2-NEXT:    vpermq {{.*#+}} ymm1 = ymm1[0,2,1,3]
+; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
+; AVX2-NEXT:    vpackuswb %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
+; AVX2-NEXT:    retq
+;
+; AVX512-LABEL: trunc_usat_v32i32_v32i8:
+; AVX512:       # %bb.0:
+; AVX512-NEXT:    vpmovusdb %zmm0, %xmm0
+; AVX512-NEXT:    vpmovusdb %zmm1, %xmm1
+; AVX512-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
+; AVX512-NEXT:    retq
+  %1 = icmp ult <32 x i32> %a0, <i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255>
+  %2 = select <32 x i1> %1, <32 x i32> %a0, <32 x i32> <i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255>
+  %3 = trunc <32 x i32> %2 to <32 x i8>
+  ret <32 x i8> %3
+}




More information about the llvm-commits mailing list