[llvm] r374459 - [X86] Add test cases for packus/ssat/usat 32i32->v32i8 test cases. NFC
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Thu Oct 10 14:46:44 PDT 2019
Author: ctopper
Date: Thu Oct 10 14:46:44 2019
New Revision: 374459
URL: http://llvm.org/viewvc/llvm-project?rev=374459&view=rev
Log:
[X86] Add test cases for packus/ssat/usat 32i32->v32i8 test cases. NFC
Modified:
llvm/trunk/test/CodeGen/X86/min-legal-vector-width.ll
llvm/trunk/test/CodeGen/X86/vector-trunc-packus.ll
llvm/trunk/test/CodeGen/X86/vector-trunc-ssat.ll
llvm/trunk/test/CodeGen/X86/vector-trunc-usat.ll
Modified: llvm/trunk/test/CodeGen/X86/min-legal-vector-width.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/min-legal-vector-width.ll?rev=374459&r1=374458&r2=374459&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/min-legal-vector-width.ll (original)
+++ llvm/trunk/test/CodeGen/X86/min-legal-vector-width.ll Thu Oct 10 14:46:44 2019
@@ -1097,3 +1097,22 @@ define <16 x i8> @trunc_packus_v16i32_v1
%f = trunc <16 x i32> %e to <16 x i8>
ret <16 x i8> %f
}
+
+define <32 x i8> @trunc_packus_v32i32_v32i8(<32 x i32> %a0) {
+; CHECK-LABEL: trunc_packus_v32i32_v32i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vpmaxsd %zmm2, %zmm0, %zmm0
+; CHECK-NEXT: vpmovusdb %zmm0, %xmm0
+; CHECK-NEXT: vpmaxsd %zmm2, %zmm1, %zmm1
+; CHECK-NEXT: vpmovusdb %zmm1, %xmm1
+; CHECK-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
+; CHECK-NEXT: retq
+ %1 = icmp slt <32 x i32> %a0, <i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255>
+ %2 = select <32 x i1> %1, <32 x i32> %a0, <32 x i32> <i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255>
+ %3 = icmp sgt <32 x i32> %2, zeroinitializer
+ %4 = select <32 x i1> %3, <32 x i32> %2, <32 x i32> zeroinitializer
+ %5 = trunc <32 x i32> %4 to <32 x i8>
+ ret <32 x i8> %5
+}
+
Modified: llvm/trunk/test/CodeGen/X86/vector-trunc-packus.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-trunc-packus.ll?rev=374459&r1=374458&r2=374459&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-trunc-packus.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-trunc-packus.ll Thu Oct 10 14:46:44 2019
@@ -3087,3 +3087,57 @@ define <32 x i8> @trunc_packus_v32i16_v3
%5 = trunc <32 x i16> %4 to <32 x i8>
ret <32 x i8> %5
}
+
+define <32 x i8> @trunc_packus_v32i32_v32i8(<32 x i32> %a0) {
+; SSE-LABEL: trunc_packus_v32i32_v32i8:
+; SSE: # %bb.0:
+; SSE-NEXT: packssdw %xmm3, %xmm2
+; SSE-NEXT: packssdw %xmm1, %xmm0
+; SSE-NEXT: packuswb %xmm2, %xmm0
+; SSE-NEXT: packssdw %xmm7, %xmm6
+; SSE-NEXT: packssdw %xmm5, %xmm4
+; SSE-NEXT: packuswb %xmm6, %xmm4
+; SSE-NEXT: movdqa %xmm4, %xmm1
+; SSE-NEXT: retq
+;
+; AVX1-LABEL: trunc_packus_v32i32_v32i8:
+; AVX1: # %bb.0:
+; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm4
+; AVX1-NEXT: vpackssdw %xmm4, %xmm3, %xmm3
+; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm4
+; AVX1-NEXT: vpackssdw %xmm4, %xmm2, %xmm2
+; AVX1-NEXT: vpackuswb %xmm3, %xmm2, %xmm2
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
+; AVX1-NEXT: vpackssdw %xmm3, %xmm1, %xmm1
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
+; AVX1-NEXT: vpackssdw %xmm3, %xmm0, %xmm0
+; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: trunc_packus_v32i32_v32i8:
+; AVX2: # %bb.0:
+; AVX2-NEXT: vpackssdw %ymm3, %ymm2, %ymm2
+; AVX2-NEXT: vpermq {{.*#+}} ymm2 = ymm2[0,2,1,3]
+; AVX2-NEXT: vpackssdw %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
+; AVX2-NEXT: vpackuswb %ymm2, %ymm0, %ymm0
+; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
+; AVX2-NEXT: retq
+;
+; AVX512-LABEL: trunc_packus_v32i32_v32i8:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; AVX512-NEXT: vpmaxsd %zmm2, %zmm0, %zmm0
+; AVX512-NEXT: vpmovusdb %zmm0, %xmm0
+; AVX512-NEXT: vpmaxsd %zmm2, %zmm1, %zmm1
+; AVX512-NEXT: vpmovusdb %zmm1, %xmm1
+; AVX512-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
+; AVX512-NEXT: retq
+ %1 = icmp slt <32 x i32> %a0, <i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255>
+ %2 = select <32 x i1> %1, <32 x i32> %a0, <32 x i32> <i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255>
+ %3 = icmp sgt <32 x i32> %2, zeroinitializer
+ %4 = select <32 x i1> %3, <32 x i32> %2, <32 x i32> zeroinitializer
+ %5 = trunc <32 x i32> %4 to <32 x i8>
+ ret <32 x i8> %5
+}
Modified: llvm/trunk/test/CodeGen/X86/vector-trunc-ssat.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-trunc-ssat.ll?rev=374459&r1=374458&r2=374459&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-trunc-ssat.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-trunc-ssat.ll Thu Oct 10 14:46:44 2019
@@ -3049,3 +3049,54 @@ define <32 x i8> @trunc_ssat_v32i16_v32i
%5 = trunc <32 x i16> %4 to <32 x i8>
ret <32 x i8> %5
}
+
+define <32 x i8> @trunc_ssat_v32i32_v32i8(<32 x i32> %a0) {
+; SSE-LABEL: trunc_ssat_v32i32_v32i8:
+; SSE: # %bb.0:
+; SSE-NEXT: packssdw %xmm3, %xmm2
+; SSE-NEXT: packssdw %xmm1, %xmm0
+; SSE-NEXT: packsswb %xmm2, %xmm0
+; SSE-NEXT: packssdw %xmm7, %xmm6
+; SSE-NEXT: packssdw %xmm5, %xmm4
+; SSE-NEXT: packsswb %xmm6, %xmm4
+; SSE-NEXT: movdqa %xmm4, %xmm1
+; SSE-NEXT: retq
+;
+; AVX1-LABEL: trunc_ssat_v32i32_v32i8:
+; AVX1: # %bb.0:
+; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm4
+; AVX1-NEXT: vpackssdw %xmm4, %xmm3, %xmm3
+; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm4
+; AVX1-NEXT: vpackssdw %xmm4, %xmm2, %xmm2
+; AVX1-NEXT: vpacksswb %xmm3, %xmm2, %xmm2
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
+; AVX1-NEXT: vpackssdw %xmm3, %xmm1, %xmm1
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
+; AVX1-NEXT: vpackssdw %xmm3, %xmm0, %xmm0
+; AVX1-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: trunc_ssat_v32i32_v32i8:
+; AVX2: # %bb.0:
+; AVX2-NEXT: vpackssdw %ymm3, %ymm2, %ymm2
+; AVX2-NEXT: vpermq {{.*#+}} ymm2 = ymm2[0,2,1,3]
+; AVX2-NEXT: vpackssdw %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
+; AVX2-NEXT: vpacksswb %ymm2, %ymm0, %ymm0
+; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
+; AVX2-NEXT: retq
+;
+; AVX512-LABEL: trunc_ssat_v32i32_v32i8:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vpmovsdb %zmm0, %xmm0
+; AVX512-NEXT: vpmovsdb %zmm1, %xmm1
+; AVX512-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
+; AVX512-NEXT: retq
+ %1 = icmp slt <32 x i32> %a0, <i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127>
+ %2 = select <32 x i1> %1, <32 x i32> %a0, <32 x i32> <i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127, i32 127>
+ %3 = icmp sgt <32 x i32> %2, <i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128>
+ %4 = select <32 x i1> %3, <32 x i32> %2, <32 x i32> <i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128>
+ %5 = trunc <32 x i32> %4 to <32 x i8>
+ ret <32 x i8> %5
+}
Modified: llvm/trunk/test/CodeGen/X86/vector-trunc-usat.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-trunc-usat.ll?rev=374459&r1=374458&r2=374459&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-trunc-usat.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-trunc-usat.ll Thu Oct 10 14:46:44 2019
@@ -2453,3 +2453,211 @@ define <32 x i8> @trunc_usat_v32i16_v32i
%3 = trunc <32 x i16> %2 to <32 x i8>
ret <32 x i8> %3
}
+
+define <32 x i8> @trunc_usat_v32i32_v32i8(<32 x i32> %a0) {
+; SSE2-LABEL: trunc_usat_v32i32_v32i8:
+; SSE2: # %bb.0:
+; SSE2-NEXT: movdqa %xmm1, %xmm8
+; SSE2-NEXT: movdqa {{.*#+}} xmm10 = [255,255,255,255]
+; SSE2-NEXT: movdqa {{.*#+}} xmm11 = [2147483648,2147483648,2147483648,2147483648]
+; SSE2-NEXT: movdqa %xmm5, %xmm1
+; SSE2-NEXT: pxor %xmm11, %xmm1
+; SSE2-NEXT: movdqa {{.*#+}} xmm9 = [2147483903,2147483903,2147483903,2147483903]
+; SSE2-NEXT: movdqa %xmm9, %xmm12
+; SSE2-NEXT: pcmpgtd %xmm1, %xmm12
+; SSE2-NEXT: pand %xmm12, %xmm5
+; SSE2-NEXT: pandn %xmm10, %xmm12
+; SSE2-NEXT: por %xmm5, %xmm12
+; SSE2-NEXT: movdqa %xmm4, %xmm5
+; SSE2-NEXT: pxor %xmm11, %xmm5
+; SSE2-NEXT: movdqa %xmm9, %xmm1
+; SSE2-NEXT: pcmpgtd %xmm5, %xmm1
+; SSE2-NEXT: pand %xmm1, %xmm4
+; SSE2-NEXT: pandn %xmm10, %xmm1
+; SSE2-NEXT: por %xmm4, %xmm1
+; SSE2-NEXT: packuswb %xmm12, %xmm1
+; SSE2-NEXT: movdqa %xmm7, %xmm4
+; SSE2-NEXT: pxor %xmm11, %xmm4
+; SSE2-NEXT: movdqa %xmm9, %xmm5
+; SSE2-NEXT: pcmpgtd %xmm4, %xmm5
+; SSE2-NEXT: pand %xmm5, %xmm7
+; SSE2-NEXT: pandn %xmm10, %xmm5
+; SSE2-NEXT: por %xmm7, %xmm5
+; SSE2-NEXT: movdqa %xmm6, %xmm4
+; SSE2-NEXT: pxor %xmm11, %xmm4
+; SSE2-NEXT: movdqa %xmm9, %xmm7
+; SSE2-NEXT: pcmpgtd %xmm4, %xmm7
+; SSE2-NEXT: pand %xmm7, %xmm6
+; SSE2-NEXT: pandn %xmm10, %xmm7
+; SSE2-NEXT: por %xmm6, %xmm7
+; SSE2-NEXT: packuswb %xmm5, %xmm7
+; SSE2-NEXT: packuswb %xmm7, %xmm1
+; SSE2-NEXT: movdqa %xmm8, %xmm4
+; SSE2-NEXT: pxor %xmm11, %xmm4
+; SSE2-NEXT: movdqa %xmm9, %xmm5
+; SSE2-NEXT: pcmpgtd %xmm4, %xmm5
+; SSE2-NEXT: pand %xmm5, %xmm8
+; SSE2-NEXT: pandn %xmm10, %xmm5
+; SSE2-NEXT: por %xmm8, %xmm5
+; SSE2-NEXT: movdqa %xmm0, %xmm4
+; SSE2-NEXT: pxor %xmm11, %xmm4
+; SSE2-NEXT: movdqa %xmm9, %xmm6
+; SSE2-NEXT: pcmpgtd %xmm4, %xmm6
+; SSE2-NEXT: pand %xmm6, %xmm0
+; SSE2-NEXT: pandn %xmm10, %xmm6
+; SSE2-NEXT: por %xmm6, %xmm0
+; SSE2-NEXT: packuswb %xmm5, %xmm0
+; SSE2-NEXT: movdqa %xmm3, %xmm4
+; SSE2-NEXT: pxor %xmm11, %xmm4
+; SSE2-NEXT: movdqa %xmm9, %xmm5
+; SSE2-NEXT: pcmpgtd %xmm4, %xmm5
+; SSE2-NEXT: pand %xmm5, %xmm3
+; SSE2-NEXT: pandn %xmm10, %xmm5
+; SSE2-NEXT: por %xmm3, %xmm5
+; SSE2-NEXT: pxor %xmm2, %xmm11
+; SSE2-NEXT: pcmpgtd %xmm11, %xmm9
+; SSE2-NEXT: pand %xmm9, %xmm2
+; SSE2-NEXT: pandn %xmm10, %xmm9
+; SSE2-NEXT: por %xmm2, %xmm9
+; SSE2-NEXT: packuswb %xmm5, %xmm9
+; SSE2-NEXT: packuswb %xmm9, %xmm0
+; SSE2-NEXT: retq
+;
+; SSSE3-LABEL: trunc_usat_v32i32_v32i8:
+; SSSE3: # %bb.0:
+; SSSE3-NEXT: movdqa %xmm1, %xmm8
+; SSSE3-NEXT: movdqa {{.*#+}} xmm10 = [255,255,255,255]
+; SSSE3-NEXT: movdqa {{.*#+}} xmm11 = [2147483648,2147483648,2147483648,2147483648]
+; SSSE3-NEXT: movdqa %xmm5, %xmm1
+; SSSE3-NEXT: pxor %xmm11, %xmm1
+; SSSE3-NEXT: movdqa {{.*#+}} xmm9 = [2147483903,2147483903,2147483903,2147483903]
+; SSSE3-NEXT: movdqa %xmm9, %xmm12
+; SSSE3-NEXT: pcmpgtd %xmm1, %xmm12
+; SSSE3-NEXT: pand %xmm12, %xmm5
+; SSSE3-NEXT: pandn %xmm10, %xmm12
+; SSSE3-NEXT: por %xmm5, %xmm12
+; SSSE3-NEXT: movdqa %xmm4, %xmm5
+; SSSE3-NEXT: pxor %xmm11, %xmm5
+; SSSE3-NEXT: movdqa %xmm9, %xmm1
+; SSSE3-NEXT: pcmpgtd %xmm5, %xmm1
+; SSSE3-NEXT: pand %xmm1, %xmm4
+; SSSE3-NEXT: pandn %xmm10, %xmm1
+; SSSE3-NEXT: por %xmm4, %xmm1
+; SSSE3-NEXT: packuswb %xmm12, %xmm1
+; SSSE3-NEXT: movdqa %xmm7, %xmm4
+; SSSE3-NEXT: pxor %xmm11, %xmm4
+; SSSE3-NEXT: movdqa %xmm9, %xmm5
+; SSSE3-NEXT: pcmpgtd %xmm4, %xmm5
+; SSSE3-NEXT: pand %xmm5, %xmm7
+; SSSE3-NEXT: pandn %xmm10, %xmm5
+; SSSE3-NEXT: por %xmm7, %xmm5
+; SSSE3-NEXT: movdqa %xmm6, %xmm4
+; SSSE3-NEXT: pxor %xmm11, %xmm4
+; SSSE3-NEXT: movdqa %xmm9, %xmm7
+; SSSE3-NEXT: pcmpgtd %xmm4, %xmm7
+; SSSE3-NEXT: pand %xmm7, %xmm6
+; SSSE3-NEXT: pandn %xmm10, %xmm7
+; SSSE3-NEXT: por %xmm6, %xmm7
+; SSSE3-NEXT: packuswb %xmm5, %xmm7
+; SSSE3-NEXT: packuswb %xmm7, %xmm1
+; SSSE3-NEXT: movdqa %xmm8, %xmm4
+; SSSE3-NEXT: pxor %xmm11, %xmm4
+; SSSE3-NEXT: movdqa %xmm9, %xmm5
+; SSSE3-NEXT: pcmpgtd %xmm4, %xmm5
+; SSSE3-NEXT: pand %xmm5, %xmm8
+; SSSE3-NEXT: pandn %xmm10, %xmm5
+; SSSE3-NEXT: por %xmm8, %xmm5
+; SSSE3-NEXT: movdqa %xmm0, %xmm4
+; SSSE3-NEXT: pxor %xmm11, %xmm4
+; SSSE3-NEXT: movdqa %xmm9, %xmm6
+; SSSE3-NEXT: pcmpgtd %xmm4, %xmm6
+; SSSE3-NEXT: pand %xmm6, %xmm0
+; SSSE3-NEXT: pandn %xmm10, %xmm6
+; SSSE3-NEXT: por %xmm6, %xmm0
+; SSSE3-NEXT: packuswb %xmm5, %xmm0
+; SSSE3-NEXT: movdqa %xmm3, %xmm4
+; SSSE3-NEXT: pxor %xmm11, %xmm4
+; SSSE3-NEXT: movdqa %xmm9, %xmm5
+; SSSE3-NEXT: pcmpgtd %xmm4, %xmm5
+; SSSE3-NEXT: pand %xmm5, %xmm3
+; SSSE3-NEXT: pandn %xmm10, %xmm5
+; SSSE3-NEXT: por %xmm3, %xmm5
+; SSSE3-NEXT: pxor %xmm2, %xmm11
+; SSSE3-NEXT: pcmpgtd %xmm11, %xmm9
+; SSSE3-NEXT: pand %xmm9, %xmm2
+; SSSE3-NEXT: pandn %xmm10, %xmm9
+; SSSE3-NEXT: por %xmm2, %xmm9
+; SSSE3-NEXT: packuswb %xmm5, %xmm9
+; SSSE3-NEXT: packuswb %xmm9, %xmm0
+; SSSE3-NEXT: retq
+;
+; SSE41-LABEL: trunc_usat_v32i32_v32i8:
+; SSE41: # %bb.0:
+; SSE41-NEXT: movdqa {{.*#+}} xmm8 = [255,255,255,255]
+; SSE41-NEXT: pminud %xmm8, %xmm5
+; SSE41-NEXT: pminud %xmm8, %xmm4
+; SSE41-NEXT: packusdw %xmm5, %xmm4
+; SSE41-NEXT: pminud %xmm8, %xmm7
+; SSE41-NEXT: pminud %xmm8, %xmm6
+; SSE41-NEXT: packusdw %xmm7, %xmm6
+; SSE41-NEXT: packuswb %xmm6, %xmm4
+; SSE41-NEXT: pminud %xmm8, %xmm1
+; SSE41-NEXT: pminud %xmm8, %xmm0
+; SSE41-NEXT: packusdw %xmm1, %xmm0
+; SSE41-NEXT: pminud %xmm8, %xmm3
+; SSE41-NEXT: pminud %xmm8, %xmm2
+; SSE41-NEXT: packusdw %xmm3, %xmm2
+; SSE41-NEXT: packuswb %xmm2, %xmm0
+; SSE41-NEXT: movdqa %xmm4, %xmm1
+; SSE41-NEXT: retq
+;
+; AVX1-LABEL: trunc_usat_v32i32_v32i8:
+; AVX1: # %bb.0:
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4
+; AVX1-NEXT: vmovdqa {{.*#+}} xmm5 = [255,255,255,255]
+; AVX1-NEXT: vpminud %xmm5, %xmm4, %xmm4
+; AVX1-NEXT: vpminud %xmm5, %xmm0, %xmm0
+; AVX1-NEXT: vpackusdw %xmm4, %xmm0, %xmm0
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm4
+; AVX1-NEXT: vpminud %xmm5, %xmm4, %xmm4
+; AVX1-NEXT: vpminud %xmm5, %xmm1, %xmm1
+; AVX1-NEXT: vpackusdw %xmm4, %xmm1, %xmm1
+; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm1
+; AVX1-NEXT: vpminud %xmm5, %xmm1, %xmm1
+; AVX1-NEXT: vpminud %xmm5, %xmm2, %xmm2
+; AVX1-NEXT: vpackusdw %xmm1, %xmm2, %xmm1
+; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm2
+; AVX1-NEXT: vpminud %xmm5, %xmm2, %xmm2
+; AVX1-NEXT: vpminud %xmm5, %xmm3, %xmm3
+; AVX1-NEXT: vpackusdw %xmm2, %xmm3, %xmm2
+; AVX1-NEXT: vpackuswb %xmm2, %xmm1, %xmm1
+; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: trunc_usat_v32i32_v32i8:
+; AVX2: # %bb.0:
+; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm4 = [255,255,255,255,255,255,255,255]
+; AVX2-NEXT: vpminud %ymm4, %ymm1, %ymm1
+; AVX2-NEXT: vpminud %ymm4, %ymm0, %ymm0
+; AVX2-NEXT: vpackusdw %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: vpminud %ymm4, %ymm3, %ymm1
+; AVX2-NEXT: vpminud %ymm4, %ymm2, %ymm2
+; AVX2-NEXT: vpackusdw %ymm1, %ymm2, %ymm1
+; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,2,1,3]
+; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
+; AVX2-NEXT: vpackuswb %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
+; AVX2-NEXT: retq
+;
+; AVX512-LABEL: trunc_usat_v32i32_v32i8:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vpmovusdb %zmm0, %xmm0
+; AVX512-NEXT: vpmovusdb %zmm1, %xmm1
+; AVX512-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
+; AVX512-NEXT: retq
+ %1 = icmp ult <32 x i32> %a0, <i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255>
+ %2 = select <32 x i1> %1, <32 x i32> %a0, <32 x i32> <i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255>
+ %3 = trunc <32 x i32> %2 to <32 x i8>
+ ret <32 x i8> %3
+}
More information about the llvm-commits
mailing list