[llvm] r340127 - [X86] Add test cases to show missed opportunities to use 512-bit PSUBUS.
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Sat Aug 18 11:50:59 PDT 2018
Author: ctopper
Date: Sat Aug 18 11:50:59 2018
New Revision: 340127
URL: http://llvm.org/viewvc/llvm-project?rev=340127&view=rev
Log:
[X86] Add test cases to show missed opportunities to use 512-bit PSUBUS.
Modified:
llvm/trunk/test/CodeGen/X86/psubus.ll
Modified: llvm/trunk/test/CodeGen/X86/psubus.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/psubus.ll?rev=340127&r1=340126&r2=340127&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/psubus.ll (original)
+++ llvm/trunk/test/CodeGen/X86/psubus.ll Sat Aug 18 11:50:59 2018
@@ -1125,6 +1125,129 @@ vector.ph:
ret <8 x i16> %res
}
+define <64 x i8> @test17(<64 x i8> %x, i8 zeroext %w) nounwind {
+; SSE2-LABEL: test17:
+; SSE2: # %bb.0: # %vector.ph
+; SSE2-NEXT: movd %edi, %xmm4
+; SSE2-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; SSE2-NEXT: pshuflw {{.*#+}} xmm4 = xmm4[0,0,2,3,4,5,6,7]
+; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm4[0,0,0,0]
+; SSE2-NEXT: psubusb %xmm4, %xmm0
+; SSE2-NEXT: psubusb %xmm4, %xmm1
+; SSE2-NEXT: psubusb %xmm4, %xmm2
+; SSE2-NEXT: psubusb %xmm4, %xmm3
+; SSE2-NEXT: retq
+;
+; SSSE3-LABEL: test17:
+; SSSE3: # %bb.0: # %vector.ph
+; SSSE3-NEXT: movd %edi, %xmm4
+; SSSE3-NEXT: pxor %xmm5, %xmm5
+; SSSE3-NEXT: pshufb %xmm5, %xmm4
+; SSSE3-NEXT: psubusb %xmm4, %xmm0
+; SSSE3-NEXT: psubusb %xmm4, %xmm1
+; SSSE3-NEXT: psubusb %xmm4, %xmm2
+; SSSE3-NEXT: psubusb %xmm4, %xmm3
+; SSSE3-NEXT: retq
+;
+; SSE41-LABEL: test17:
+; SSE41: # %bb.0: # %vector.ph
+; SSE41-NEXT: movd %edi, %xmm4
+; SSE41-NEXT: pxor %xmm5, %xmm5
+; SSE41-NEXT: pshufb %xmm5, %xmm4
+; SSE41-NEXT: psubusb %xmm4, %xmm0
+; SSE41-NEXT: psubusb %xmm4, %xmm1
+; SSE41-NEXT: psubusb %xmm4, %xmm2
+; SSE41-NEXT: psubusb %xmm4, %xmm3
+; SSE41-NEXT: retq
+;
+; AVX1-LABEL: test17:
+; AVX1: # %bb.0: # %vector.ph
+; AVX1-NEXT: vmovd %edi, %xmm2
+; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; AVX1-NEXT: vpshufb %xmm3, %xmm2, %xmm2
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
+; AVX1-NEXT: vpsubusb %xmm2, %xmm3, %xmm3
+; AVX1-NEXT: vpsubusb %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
+; AVX1-NEXT: vpsubusb %xmm2, %xmm3, %xmm3
+; AVX1-NEXT: vpsubusb %xmm2, %xmm1, %xmm1
+; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm1, %ymm1
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: test17:
+; AVX2: # %bb.0: # %vector.ph
+; AVX2-NEXT: vmovd %edi, %xmm2
+; AVX2-NEXT: vpbroadcastb %xmm2, %ymm2
+; AVX2-NEXT: vpsubusb %ymm2, %ymm0, %ymm0
+; AVX2-NEXT: vpsubusb %ymm2, %ymm1, %ymm1
+; AVX2-NEXT: retq
+;
+; AVX512-LABEL: test17:
+; AVX512: # %bb.0: # %vector.ph
+; AVX512-NEXT: vpbroadcastb %edi, %zmm1
+; AVX512-NEXT: vpcmpnltub %zmm1, %zmm0, %k1
+; AVX512-NEXT: vpsubb %zmm1, %zmm0, %zmm0 {%k1} {z}
+; AVX512-NEXT: retq
+vector.ph:
+ %0 = insertelement <64 x i8> undef, i8 %w, i32 0
+ %broadcast15 = shufflevector <64 x i8> %0, <64 x i8> undef, <64 x i32> zeroinitializer
+ %1 = icmp ult <64 x i8> %x, %broadcast15
+ %2 = sub <64 x i8> %x, %broadcast15
+ %res = select <64 x i1> %1, <64 x i8> zeroinitializer, <64 x i8> %2
+ ret <64 x i8> %res
+}
+
+define <32 x i16> @test18(<32 x i16> %x, i16 zeroext %w) nounwind {
+; SSE-LABEL: test18:
+; SSE: # %bb.0: # %vector.ph
+; SSE-NEXT: movd %edi, %xmm4
+; SSE-NEXT: pshuflw {{.*#+}} xmm4 = xmm4[0,0,2,3,4,5,6,7]
+; SSE-NEXT: pshufd {{.*#+}} xmm4 = xmm4[0,0,0,0]
+; SSE-NEXT: psubusw %xmm4, %xmm0
+; SSE-NEXT: psubusw %xmm4, %xmm1
+; SSE-NEXT: psubusw %xmm4, %xmm2
+; SSE-NEXT: psubusw %xmm4, %xmm3
+; SSE-NEXT: retq
+;
+; AVX1-LABEL: test18:
+; AVX1: # %bb.0: # %vector.ph
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
+; AVX1-NEXT: vmovd %edi, %xmm3
+; AVX1-NEXT: vpshuflw {{.*#+}} xmm3 = xmm3[0,0,2,3,4,5,6,7]
+; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm3[0,0,0,0]
+; AVX1-NEXT: vpsubusw %xmm3, %xmm2, %xmm2
+; AVX1-NEXT: vpsubusw %xmm3, %xmm0, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT: vpsubusw %xmm3, %xmm2, %xmm2
+; AVX1-NEXT: vpsubusw %xmm3, %xmm1, %xmm1
+; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: test18:
+; AVX2: # %bb.0: # %vector.ph
+; AVX2-NEXT: vmovd %edi, %xmm2
+; AVX2-NEXT: vpbroadcastw %xmm2, %ymm2
+; AVX2-NEXT: vpsubusw %ymm2, %ymm0, %ymm0
+; AVX2-NEXT: vpsubusw %ymm2, %ymm1, %ymm1
+; AVX2-NEXT: retq
+;
+; AVX512-LABEL: test18:
+; AVX512: # %bb.0: # %vector.ph
+; AVX512-NEXT: vpbroadcastw %edi, %zmm1
+; AVX512-NEXT: vpcmpnltuw %zmm1, %zmm0, %k1
+; AVX512-NEXT: vpsubw %zmm1, %zmm0, %zmm0 {%k1} {z}
+; AVX512-NEXT: retq
+vector.ph:
+ %0 = insertelement <32 x i16> undef, i16 %w, i32 0
+ %broadcast15 = shufflevector <32 x i16> %0, <32 x i16> undef, <32 x i32> zeroinitializer
+ %1 = icmp ult <32 x i16> %x, %broadcast15
+ %2 = sub <32 x i16> %x, %broadcast15
+ %res = select <32 x i1> %1, <32 x i16> zeroinitializer, <32 x i16> %2
+ ret <32 x i16> %res
+}
+
define <8 x i16> @psubus_8i16_max(<8 x i16> %x, <8 x i16> %y) nounwind {
; SSE-LABEL: psubus_8i16_max:
; SSE: # %bb.0: # %vector.ph
More information about the llvm-commits
mailing list