[llvm] r264509 - [X86][AVX512BW] AVX512BW can sign-extend v32i8 to v32i16 for simpler v32i8 multiplies.
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Sat Mar 26 02:44:27 PDT 2016
Author: rksimon
Date: Sat Mar 26 04:44:27 2016
New Revision: 264509
URL: http://llvm.org/viewvc/llvm-project?rev=264509&view=rev
Log:
[X86][AVX512BW] AVX512BW can sign-extend v32i8 to v32i16 for simpler v32i8 multiplies.
Only pre-AVX512BW targets need to split v32i8 vectors.
Modified:
llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
llvm/trunk/test/CodeGen/X86/pmul.ll
Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=264509&r1=264508&r2=264509&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Sat Mar 26 04:44:27 2016
@@ -18910,8 +18910,9 @@ static SDValue LowerMUL(SDValue Op, cons
if (VT == MVT::v16i8 || VT == MVT::v32i8) {
if (Subtarget.hasInt256()) {
// For 256-bit vectors, split into 128-bit vectors to allow the
- // sign-extension to occur.
- if (VT == MVT::v32i8)
+ // sign-extension to occur. We don't need this on AVX512BW as we can
+ // safely sign-extend to v32i16.
+ if (VT == MVT::v32i8 && !Subtarget.hasBWI())
return Lower256IntArith(Op, DAG);
MVT ExVT = MVT::getVectorVT(MVT::i16, VT.getVectorNumElements());
Modified: llvm/trunk/test/CodeGen/X86/pmul.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/pmul.ll?rev=264509&r1=264508&r2=264509&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/pmul.ll (original)
+++ llvm/trunk/test/CodeGen/X86/pmul.ll Sat Mar 26 04:44:27 2016
@@ -473,15 +473,11 @@ define <32 x i8> @mul_v32i8c(<32 x i8> %
;
; AVX512BW-LABEL: mul_v32i8c:
; AVX512BW: # BB#0: # %entry
-; AVX512BW-NEXT: vpmovsxbw %xmm0, %ymm1
-; AVX512BW-NEXT: vpmovsxbw {{.*}}(%rip), %ymm2
-; AVX512BW-NEXT: vpmullw %ymm2, %ymm1, %ymm1
-; AVX512BW-NEXT: vpmovwb %zmm1, %ymm1
-; AVX512BW-NEXT: vextracti128 $1, %ymm0, %xmm0
-; AVX512BW-NEXT: vpmovsxbw %xmm0, %ymm0
-; AVX512BW-NEXT: vpmullw %ymm2, %ymm0, %ymm0
+; AVX512BW-NEXT: vmovaps {{.*#+}} ymm1 = [117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117]
+; AVX512BW-NEXT: vpmovsxbw %ymm1, %zmm1
+; AVX512BW-NEXT: vpmovsxbw %ymm0, %zmm0
+; AVX512BW-NEXT: vpmullw %zmm1, %zmm0, %zmm0
; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0
-; AVX512BW-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
; AVX512BW-NEXT: retq
entry:
%A = mul <32 x i8> %i, < i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117 >
@@ -678,17 +674,10 @@ define <32 x i8> @mul_v32i8(<32 x i8> %i
;
; AVX512BW-LABEL: mul_v32i8:
; AVX512BW: # BB#0: # %entry
-; AVX512BW-NEXT: vpmovsxbw %xmm1, %ymm2
-; AVX512BW-NEXT: vpmovsxbw %xmm0, %ymm3
-; AVX512BW-NEXT: vpmullw %ymm2, %ymm3, %ymm2
-; AVX512BW-NEXT: vpmovwb %zmm2, %ymm2
-; AVX512BW-NEXT: vextracti128 $1, %ymm1, %xmm1
-; AVX512BW-NEXT: vpmovsxbw %xmm1, %ymm1
-; AVX512BW-NEXT: vextracti128 $1, %ymm0, %xmm0
-; AVX512BW-NEXT: vpmovsxbw %xmm0, %ymm0
-; AVX512BW-NEXT: vpmullw %ymm1, %ymm0, %ymm0
+; AVX512BW-NEXT: vpmovsxbw %ymm1, %zmm1
+; AVX512BW-NEXT: vpmovsxbw %ymm0, %zmm0
+; AVX512BW-NEXT: vpmullw %zmm1, %zmm0, %zmm0
; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0
-; AVX512BW-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0
; AVX512BW-NEXT: retq
entry:
%A = mul <32 x i8> %i, %j
More information about the llvm-commits
mailing list