[llvm] d0902a8 - [X86][AVX] createVariablePermute - correctly extend same-sized-vector indices (PR50356)

Simon Pilgrim via llvm-commits llvm-commits at lists.llvm.org
Tue May 18 12:30:57 PDT 2021


Author: Simon Pilgrim
Date: 2021-05-18T20:30:46+01:00
New Revision: d0902a8665b1e158fb78df7fc5008e3af862939c

URL: https://github.com/llvm/llvm-project/commit/d0902a8665b1e158fb78df7fc5008e3af862939c
DIFF: https://github.com/llvm/llvm-project/commit/d0902a8665b1e158fb78df7fc5008e3af862939c.diff

LOG: [X86][AVX] createVariablePermute - correctly extend same-sized-vector indices (PR50356)

D101838 incorrectly handled indices vectors of the same size but with higher element counts to just bitcast to the target indices type instead of performing a ZERO_EXTEND_VECTOR_INREG

Added: 
    

Modified: 
    llvm/lib/Target/X86/X86ISelLowering.cpp
    llvm/test/CodeGen/X86/var-permute-128.ll
    llvm/test/CodeGen/X86/var-permute-256.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index ec7a27b83c00..a555d43c3602 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -9979,7 +9979,8 @@ static SDValue createVariablePermute(MVT VT, SDValue SrcVec, SDValue IndicesVec,
          "Illegal variable permute mask size");
   if (IndicesVec.getValueType().getVectorNumElements() > NumElts) {
     if (IndicesVec.getValueSizeInBits() == SizeInBits)
-      IndicesVec = DAG.getBitcast(IndicesVT, IndicesVec);
+      IndicesVec = DAG.getNode(ISD::ZERO_EXTEND_VECTOR_INREG, SDLoc(IndicesVec),
+                               IndicesVT, IndicesVec);
     else
       IndicesVec = extractSubVector(IndicesVec, 0, DAG, SDLoc(IndicesVec),
                                     NumElts * VT.getScalarSizeInBits());

diff  --git a/llvm/test/CodeGen/X86/var-permute-128.ll b/llvm/test/CodeGen/X86/var-permute-128.ll
index 27a689558483..a28e16417a38 100644
--- a/llvm/test/CodeGen/X86/var-permute-128.ll
+++ b/llvm/test/CodeGen/X86/var-permute-128.ll
@@ -1162,6 +1162,7 @@ define void @indices_convert() {
 ; XOP:       # %bb.0: # %bb
 ; XOP-NEXT:    vpshufd {{.*#+}} xmm0 = mem[2,3,2,3]
 ; XOP-NEXT:    vpand {{.*}}(%rip), %xmm0, %xmm0
+; XOP-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
 ; XOP-NEXT:    vmovapd (%rax), %xmm1
 ; XOP-NEXT:    vpaddq %xmm0, %xmm0, %xmm0
 ; XOP-NEXT:    vpermil2pd $0, %xmm0, %xmm1, %xmm1, %xmm0
@@ -1172,6 +1173,7 @@ define void @indices_convert() {
 ; AVX1:       # %bb.0: # %bb
 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = mem[2,3,2,3]
 ; AVX1-NEXT:    vpand {{.*}}(%rip), %xmm0, %xmm0
+; AVX1-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
 ; AVX1-NEXT:    vmovapd (%rax), %xmm1
 ; AVX1-NEXT:    vpaddq %xmm0, %xmm0, %xmm0
 ; AVX1-NEXT:    vpermilpd %xmm0, %xmm1, %xmm0
@@ -1183,6 +1185,7 @@ define void @indices_convert() {
 ; AVX2-NEXT:    vpbroadcastq (%rax), %xmm0
 ; AVX2-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [7,7,7,7]
 ; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
+; AVX2-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
 ; AVX2-NEXT:    vpaddq %xmm0, %xmm0, %xmm0
 ; AVX2-NEXT:    vmovapd (%rax), %xmm1
 ; AVX2-NEXT:    vpermilpd %xmm0, %xmm1, %xmm0
@@ -1191,12 +1194,13 @@ define void @indices_convert() {
 ;
 ; AVX512-LABEL: indices_convert:
 ; AVX512:       # %bb.0: # %bb
-; AVX512-NEXT:    vmovaps (%rax), %ymm0
-; AVX512-NEXT:    vmovddup {{.*#+}} xmm1 = mem[0,0]
-; AVX512-NEXT:    vbroadcastss {{.*#+}} xmm2 = [7,7,7,7]
-; AVX512-NEXT:    vandps %xmm2, %xmm1, %xmm1
-; AVX512-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
-; AVX512-NEXT:    vmovups %xmm0, (%rax)
+; AVX512-NEXT:    vmovdqa (%rax), %ymm0
+; AVX512-NEXT:    vpbroadcastq (%rax), %xmm1
+; AVX512-NEXT:    vpbroadcastd {{.*#+}} xmm2 = [7,7,7,7]
+; AVX512-NEXT:    vpand %xmm2, %xmm1, %xmm1
+; AVX512-NEXT:    vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
+; AVX512-NEXT:    vpermq %zmm0, %zmm1, %zmm0
+; AVX512-NEXT:    vmovdqu %xmm0, (%rax)
 ; AVX512-NEXT:    vzeroupper
 ; AVX512-NEXT:    retq
 ;
@@ -1204,6 +1208,7 @@ define void @indices_convert() {
 ; AVX512VL:       # %bb.0: # %bb
 ; AVX512VL-NEXT:    vpbroadcastq (%rax), %xmm0
 ; AVX512VL-NEXT:    vpandd {{.*}}(%rip){1to4}, %xmm0, %xmm0
+; AVX512VL-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
 ; AVX512VL-NEXT:    vpermq (%rax), %ymm0, %ymm0
 ; AVX512VL-NEXT:    vmovdqu %xmm0, (%rax)
 ; AVX512VL-NEXT:    vzeroupper

diff  --git a/llvm/test/CodeGen/X86/var-permute-256.ll b/llvm/test/CodeGen/X86/var-permute-256.ll
index 3759d711f580..9902a351b8bf 100644
--- a/llvm/test/CodeGen/X86/var-permute-256.ll
+++ b/llvm/test/CodeGen/X86/var-permute-256.ll
@@ -1142,6 +1142,7 @@ define <4 x i64> @PR50356(<4 x i64> %0, <4 x i32> %1, <4 x i64> %2) unnamed_addr
 ; XOP-LABEL: PR50356:
 ; XOP:       # %bb.0:
 ; XOP-NEXT:    vpand {{.*}}(%rip), %xmm1, %xmm1
+; XOP-NEXT:    vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
 ; XOP-NEXT:    vextractf128 $1, %ymm0, %xmm3
 ; XOP-NEXT:    vpaddq %xmm1, %xmm1, %xmm1
 ; XOP-NEXT:    vpermil2pd $0, %xmm1, %xmm3, %xmm0, %xmm0
@@ -1157,6 +1158,7 @@ define <4 x i64> @PR50356(<4 x i64> %0, <4 x i32> %1, <4 x i64> %2) unnamed_addr
 ; AVX1-LABEL: PR50356:
 ; AVX1:       # %bb.0:
 ; AVX1-NEXT:    vpand {{.*}}(%rip), %xmm1, %xmm1
+; AVX1-NEXT:    vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
 ; AVX1-NEXT:    vpaddq %xmm1, %xmm1, %xmm1
 ; AVX1-NEXT:    vpermilpd %xmm1, %xmm3, %xmm3


        


More information about the llvm-commits mailing list