[llvm] 7dbfcfa - [DAG] combineInsertEltToShuffle - if EXTRACT_VECTOR_ELT fails to match an existing shuffle op, try to replace an undef op if there is one.

Simon Pilgrim via llvm-commits llvm-commits at lists.llvm.org
Thu Jun 9 07:09:50 PDT 2022


Author: Simon Pilgrim
Date: 2022-06-09T14:56:14+01:00
New Revision: 7dbfcfa735f28a3bd33b465c686a20c4974373ae

URL: https://github.com/llvm/llvm-project/commit/7dbfcfa735f28a3bd33b465c686a20c4974373ae
DIFF: https://github.com/llvm/llvm-project/commit/7dbfcfa735f28a3bd33b465c686a20c4974373ae.diff

LOG: [DAG] combineInsertEltToShuffle - if EXTRACT_VECTOR_ELT fails to match an existing shuffle op, try to replace an undef op if there is one.

This should fix a number of shuffle regressions in D127115 where the re-ordered combines mean we fail to fold a EXTRACT_VECTOR_ELT/INSERT_VECTOR_ELT sequence into a BUILD_VECTOR if we extract from more than one vector source.

Added: 
    

Modified: 
    llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
    llvm/test/CodeGen/X86/vector-interleaved-load-i32-stride-6.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 2aeaddbcee382..33bf4f4bc31e2 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -19233,6 +19233,14 @@ SDValue DAGCombiner::combineInsertEltToShuffle(SDNode *N, unsigned InsIndex) {
       }
     }
 
+    // If we failed to find a match, see if we can replace an UNDEF shuffle
+    // operand.
+    if (ElementOffset == -1 && Y.isUndef() &&
+        InsertVal0.getValueType() == Y.getValueType()) {
+      ElementOffset = Mask.size();
+      Y = InsertVal0;
+    }
+
     if (ElementOffset != -1) {
       SmallVector<int, 16> NewMask(Mask.begin(), Mask.end());
 

diff  --git a/llvm/test/CodeGen/X86/vector-interleaved-load-i32-stride-6.ll b/llvm/test/CodeGen/X86/vector-interleaved-load-i32-stride-6.ll
index e8d19d56f3c75..3aed6a35fa6c4 100644
--- a/llvm/test/CodeGen/X86/vector-interleaved-load-i32-stride-6.ll
+++ b/llvm/test/CodeGen/X86/vector-interleaved-load-i32-stride-6.ll
@@ -298,15 +298,13 @@ define void @load_i32_stride6_vf4(<24 x i32>* %in.vec, <4 x i32>* %out.vec0, <4
 ; AVX512-NEXT:    vmovdqa 64(%rdi), %xmm5
 ; AVX512-NEXT:    vpextrd $2, %xmm5, %eax
 ; AVX512-NEXT:    vpinsrd $3, %eax, %xmm4, %xmm8
-; AVX512-NEXT:    vpextrd $3, %xmm0, %eax
-; AVX512-NEXT:    vpshufd {{.*#+}} xmm6 = xmm2[1,1,1,1]
-; AVX512-NEXT:    vpinsrd $1, %eax, %xmm6, %xmm6
 ; AVX512-NEXT:    vpextrd $1, %xmm3, %eax
+; AVX512-NEXT:    vpblendd {{.*#+}} xmm6 = xmm2[0,1],xmm0[2,3]
+; AVX512-NEXT:    vpshufd {{.*#+}} xmm6 = xmm6[1,3,2,3]
 ; AVX512-NEXT:    vpinsrd $2, %eax, %xmm6, %xmm6
 ; AVX512-NEXT:    vpblendd {{.*#+}} xmm6 = xmm6[0,1,2],xmm5[3]
-; AVX512-NEXT:    vpbroadcastd 8(%rdi), %xmm7
-; AVX512-NEXT:    vmovd %xmm1, %eax
-; AVX512-NEXT:    vpinsrd $1, %eax, %xmm7, %xmm7
+; AVX512-NEXT:    vpblendd {{.*#+}} xmm7 = xmm1[0,1],xmm2[2,3]
+; AVX512-NEXT:    vpshufd {{.*#+}} xmm7 = xmm7[2,0,2,3]
 ; AVX512-NEXT:    vpblendd {{.*#+}} xmm7 = xmm7[0,1],xmm3[2],xmm7[3]
 ; AVX512-NEXT:    vmovdqa 80(%rdi), %xmm4
 ; AVX512-NEXT:    vmovd %xmm4, %eax
@@ -323,10 +321,9 @@ define void @load_i32_stride6_vf4(<24 x i32>* %in.vec, <4 x i32>* %out.vec0, <4
 ; AVX512-NEXT:    vpinsrd $2, %eax, %xmm3, %xmm3
 ; AVX512-NEXT:    vpextrd $2, %xmm4, %eax
 ; AVX512-NEXT:    vpinsrd $3, %eax, %xmm3, %xmm3
-; AVX512-NEXT:    vpextrd $3, %xmm1, %eax
-; AVX512-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[1,1,1,1]
-; AVX512-NEXT:    vpinsrd $1, %eax, %xmm0, %xmm0
 ; AVX512-NEXT:    vpextrd $1, %xmm5, %eax
+; AVX512-NEXT:    vpblendd {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
+; AVX512-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[1,3,2,3]
 ; AVX512-NEXT:    vpinsrd $2, %eax, %xmm0, %xmm0
 ; AVX512-NEXT:    vpblendd {{.*#+}} xmm0 = xmm0[0,1,2],xmm4[3]
 ; AVX512-NEXT:    vmovdqa %xmm8, (%rsi)


        


More information about the llvm-commits mailing list