r274484 - [AVX512] Modify what indices we emit for the zero vector we use for zero extension of the result of a v2i1 or v4i1 masked compare. This way we emit something that the backend easily interprets as a concatenation rather than a true shuffle. This delivers slightly better codegen with the current backend capabilities.

Mon Jul 4 00:09:46 PDT 2016

Author: ctopper
Date: Mon Jul  4 02:09:46 2016
New Revision: 274484

URL: http://llvm.org/viewvc/llvm-project?rev=274484&view=rev
Log:
[AVX512] Modify what indices we emit for the zero vector we use for zero extension of the result of a v2i1 or v4i1 masked compare. This way we emit something that the backend easily interprets as a concatenation rather than a true shuffle. This delivers slightly better codegen with the current backend capabilities.

Modified:
    cfe/trunk/lib/CodeGen/CGBuiltin.cpp
    cfe/trunk/test/CodeGen/avx512vl-builtins.c

Modified: cfe/trunk/lib/CodeGen/CGBuiltin.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGBuiltin.cpp?rev=274484&r1=274483&r2=274484&view=diff
==============================================================================

--- cfe/trunk/lib/CodeGen/CGBuiltin.cpp (original)
+++ cfe/trunk/lib/CodeGen/CGBuiltin.cpp Mon Jul  4 02:09:46 2016
@@ -6512,7 +6512,7 @@ static Value *EmitX86MaskedCompare(CodeG
     for (unsigned i = 0; i != NumElts; ++i)
       Indices[i] = i;
     for (unsigned i = NumElts; i != 8; ++i)
-      Indices[i] = NumElts;
+      Indices[i] = i % NumElts + NumElts;
     Cmp = CGF.Builder.CreateShuffleVector(
         Cmp, llvm::Constant::getNullValue(Cmp->getType()), Indices);
   }

Modified: cfe/trunk/test/CodeGen/avx512vl-builtins.c
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/avx512vl-builtins.c?rev=274484&r1=274483&r2=274484&view=diff
==============================================================================
--- cfe/trunk/test/CodeGen/avx512vl-builtins.c (original)
+++ cfe/trunk/test/CodeGen/avx512vl-builtins.c Mon Jul  4 02:09:46 2016
@@ -8,7 +8,7 @@
 __mmask8 test_mm_cmpeq_epu32_mask(__m128i __a, __m128i __b) {
   // CHECK-LABEL: @test_mm_cmpeq_epu32_mask
   // CHECK: icmp eq <4 x i32> %{{.*}}, %{{.*}}
-  // CHECK: shufflevector <4 x i1> %{{.*}}, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 4, i32 4, i32 4>
+  // CHECK: shufflevector <4 x i1> %{{.*}}, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
   return (__mmask8)_mm_cmpeq_epu32_mask(__a, __b);
 }
 
@@ -22,6 +22,7 @@ __mmask8 test_mm_mask_cmpeq_epu32_mask(_
 __mmask8 test_mm_cmpeq_epu64_mask(__m128i __a, __m128i __b) {
   // CHECK-LABEL: @test_mm_cmpeq_epu64_mask
   // CHECK: icmp eq <2 x i64> %{{.*}}, %{{.*}}
+  // CHECK: shufflevector <2 x i1> %{{.*}}, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
   return (__mmask8)_mm_cmpeq_epu64_mask(__a, __b);
 }