r351390 - [X86] Add versions of the avx512 gather intrinsics that take the mask as a vXi1 vector instead of a scalar

Craig Topper via cfe-commits cfe-commits at lists.llvm.org
Wed Jan 16 14:34:33 PST 2019


Author: ctopper
Date: Wed Jan 16 14:34:33 2019
New Revision: 351390

URL: http://llvm.org/viewvc/llvm-project?rev=351390&view=rev
Log:
[X86] Add versions of the avx512 gather intrinsics that take the mask as a vXi1 vector instead of a scalar

We need to custom handle these so we can turn the scalar mask into a vXi1 vector.

Differential Revision: https://reviews.llvm.org/D56530

Modified:
    cfe/trunk/lib/CodeGen/CGBuiltin.cpp
    cfe/trunk/test/CodeGen/avx512f-builtins.c
    cfe/trunk/test/CodeGen/avx512vl-builtins.c

Modified: cfe/trunk/lib/CodeGen/CGBuiltin.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGBuiltin.cpp?rev=351390&r1=351389&r2=351390&view=diff
==============================================================================
--- cfe/trunk/lib/CodeGen/CGBuiltin.cpp (original)
+++ cfe/trunk/lib/CodeGen/CGBuiltin.cpp Wed Jan 16 14:34:33 2019
@@ -10073,6 +10073,114 @@ Value *CodeGenFunction::EmitX86BuiltinEx
   case X86::BI__builtin_ia32_compressstoreqi512_mask:
     return EmitX86CompressStore(*this, Ops);
 
+  case X86::BI__builtin_ia32_gather3div2df:
+  case X86::BI__builtin_ia32_gather3div2di:
+  case X86::BI__builtin_ia32_gather3div4df:
+  case X86::BI__builtin_ia32_gather3div4di:
+  case X86::BI__builtin_ia32_gather3div4sf:
+  case X86::BI__builtin_ia32_gather3div4si:
+  case X86::BI__builtin_ia32_gather3div8sf:
+  case X86::BI__builtin_ia32_gather3div8si:
+  case X86::BI__builtin_ia32_gather3siv2df:
+  case X86::BI__builtin_ia32_gather3siv2di:
+  case X86::BI__builtin_ia32_gather3siv4df:
+  case X86::BI__builtin_ia32_gather3siv4di:
+  case X86::BI__builtin_ia32_gather3siv4sf:
+  case X86::BI__builtin_ia32_gather3siv4si:
+  case X86::BI__builtin_ia32_gather3siv8sf:
+  case X86::BI__builtin_ia32_gather3siv8si:
+  case X86::BI__builtin_ia32_gathersiv8df:
+  case X86::BI__builtin_ia32_gathersiv16sf:
+  case X86::BI__builtin_ia32_gatherdiv8df:
+  case X86::BI__builtin_ia32_gatherdiv16sf:
+  case X86::BI__builtin_ia32_gathersiv8di:
+  case X86::BI__builtin_ia32_gathersiv16si:
+  case X86::BI__builtin_ia32_gatherdiv8di:
+  case X86::BI__builtin_ia32_gatherdiv16si: {
+    Intrinsic::ID IID;
+    switch (BuiltinID) {
+    default: llvm_unreachable("Unexpected builtin");
+    case X86::BI__builtin_ia32_gather3div2df:
+      IID = Intrinsic::x86_avx512_mask_gather3div2_df;
+      break;
+    case X86::BI__builtin_ia32_gather3div2di:
+      IID = Intrinsic::x86_avx512_mask_gather3div2_di;
+      break;
+    case X86::BI__builtin_ia32_gather3div4df:
+      IID = Intrinsic::x86_avx512_mask_gather3div4_df;
+      break;
+    case X86::BI__builtin_ia32_gather3div4di:
+      IID = Intrinsic::x86_avx512_mask_gather3div4_di;
+      break;
+    case X86::BI__builtin_ia32_gather3div4sf:
+      IID = Intrinsic::x86_avx512_mask_gather3div4_sf;
+      break;
+    case X86::BI__builtin_ia32_gather3div4si:
+      IID = Intrinsic::x86_avx512_mask_gather3div4_si;
+      break;
+    case X86::BI__builtin_ia32_gather3div8sf:
+      IID = Intrinsic::x86_avx512_mask_gather3div8_sf;
+      break;
+    case X86::BI__builtin_ia32_gather3div8si:
+      IID = Intrinsic::x86_avx512_mask_gather3div8_si;
+      break;
+    case X86::BI__builtin_ia32_gather3siv2df:
+      IID = Intrinsic::x86_avx512_mask_gather3siv2_df;
+      break;
+    case X86::BI__builtin_ia32_gather3siv2di:
+      IID = Intrinsic::x86_avx512_mask_gather3siv2_di;
+      break;
+    case X86::BI__builtin_ia32_gather3siv4df:
+      IID = Intrinsic::x86_avx512_mask_gather3siv4_df;
+      break;
+    case X86::BI__builtin_ia32_gather3siv4di:
+      IID = Intrinsic::x86_avx512_mask_gather3siv4_di;
+      break;
+    case X86::BI__builtin_ia32_gather3siv4sf:
+      IID = Intrinsic::x86_avx512_mask_gather3siv4_sf;
+      break;
+    case X86::BI__builtin_ia32_gather3siv4si:
+      IID = Intrinsic::x86_avx512_mask_gather3siv4_si;
+      break;
+    case X86::BI__builtin_ia32_gather3siv8sf:
+      IID = Intrinsic::x86_avx512_mask_gather3siv8_sf;
+      break;
+    case X86::BI__builtin_ia32_gather3siv8si:
+      IID = Intrinsic::x86_avx512_mask_gather3siv8_si;
+      break;
+    case X86::BI__builtin_ia32_gathersiv8df:
+      IID = Intrinsic::x86_avx512_mask_gather_dpd_512;
+      break;
+    case X86::BI__builtin_ia32_gathersiv16sf:
+      IID = Intrinsic::x86_avx512_mask_gather_dps_512;
+      break;
+    case X86::BI__builtin_ia32_gatherdiv8df:
+      IID = Intrinsic::x86_avx512_mask_gather_qpd_512;
+      break;
+    case X86::BI__builtin_ia32_gatherdiv16sf:
+      IID = Intrinsic::x86_avx512_mask_gather_qps_512;
+      break;
+    case X86::BI__builtin_ia32_gathersiv8di:
+      IID = Intrinsic::x86_avx512_mask_gather_dpq_512;
+      break;
+    case X86::BI__builtin_ia32_gathersiv16si:
+      IID = Intrinsic::x86_avx512_mask_gather_dpi_512;
+      break;
+    case X86::BI__builtin_ia32_gatherdiv8di:
+      IID = Intrinsic::x86_avx512_mask_gather_qpq_512;
+      break;
+    case X86::BI__builtin_ia32_gatherdiv16si:
+      IID = Intrinsic::x86_avx512_mask_gather_qpi_512;
+      break;
+    }
+
+    unsigned MinElts = std::min(Ops[0]->getType()->getVectorNumElements(),
+                                Ops[2]->getType()->getVectorNumElements());
+    Ops[3] = getMaskVecValue(*this, Ops[3], MinElts);
+    Function *Intr = CGM.getIntrinsic(IID);
+    return Builder.CreateCall(Intr, Ops);
+  }
+
   case X86::BI__builtin_ia32_storehps:
   case X86::BI__builtin_ia32_storelps: {
     llvm::Type *PtrTy = llvm::PointerType::getUnqual(Int64Ty);

Modified: cfe/trunk/test/CodeGen/avx512f-builtins.c
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/avx512f-builtins.c?rev=351390&r1=351389&r2=351390&view=diff
==============================================================================
--- cfe/trunk/test/CodeGen/avx512f-builtins.c (original)
+++ cfe/trunk/test/CodeGen/avx512f-builtins.c Wed Jan 16 14:34:33 2019
@@ -7002,97 +7002,97 @@ __m512 test_mm512_maskz_getexp_ps(__mmas
 
 __m256 test_mm512_i64gather_ps(__m512i __index, void const *__addr) {
   // CHECK-LABEL: @test_mm512_i64gather_ps
-  // CHECK: @llvm.x86.avx512.gather.qps.512
+  // CHECK: @llvm.x86.avx512.mask.gather.qps.512
   return _mm512_i64gather_ps(__index, __addr, 2); 
 }
 
 __m256 test_mm512_mask_i64gather_ps(__m256 __v1_old, __mmask8 __mask, __m512i __index, void const *__addr) {
   // CHECK-LABEL: @test_mm512_mask_i64gather_ps
-  // CHECK: @llvm.x86.avx512.gather.qps.512
+  // CHECK: @llvm.x86.avx512.mask.gather.qps.512
   return _mm512_mask_i64gather_ps(__v1_old, __mask, __index, __addr, 2); 
 }
 
 __m256i test_mm512_i64gather_epi32(__m512i __index, void const *__addr) {
   // CHECK-LABEL: @test_mm512_i64gather_epi32
-  // CHECK: @llvm.x86.avx512.gather.qpi.512
+  // CHECK: @llvm.x86.avx512.mask.gather.qpi.512
   return _mm512_i64gather_epi32(__index, __addr, 2); 
 }
 
 __m256i test_mm512_mask_i64gather_epi32(__m256i __v1_old, __mmask8 __mask, __m512i __index, void const *__addr) {
   // CHECK-LABEL: @test_mm512_mask_i64gather_epi32
-  // CHECK: @llvm.x86.avx512.gather.qpi.512
+  // CHECK: @llvm.x86.avx512.mask.gather.qpi.512
   return _mm512_mask_i64gather_epi32(__v1_old, __mask, __index, __addr, 2); 
 }
 
 __m512d test_mm512_i64gather_pd(__m512i __index, void const *__addr) {
   // CHECK-LABEL: @test_mm512_i64gather_pd
-  // CHECK: @llvm.x86.avx512.gather.qpd.512
+  // CHECK: @llvm.x86.avx512.mask.gather.qpd.512
   return _mm512_i64gather_pd(__index, __addr, 2); 
 }
 
 __m512d test_mm512_mask_i64gather_pd(__m512d __v1_old, __mmask8 __mask, __m512i __index, void const *__addr) {
   // CHECK-LABEL: @test_mm512_mask_i64gather_pd
-  // CHECK: @llvm.x86.avx512.gather.qpd.512
+  // CHECK: @llvm.x86.avx512.mask.gather.qpd.512
   return _mm512_mask_i64gather_pd(__v1_old, __mask, __index, __addr, 2); 
 }
 
 __m512i test_mm512_i64gather_epi64(__m512i __index, void const *__addr) {
   // CHECK-LABEL: @test_mm512_i64gather_epi64
-  // CHECK: @llvm.x86.avx512.gather.qpq.512
+  // CHECK: @llvm.x86.avx512.mask.gather.qpq.512
   return _mm512_i64gather_epi64(__index, __addr, 2); 
 }
 
 __m512i test_mm512_mask_i64gather_epi64(__m512i __v1_old, __mmask8 __mask, __m512i __index, void const *__addr) {
   // CHECK-LABEL: @test_mm512_mask_i64gather_epi64
-  // CHECK: @llvm.x86.avx512.gather.qpq.512
+  // CHECK: @llvm.x86.avx512.mask.gather.qpq.512
   return _mm512_mask_i64gather_epi64(__v1_old, __mask, __index, __addr, 2); 
 }
 
 __m512 test_mm512_i32gather_ps(__m512i __index, void const *__addr) {
   // CHECK-LABEL: @test_mm512_i32gather_ps
-  // CHECK: @llvm.x86.avx512.gather.dps.512
+  // CHECK: @llvm.x86.avx512.mask.gather.dps.512
   return _mm512_i32gather_ps(__index, __addr, 2); 
 }
 
 __m512 test_mm512_mask_i32gather_ps(__m512 v1_old, __mmask16 __mask, __m512i __index, void const *__addr) {
   // CHECK-LABEL: @test_mm512_mask_i32gather_ps
-  // CHECK: @llvm.x86.avx512.gather.dps.512
+  // CHECK: @llvm.x86.avx512.mask.gather.dps.512
   return _mm512_mask_i32gather_ps(v1_old, __mask, __index, __addr, 2); 
 }
 
 __m512i test_mm512_i32gather_epi32(__m512i __index, void const *__addr) {
   // CHECK-LABEL: @test_mm512_i32gather_epi32
-  // CHECK: @llvm.x86.avx512.gather.dpi.512
+  // CHECK: @llvm.x86.avx512.mask.gather.dpi.512
   return _mm512_i32gather_epi32(__index, __addr, 2); 
 }
 
 __m512i test_mm512_mask_i32gather_epi32(__m512i __v1_old, __mmask16 __mask, __m512i __index, void const *__addr) {
   // CHECK-LABEL: @test_mm512_mask_i32gather_epi32
-  // CHECK: @llvm.x86.avx512.gather.dpi.512
+  // CHECK: @llvm.x86.avx512.mask.gather.dpi.512
   return _mm512_mask_i32gather_epi32(__v1_old, __mask, __index, __addr, 2); 
 }
 
 __m512d test_mm512_i32gather_pd(__m256i __index, void const *__addr) {
   // CHECK-LABEL: @test_mm512_i32gather_pd
-  // CHECK: @llvm.x86.avx512.gather.dpd.512
+  // CHECK: @llvm.x86.avx512.mask.gather.dpd.512
   return _mm512_i32gather_pd(__index, __addr, 2); 
 }
 
 __m512d test_mm512_mask_i32gather_pd(__m512d __v1_old, __mmask8 __mask, __m256i __index, void const *__addr) {
   // CHECK-LABEL: @test_mm512_mask_i32gather_pd
-  // CHECK: @llvm.x86.avx512.gather.dpd.512
+  // CHECK: @llvm.x86.avx512.mask.gather.dpd.512
   return _mm512_mask_i32gather_pd(__v1_old, __mask, __index, __addr, 2); 
 }
 
 __m512i test_mm512_i32gather_epi64(__m256i __index, void const *__addr) {
   // CHECK-LABEL: @test_mm512_i32gather_epi64
-  // CHECK: @llvm.x86.avx512.gather.dpq.512
+  // CHECK: @llvm.x86.avx512.mask.gather.dpq.512
   return _mm512_i32gather_epi64(__index, __addr, 2); 
 }
 
 __m512i test_mm512_mask_i32gather_epi64(__m512i __v1_old, __mmask8 __mask, __m256i __index, void const *__addr) {
   // CHECK-LABEL: @test_mm512_mask_i32gather_epi64
-  // CHECK: @llvm.x86.avx512.gather.dpq.512
+  // CHECK: @llvm.x86.avx512.mask.gather.dpq.512
   return _mm512_mask_i32gather_epi64(__v1_old, __mask, __index, __addr, 2); 
 }
 

Modified: cfe/trunk/test/CodeGen/avx512vl-builtins.c
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/avx512vl-builtins.c?rev=351390&r1=351389&r2=351390&view=diff
==============================================================================
--- cfe/trunk/test/CodeGen/avx512vl-builtins.c (original)
+++ cfe/trunk/test/CodeGen/avx512vl-builtins.c Wed Jan 16 14:34:33 2019
@@ -9280,97 +9280,97 @@ __m256 test_mm256_maskz_getmant_ps(__mma
 
 __m128d test_mm_mmask_i64gather_pd(__m128d __v1_old, __mmask8 __mask, __m128i __index, void const *__addr) {
   // CHECK-LABEL: @test_mm_mmask_i64gather_pd
-  // CHECK: @llvm.x86.avx512.gather3div2.df
+  // CHECK: @llvm.x86.avx512.mask.gather3div2.df
   return _mm_mmask_i64gather_pd(__v1_old, __mask, __index, __addr, 2); 
 }
 
 __m128i test_mm_mmask_i64gather_epi64(__m128i __v1_old, __mmask8 __mask, __m128i __index, void const *__addr) {
   // CHECK-LABEL: @test_mm_mmask_i64gather_epi64
-  // CHECK: @llvm.x86.avx512.gather3div2.di
+  // CHECK: @llvm.x86.avx512.mask.gather3div2.di
   return _mm_mmask_i64gather_epi64(__v1_old, __mask, __index, __addr, 2); 
 }
 
 __m256d test_mm256_mmask_i64gather_pd(__m256d __v1_old, __mmask8 __mask, __m256i __index, void const *__addr) {
   // CHECK-LABEL: @test_mm256_mmask_i64gather_pd
-  // CHECK: @llvm.x86.avx512.gather3div4.df
+  // CHECK: @llvm.x86.avx512.mask.gather3div4.df
   return _mm256_mmask_i64gather_pd(__v1_old, __mask, __index, __addr, 2); 
 }
 
 __m256i test_mm256_mmask_i64gather_epi64(__m256i __v1_old, __mmask8 __mask, __m256i __index, void const *__addr) {
   // CHECK-LABEL: @test_mm256_mmask_i64gather_epi64
-  // CHECK: @llvm.x86.avx512.gather3div4.di
+  // CHECK: @llvm.x86.avx512.mask.gather3div4.di
   return _mm256_mmask_i64gather_epi64(__v1_old, __mask, __index, __addr, 2); 
 }
 
 __m128 test_mm_mmask_i64gather_ps(__m128 __v1_old, __mmask8 __mask, __m128i __index, void const *__addr) {
   // CHECK-LABEL: @test_mm_mmask_i64gather_ps
-  // CHECK: @llvm.x86.avx512.gather3div4.sf
+  // CHECK: @llvm.x86.avx512.mask.gather3div4.sf
   return _mm_mmask_i64gather_ps(__v1_old, __mask, __index, __addr, 2); 
 }
 
 __m128i test_mm_mmask_i64gather_epi32(__m128i __v1_old, __mmask8 __mask, __m128i __index, void const *__addr) {
   // CHECK-LABEL: @test_mm_mmask_i64gather_epi32
-  // CHECK: @llvm.x86.avx512.gather3div4.si
+  // CHECK: @llvm.x86.avx512.mask.gather3div4.si
   return _mm_mmask_i64gather_epi32(__v1_old, __mask, __index, __addr, 2); 
 }
 
 __m128 test_mm256_mmask_i64gather_ps(__m128 __v1_old, __mmask8 __mask, __m256i __index, void const *__addr) {
   // CHECK-LABEL: @test_mm256_mmask_i64gather_ps
-  // CHECK: @llvm.x86.avx512.gather3div8.sf
+  // CHECK: @llvm.x86.avx512.mask.gather3div8.sf
   return _mm256_mmask_i64gather_ps(__v1_old, __mask, __index, __addr, 2); 
 }
 
 __m128i test_mm256_mmask_i64gather_epi32(__m128i __v1_old, __mmask8 __mask, __m256i __index, void const *__addr) {
   // CHECK-LABEL: @test_mm256_mmask_i64gather_epi32
-  // CHECK: @llvm.x86.avx512.gather3div8.si
+  // CHECK: @llvm.x86.avx512.mask.gather3div8.si
   return _mm256_mmask_i64gather_epi32(__v1_old, __mask, __index, __addr, 2); 
 }
 
 __m128d test_mm_mask_i32gather_pd(__m128d __v1_old, __mmask8 __mask, __m128i __index, void const *__addr) {
   // CHECK-LABEL: @test_mm_mask_i32gather_pd
-  // CHECK: @llvm.x86.avx512.gather3siv2.df
+  // CHECK: @llvm.x86.avx512.mask.gather3siv2.df
   return _mm_mmask_i32gather_pd(__v1_old, __mask, __index, __addr, 2); 
 }
 
 __m128i test_mm_mask_i32gather_epi64(__m128i __v1_old, __mmask8 __mask, __m128i __index, void const *__addr) {
   // CHECK-LABEL: @test_mm_mask_i32gather_epi64
-  // CHECK: @llvm.x86.avx512.gather3siv2.di
+  // CHECK: @llvm.x86.avx512.mask.gather3siv2.di
   return _mm_mmask_i32gather_epi64(__v1_old, __mask, __index, __addr, 2); 
 }
 
 __m256d test_mm256_mask_i32gather_pd(__m256d __v1_old, __mmask8 __mask, __m128i __index, void const *__addr) {
   // CHECK-LABEL: @test_mm256_mask_i32gather_pd
-  // CHECK: @llvm.x86.avx512.gather3siv4.df
+  // CHECK: @llvm.x86.avx512.mask.gather3siv4.df
   return _mm256_mmask_i32gather_pd(__v1_old, __mask, __index, __addr, 2); 
 }
 
 __m256i test_mm256_mask_i32gather_epi64(__m256i __v1_old, __mmask8 __mask, __m128i __index, void const *__addr) {
   // CHECK-LABEL: @test_mm256_mask_i32gather_epi64
-  // CHECK: @llvm.x86.avx512.gather3siv4.di
+  // CHECK: @llvm.x86.avx512.mask.gather3siv4.di
   return _mm256_mmask_i32gather_epi64(__v1_old, __mask, __index, __addr, 2); 
 }
 
 __m128 test_mm_mask_i32gather_ps(__m128 __v1_old, __mmask8 __mask, __m128i __index, void const *__addr) {
   // CHECK-LABEL: @test_mm_mask_i32gather_ps
-  // CHECK: @llvm.x86.avx512.gather3siv4.sf
+  // CHECK: @llvm.x86.avx512.mask.gather3siv4.sf
   return _mm_mmask_i32gather_ps(__v1_old, __mask, __index, __addr, 2); 
 }
 
 __m128i test_mm_mask_i32gather_epi32(__m128i __v1_old, __mmask8 __mask, __m128i __index, void const *__addr) {
   // CHECK-LABEL: @test_mm_mask_i32gather_epi32
-  // CHECK: @llvm.x86.avx512.gather3siv4.si
+  // CHECK: @llvm.x86.avx512.mask.gather3siv4.si
   return _mm_mmask_i32gather_epi32(__v1_old, __mask, __index, __addr, 2); 
 }
 
 __m256 test_mm256_mask_i32gather_ps(__m256 __v1_old, __mmask8 __mask, __m256i __index, void const *__addr) {
   // CHECK-LABEL: @test_mm256_mask_i32gather_ps
-  // CHECK: @llvm.x86.avx512.gather3siv8.sf
+  // CHECK: @llvm.x86.avx512.mask.gather3siv8.sf
   return _mm256_mmask_i32gather_ps(__v1_old, __mask, __index, __addr, 2); 
 }
 
 __m256i test_mm256_mask_i32gather_epi32(__m256i __v1_old, __mmask8 __mask, __m256i __index, void const *__addr) {
   // CHECK-LABEL: @test_mm256_mask_i32gather_epi32
-  // CHECK: @llvm.x86.avx512.gather3siv8.si
+  // CHECK: @llvm.x86.avx512.mask.gather3siv8.si
   return _mm256_mmask_i32gather_epi32(__v1_old, __mask, __index, __addr, 2); 
 }
 




More information about the cfe-commits mailing list