[llvm] r334368 - [X86] Add expandload and compresstore fast-isel tests for avx512f and avx512vl. Update existing tests for avx512vbmi2 to use target independent intrinsics.

Craig Topper via llvm-commits llvm-commits at lists.llvm.org
Sun Jun 10 11:55:37 PDT 2018


Author: ctopper
Date: Sun Jun 10 11:55:37 2018
New Revision: 334368

URL: http://llvm.org/viewvc/llvm-project?rev=334368&view=rev
Log:
[X86] Add expandload and compresstore fast-isel tests for avx512f and avx512vl. Update existing tests for avx512vbmi2 to use target independent intrinsics.

Modified:
    llvm/trunk/test/CodeGen/X86/avx512-intrinsics-fast-isel.ll
    llvm/trunk/test/CodeGen/X86/avx512vbmi2-intrinsics-fast-isel.ll
    llvm/trunk/test/CodeGen/X86/avx512vbmi2vl-intrinsics-fast-isel.ll
    llvm/trunk/test/CodeGen/X86/avx512vl-intrinsics-fast-isel.ll

Modified: llvm/trunk/test/CodeGen/X86/avx512-intrinsics-fast-isel.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-intrinsics-fast-isel.ll?rev=334368&r1=334367&r2=334368&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-intrinsics-fast-isel.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-intrinsics-fast-isel.ll Sun Jun 10 11:55:37 2018
@@ -5666,10 +5666,276 @@ entry:
   ret <2 x double> %0
 }
 
+define <8 x i64> @test_mm512_mask_expandloadu_epi64(<8 x i64> %__W, i8 zeroext %__U, i8* readonly %__P) {
+; X86-LABEL: test_mm512_mask_expandloadu_epi64:
+; X86:       # %bb.0: # %entry
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; X86-NEXT:    kmovw %ecx, %k1
+; X86-NEXT:    vpexpandq (%eax), %zmm0 {%k1}
+; X86-NEXT:    retl
+;
+; X64-LABEL: test_mm512_mask_expandloadu_epi64:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    kmovw %edi, %k1
+; X64-NEXT:    vpexpandq (%rsi), %zmm0 {%k1}
+; X64-NEXT:    retq
+entry:
+  %0 = bitcast i8* %__P to i64*
+  %1 = bitcast i8 %__U to <8 x i1>
+  %2 = tail call <8 x i64> @llvm.masked.expandload.v8i64(i64* %0, <8 x i1> %1, <8 x i64> %__W)
+  ret <8 x i64> %2
+}
+
+define <8 x i64> @test_mm512_maskz_expandloadu_epi64(i8 zeroext %__U, i8* readonly %__P) {
+; X86-LABEL: test_mm512_maskz_expandloadu_epi64:
+; X86:       # %bb.0: # %entry
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; X86-NEXT:    kmovw %ecx, %k1
+; X86-NEXT:    vpexpandq (%eax), %zmm0 {%k1} {z}
+; X86-NEXT:    retl
+;
+; X64-LABEL: test_mm512_maskz_expandloadu_epi64:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    kmovw %edi, %k1
+; X64-NEXT:    vpexpandq (%rsi), %zmm0 {%k1} {z}
+; X64-NEXT:    retq
+entry:
+  %0 = bitcast i8* %__P to i64*
+  %1 = bitcast i8 %__U to <8 x i1>
+  %2 = tail call <8 x i64> @llvm.masked.expandload.v8i64(i64* %0, <8 x i1> %1, <8 x i64> zeroinitializer)
+  ret <8 x i64> %2
+}
+
+define <8 x double> @test_mm512_mask_expandloadu_pd(<8 x double> %__W, i8 zeroext %__U, i8* readonly %__P) {
+; X86-LABEL: test_mm512_mask_expandloadu_pd:
+; X86:       # %bb.0: # %entry
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; X86-NEXT:    kmovw %ecx, %k1
+; X86-NEXT:    vexpandpd (%eax), %zmm0 {%k1}
+; X86-NEXT:    retl
+;
+; X64-LABEL: test_mm512_mask_expandloadu_pd:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    kmovw %edi, %k1
+; X64-NEXT:    vexpandpd (%rsi), %zmm0 {%k1}
+; X64-NEXT:    retq
+entry:
+  %0 = bitcast i8* %__P to double*
+  %1 = bitcast i8 %__U to <8 x i1>
+  %2 = tail call <8 x double> @llvm.masked.expandload.v8f64(double* %0, <8 x i1> %1, <8 x double> %__W)
+  ret <8 x double> %2
+}
+
+define <8 x double> @test_mm512_maskz_expandloadu_pd(i8 zeroext %__U, i8* readonly %__P) {
+; X86-LABEL: test_mm512_maskz_expandloadu_pd:
+; X86:       # %bb.0: # %entry
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; X86-NEXT:    kmovw %ecx, %k1
+; X86-NEXT:    vexpandpd (%eax), %zmm0 {%k1} {z}
+; X86-NEXT:    retl
+;
+; X64-LABEL: test_mm512_maskz_expandloadu_pd:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    kmovw %edi, %k1
+; X64-NEXT:    vexpandpd (%rsi), %zmm0 {%k1} {z}
+; X64-NEXT:    retq
+entry:
+  %0 = bitcast i8* %__P to double*
+  %1 = bitcast i8 %__U to <8 x i1>
+  %2 = tail call <8 x double> @llvm.masked.expandload.v8f64(double* %0, <8 x i1> %1, <8 x double> zeroinitializer)
+  ret <8 x double> %2
+}
+
+define <8 x i64> @test_mm512_mask_expandloadu_epi32(<8 x i64> %__W, i16 zeroext %__U, i8* readonly %__P) {
+; X86-LABEL: test_mm512_mask_expandloadu_epi32:
+; X86:       # %bb.0: # %entry
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT:    vpexpandd (%eax), %zmm0 {%k1}
+; X86-NEXT:    retl
+;
+; X64-LABEL: test_mm512_mask_expandloadu_epi32:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    kmovw %edi, %k1
+; X64-NEXT:    vpexpandd (%rsi), %zmm0 {%k1}
+; X64-NEXT:    retq
+entry:
+  %0 = bitcast <8 x i64> %__W to <16 x i32>
+  %1 = bitcast i8* %__P to i32*
+  %2 = bitcast i16 %__U to <16 x i1>
+  %3 = tail call <16 x i32> @llvm.masked.expandload.v16i32(i32* %1, <16 x i1> %2, <16 x i32> %0) #11
+  %4 = bitcast <16 x i32> %3 to <8 x i64>
+  ret <8 x i64> %4
+}
+
+define <8 x i64> @test_mm512_maskz_expandloadu_epi32(i16 zeroext %__U, i8* readonly %__P) {
+; X86-LABEL: test_mm512_maskz_expandloadu_epi32:
+; X86:       # %bb.0: # %entry
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT:    vpexpandd (%eax), %zmm0 {%k1} {z}
+; X86-NEXT:    retl
+;
+; X64-LABEL: test_mm512_maskz_expandloadu_epi32:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    kmovw %edi, %k1
+; X64-NEXT:    vpexpandd (%rsi), %zmm0 {%k1} {z}
+; X64-NEXT:    retq
+entry:
+  %0 = bitcast i8* %__P to i32*
+  %1 = bitcast i16 %__U to <16 x i1>
+  %2 = tail call <16 x i32> @llvm.masked.expandload.v16i32(i32* %0, <16 x i1> %1, <16 x i32> zeroinitializer)
+  %3 = bitcast <16 x i32> %2 to <8 x i64>
+  ret <8 x i64> %3
+}
+
+define <16 x float> @test_mm512_mask_expandloadu_ps(<16 x float> %__W, i16 zeroext %__U, i8* readonly %__P) {
+; X86-LABEL: test_mm512_mask_expandloadu_ps:
+; X86:       # %bb.0: # %entry
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT:    vexpandps (%eax), %zmm0 {%k1}
+; X86-NEXT:    retl
+;
+; X64-LABEL: test_mm512_mask_expandloadu_ps:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    kmovw %edi, %k1
+; X64-NEXT:    vexpandps (%rsi), %zmm0 {%k1}
+; X64-NEXT:    retq
+entry:
+  %0 = bitcast i8* %__P to float*
+  %1 = bitcast i16 %__U to <16 x i1>
+  %2 = tail call <16 x float> @llvm.masked.expandload.v16f32(float* %0, <16 x i1> %1, <16 x float> %__W) #11
+  ret <16 x float> %2
+}
+
+define <16 x float> @test_mm512_maskz_expandloadu_ps(i16 zeroext %__U, i8* readonly %__P) {
+; X86-LABEL: test_mm512_maskz_expandloadu_ps:
+; X86:       # %bb.0: # %entry
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT:    vexpandps (%eax), %zmm0 {%k1} {z}
+; X86-NEXT:    retl
+;
+; X64-LABEL: test_mm512_maskz_expandloadu_ps:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    kmovw %edi, %k1
+; X64-NEXT:    vexpandps (%rsi), %zmm0 {%k1} {z}
+; X64-NEXT:    retq
+entry:
+  %0 = bitcast i8* %__P to float*
+  %1 = bitcast i16 %__U to <16 x i1>
+  %2 = tail call <16 x float> @llvm.masked.expandload.v16f32(float* %0, <16 x i1> %1, <16 x float> zeroinitializer)
+  ret <16 x float> %2
+}
+
+define void @test_mm512_mask_compressstoreu_pd(i8* %__P, i8 zeroext %__U, <8 x double> %__A) {
+; X86-LABEL: test_mm512_mask_compressstoreu_pd:
+; X86:       # %bb.0: # %entry
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    kmovw %eax, %k1
+; X86-NEXT:    vcompresspd %zmm0, (%ecx) {%k1}
+; X86-NEXT:    vzeroupper
+; X86-NEXT:    retl
+;
+; X64-LABEL: test_mm512_mask_compressstoreu_pd:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    kmovw %esi, %k1
+; X64-NEXT:    vcompresspd %zmm0, (%rdi) {%k1}
+; X64-NEXT:    vzeroupper
+; X64-NEXT:    retq
+entry:
+  %0 = bitcast i8* %__P to double*
+  %1 = bitcast i8 %__U to <8 x i1>
+  tail call void @llvm.masked.compressstore.v8f64(<8 x double> %__A, double* %0, <8 x i1> %1)
+  ret void
+}
+
+define void @test_mm512_mask_compressstoreu_epi64(i8* %__P, i8 zeroext %__U, <8 x i64> %__A) {
+; X86-LABEL: test_mm512_mask_compressstoreu_epi64:
+; X86:       # %bb.0: # %entry
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    kmovw %eax, %k1
+; X86-NEXT:    vpcompressq %zmm0, (%ecx) {%k1}
+; X86-NEXT:    vzeroupper
+; X86-NEXT:    retl
+;
+; X64-LABEL: test_mm512_mask_compressstoreu_epi64:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    kmovw %esi, %k1
+; X64-NEXT:    vpcompressq %zmm0, (%rdi) {%k1}
+; X64-NEXT:    vzeroupper
+; X64-NEXT:    retq
+entry:
+  %0 = bitcast i8* %__P to i64*
+  %1 = bitcast i8 %__U to <8 x i1>
+  tail call void @llvm.masked.compressstore.v8i64(<8 x i64> %__A, i64* %0, <8 x i1> %1)
+  ret void
+}
+
+define void @test_mm512_mask_compressstoreu_ps(i8* %__P, i16 zeroext %__U, <16 x float> %__A) {
+; X86-LABEL: test_mm512_mask_compressstoreu_ps:
+; X86:       # %bb.0: # %entry
+; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    vcompressps %zmm0, (%eax) {%k1}
+; X86-NEXT:    vzeroupper
+; X86-NEXT:    retl
+;
+; X64-LABEL: test_mm512_mask_compressstoreu_ps:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    kmovw %esi, %k1
+; X64-NEXT:    vcompressps %zmm0, (%rdi) {%k1}
+; X64-NEXT:    vzeroupper
+; X64-NEXT:    retq
+entry:
+  %0 = bitcast i8* %__P to float*
+  %1 = bitcast i16 %__U to <16 x i1>
+  tail call void @llvm.masked.compressstore.v16f32(<16 x float> %__A, float* %0, <16 x i1> %1)
+  ret void
+}
+
+define void @test_mm512_mask_compressstoreu_epi32(i8* %__P, i16 zeroext %__U, <8 x i64> %__A) {
+; X86-LABEL: test_mm512_mask_compressstoreu_epi32:
+; X86:       # %bb.0: # %entry
+; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    vpcompressd %zmm0, (%eax) {%k1}
+; X86-NEXT:    vzeroupper
+; X86-NEXT:    retl
+;
+; X64-LABEL: test_mm512_mask_compressstoreu_epi32:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    kmovw %esi, %k1
+; X64-NEXT:    vpcompressd %zmm0, (%rdi) {%k1}
+; X64-NEXT:    vzeroupper
+; X64-NEXT:    retq
+entry:
+  %0 = bitcast <8 x i64> %__A to <16 x i32>
+  %1 = bitcast i8* %__P to i32*
+  %2 = bitcast i16 %__U to <16 x i1>
+  tail call void @llvm.masked.compressstore.v16i32(<16 x i32> %0, i32* %1, <16 x i1> %2)
+  ret void
+}
+
 declare <8 x double> @llvm.fma.v8f64(<8 x double>, <8 x double>, <8 x double>) #9
 declare <16 x float> @llvm.fma.v16f32(<16 x float>, <16 x float>, <16 x float>) #9
 declare float @llvm.fma.f32(float, float, float) #9
 declare double @llvm.fma.f64(double, double, double) #9
+declare <8 x i64> @llvm.masked.expandload.v8i64(i64*, <8 x i1>, <8 x i64>)
+declare <8 x double> @llvm.masked.expandload.v8f64(double*, <8 x i1>, <8 x double>)
+declare <16 x i32> @llvm.masked.expandload.v16i32(i32*, <16 x i1>, <16 x i32>) #10
+declare <16 x float> @llvm.masked.expandload.v16f32(float*, <16 x i1>, <16 x float>)
+declare void @llvm.masked.compressstore.v8f64(<8 x double>, double*, <8 x i1>)
+declare void @llvm.masked.compressstore.v8i64(<8 x i64>, i64*, <8 x i1>)
+declare void @llvm.masked.compressstore.v16f32(<16 x float>, float*, <16 x i1>)
+declare void @llvm.masked.compressstore.v16i32(<16 x i32>, i32*, <16 x i1>)
 
 !0 = !{i32 1}
 

Modified: llvm/trunk/test/CodeGen/X86/avx512vbmi2-intrinsics-fast-isel.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512vbmi2-intrinsics-fast-isel.ll?rev=334368&r1=334367&r2=334368&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512vbmi2-intrinsics-fast-isel.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512vbmi2-intrinsics-fast-isel.ll Sun Jun 10 11:55:37 2018
@@ -103,7 +103,9 @@ define void @test_mm512_mask_compresssto
 ; X64-NEXT:    retq
 entry:
   %0 = bitcast <8 x i64> %__D to <32 x i16>
-  tail call void @llvm.x86.avx512.mask.compress.store.w.512(i8* %__P, <32 x i16> %0, i32 %__U)
+  %1 = bitcast i8* %__P to i16*
+  %2 = bitcast i32 %__U to <32 x i1>
+  tail call void @llvm.masked.compressstore.v32i16(<32 x i16> %0, i16* %1, <32 x i1> %2)
   ret void
 }
 
@@ -126,7 +128,8 @@ define void @test_mm512_mask_compresssto
 ; X64-NEXT:    retq
 entry:
   %0 = bitcast <8 x i64> %__D to <64 x i8>
-  tail call void @llvm.x86.avx512.mask.compress.store.b.512(i8* %__P, <64 x i8> %0, i64 %__U)
+  %1 = bitcast i64 %__U to <64 x i1>
+  tail call void @llvm.masked.compressstore.v64i8(<64 x i8> %0, i8* %__P, <64 x i1> %1)
   ret void
 }
 
@@ -227,9 +230,11 @@ define <8 x i64> @test_mm512_mask_expand
 ; X64-NEXT:    retq
 entry:
   %0 = bitcast <8 x i64> %__S to <32 x i16>
-  %1 = tail call <32 x i16> @llvm.x86.avx512.mask.expand.load.w.512(i8* %__P, <32 x i16> %0, i32 %__U)
-  %2 = bitcast <32 x i16> %1 to <8 x i64>
-  ret <8 x i64> %2
+  %1 = bitcast i8* %__P to i16*
+  %2 = bitcast i32 %__U to <32 x i1>
+  %3 = tail call <32 x i16> @llvm.masked.expandload.v32i16(i16* %1, <32 x i1> %2, <32 x i16> %0)
+  %4 = bitcast <32 x i16> %3 to <8 x i64>
+  ret <8 x i64> %4
 }
 
 define <8 x i64> @test_mm512_maskz_expandloadu_epi16(i32 %__U, i8* readonly %__P) {
@@ -246,9 +251,11 @@ define <8 x i64> @test_mm512_maskz_expan
 ; X64-NEXT:    vpexpandw (%rsi), %zmm0 {%k1} {z}
 ; X64-NEXT:    retq
 entry:
-  %0 = tail call <32 x i16> @llvm.x86.avx512.mask.expand.load.w.512(i8* %__P, <32 x i16> zeroinitializer, i32 %__U)
-  %1 = bitcast <32 x i16> %0 to <8 x i64>
-  ret <8 x i64> %1
+  %0 = bitcast i8* %__P to i16*
+  %1 = bitcast i32 %__U to <32 x i1>
+  %2 = tail call <32 x i16> @llvm.masked.expandload.v32i16(i16* %0, <32 x i1> %1, <32 x i16> zeroinitializer)
+  %3 = bitcast <32 x i16> %2 to <8 x i64>
+  ret <8 x i64> %3
 }
 
 define <8 x i64> @test_mm512_mask_expandloadu_epi8(<8 x i64> %__S, i64 %__U, i8* readonly %__P) {
@@ -268,9 +275,10 @@ define <8 x i64> @test_mm512_mask_expand
 ; X64-NEXT:    retq
 entry:
   %0 = bitcast <8 x i64> %__S to <64 x i8>
-  %1 = tail call <64 x i8> @llvm.x86.avx512.mask.expand.load.b.512(i8* %__P, <64 x i8> %0, i64 %__U)
-  %2 = bitcast <64 x i8> %1 to <8 x i64>
-  ret <8 x i64> %2
+  %1 = bitcast i64 %__U to <64 x i1>
+  %2 = tail call <64 x i8> @llvm.masked.expandload.v64i8(i8* %__P, <64 x i1> %1, <64 x i8> %0)
+  %3 = bitcast <64 x i8> %2 to <8 x i64>
+  ret <8 x i64> %3
 }
 
 define <8 x i64> @test_mm512_maskz_expandloadu_epi8(i64 %__U, i8* readonly %__P) {
@@ -289,9 +297,10 @@ define <8 x i64> @test_mm512_maskz_expan
 ; X64-NEXT:    vpexpandb (%rsi), %zmm0 {%k1} {z}
 ; X64-NEXT:    retq
 entry:
-  %0 = tail call <64 x i8> @llvm.x86.avx512.mask.expand.load.b.512(i8* %__P, <64 x i8> zeroinitializer, i64 %__U)
-  %1 = bitcast <64 x i8> %0 to <8 x i64>
-  ret <8 x i64> %1
+  %0 = bitcast i64 %__U to <64 x i1>
+  %1 = tail call <64 x i8> @llvm.masked.expandload.v64i8(i8* %__P, <64 x i1> %0, <64 x i8> zeroinitializer)
+  %2 = bitcast <64 x i8> %1 to <8 x i64>
+  ret <8 x i64> %2
 }
 
 define <8 x i64> @test_mm512_mask_shldi_epi64(<8 x i64> %__S, i8 zeroext %__U, <8 x i64> %__A, <8 x i64> %__B) {
@@ -932,12 +941,12 @@ entry:
 
 declare <32 x i16> @llvm.x86.avx512.mask.compress.w.512(<32 x i16>, <32 x i16>, i32)
 declare <64 x i8> @llvm.x86.avx512.mask.compress.b.512(<64 x i8>, <64 x i8>, i64)
-declare void @llvm.x86.avx512.mask.compress.store.w.512(i8*, <32 x i16>, i32)
-declare void @llvm.x86.avx512.mask.compress.store.b.512(i8*, <64 x i8>, i64)
+declare void @llvm.masked.compressstore.v32i16(<32 x i16>, i16*, <32 x i1>)
+declare void @llvm.masked.compressstore.v64i8(<64 x i8>, i8*, <64 x i1>)
 declare <32 x i16> @llvm.x86.avx512.mask.expand.w.512(<32 x i16>, <32 x i16>, i32)
 declare <64 x i8> @llvm.x86.avx512.mask.expand.b.512(<64 x i8>, <64 x i8>, i64)
-declare <32 x i16> @llvm.x86.avx512.mask.expand.load.w.512(i8*, <32 x i16>, i32)
-declare <64 x i8> @llvm.x86.avx512.mask.expand.load.b.512(i8*, <64 x i8>, i64)
+declare <32 x i16> @llvm.masked.expandload.v32i16(i16*, <32 x i1>, <32 x i16>)
+declare <64 x i8> @llvm.masked.expandload.v64i8(i8*, <64 x i1>, <64 x i8>)
 declare <8 x i64> @llvm.x86.avx512.mask.vpshldv.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
 declare <8 x i64> @llvm.x86.avx512.maskz.vpshldv.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
 declare <16 x i32> @llvm.x86.avx512.mask.vpshldv.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)

Modified: llvm/trunk/test/CodeGen/X86/avx512vbmi2vl-intrinsics-fast-isel.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512vbmi2vl-intrinsics-fast-isel.ll?rev=334368&r1=334367&r2=334368&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512vbmi2vl-intrinsics-fast-isel.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512vbmi2vl-intrinsics-fast-isel.ll Sun Jun 10 11:55:37 2018
@@ -100,7 +100,9 @@ define void @test_mm_mask_compressstoreu
 ; X64-NEXT:    retq
 entry:
   %0 = bitcast <2 x i64> %__D to <8 x i16>
-  tail call void @llvm.x86.avx512.mask.compress.store.w.128(i8* %__P, <8 x i16> %0, i8 %__U)
+  %1 = bitcast i8* %__P to i16*
+  %2 = bitcast i8 %__U to <8 x i1>
+  tail call void @llvm.masked.compressstore.v8i16(<8 x i16> %0, i16* %1, <8 x i1> %2)
   ret void
 }
 
@@ -119,7 +121,8 @@ define void @test_mm_mask_compressstoreu
 ; X64-NEXT:    retq
 entry:
   %0 = bitcast <2 x i64> %__D to <16 x i8>
-  tail call void @llvm.x86.avx512.mask.compress.store.b.128(i8* %__P, <16 x i8> %0, i16 %__U)
+  %1 = bitcast i16 %__U to <16 x i1>
+  tail call void @llvm.masked.compressstore.v16i8(<16 x i8> %0, i8* %__P, <16 x i1> %1)
   ret void
 }
 
@@ -219,9 +222,11 @@ define <2 x i64> @test_mm_mask_expandloa
 ; X64-NEXT:    retq
 entry:
   %0 = bitcast <2 x i64> %__S to <8 x i16>
-  %1 = tail call <8 x i16> @llvm.x86.avx512.mask.expand.load.w.128(i8* %__P, <8 x i16> %0, i8 %__U)
-  %2 = bitcast <8 x i16> %1 to <2 x i64>
-  ret <2 x i64> %2
+  %1 = bitcast i8* %__P to i16*
+  %2 = bitcast i8 %__U to <8 x i1>
+  %3 = tail call <8 x i16> @llvm.masked.expandload.v8i16(i16* %1, <8 x i1> %2, <8 x i16> %0)
+  %4 = bitcast <8 x i16> %3 to <2 x i64>
+  ret <2 x i64> %4
 }
 
 define <2 x i64> @test_mm_maskz_expandloadu_epi16(i8 zeroext %__U, i8* readonly %__P) {
@@ -239,9 +244,11 @@ define <2 x i64> @test_mm_maskz_expandlo
 ; X64-NEXT:    vpexpandw (%rsi), %xmm0 {%k1} {z}
 ; X64-NEXT:    retq
 entry:
-  %0 = tail call <8 x i16> @llvm.x86.avx512.mask.expand.load.w.128(i8* %__P, <8 x i16> zeroinitializer, i8 %__U)
-  %1 = bitcast <8 x i16> %0 to <2 x i64>
-  ret <2 x i64> %1
+  %0 = bitcast i8* %__P to i16*
+  %1 = bitcast i8 %__U to <8 x i1>
+  %2 = tail call <8 x i16> @llvm.masked.expandload.v8i16(i16* %0, <8 x i1> %1, <8 x i16> zeroinitializer)
+  %3 = bitcast <8 x i16> %2 to <2 x i64>
+  ret <2 x i64> %3
 }
 
 define <2 x i64> @test_mm_mask_expandloadu_epi8(<2 x i64> %__S, i16 zeroext %__U, i8* readonly %__P) {
@@ -259,9 +266,10 @@ define <2 x i64> @test_mm_mask_expandloa
 ; X64-NEXT:    retq
 entry:
   %0 = bitcast <2 x i64> %__S to <16 x i8>
-  %1 = tail call <16 x i8> @llvm.x86.avx512.mask.expand.load.b.128(i8* %__P, <16 x i8> %0, i16 %__U)
-  %2 = bitcast <16 x i8> %1 to <2 x i64>
-  ret <2 x i64> %2
+  %1 = bitcast i16 %__U to <16 x i1>
+  %2 = tail call <16 x i8> @llvm.masked.expandload.v16i8(i8* %__P, <16 x i1> %1, <16 x i8> %0)
+  %3 = bitcast <16 x i8> %2 to <2 x i64>
+  ret <2 x i64> %3
 }
 
 define <2 x i64> @test_mm_maskz_expandloadu_epi8(i16 zeroext %__U, i8* readonly %__P) {
@@ -278,9 +286,10 @@ define <2 x i64> @test_mm_maskz_expandlo
 ; X64-NEXT:    vpexpandb (%rsi), %xmm0 {%k1} {z}
 ; X64-NEXT:    retq
 entry:
-  %0 = tail call <16 x i8> @llvm.x86.avx512.mask.expand.load.b.128(i8* %__P, <16 x i8> zeroinitializer, i16 %__U)
-  %1 = bitcast <16 x i8> %0 to <2 x i64>
-  ret <2 x i64> %1
+  %0 = bitcast i16 %__U to <16 x i1>
+  %1 = tail call <16 x i8> @llvm.masked.expandload.v16i8(i8* %__P, <16 x i1> %0, <16 x i8> zeroinitializer)
+  %2 = bitcast <16 x i8> %1 to <2 x i64>
+  ret <2 x i64> %2
 }
 
 define <4 x i64> @test_mm256_mask_compress_epi16(<4 x i64> %__S, i16 zeroext %__U, <4 x i64> %__D) {
@@ -378,7 +387,9 @@ define void @test_mm256_mask_compresssto
 ; X64-NEXT:    retq
 entry:
   %0 = bitcast <4 x i64> %__D to <16 x i16>
-  tail call void @llvm.x86.avx512.mask.compress.store.w.256(i8* %__P, <16 x i16> %0, i16 %__U)
+  %1 = bitcast i8* %__P to i16*
+  %2 = bitcast i16 %__U to <16 x i1>
+  tail call void @llvm.masked.compressstore.v16i16(<16 x i16> %0, i16* %1, <16 x i1> %2)
   ret void
 }
 
@@ -399,7 +410,8 @@ define void @test_mm256_mask_compresssto
 ; X64-NEXT:    retq
 entry:
   %0 = bitcast <4 x i64> %__D to <32 x i8>
-  tail call void @llvm.x86.avx512.mask.compress.store.b.256(i8* %__P, <32 x i8> %0, i32 %__U)
+  %1 = bitcast i32 %__U to <32 x i1>
+  tail call void @llvm.masked.compressstore.v32i8(<32 x i8> %0, i8* %__P, <32 x i1> %1)
   ret void
 }
 
@@ -496,9 +508,11 @@ define <4 x i64> @test_mm256_mask_expand
 ; X64-NEXT:    retq
 entry:
   %0 = bitcast <4 x i64> %__S to <16 x i16>
-  %1 = tail call <16 x i16> @llvm.x86.avx512.mask.expand.load.w.256(i8* %__P, <16 x i16> %0, i16 %__U)
-  %2 = bitcast <16 x i16> %1 to <4 x i64>
-  ret <4 x i64> %2
+  %1 = bitcast i8* %__P to i16*
+  %2 = bitcast i16 %__U to <16 x i1>
+  %3 = tail call <16 x i16> @llvm.masked.expandload.v16i16(i16* %1, <16 x i1> %2, <16 x i16> %0)
+  %4 = bitcast <16 x i16> %3 to <4 x i64>
+  ret <4 x i64> %4
 }
 
 define <4 x i64> @test_mm256_maskz_expandloadu_epi16(i16 zeroext %__U, i8* readonly %__P) {
@@ -515,9 +529,11 @@ define <4 x i64> @test_mm256_maskz_expan
 ; X64-NEXT:    vpexpandw (%rsi), %ymm0 {%k1} {z}
 ; X64-NEXT:    retq
 entry:
-  %0 = tail call <16 x i16> @llvm.x86.avx512.mask.expand.load.w.256(i8* %__P, <16 x i16> zeroinitializer, i16 %__U)
-  %1 = bitcast <16 x i16> %0 to <4 x i64>
-  ret <4 x i64> %1
+  %0 = bitcast i8* %__P to i16*
+  %1 = bitcast i16 %__U to <16 x i1>
+  %2 = tail call <16 x i16> @llvm.masked.expandload.v16i16(i16* %0, <16 x i1> %1, <16 x i16> zeroinitializer)
+  %3 = bitcast <16 x i16> %2 to <4 x i64>
+  ret <4 x i64> %3
 }
 
 define <4 x i64> @test_mm256_mask_expandloadu_epi8(<4 x i64> %__S, i32 %__U, i8* readonly %__P) {
@@ -535,9 +551,10 @@ define <4 x i64> @test_mm256_mask_expand
 ; X64-NEXT:    retq
 entry:
   %0 = bitcast <4 x i64> %__S to <32 x i8>
-  %1 = tail call <32 x i8> @llvm.x86.avx512.mask.expand.load.b.256(i8* %__P, <32 x i8> %0, i32 %__U)
-  %2 = bitcast <32 x i8> %1 to <4 x i64>
-  ret <4 x i64> %2
+  %1 = bitcast i32 %__U to <32 x i1>
+  %2 = tail call <32 x i8> @llvm.masked.expandload.v32i8(i8* %__P, <32 x i1> %1, <32 x i8> %0)
+  %3 = bitcast <32 x i8> %2 to <4 x i64>
+  ret <4 x i64> %3
 }
 
 define <4 x i64> @test_mm256_maskz_expandloadu_epi8(i32 %__U, i8* readonly %__P) {
@@ -554,9 +571,10 @@ define <4 x i64> @test_mm256_maskz_expan
 ; X64-NEXT:    vpexpandb (%rsi), %ymm0 {%k1} {z}
 ; X64-NEXT:    retq
 entry:
-  %0 = tail call <32 x i8> @llvm.x86.avx512.mask.expand.load.b.256(i8* %__P, <32 x i8> zeroinitializer, i32 %__U)
-  %1 = bitcast <32 x i8> %0 to <4 x i64>
-  ret <4 x i64> %1
+  %0 = bitcast i32 %__U to <32 x i1>
+  %1 = tail call <32 x i8> @llvm.masked.expandload.v32i8(i8* %__P, <32 x i1> %0, <32 x i8> zeroinitializer)
+  %2 = bitcast <32 x i8> %1 to <4 x i64>
+  ret <4 x i64> %2
 }
 
 define <4 x i64> @test_mm256_mask_shldi_epi64(<4 x i64> %__S, i8 zeroext %__U, <4 x i64> %__A, <4 x i64> %__B) {
@@ -1857,20 +1875,20 @@ entry:
 
 declare <8 x i16> @llvm.x86.avx512.mask.compress.w.128(<8 x i16>, <8 x i16>, i8)
 declare <16 x i8> @llvm.x86.avx512.mask.compress.b.128(<16 x i8>, <16 x i8>, i16)
-declare void @llvm.x86.avx512.mask.compress.store.w.128(i8*, <8 x i16>, i8)
-declare void @llvm.x86.avx512.mask.compress.store.b.128(i8*, <16 x i8>, i16)
+declare void @llvm.masked.compressstore.v8i16(<8 x i16>, i16*, <8 x i1>)
+declare void @llvm.masked.compressstore.v16i8(<16 x i8>, i8*, <16 x i1>)
 declare <8 x i16> @llvm.x86.avx512.mask.expand.w.128(<8 x i16>, <8 x i16>, i8)
 declare <16 x i8> @llvm.x86.avx512.mask.expand.b.128(<16 x i8>, <16 x i8>, i16)
-declare <8 x i16> @llvm.x86.avx512.mask.expand.load.w.128(i8*, <8 x i16>, i8)
-declare <16 x i8> @llvm.x86.avx512.mask.expand.load.b.128(i8*, <16 x i8>, i16)
+declare <8 x i16> @llvm.masked.expandload.v8i16(i16*, <8 x i1>, <8 x i16>)
+declare <16 x i8> @llvm.masked.expandload.v16i8(i8*, <16 x i1>, <16 x i8>)
 declare <16 x i16> @llvm.x86.avx512.mask.compress.w.256(<16 x i16>, <16 x i16>, i16)
 declare <32 x i8> @llvm.x86.avx512.mask.compress.b.256(<32 x i8>, <32 x i8>, i32)
-declare void @llvm.x86.avx512.mask.compress.store.w.256(i8*, <16 x i16>, i16)
-declare void @llvm.x86.avx512.mask.compress.store.b.256(i8*, <32 x i8>, i32)
+declare void @llvm.masked.compressstore.v16i16(<16 x i16>, i16*, <16 x i1>)
+declare void @llvm.masked.compressstore.v32i8(<32 x i8>, i8*, <32 x i1>)
 declare <16 x i16> @llvm.x86.avx512.mask.expand.w.256(<16 x i16>, <16 x i16>, i16)
 declare <32 x i8> @llvm.x86.avx512.mask.expand.b.256(<32 x i8>, <32 x i8>, i32)
-declare <16 x i16> @llvm.x86.avx512.mask.expand.load.w.256(i8*, <16 x i16>, i16)
-declare <32 x i8> @llvm.x86.avx512.mask.expand.load.b.256(i8*, <32 x i8>, i32)
+declare <16 x i16> @llvm.masked.expandload.v16i16(i16*, <16 x i1>, <16 x i16>)
+declare <32 x i8> @llvm.masked.expandload.v32i8(i8*, <32 x i1>, <32 x i8>)
 declare <4 x i64> @llvm.x86.avx512.mask.vpshldv.q.256(<4 x i64>, <4 x i64>, <4 x i64>, i8)
 declare <4 x i64> @llvm.x86.avx512.maskz.vpshldv.q.256(<4 x i64>, <4 x i64>, <4 x i64>, i8)
 declare <2 x i64> @llvm.x86.avx512.mask.vpshldv.q.128(<2 x i64>, <2 x i64>, <2 x i64>, i8)

Modified: llvm/trunk/test/CodeGen/X86/avx512vl-intrinsics-fast-isel.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512vl-intrinsics-fast-isel.ll?rev=334368&r1=334367&r2=334368&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512vl-intrinsics-fast-isel.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512vl-intrinsics-fast-isel.ll Sun Jun 10 11:55:37 2018
@@ -5831,6 +5831,545 @@ entry:
   ret <8 x float> %2
 }
 
+define <2 x double> @test_mm_mask_expandloadu_pd(<2 x double> %__W, i8 zeroext %__U, i8* readonly %__P) {
+; X86-LABEL: test_mm_mask_expandloadu_pd:
+; X86:       # %bb.0: # %entry
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; X86-NEXT:    kmovw %ecx, %k1
+; X86-NEXT:    vexpandpd (%eax), %xmm0 {%k1}
+; X86-NEXT:    retl
+;
+; X64-LABEL: test_mm_mask_expandloadu_pd:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    kmovw %edi, %k1
+; X64-NEXT:    vexpandpd (%rsi), %xmm0 {%k1}
+; X64-NEXT:    retq
+entry:
+  %0 = bitcast i8* %__P to double*
+  %1 = bitcast i8 %__U to <8 x i1>
+  %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
+  %2 = tail call <2 x double> @llvm.masked.expandload.v2f64(double* %0, <2 x i1> %extract.i, <2 x double> %__W)
+  ret <2 x double> %2
+}
+
+define <2 x double> @test_mm_maskz_expandloadu_pd(i8 zeroext %__U, i8* readonly %__P) {
+; X86-LABEL: test_mm_maskz_expandloadu_pd:
+; X86:       # %bb.0: # %entry
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; X86-NEXT:    kmovw %ecx, %k1
+; X86-NEXT:    vexpandpd (%eax), %xmm0 {%k1} {z}
+; X86-NEXT:    retl
+;
+; X64-LABEL: test_mm_maskz_expandloadu_pd:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    kmovw %edi, %k1
+; X64-NEXT:    vexpandpd (%rsi), %xmm0 {%k1} {z}
+; X64-NEXT:    retq
+entry:
+  %0 = bitcast i8* %__P to double*
+  %1 = bitcast i8 %__U to <8 x i1>
+  %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
+  %2 = tail call <2 x double> @llvm.masked.expandload.v2f64(double* %0, <2 x i1> %extract.i, <2 x double> zeroinitializer)
+  ret <2 x double> %2
+}
+
+define <4 x double> @test_mm256_mask_expandloadu_pd(<4 x double> %__W, i8 zeroext %__U, i8* readonly %__P) {
+; X86-LABEL: test_mm256_mask_expandloadu_pd:
+; X86:       # %bb.0: # %entry
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; X86-NEXT:    kmovw %ecx, %k1
+; X86-NEXT:    vexpandpd (%eax), %ymm0 {%k1}
+; X86-NEXT:    retl
+;
+; X64-LABEL: test_mm256_mask_expandloadu_pd:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    kmovw %edi, %k1
+; X64-NEXT:    vexpandpd (%rsi), %ymm0 {%k1}
+; X64-NEXT:    retq
+entry:
+  %0 = bitcast i8* %__P to double*
+  %1 = bitcast i8 %__U to <8 x i1>
+  %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  %2 = tail call <4 x double> @llvm.masked.expandload.v4f64(double* %0, <4 x i1> %extract.i, <4 x double> %__W)
+  ret <4 x double> %2
+}
+
+define <4 x double> @test_mm256_maskz_expandloadu_pd(i8 zeroext %__U, i8* readonly %__P) {
+; X86-LABEL: test_mm256_maskz_expandloadu_pd:
+; X86:       # %bb.0: # %entry
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; X86-NEXT:    kmovw %ecx, %k1
+; X86-NEXT:    vexpandpd (%eax), %ymm0 {%k1} {z}
+; X86-NEXT:    retl
+;
+; X64-LABEL: test_mm256_maskz_expandloadu_pd:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    kmovw %edi, %k1
+; X64-NEXT:    vexpandpd (%rsi), %ymm0 {%k1} {z}
+; X64-NEXT:    retq
+entry:
+  %0 = bitcast i8* %__P to double*
+  %1 = bitcast i8 %__U to <8 x i1>
+  %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  %2 = tail call <4 x double> @llvm.masked.expandload.v4f64(double* %0, <4 x i1> %extract.i, <4 x double> zeroinitializer)
+  ret <4 x double> %2
+}
+
+define <2 x i64> @test_mm_mask_expandloadu_epi64(<2 x i64> %__W, i8 zeroext %__U, i8* readonly %__P) {
+; X86-LABEL: test_mm_mask_expandloadu_epi64:
+; X86:       # %bb.0: # %entry
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; X86-NEXT:    kmovw %ecx, %k1
+; X86-NEXT:    vpexpandq (%eax), %xmm0 {%k1}
+; X86-NEXT:    retl
+;
+; X64-LABEL: test_mm_mask_expandloadu_epi64:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    kmovw %edi, %k1
+; X64-NEXT:    vpexpandq (%rsi), %xmm0 {%k1}
+; X64-NEXT:    retq
+entry:
+  %0 = bitcast i8* %__P to i64*
+  %1 = bitcast i8 %__U to <8 x i1>
+  %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
+  %2 = tail call <2 x i64> @llvm.masked.expandload.v2i64(i64* %0, <2 x i1> %extract.i, <2 x i64> %__W) #10
+  ret <2 x i64> %2
+}
+
+define <2 x i64> @test_mm_maskz_expandloadu_epi64(i8 zeroext %__U, i8* readonly %__P) {
+; X86-LABEL: test_mm_maskz_expandloadu_epi64:
+; X86:       # %bb.0: # %entry
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; X86-NEXT:    kmovw %ecx, %k1
+; X86-NEXT:    vpexpandq (%eax), %xmm0 {%k1} {z}
+; X86-NEXT:    retl
+;
+; X64-LABEL: test_mm_maskz_expandloadu_epi64:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    kmovw %edi, %k1
+; X64-NEXT:    vpexpandq (%rsi), %xmm0 {%k1} {z}
+; X64-NEXT:    retq
+entry:
+  %0 = bitcast i8* %__P to i64*
+  %1 = bitcast i8 %__U to <8 x i1>
+  %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
+  %2 = tail call <2 x i64> @llvm.masked.expandload.v2i64(i64* %0, <2 x i1> %extract.i, <2 x i64> zeroinitializer)
+  ret <2 x i64> %2
+}
+
+define <4 x i64> @test_mm256_mask_expandloadu_epi64(<4 x i64> %__W, i8 zeroext %__U, i8* readonly %__P) {
+; X86-LABEL: test_mm256_mask_expandloadu_epi64:
+; X86:       # %bb.0: # %entry
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; X86-NEXT:    kmovw %ecx, %k1
+; X86-NEXT:    vpexpandq (%eax), %ymm0 {%k1}
+; X86-NEXT:    retl
+;
+; X64-LABEL: test_mm256_mask_expandloadu_epi64:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    kmovw %edi, %k1
+; X64-NEXT:    vpexpandq (%rsi), %ymm0 {%k1}
+; X64-NEXT:    retq
+entry:
+  %0 = bitcast i8* %__P to i64*
+  %1 = bitcast i8 %__U to <8 x i1>
+  %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  %2 = tail call <4 x i64> @llvm.masked.expandload.v4i64(i64* %0, <4 x i1> %extract.i, <4 x i64> %__W) #10
+  ret <4 x i64> %2
+}
+
+define <4 x i64> @test_mm256_maskz_expandloadu_epi64(i8 zeroext %__U, i8* readonly %__P) {
+; X86-LABEL: test_mm256_maskz_expandloadu_epi64:
+; X86:       # %bb.0: # %entry
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; X86-NEXT:    kmovw %ecx, %k1
+; X86-NEXT:    vpexpandq (%eax), %ymm0 {%k1} {z}
+; X86-NEXT:    retl
+;
+; X64-LABEL: test_mm256_maskz_expandloadu_epi64:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    kmovw %edi, %k1
+; X64-NEXT:    vpexpandq (%rsi), %ymm0 {%k1} {z}
+; X64-NEXT:    retq
+entry:
+  %0 = bitcast i8* %__P to i64*
+  %1 = bitcast i8 %__U to <8 x i1>
+  %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  %2 = tail call <4 x i64> @llvm.masked.expandload.v4i64(i64* %0, <4 x i1> %extract.i, <4 x i64> zeroinitializer)
+  ret <4 x i64> %2
+}
+
+define <4 x float> @test_mm_mask_expandloadu_ps(<4 x float> %__W, i8 zeroext %__U, i8* readonly %__P) {
+; X86-LABEL: test_mm_mask_expandloadu_ps:
+; X86:       # %bb.0: # %entry
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; X86-NEXT:    kmovw %ecx, %k1
+; X86-NEXT:    vexpandps (%eax), %xmm0 {%k1}
+; X86-NEXT:    retl
+;
+; X64-LABEL: test_mm_mask_expandloadu_ps:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    kmovw %edi, %k1
+; X64-NEXT:    vexpandps (%rsi), %xmm0 {%k1}
+; X64-NEXT:    retq
+entry:
+  %0 = bitcast i8* %__P to float*
+  %1 = bitcast i8 %__U to <8 x i1>
+  %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  %2 = tail call <4 x float> @llvm.masked.expandload.v4f32(float* %0, <4 x i1> %extract.i, <4 x float> %__W)
+  ret <4 x float> %2
+}
+
+define <4 x float> @test_mm_maskz_expandloadu_ps(i8 zeroext %__U, i8* readonly %__P) {
+; X86-LABEL: test_mm_maskz_expandloadu_ps:
+; X86:       # %bb.0: # %entry
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; X86-NEXT:    kmovw %ecx, %k1
+; X86-NEXT:    vexpandps (%eax), %xmm0 {%k1} {z}
+; X86-NEXT:    retl
+;
+; X64-LABEL: test_mm_maskz_expandloadu_ps:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    kmovw %edi, %k1
+; X64-NEXT:    vexpandps (%rsi), %xmm0 {%k1} {z}
+; X64-NEXT:    retq
+entry:
+  %0 = bitcast i8* %__P to float*
+  %1 = bitcast i8 %__U to <8 x i1>
+  %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  %2 = tail call <4 x float> @llvm.masked.expandload.v4f32(float* %0, <4 x i1> %extract.i, <4 x float> zeroinitializer)
+  ret <4 x float> %2
+}
+
+define <8 x float> @test_mm256_mask_expandloadu_ps(<8 x float> %__W, i8 zeroext %__U, i8* readonly %__P) {
+; X86-LABEL: test_mm256_mask_expandloadu_ps:
+; X86:       # %bb.0: # %entry
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; X86-NEXT:    kmovw %ecx, %k1
+; X86-NEXT:    vexpandps (%eax), %ymm0 {%k1}
+; X86-NEXT:    retl
+;
+; X64-LABEL: test_mm256_mask_expandloadu_ps:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    kmovw %edi, %k1
+; X64-NEXT:    vexpandps (%rsi), %ymm0 {%k1}
+; X64-NEXT:    retq
+entry:
+  %0 = bitcast i8* %__P to float*
+  %1 = bitcast i8 %__U to <8 x i1>
+  %2 = tail call <8 x float> @llvm.masked.expandload.v8f32(float* %0, <8 x i1> %1, <8 x float> %__W)
+  ret <8 x float> %2
+}
+
+define <8 x float> @test_mm256_maskz_expandloadu_ps(i8 zeroext %__U, i8* readonly %__P) {
+; X86-LABEL: test_mm256_maskz_expandloadu_ps:
+; X86:       # %bb.0: # %entry
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; X86-NEXT:    kmovw %ecx, %k1
+; X86-NEXT:    vexpandps (%eax), %ymm0 {%k1} {z}
+; X86-NEXT:    retl
+;
+; X64-LABEL: test_mm256_maskz_expandloadu_ps:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    kmovw %edi, %k1
+; X64-NEXT:    vexpandps (%rsi), %ymm0 {%k1} {z}
+; X64-NEXT:    retq
+entry:
+  %0 = bitcast i8* %__P to float*
+  %1 = bitcast i8 %__U to <8 x i1>
+  %2 = tail call <8 x float> @llvm.masked.expandload.v8f32(float* %0, <8 x i1> %1, <8 x float> zeroinitializer)
+  ret <8 x float> %2
+}
+
+define <2 x i64> @test_mm_mask_expandloadu_epi32(<2 x i64> %__W, i8 zeroext %__U, i8* readonly %__P) {
+; X86-LABEL: test_mm_mask_expandloadu_epi32:
+; X86:       # %bb.0: # %entry
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; X86-NEXT:    kmovw %ecx, %k1
+; X86-NEXT:    vpexpandd (%eax), %xmm0 {%k1}
+; X86-NEXT:    retl
+;
+; X64-LABEL: test_mm_mask_expandloadu_epi32:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    kmovw %edi, %k1
+; X64-NEXT:    vpexpandd (%rsi), %xmm0 {%k1}
+; X64-NEXT:    retq
+entry:
+  %0 = bitcast <2 x i64> %__W to <4 x i32>
+  %1 = bitcast i8* %__P to i32*
+  %2 = bitcast i8 %__U to <8 x i1>
+  %extract.i = shufflevector <8 x i1> %2, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  %3 = tail call <4 x i32> @llvm.masked.expandload.v4i32(i32* %1, <4 x i1> %extract.i, <4 x i32> %0)
+  %4 = bitcast <4 x i32> %3 to <2 x i64>
+  ret <2 x i64> %4
+}
+
+define <2 x i64> @test_mm_maskz_expandloadu_epi32(i8 zeroext %__U, i8* readonly %__P) {
+; X86-LABEL: test_mm_maskz_expandloadu_epi32:
+; X86:       # %bb.0: # %entry
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; X86-NEXT:    kmovw %ecx, %k1
+; X86-NEXT:    vpexpandd (%eax), %xmm0 {%k1} {z}
+; X86-NEXT:    retl
+;
+; X64-LABEL: test_mm_maskz_expandloadu_epi32:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    kmovw %edi, %k1
+; X64-NEXT:    vpexpandd (%rsi), %xmm0 {%k1} {z}
+; X64-NEXT:    retq
+entry:
+  %0 = bitcast i8* %__P to i32*
+  %1 = bitcast i8 %__U to <8 x i1>
+  %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  %2 = tail call <4 x i32> @llvm.masked.expandload.v4i32(i32* %0, <4 x i1> %extract.i, <4 x i32> zeroinitializer)
+  %3 = bitcast <4 x i32> %2 to <2 x i64>
+  ret <2 x i64> %3
+}
+
+define <4 x i64> @test_mm256_mask_expandloadu_epi32(<4 x i64> %__W, i8 zeroext %__U, i8* readonly %__P) {
+; X86-LABEL: test_mm256_mask_expandloadu_epi32:
+; X86:       # %bb.0: # %entry
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; X86-NEXT:    kmovw %ecx, %k1
+; X86-NEXT:    vpexpandd (%eax), %ymm0 {%k1}
+; X86-NEXT:    retl
+;
+; X64-LABEL: test_mm256_mask_expandloadu_epi32:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    kmovw %edi, %k1
+; X64-NEXT:    vpexpandd (%rsi), %ymm0 {%k1}
+; X64-NEXT:    retq
+entry:
+  %0 = bitcast <4 x i64> %__W to <8 x i32>
+  %1 = bitcast i8* %__P to i32*
+  %2 = bitcast i8 %__U to <8 x i1>
+  %3 = tail call <8 x i32> @llvm.masked.expandload.v8i32(i32* %1, <8 x i1> %2, <8 x i32> %0)
+  %4 = bitcast <8 x i32> %3 to <4 x i64>
+  ret <4 x i64> %4
+}
+
+define <4 x i64> @test_mm256_maskz_expandloadu_epi32(i8 zeroext %__U, i8* readonly %__P) {
+; X86-LABEL: test_mm256_maskz_expandloadu_epi32:
+; X86:       # %bb.0: # %entry
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; X86-NEXT:    kmovw %ecx, %k1
+; X86-NEXT:    vpexpandd (%eax), %ymm0 {%k1} {z}
+; X86-NEXT:    retl
+;
+; X64-LABEL: test_mm256_maskz_expandloadu_epi32:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    kmovw %edi, %k1
+; X64-NEXT:    vpexpandd (%rsi), %ymm0 {%k1} {z}
+; X64-NEXT:    retq
+entry:
+  %0 = bitcast i8* %__P to i32*
+  %1 = bitcast i8 %__U to <8 x i1>
+  %2 = tail call <8 x i32> @llvm.masked.expandload.v8i32(i32* %0, <8 x i1> %1, <8 x i32> zeroinitializer)
+  %3 = bitcast <8 x i32> %2 to <4 x i64>
+  ret <4 x i64> %3
+}
+
+define void @test_mm_mask_compressstoreu_pd(i8* %__P, i8 zeroext %__U, <2 x double> %__A) {
+; X86-LABEL: test_mm_mask_compressstoreu_pd:
+; X86:       # %bb.0: # %entry
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    kmovw %eax, %k1
+; X86-NEXT:    vcompresspd %xmm0, (%ecx) {%k1}
+; X86-NEXT:    retl
+;
+; X64-LABEL: test_mm_mask_compressstoreu_pd:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    kmovw %esi, %k1
+; X64-NEXT:    vcompresspd %xmm0, (%rdi) {%k1}
+; X64-NEXT:    retq
+entry:
+  %0 = bitcast i8* %__P to double*
+  %1 = bitcast i8 %__U to <8 x i1>
+  %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
+  tail call void @llvm.masked.compressstore.v2f64(<2 x double> %__A, double* %0, <2 x i1> %extract.i)
+  ret void
+}
+
+define void @test_mm256_mask_compressstoreu_pd(i8* %__P, i8 zeroext %__U, <4 x double> %__A) {
+; X86-LABEL: test_mm256_mask_compressstoreu_pd:
+; X86:       # %bb.0: # %entry
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    kmovw %eax, %k1
+; X86-NEXT:    vcompresspd %ymm0, (%ecx) {%k1}
+; X86-NEXT:    vzeroupper
+; X86-NEXT:    retl
+;
+; X64-LABEL: test_mm256_mask_compressstoreu_pd:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    kmovw %esi, %k1
+; X64-NEXT:    vcompresspd %ymm0, (%rdi) {%k1}
+; X64-NEXT:    vzeroupper
+; X64-NEXT:    retq
+entry:
+  %0 = bitcast i8* %__P to double*
+  %1 = bitcast i8 %__U to <8 x i1>
+  %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  tail call void @llvm.masked.compressstore.v4f64(<4 x double> %__A, double* %0, <4 x i1> %extract.i)
+  ret void
+}
+
+define void @test_mm_mask_compressstoreu_epi64(i8* %__P, i8 zeroext %__U, <2 x i64> %__A) {
+; X86-LABEL: test_mm_mask_compressstoreu_epi64:
+; X86:       # %bb.0: # %entry
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    kmovw %eax, %k1
+; X86-NEXT:    vpcompressq %xmm0, (%ecx) {%k1}
+; X86-NEXT:    retl
+;
+; X64-LABEL: test_mm_mask_compressstoreu_epi64:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    kmovw %esi, %k1
+; X64-NEXT:    vpcompressq %xmm0, (%rdi) {%k1}
+; X64-NEXT:    retq
+entry:
+  %0 = bitcast i8* %__P to i64*
+  %1 = bitcast i8 %__U to <8 x i1>
+  %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
+  tail call void @llvm.masked.compressstore.v2i64(<2 x i64> %__A, i64* %0, <2 x i1> %extract.i)
+  ret void
+}
+
+define void @test_mm256_mask_compressstoreu_epi64(i8* %__P, i8 zeroext %__U, <4 x i64> %__A) {
+; X86-LABEL: test_mm256_mask_compressstoreu_epi64:
+; X86:       # %bb.0: # %entry
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    kmovw %eax, %k1
+; X86-NEXT:    vpcompressq %ymm0, (%ecx) {%k1}
+; X86-NEXT:    vzeroupper
+; X86-NEXT:    retl
+;
+; X64-LABEL: test_mm256_mask_compressstoreu_epi64:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    kmovw %esi, %k1
+; X64-NEXT:    vpcompressq %ymm0, (%rdi) {%k1}
+; X64-NEXT:    vzeroupper
+; X64-NEXT:    retq
+entry:
+  %0 = bitcast i8* %__P to i64*
+  %1 = bitcast i8 %__U to <8 x i1>
+  %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  tail call void @llvm.masked.compressstore.v4i64(<4 x i64> %__A, i64* %0, <4 x i1> %extract.i)
+  ret void
+}
+
+define void @test_mm_mask_compressstoreu_ps(i8* %__P, i8 zeroext %__U, <4 x float> %__A) {
+; X86-LABEL: test_mm_mask_compressstoreu_ps:
+; X86:       # %bb.0: # %entry
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    kmovw %eax, %k1
+; X86-NEXT:    vcompressps %xmm0, (%ecx) {%k1}
+; X86-NEXT:    retl
+;
+; X64-LABEL: test_mm_mask_compressstoreu_ps:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    kmovw %esi, %k1
+; X64-NEXT:    vcompressps %xmm0, (%rdi) {%k1}
+; X64-NEXT:    retq
+entry:
+  %0 = bitcast i8* %__P to float*
+  %1 = bitcast i8 %__U to <8 x i1>
+  %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  tail call void @llvm.masked.compressstore.v4f32(<4 x float> %__A, float* %0, <4 x i1> %extract.i)
+  ret void
+}
+
+define void @test_mm256_mask_compressstoreu_ps(i8* %__P, i8 zeroext %__U, <8 x float> %__A) {
+; X86-LABEL: test_mm256_mask_compressstoreu_ps:
+; X86:       # %bb.0: # %entry
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    kmovw %eax, %k1
+; X86-NEXT:    vcompressps %ymm0, (%ecx) {%k1}
+; X86-NEXT:    vzeroupper
+; X86-NEXT:    retl
+;
+; X64-LABEL: test_mm256_mask_compressstoreu_ps:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    kmovw %esi, %k1
+; X64-NEXT:    vcompressps %ymm0, (%rdi) {%k1}
+; X64-NEXT:    vzeroupper
+; X64-NEXT:    retq
+entry:
+  %0 = bitcast i8* %__P to float*
+  %1 = bitcast i8 %__U to <8 x i1>
+  tail call void @llvm.masked.compressstore.v8f32(<8 x float> %__A, float* %0, <8 x i1> %1)
+  ret void
+}
+
+define void @test_mm_mask_compressstoreu_epi32(i8* %__P, i8 zeroext %__U, <2 x i64> %__A) {
+; X86-LABEL: test_mm_mask_compressstoreu_epi32:
+; X86:       # %bb.0: # %entry
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    kmovw %eax, %k1
+; X86-NEXT:    vpcompressd %xmm0, (%ecx) {%k1}
+; X86-NEXT:    retl
+;
+; X64-LABEL: test_mm_mask_compressstoreu_epi32:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    kmovw %esi, %k1
+; X64-NEXT:    vpcompressd %xmm0, (%rdi) {%k1}
+; X64-NEXT:    retq
+entry:
+  %0 = bitcast <2 x i64> %__A to <4 x i32>
+  %1 = bitcast i8* %__P to i32*
+  %2 = bitcast i8 %__U to <8 x i1>
+  %extract.i = shufflevector <8 x i1> %2, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  tail call void @llvm.masked.compressstore.v4i32(<4 x i32> %0, i32* %1, <4 x i1> %extract.i)
+  ret void
+}
+
+define void @test_mm256_mask_compressstoreu_epi32(i8* %__P, i8 zeroext %__U, <4 x i64> %__A) {
+; X86-LABEL: test_mm256_mask_compressstoreu_epi32:
+; X86:       # %bb.0: # %entry
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    kmovw %eax, %k1
+; X86-NEXT:    vpcompressd %ymm0, (%ecx) {%k1}
+; X86-NEXT:    vzeroupper
+; X86-NEXT:    retl
+;
+; X64-LABEL: test_mm256_mask_compressstoreu_epi32:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    kmovw %esi, %k1
+; X64-NEXT:    vpcompressd %ymm0, (%rdi) {%k1}
+; X64-NEXT:    vzeroupper
+; X64-NEXT:    retq
+entry:
+  %0 = bitcast <4 x i64> %__A to <8 x i32>
+  %1 = bitcast i8* %__P to i32*
+  %2 = bitcast i8 %__U to <8 x i1>
+  tail call void @llvm.masked.compressstore.v8i32(<8 x i32> %0, i32* %1, <8 x i1> %2) #10
+  ret void
+}
+
+
 declare <2 x double> @llvm.fma.v2f64(<2 x double>, <2 x double>, <2 x double>) #8
 declare <4 x double> @llvm.fma.v4f64(<4 x double>, <4 x double>, <4 x double>) #8
 declare <4 x float> @llvm.fma.v4f32(<4 x float>, <4 x float>, <4 x float>) #8
@@ -5863,5 +6402,21 @@ declare <4 x float> @llvm.x86.avx512.vpe
 declare <8 x float> @llvm.x86.avx512.vpermi2var.ps.256(<8 x float>, <8 x i32>, <8 x float>)
 declare <2 x i64> @llvm.x86.avx512.vpermi2var.q.128(<2 x i64>, <2 x i64>, <2 x i64>)
 declare <4 x i64> @llvm.x86.avx512.vpermi2var.q.256(<4 x i64>, <4 x i64>, <4 x i64>)
+declare <2 x double> @llvm.masked.expandload.v2f64(double*, <2 x i1>, <2 x double>)
+declare <4 x double> @llvm.masked.expandload.v4f64(double*, <4 x i1>, <4 x double>)
+declare <2 x i64> @llvm.masked.expandload.v2i64(i64*, <2 x i1>, <2 x i64>)
+declare <4 x i64> @llvm.masked.expandload.v4i64(i64*, <4 x i1>, <4 x i64>)
+declare <4 x float> @llvm.masked.expandload.v4f32(float*, <4 x i1>, <4 x float>)
+declare <8 x float> @llvm.masked.expandload.v8f32(float*, <8 x i1>, <8 x float>)
+declare <4 x i32> @llvm.masked.expandload.v4i32(i32*, <4 x i1>, <4 x i32>)
+declare <8 x i32> @llvm.masked.expandload.v8i32(i32*, <8 x i1>, <8 x i32>)
+declare void @llvm.masked.compressstore.v2f64(<2 x double>, double*, <2 x i1>)
+declare void @llvm.masked.compressstore.v4f64(<4 x double>, double*, <4 x i1>)
+declare void @llvm.masked.compressstore.v2i64(<2 x i64>, i64*, <2 x i1>)
+declare void @llvm.masked.compressstore.v4i64(<4 x i64>, i64*, <4 x i1>)
+declare void @llvm.masked.compressstore.v4f32(<4 x float>, float*, <4 x i1>)
+declare void @llvm.masked.compressstore.v8f32(<8 x float>, float*, <8 x i1>)
+declare void @llvm.masked.compressstore.v4i32(<4 x i32>, i32*, <4 x i1>)
+declare void @llvm.masked.compressstore.v8i32(<8 x i32>, i32*, <8 x i1>)
 
 !0 = !{i32 1}




More information about the llvm-commits mailing list