[llvm] r286755 - [InstCombine][AVX-512] Teach InstCombineCalls to handle the new unmasked AVX-512 variable shift intrinsics.

Craig Topper via llvm-commits llvm-commits at lists.llvm.org
Sat Nov 12 23:26:19 PST 2016


Author: ctopper
Date: Sun Nov 13 01:26:19 2016
New Revision: 286755

URL: http://llvm.org/viewvc/llvm-project?rev=286755&view=rev
Log:
[InstCombine][AVX-512] Teach InstCombineCalls to handle the new unmasked AVX-512 variable shift intrinsics.

Modified:
    llvm/trunk/lib/Transforms/InstCombine/InstCombineCalls.cpp
    llvm/trunk/test/Transforms/InstCombine/x86-vector-shifts.ll

Modified: llvm/trunk/lib/Transforms/InstCombine/InstCombineCalls.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/InstCombine/InstCombineCalls.cpp?rev=286755&r1=286754&r2=286755&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/InstCombine/InstCombineCalls.cpp (original)
+++ llvm/trunk/lib/Transforms/InstCombine/InstCombineCalls.cpp Sun Nov 13 01:26:19 2016
@@ -258,8 +258,7 @@ static Value *simplifyX86immShift(const
   bool ShiftLeft = false;
 
   switch (II.getIntrinsicID()) {
-  default:
-    return nullptr;
+  default: llvm_unreachable("Unexpected intrinsic!");
   case Intrinsic::x86_sse2_psra_d:
   case Intrinsic::x86_sse2_psra_w:
   case Intrinsic::x86_sse2_psrai_d:
@@ -393,10 +392,13 @@ static Value *simplifyX86varShift(const
   bool ShiftLeft = false;
 
   switch (II.getIntrinsicID()) {
-  default:
-    return nullptr;
+  default: llvm_unreachable("Unexpected intrinsic!");
   case Intrinsic::x86_avx2_psrav_d:
   case Intrinsic::x86_avx2_psrav_d_256:
+  case Intrinsic::x86_avx512_psrav_q_128:
+  case Intrinsic::x86_avx512_psrav_q_256:
+  case Intrinsic::x86_avx512_psrav_d_512:
+  case Intrinsic::x86_avx512_psrav_q_512:
     LogicalShift = false;
     ShiftLeft = false;
     break;
@@ -404,6 +406,8 @@ static Value *simplifyX86varShift(const
   case Intrinsic::x86_avx2_psrlv_d_256:
   case Intrinsic::x86_avx2_psrlv_q:
   case Intrinsic::x86_avx2_psrlv_q_256:
+  case Intrinsic::x86_avx512_psrlv_d_512:
+  case Intrinsic::x86_avx512_psrlv_q_512:
     LogicalShift = true;
     ShiftLeft = false;
     break;
@@ -411,6 +415,8 @@ static Value *simplifyX86varShift(const
   case Intrinsic::x86_avx2_psllv_d_256:
   case Intrinsic::x86_avx2_psllv_q:
   case Intrinsic::x86_avx2_psllv_q_256:
+  case Intrinsic::x86_avx512_psllv_d_512:
+  case Intrinsic::x86_avx512_psllv_q_512:
     LogicalShift = true;
     ShiftLeft = true;
     break;
@@ -1873,12 +1879,20 @@ Instruction *InstCombiner::visitCallInst
   case Intrinsic::x86_avx2_psllv_d_256:
   case Intrinsic::x86_avx2_psllv_q:
   case Intrinsic::x86_avx2_psllv_q_256:
+  case Intrinsic::x86_avx512_psllv_d_512:
+  case Intrinsic::x86_avx512_psllv_q_512:
   case Intrinsic::x86_avx2_psrav_d:
   case Intrinsic::x86_avx2_psrav_d_256:
+  case Intrinsic::x86_avx512_psrav_q_128:
+  case Intrinsic::x86_avx512_psrav_q_256:
+  case Intrinsic::x86_avx512_psrav_d_512:
+  case Intrinsic::x86_avx512_psrav_q_512:
   case Intrinsic::x86_avx2_psrlv_d:
   case Intrinsic::x86_avx2_psrlv_d_256:
   case Intrinsic::x86_avx2_psrlv_q:
   case Intrinsic::x86_avx2_psrlv_q_256:
+  case Intrinsic::x86_avx512_psrlv_d_512:
+  case Intrinsic::x86_avx512_psrlv_q_512:
     if (Value *V = simplifyX86varShift(*II, *Builder))
       return replaceInstUsesWith(*II, V);
     break;

Modified: llvm/trunk/test/Transforms/InstCombine/x86-vector-shifts.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstCombine/x86-vector-shifts.ll?rev=286755&r1=286754&r2=286755&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/InstCombine/x86-vector-shifts.ll (original)
+++ llvm/trunk/test/Transforms/InstCombine/x86-vector-shifts.ll Sun Nov 13 01:26:19 2016
@@ -1564,6 +1564,14 @@ define <8 x i32> @avx2_psrav_d_256_0(<8
   ret <8 x i32> %1
 }
 
+define <16 x i32> @avx512_psrav_d_512_0(<16 x i32> %v) {
+; CHECK-LABEL: @avx512_psrav_d_512_0(
+; CHECK-NEXT:    ret <16 x i32> %v
+;
+  %1 = tail call <16 x i32> @llvm.x86.avx512.psrav.d.512(<16 x i32> %v, <16 x i32> zeroinitializer)
+  ret <16 x i32> %1
+}
+
 define <4 x i32> @avx2_psrav_d_128_var(<4 x i32> %v) {
 ; CHECK-LABEL: @avx2_psrav_d_128_var(
 ; CHECK-NEXT:    [[TMP1:%.*]] = ashr <4 x i32> %v, <i32 0, i32 8, i32 16, i32 31>
@@ -1582,6 +1590,15 @@ define <8 x i32> @avx2_psrav_d_256_var(<
   ret <8 x i32> %1
 }
 
+define <16 x i32> @avx512_psrav_d_512_var(<16 x i32> %v) {
+; CHECK-LABEL: @avx512_psrav_d_512_var(
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr <16 x i32> %v, <i32 0, i32 8, i32 16, i32 24, i32 31, i32 24, i32 8, i32 0, i32 0, i32 8, i32 16, i32 24, i32 31, i32 24, i32 8, i32 0>
+; CHECK-NEXT:    ret <16 x i32> [[TMP1]]
+;
+  %1 = tail call <16 x i32> @llvm.x86.avx512.psrav.d.512(<16 x i32> %v, <16 x i32> <i32 0, i32 8, i32 16, i32 24, i32 32, i32 24, i32 8, i32 0, i32 0, i32 8, i32 16, i32 24, i32 32, i32 24, i32 8, i32 0>)
+  ret <16 x i32> %1
+}
+
 define <4 x i32> @avx2_psrav_d_128_allbig(<4 x i32> %v) {
 ; CHECK-LABEL: @avx2_psrav_d_128_allbig(
 ; CHECK-NEXT:    [[TMP1:%.*]] = ashr <4 x i32> %v, <i32 31, i32 31, i32 31, i32 undef>
@@ -1600,6 +1617,15 @@ define <8 x i32> @avx2_psrav_d_256_allbi
   ret <8 x i32> %1
 }
 
+define <16 x i32> @avx512_psrav_d_512_allbig(<16 x i32> %v) {
+; CHECK-LABEL: @avx512_psrav_d_512_allbig(
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr <16 x i32> %v, <i32 undef, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 undef, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31>
+; CHECK-NEXT:    ret <16 x i32> [[TMP1]]
+;
+  %1 = tail call <16 x i32> @llvm.x86.avx512.psrav.d.512(<16 x i32> %v, <16 x i32> <i32 undef, i32 100, i32 255, i32 55555, i32 -32, i32 -100, i32 -255, i32 -55555, i32 undef, i32 100, i32 255, i32 55555, i32 -32, i32 -100, i32 -255, i32 -55555>)
+  ret <16 x i32> %1
+}
+
 define <4 x i32> @avx2_psrav_d_128_undef(<4 x i32> %v) {
 ; CHECK-LABEL: @avx2_psrav_d_128_undef(
 ; CHECK-NEXT:    [[TMP1:%.*]] = ashr <4 x i32> %v, <i32 undef, i32 8, i32 16, i32 31>
@@ -1620,6 +1646,122 @@ define <8 x i32> @avx2_psrav_d_256_undef
   ret <8 x i32> %2
 }
 
+define <16 x i32> @avx512_psrav_d_512_undef(<16 x i32> %v) {
+; CHECK-LABEL: @avx512_psrav_d_512_undef(
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr <16 x i32> %v, <i32 0, i32 undef, i32 16, i32 24, i32 31, i32 24, i32 8, i32 0, i32 0, i32 8, i32 16, i32 24, i32 31, i32 24, i32 8, i32 0>
+; CHECK-NEXT:    ret <16 x i32> [[TMP1]]
+;
+  %1 = insertelement <16 x i32> <i32 0, i32 8, i32 16, i32 24, i32 32, i32 24, i32 8, i32 0, i32 0, i32 8, i32 16, i32 24, i32 32, i32 24, i32 8, i32 0>, i32 undef, i32 1
+  %2 = tail call <16 x i32> @llvm.x86.avx512.psrav.d.512(<16 x i32> %v, <16 x i32> %1)
+  ret <16 x i32> %2
+}
+
+define <2 x i64> @avx512_psrav_q_128_0(<2 x i64> %v) {
+; CHECK-LABEL: @avx512_psrav_q_128_0(
+; CHECK-NEXT:    ret <2 x i64> %v
+;
+  %1 = tail call <2 x i64> @llvm.x86.avx512.psrav.q.128(<2 x i64> %v, <2 x i64> zeroinitializer)
+  ret <2 x i64> %1
+}
+
+define <4 x i64> @avx512_psrav_q_256_0(<4 x i64> %v) {
+; CHECK-LABEL: @avx512_psrav_q_256_0(
+; CHECK-NEXT:    ret <4 x i64> %v
+;
+  %1 = tail call <4 x i64> @llvm.x86.avx512.psrav.q.256(<4 x i64> %v, <4 x i64> zeroinitializer)
+  ret <4 x i64> %1
+}
+
+define <2 x i64> @avx512_psrav_q_128_var(<2 x i64> %v) {
+; CHECK-LABEL: @avx512_psrav_q_128_var(
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr <2 x i64> %v, <i64 0, i64 8>
+  %1 = tail call <2 x i64> @llvm.x86.avx512.psrav.q.128(<2 x i64> %v, <2 x i64> <i64 0, i64 8>)
+  ret <2 x i64> %1
+}
+
+define <4 x i64> @avx512_psrav_q_256_var(<4 x i64> %v) {
+; CHECK-LABEL: @avx512_psrav_q_256_var(
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr <4 x i64> %v, <i64 0, i64 8, i64 16, i64 31>
+; CHECK-NEXT:    ret <4 x i64> [[TMP1]]
+;
+  %1 = tail call <4 x i64> @llvm.x86.avx512.psrav.q.256(<4 x i64> %v, <4 x i64> <i64 0, i64 8, i64 16, i64 31>)
+  ret <4 x i64> %1
+}
+
+define <2 x i64> @avx512_psrav_q_128_allbig(<2 x i64> %v) {
+; CHECK-LABEL: @avx512_psrav_q_128_allbig(
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr <2 x i64> %v, <i64 63, i64 undef>
+; CHECK-NEXT:    ret <2 x i64> [[TMP1]]
+;
+  %1 = tail call <2 x i64> @llvm.x86.avx512.psrav.q.128(<2 x i64> %v, <2 x i64> <i64 64, i64 undef>)
+  ret <2 x i64> %1
+}
+
+define <4 x i64> @avx512_psrav_q_256_allbig(<4 x i64> %v) {
+; CHECK-LABEL: @avx512_psrav_q_256_allbig(
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr <4 x i64> %v, <i64 63, i64 undef, i64 63, i64 63>
+; CHECK-NEXT:    ret <4 x i64> [[TMP1]]
+;
+  %1 = tail call <4 x i64> @llvm.x86.avx512.psrav.q.256(<4 x i64> %v, <4 x i64> <i64 64, i64 undef, i64 -128, i64 -60>)
+  ret <4 x i64> %1
+}
+
+define <2 x i64> @avx512_psrav_q_128_undef(<2 x i64> %v) {
+; CHECK-LABEL: @avx512_psrav_q_128_undef(
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr <2 x i64> %v, <i64 undef, i64 8>
+; CHECK-NEXT:    ret <2 x i64> [[TMP1]]
+;
+  %1 = insertelement <2 x i64> <i64 0, i64 8>, i64 undef, i64 0
+  %2 = tail call <2 x i64> @llvm.x86.avx512.psrav.q.128(<2 x i64> %v, <2 x i64> %1)
+  ret <2 x i64> %2
+}
+
+define <4 x i64> @avx512_psrav_q_256_undef(<4 x i64> %v) {
+; CHECK-LABEL: @avx512_psrav_q_256_undef(
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr <4 x i64> %v, <i64 undef, i64 8, i64 16, i64 31>
+; CHECK-NEXT:    ret <4 x i64> [[TMP1]]
+;
+  %1 = insertelement <4 x i64> <i64 0, i64 8, i64 16, i64 31>, i64 undef, i64 0
+  %2 = tail call <4 x i64> @llvm.x86.avx512.psrav.q.256(<4 x i64> %v, <4 x i64> %1)
+  ret <4 x i64> %2
+}
+
+define <8 x i64> @avx512_psrav_q_512_0(<8 x i64> %v) {
+; CHECK-LABEL: @avx512_psrav_q_512_0(
+; CHECK-NEXT:    ret <8 x i64> %v
+;
+  %1 = tail call <8 x i64> @llvm.x86.avx512.psrav.q.512(<8 x i64> %v, <8 x i64> zeroinitializer)
+  ret <8 x i64> %1
+}
+
+define <8 x i64> @avx512_psrav_q_512_var(<8 x i64> %v) {
+; CHECK-LABEL: @avx512_psrav_q_512_var(
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr <8 x i64> %v, <i64 0, i64 8, i64 16, i64 31, i64 0, i64 8, i64 16, i64 31>
+; CHECK-NEXT:    ret <8 x i64> [[TMP1]]
+;
+  %1 = tail call <8 x i64> @llvm.x86.avx512.psrav.q.512(<8 x i64> %v, <8 x i64> <i64 0, i64 8, i64 16, i64 31, i64 0, i64 8, i64 16, i64 31>)
+  ret <8 x i64> %1
+}
+
+define <8 x i64> @avx512_psrav_q_512_allbig(<8 x i64> %v) {
+; CHECK-LABEL: @avx512_psrav_q_512_allbig(
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr <8 x i64> %v, <i64 63, i64 undef, i64 63, i64 63, i64 63, i64 undef, i64 63, i64 63>
+; CHECK-NEXT:    ret <8 x i64> [[TMP1]]
+;
+  %1 = tail call <8 x i64> @llvm.x86.avx512.psrav.q.512(<8 x i64> %v, <8 x i64> <i64 64, i64 undef, i64 -128, i64 -60, i64 64, i64 undef, i64 -128, i64 -60>)
+  ret <8 x i64> %1
+}
+
+define <8 x i64> @avx512_psrav_q_512_undef(<8 x i64> %v) {
+; CHECK-LABEL: @avx512_psrav_q_512_undef(
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr <8 x i64> %v, <i64 undef, i64 8, i64 16, i64 31, i64 0, i64 8, i64 16, i64 31>
+; CHECK-NEXT:    ret <8 x i64> [[TMP1]]
+;
+  %1 = insertelement <8 x i64> <i64 0, i64 8, i64 16, i64 31, i64 0, i64 8, i64 16, i64 31>, i64 undef, i64 0
+  %2 = tail call <8 x i64> @llvm.x86.avx512.psrav.q.512(<8 x i64> %v, <8 x i64> %1)
+  ret <8 x i64> %2
+}
+
 ;
 ; LSHR - Constant Per-Element Vector
 ;
@@ -1800,6 +1942,94 @@ define <4 x i64> @avx2_psrlv_q_256_undef
   ret <4 x i64> %2
 }
 
+define <16 x i32> @avx2_psrlv_d_512_0(<16 x i32> %v) {
+; CHECK-LABEL: @avx2_psrlv_d_512_0(
+; CHECK-NEXT:    ret <16 x i32> %v
+;
+  %1 = tail call <16 x i32> @llvm.x86.avx512.psrlv.d.512(<16 x i32> %v, <16 x i32> zeroinitializer)
+  ret <16 x i32> %1
+}
+
+define <16 x i32> @avx512_psrlv_d_512_var(<16 x i32> %v) {
+; CHECK-LABEL: @avx512_psrlv_d_512_var(
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr <16 x i32> %v, <i32 0, i32 8, i32 16, i32 24, i32 31, i32 24, i32 8, i32 0, i32 0, i32 8, i32 16, i32 24, i32 31, i32 24, i32 8, i32 0>
+; CHECK-NEXT:    ret <16 x i32> [[TMP1]]
+;
+  %1 = tail call <16 x i32> @llvm.x86.avx512.psrlv.d.512(<16 x i32> %v, <16 x i32> <i32 0, i32 8, i32 16, i32 24, i32 31, i32 24, i32 8, i32 0, i32 0, i32 8, i32 16, i32 24, i32 31, i32 24, i32 8, i32 0>)
+  ret <16 x i32> %1
+}
+
+define <16 x i32> @avx512_psrlv_d_512_big(<16 x i32> %v) {
+; CHECK-LABEL: @avx512_psrlv_d_512_big(
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call <16 x i32> @llvm.x86.avx512.psrlv.d.512(<16 x i32> %v, <16 x i32> <i32 0, i32 8, i32 16, i32 64, i32 31, i32 24, i32 8, i32 0, i32 0, i32 8, i32 16, i32 64, i32 31, i32 24, i32 8, i32 0>)
+; CHECK-NEXT:    ret <16 x i32> [[TMP1]]
+;
+  %1 = tail call <16 x i32> @llvm.x86.avx512.psrlv.d.512(<16 x i32> %v, <16 x i32> <i32 0, i32 8, i32 16, i32 64, i32 31, i32 24, i32 8, i32 0, i32 0, i32 8, i32 16, i32 64, i32 31, i32 24, i32 8, i32 0>)
+  ret <16 x i32> %1
+}
+
+define <16 x i32> @avx512_psrlv_d_512_allbig(<16 x i32> %v) {
+; CHECK-LABEL: @avx512_psrlv_d_512_allbig(
+; CHECK-NEXT:    ret <16 x i32> <i32 undef, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 undef, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+;
+  %1 = tail call <16 x i32> @llvm.x86.avx512.psrlv.d.512(<16 x i32> %v, <16 x i32> <i32 undef, i32 100, i32 255, i32 55555, i32 -32, i32 -100, i32 -255, i32 -55555, i32 undef, i32 100, i32 255, i32 55555, i32 -32, i32 -100, i32 -255, i32 -55555>)
+  ret <16 x i32> %1
+}
+
+define <16 x i32> @avx512_psrlv_d_512_undef(<16 x i32> %v) {
+; CHECK-LABEL: @avx512_psrlv_d_512_undef(
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr <16 x i32> %v, <i32 0, i32 undef, i32 16, i32 31, i32 31, i32 24, i32 8, i32 0, i32 0, i32 8, i32 16, i32 31, i32 31, i32 24, i32 8, i32 0>
+; CHECK-NEXT:    ret <16 x i32> [[TMP1]]
+;
+  %1 = insertelement <16 x i32> <i32 0, i32 8, i32 16, i32 31, i32 31, i32 24, i32 8, i32 0, i32 0, i32 8, i32 16, i32 31, i32 31, i32 24, i32 8, i32 0>, i32 undef, i32 1
+  %2 = tail call <16 x i32> @llvm.x86.avx512.psrlv.d.512(<16 x i32> %v, <16 x i32> %1)
+  ret <16 x i32> %2
+}
+
+define <8 x i64> @avx512_psrlv_q_512_0(<8 x i64> %v) {
+; CHECK-LABEL: @avx512_psrlv_q_512_0(
+; CHECK-NEXT:    ret <8 x i64> %v
+;
+  %1 = tail call <8 x i64> @llvm.x86.avx512.psrlv.q.512(<8 x i64> %v, <8 x i64> zeroinitializer)
+  ret <8 x i64> %1
+}
+
+define <8 x i64> @avx512_psrlv_q_512_var(<8 x i64> %v) {
+; CHECK-LABEL: @avx512_psrlv_q_512_var(
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr <8 x i64> %v, <i64 0, i64 8, i64 16, i64 31, i64 0, i64 8, i64 16, i64 31>
+; CHECK-NEXT:    ret <8 x i64> [[TMP1]]
+;
+  %1 = tail call <8 x i64> @llvm.x86.avx512.psrlv.q.512(<8 x i64> %v, <8 x i64> <i64 0, i64 8, i64 16, i64 31, i64 0, i64 8, i64 16, i64 31>)
+  ret <8 x i64> %1
+}
+
+define <8 x i64> @avx512_psrlv_q_512_big(<8 x i64> %v) {
+; CHECK-LABEL: @avx512_psrlv_q_512_big(
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i64> @llvm.x86.avx512.psrlv.q.512(<8 x i64> %v, <8 x i64> <i64 0, i64 8, i64 16, i64 64, i64 0, i64 8, i64 16, i64 64>)
+; CHECK-NEXT:    ret <8 x i64> [[TMP1]]
+;
+  %1 = tail call <8 x i64> @llvm.x86.avx512.psrlv.q.512(<8 x i64> %v, <8 x i64> <i64 0, i64 8, i64 16, i64 64, i64 0, i64 8, i64 16, i64 64>)
+  ret <8 x i64> %1
+}
+
+define <8 x i64> @avx512_psrlv_q_512_allbig(<8 x i64> %v) {
+; CHECK-LABEL: @avx512_psrlv_q_512_allbig(
+; CHECK-NEXT:    ret <8 x i64> <i64 0, i64 undef, i64 0, i64 0, i64 0, i64 undef, i64 0, i64 0>
+;
+  %1 = tail call <8 x i64> @llvm.x86.avx512.psrlv.q.512(<8 x i64> %v, <8 x i64> <i64 64, i64 undef, i64 -128, i64 -60, i64 64, i64 undef, i64 -128, i64 -60>)
+  ret <8 x i64> %1
+}
+
+define <8 x i64> @avx512_psrlv_q_512_undef(<8 x i64> %v) {
+; CHECK-LABEL: @avx512_psrlv_q_512_undef(
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr <8 x i64> %v, <i64 undef, i64 8, i64 16, i64 31, i64 0, i64 8, i64 16, i64 31>
+; CHECK-NEXT:    ret <8 x i64> [[TMP1]]
+;
+  %1 = insertelement <8 x i64> <i64 0, i64 8, i64 16, i64 31, i64 0, i64 8, i64 16, i64 31>, i64 undef, i64 0
+  %2 = tail call <8 x i64> @llvm.x86.avx512.psrlv.q.512(<8 x i64> %v, <8 x i64> %1)
+  ret <8 x i64> %2
+}
+
 ;
 ; SHL - Constant Per-Element Vector
 ;
@@ -1980,6 +2210,94 @@ define <4 x i64> @avx2_psllv_q_256_undef
   ret <4 x i64> %2
 }
 
+define <16 x i32> @avx512_psllv_d_512_0(<16 x i32> %v) {
+; CHECK-LABEL: @avx512_psllv_d_512_0(
+; CHECK-NEXT:    ret <16 x i32> %v
+;
+  %1 = tail call <16 x i32> @llvm.x86.avx512.psllv.d.512(<16 x i32> %v, <16 x i32> zeroinitializer)
+  ret <16 x i32> %1
+}
+
+define <16 x i32> @avx512_psllv_d_512_var(<16 x i32> %v) {
+; CHECK-LABEL: @avx512_psllv_d_512_var(
+; CHECK-NEXT:    [[TMP1:%.*]] = shl <16 x i32> %v, <i32 0, i32 8, i32 16, i32 24, i32 31, i32 24, i32 8, i32 0, i32 0, i32 8, i32 16, i32 24, i32 31, i32 24, i32 8, i32 0>
+; CHECK-NEXT:    ret <16 x i32> [[TMP1]]
+;
+  %1 = tail call <16 x i32> @llvm.x86.avx512.psllv.d.512(<16 x i32> %v, <16 x i32> <i32 0, i32 8, i32 16, i32 24, i32 31, i32 24, i32 8, i32 0, i32 0, i32 8, i32 16, i32 24, i32 31, i32 24, i32 8, i32 0>)
+  ret <16 x i32> %1
+}
+
+define <16 x i32> @avx512_psllv_d_512_big(<16 x i32> %v) {
+; CHECK-LABEL: @avx512_psllv_d_512_big(
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call <16 x i32> @llvm.x86.avx512.psllv.d.512(<16 x i32> %v, <16 x i32> <i32 0, i32 8, i32 16, i32 64, i32 31, i32 24, i32 8, i32 0, i32 0, i32 8, i32 16, i32 64, i32 31, i32 24, i32 8, i32 0>)
+; CHECK-NEXT:    ret <16 x i32> [[TMP1]]
+;
+  %1 = tail call <16 x i32> @llvm.x86.avx512.psllv.d.512(<16 x i32> %v, <16 x i32> <i32 0, i32 8, i32 16, i32 64, i32 31, i32 24, i32 8, i32 0, i32 0, i32 8, i32 16, i32 64, i32 31, i32 24, i32 8, i32 0>)
+  ret <16 x i32> %1
+}
+
+define <16 x i32> @avx512_psllv_d_512_allbig(<16 x i32> %v) {
+; CHECK-LABEL: @avx512_psllv_d_512_allbig(
+; CHECK-NEXT:    ret <16 x i32> <i32 undef, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 undef, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+;
+  %1 = tail call <16 x i32> @llvm.x86.avx512.psllv.d.512(<16 x i32> %v, <16 x i32> <i32 undef, i32 100, i32 255, i32 55555, i32 -32, i32 -100, i32 -255, i32 -55555, i32 undef, i32 100, i32 255, i32 55555, i32 -32, i32 -100, i32 -255, i32 -55555>)
+  ret <16 x i32> %1
+}
+
+define <16 x i32> @avx512_psllv_d_512_undef(<16 x i32> %v) {
+; CHECK-LABEL: @avx512_psllv_d_512_undef(
+; CHECK-NEXT:    [[TMP1:%.*]] = shl <16 x i32> %v, <i32 0, i32 undef, i32 16, i32 31, i32 31, i32 24, i32 8, i32 0, i32 0, i32 8, i32 16, i32 31, i32 31, i32 24, i32 8, i32 0>
+; CHECK-NEXT:    ret <16 x i32> [[TMP1]]
+;
+  %1 = insertelement <16 x i32> <i32 0, i32 8, i32 16, i32 31, i32 31, i32 24, i32 8, i32 0, i32 0, i32 8, i32 16, i32 31, i32 31, i32 24, i32 8, i32 0>, i32 undef, i32 1
+  %2 = tail call <16 x i32> @llvm.x86.avx512.psllv.d.512(<16 x i32> %v, <16 x i32> %1)
+  ret <16 x i32> %2
+}
+
+define <8 x i64> @avx512_psllv_q_512_0(<8 x i64> %v) {
+; CHECK-LABEL: @avx512_psllv_q_512_0(
+; CHECK-NEXT:    ret <8 x i64> %v
+;
+  %1 = tail call <8 x i64> @llvm.x86.avx512.psllv.q.512(<8 x i64> %v, <8 x i64> zeroinitializer)
+  ret <8 x i64> %1
+}
+
+define <8 x i64> @avx512_psllv_q_512_var(<8 x i64> %v) {
+; CHECK-LABEL: @avx512_psllv_q_512_var(
+; CHECK-NEXT:    [[TMP1:%.*]] = shl <8 x i64> %v, <i64 0, i64 8, i64 16, i64 31, i64 0, i64 8, i64 16, i64 31>
+; CHECK-NEXT:    ret <8 x i64> [[TMP1]]
+;
+  %1 = tail call <8 x i64> @llvm.x86.avx512.psllv.q.512(<8 x i64> %v, <8 x i64> <i64 0, i64 8, i64 16, i64 31, i64 0, i64 8, i64 16, i64 31>)
+  ret <8 x i64> %1
+}
+
+define <8 x i64> @avx512_psllv_q_512_big(<8 x i64> %v) {
+; CHECK-LABEL: @avx512_psllv_q_512_big(
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i64> @llvm.x86.avx512.psllv.q.512(<8 x i64> %v, <8 x i64> <i64 0, i64 8, i64 16, i64 64, i64 0, i64 8, i64 16, i64 64>)
+; CHECK-NEXT:    ret <8 x i64> [[TMP1]]
+;
+  %1 = tail call <8 x i64> @llvm.x86.avx512.psllv.q.512(<8 x i64> %v, <8 x i64> <i64 0, i64 8, i64 16, i64 64, i64 0, i64 8, i64 16, i64 64>)
+  ret <8 x i64> %1
+}
+
+define <8 x i64> @avx512_psllv_q_512_allbig(<8 x i64> %v) {
+; CHECK-LABEL: @avx512_psllv_q_512_allbig(
+; CHECK-NEXT:    ret <8 x i64> <i64 0, i64 undef, i64 0, i64 0, i64 0, i64 undef, i64 0, i64 0>
+;
+  %1 = tail call <8 x i64> @llvm.x86.avx512.psllv.q.512(<8 x i64> %v, <8 x i64> <i64 64, i64 undef, i64 -128, i64 -60, i64 64, i64 undef, i64 -128, i64 -60>)
+  ret <8 x i64> %1
+}
+
+define <8 x i64> @avx512_psllv_q_512_undef(<8 x i64> %v) {
+; CHECK-LABEL: @avx512_psllv_q_512_undef(
+; CHECK-NEXT:    [[TMP1:%.*]] = shl <8 x i64> %v, <i64 undef, i64 8, i64 16, i64 31, i64 0, i64 8, i64 16, i64 31>
+; CHECK-NEXT:    ret <8 x i64> [[TMP1]]
+;
+  %1 = insertelement <8 x i64> <i64 0, i64 8, i64 16, i64 31, i64 0, i64 8, i64 16, i64 31>, i64 undef, i64 0
+  %2 = tail call <8 x i64> @llvm.x86.avx512.psllv.q.512(<8 x i64> %v, <8 x i64> %1)
+  ret <8 x i64> %2
+}
+
 ;
 ; Vector Demanded Bits
 ;
@@ -2710,15 +3028,23 @@ declare <8 x i16> @llvm.x86.sse2.psra.w(
 
 declare <4 x i32> @llvm.x86.avx2.psrav.d(<4 x i32>, <4 x i32>) #1
 declare <8 x i32> @llvm.x86.avx2.psrav.d.256(<8 x i32>, <8 x i32>) #1
+declare <16 x i32> @llvm.x86.avx512.psrav.d.512(<16 x i32>, <16 x i32>) #1
+declare <2 x i64> @llvm.x86.avx512.psrav.q.128(<2 x i64>, <2 x i64>) #1
+declare <4 x i64> @llvm.x86.avx512.psrav.q.256(<4 x i64>, <4 x i64>) #1
+declare <8 x i64> @llvm.x86.avx512.psrav.q.512(<8 x i64>, <8 x i64>) #1
 
 declare <4 x i32> @llvm.x86.avx2.psrlv.d(<4 x i32>, <4 x i32>) #1
 declare <8 x i32> @llvm.x86.avx2.psrlv.d.256(<8 x i32>, <8 x i32>) #1
 declare <2 x i64> @llvm.x86.avx2.psrlv.q(<2 x i64>, <2 x i64>) #1
 declare <4 x i64> @llvm.x86.avx2.psrlv.q.256(<4 x i64>, <4 x i64>) #1
+declare <16 x i32> @llvm.x86.avx512.psrlv.d.512(<16 x i32>, <16 x i32>) #1
+declare <8 x i64> @llvm.x86.avx512.psrlv.q.512(<8 x i64>, <8 x i64>) #1
 
 declare <4 x i32> @llvm.x86.avx2.psllv.d(<4 x i32>, <4 x i32>) #1
 declare <8 x i32> @llvm.x86.avx2.psllv.d.256(<8 x i32>, <8 x i32>) #1
 declare <2 x i64> @llvm.x86.avx2.psllv.q(<2 x i64>, <2 x i64>) #1
 declare <4 x i64> @llvm.x86.avx2.psllv.q.256(<4 x i64>, <4 x i64>) #1
+declare <16 x i32> @llvm.x86.avx512.psllv.d.512(<16 x i32>, <16 x i32>) #1
+declare <8 x i64> @llvm.x86.avx512.psllv.q.512(<8 x i64>, <8 x i64>) #1
 
 attributes #1 = { nounwind readnone }




More information about the llvm-commits mailing list