<div dir="ltr">FYI, reverting due to crashes. Updated review thread and filed <a href="http://llvm.org/PR37260">http://llvm.org/PR37260</a> with testcase.</div><br><div class="gmail_quote"><div dir="ltr">On Thu, Apr 19, 2018 at 5:16 AM Alexander Ivchenko via llvm-commits <<a href="mailto:llvm-commits@lists.llvm.org">llvm-commits@lists.llvm.org</a>> wrote:<br></div><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">Author: aivchenk<br>

Date: Thu Apr 19 05:13:30 2018<br>

New Revision: 330322<br>

<br>

URL: <a href="http://llvm.org/viewvc/llvm-project?rev=330322&view=rev" rel="noreferrer" target="_blank">http://llvm.org/viewvc/llvm-project?rev=330322&view=rev</a><br>

Log:<br>

Lowering x86 adds/addus/subs/subus intrinsics (llvm part)<br>

<br>

This is the patch that lowers x86 intrinsics to native IR<br>

in order to enable optimizations. The patch also includes folding<br>

of previously missing saturation patterns so that IR emits the same<br>

machine instructions as the intrinsics.<br>

<br>

Patch by tkrupa<br>

<br>

Differential Revision: <a href="https://reviews.llvm.org/D44785" rel="noreferrer" target="_blank">https://reviews.llvm.org/D44785</a><br>

<br>

Added:<br>

    llvm/trunk/test/CodeGen/X86/vector-arith-sat.ll<br>

Modified:<br>

    llvm/trunk/include/llvm/IR/IntrinsicsX86.td<br>

    llvm/trunk/lib/IR/AutoUpgrade.cpp<br>

    llvm/trunk/lib/Target/X86/X86ISelLowering.cpp<br>

    llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h<br>

    llvm/trunk/test/CodeGen/X86/avx2-intrinsics-fast-isel.ll<br>

    llvm/trunk/test/CodeGen/X86/avx2-intrinsics-x86-upgrade.ll<br>

    llvm/trunk/test/CodeGen/X86/avx2-intrinsics-x86.ll<br>

    llvm/trunk/test/CodeGen/X86/avx512bw-intrinsics-upgrade.ll<br>

    llvm/trunk/test/CodeGen/X86/avx512bw-intrinsics.ll<br>

    llvm/trunk/test/CodeGen/X86/avx512bwvl-intrinsics-upgrade.ll<br>

    llvm/trunk/test/CodeGen/X86/avx512bwvl-intrinsics.ll<br>

    llvm/trunk/test/CodeGen/X86/sse2-intrinsics-fast-isel.ll<br>

    llvm/trunk/test/CodeGen/X86/sse2-intrinsics-x86-upgrade.ll<br>

    llvm/trunk/test/CodeGen/X86/sse2-intrinsics-x86.ll<br>

    llvm/trunk/test/CodeGen/X86/sse2-schedule.ll<br>

    llvm/trunk/test/Instrumentation/MemorySanitizer/msan_x86intrinsics.ll<br>

<br>

Modified: llvm/trunk/include/llvm/IR/IntrinsicsX86.td<br>

URL: <a href="http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/IR/IntrinsicsX86.td?rev=330322&r1=330321&r2=330322&view=diff" rel="noreferrer" target="_blank">http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/IR/IntrinsicsX86.td?rev=330322&r1=330321&r2=330322&view=diff</a><br>

==============================================================================<br>

--- llvm/trunk/include/llvm/IR/IntrinsicsX86.td (original)<br>

+++ llvm/trunk/include/llvm/IR/IntrinsicsX86.td Thu Apr 19 05:13:30 2018<br>

@@ -378,30 +378,6 @@ let TargetPrefix = "x86" in {  // All in<br>

<br>

 // Integer arithmetic ops.<br>

 let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".<br>

-  def int_x86_sse2_padds_b : GCCBuiltin<"__builtin_ia32_paddsb128">,<br>

-              Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty,<br>

-                         llvm_v16i8_ty], [IntrNoMem, Commutative]>;<br>

-  def int_x86_sse2_padds_w : GCCBuiltin<"__builtin_ia32_paddsw128">,<br>

-              Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty,<br>

-                         llvm_v8i16_ty], [IntrNoMem, Commutative]>;<br>

-  def int_x86_sse2_paddus_b : GCCBuiltin<"__builtin_ia32_paddusb128">,<br>

-              Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty,<br>

-                         llvm_v16i8_ty], [IntrNoMem, Commutative]>;<br>

-  def int_x86_sse2_paddus_w : GCCBuiltin<"__builtin_ia32_paddusw128">,<br>

-              Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty,<br>

-                         llvm_v8i16_ty], [IntrNoMem, Commutative]>;<br>

-  def int_x86_sse2_psubs_b : GCCBuiltin<"__builtin_ia32_psubsb128">,<br>

-              Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty,<br>

-                         llvm_v16i8_ty], [IntrNoMem]>;<br>

-  def int_x86_sse2_psubs_w : GCCBuiltin<"__builtin_ia32_psubsw128">,<br>

-              Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty,<br>

-                         llvm_v8i16_ty], [IntrNoMem]>;<br>

-  def int_x86_sse2_psubus_b : GCCBuiltin<"__builtin_ia32_psubusb128">,<br>

-              Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty,<br>

-                         llvm_v16i8_ty], [IntrNoMem]>;<br>

-  def int_x86_sse2_psubus_w : GCCBuiltin<"__builtin_ia32_psubusw128">,<br>

-              Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty,<br>

-                         llvm_v8i16_ty], [IntrNoMem]>;<br>

   def int_x86_sse2_pmulhu_w : GCCBuiltin<"__builtin_ia32_pmulhuw128">,<br>

               Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty,<br>

                          llvm_v8i16_ty], [IntrNoMem, Commutative]>;<br>

@@ -1627,30 +1603,6 @@ let TargetPrefix = "x86" in {  // All in<br>

<br>

 // Integer arithmetic ops.<br>

 let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".<br>

-  def int_x86_avx2_padds_b : GCCBuiltin<"__builtin_ia32_paddsb256">,<br>

-              Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty,<br>

-                         llvm_v32i8_ty], [IntrNoMem, Commutative]>;<br>

-  def int_x86_avx2_padds_w : GCCBuiltin<"__builtin_ia32_paddsw256">,<br>

-              Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty,<br>

-                         llvm_v16i16_ty], [IntrNoMem, Commutative]>;<br>

-  def int_x86_avx2_paddus_b : GCCBuiltin<"__builtin_ia32_paddusb256">,<br>

-              Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty,<br>

-                         llvm_v32i8_ty], [IntrNoMem, Commutative]>;<br>

-  def int_x86_avx2_paddus_w : GCCBuiltin<"__builtin_ia32_paddusw256">,<br>

-              Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty,<br>

-                         llvm_v16i16_ty], [IntrNoMem, Commutative]>;<br>

-  def int_x86_avx2_psubs_b : GCCBuiltin<"__builtin_ia32_psubsb256">,<br>

-              Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty,<br>

-                         llvm_v32i8_ty], [IntrNoMem]>;<br>

-  def int_x86_avx2_psubs_w : GCCBuiltin<"__builtin_ia32_psubsw256">,<br>

-              Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty,<br>

-                         llvm_v16i16_ty], [IntrNoMem]>;<br>

-  def int_x86_avx2_psubus_b : GCCBuiltin<"__builtin_ia32_psubusb256">,<br>

-              Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty,<br>

-                         llvm_v32i8_ty], [IntrNoMem]>;<br>

-  def int_x86_avx2_psubus_w : GCCBuiltin<"__builtin_ia32_psubusw256">,<br>

-              Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty,<br>

-                         llvm_v16i16_ty], [IntrNoMem]>;<br>

   def int_x86_avx2_pmulhu_w : GCCBuiltin<"__builtin_ia32_pmulhuw256">,<br>

               Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty,<br>

                          llvm_v16i16_ty], [IntrNoMem, Commutative]>;<br>

@@ -4695,78 +4647,6 @@ let TargetPrefix = "x86" in {  // All in<br>

 }<br>

 // Integer arithmetic ops<br>

 let TargetPrefix = "x86" in {<br>

-  def int_x86_avx512_mask_padds_b_128 : // FIXME: remove this intrinsic<br>

-          Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty,<br>

-                     llvm_v16i8_ty, llvm_i16_ty], [IntrNoMem]>;<br>

-  def int_x86_avx512_mask_padds_b_256 : // FIXME: remove this intrinsic<br>

-          Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty,<br>

-                     llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>;<br>

-  def int_x86_avx512_mask_padds_b_512 : GCCBuiltin<"__builtin_ia32_paddsb512_mask">,<br>

-          Intrinsic<[llvm_v64i8_ty], [llvm_v64i8_ty, llvm_v64i8_ty,<br>

-                     llvm_v64i8_ty, llvm_i64_ty], [IntrNoMem]>;<br>

-  def int_x86_avx512_mask_padds_w_128 : // FIXME: remove this intrinsic<br>

-          Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty,<br>

-                     llvm_v8i16_ty, llvm_i8_ty], [IntrNoMem]>;<br>

-  def int_x86_avx512_mask_padds_w_256 : // FIXME: remove this intrinsic<br>

-          Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty,<br>

-                     llvm_v16i16_ty, llvm_i16_ty], [IntrNoMem]>;<br>

-  def int_x86_avx512_mask_padds_w_512 : GCCBuiltin<"__builtin_ia32_paddsw512_mask">,<br>

-          Intrinsic<[llvm_v32i16_ty], [llvm_v32i16_ty, llvm_v32i16_ty,<br>

-                     llvm_v32i16_ty, llvm_i32_ty], [IntrNoMem]>;<br>

-  def int_x86_avx512_mask_paddus_b_128 : // FIXME: remove this intrinsic<br>

-          Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty,<br>

-                     llvm_v16i8_ty, llvm_i16_ty], [IntrNoMem]>;<br>

-  def int_x86_avx512_mask_paddus_b_256 : // FIXME: remove this intrinsic<br>

-          Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty,<br>

-                     llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>;<br>

-  def int_x86_avx512_mask_paddus_b_512 : GCCBuiltin<"__builtin_ia32_paddusb512_mask">,<br>

-          Intrinsic<[llvm_v64i8_ty], [llvm_v64i8_ty, llvm_v64i8_ty,<br>

-                     llvm_v64i8_ty, llvm_i64_ty], [IntrNoMem]>;<br>

-  def int_x86_avx512_mask_paddus_w_128 : // FIXME: remove this intrinsic<br>

-          Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty,<br>

-                     llvm_v8i16_ty, llvm_i8_ty], [IntrNoMem]>;<br>

-  def int_x86_avx512_mask_paddus_w_256 : // FIXME: remove this intrinsic<br>

-          Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty,<br>

-                     llvm_v16i16_ty, llvm_i16_ty], [IntrNoMem]>;<br>

-  def int_x86_avx512_mask_paddus_w_512 : GCCBuiltin<"__builtin_ia32_paddusw512_mask">,<br>

-          Intrinsic<[llvm_v32i16_ty], [llvm_v32i16_ty, llvm_v32i16_ty,<br>

-                     llvm_v32i16_ty, llvm_i32_ty], [IntrNoMem]>;<br>

-  def int_x86_avx512_mask_psubs_b_128 : // FIXME: remove this intrinsic<br>

-          Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty,<br>

-                     llvm_v16i8_ty, llvm_i16_ty], [IntrNoMem]>;<br>

-  def int_x86_avx512_mask_psubs_b_256 : // FIXME: remove this intrinsic<br>

-          Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty,<br>

-                     llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>;<br>

-  def int_x86_avx512_mask_psubs_b_512 : GCCBuiltin<"__builtin_ia32_psubsb512_mask">,<br>

-          Intrinsic<[llvm_v64i8_ty], [llvm_v64i8_ty, llvm_v64i8_ty,<br>

-                     llvm_v64i8_ty, llvm_i64_ty], [IntrNoMem]>;<br>

-  def int_x86_avx512_mask_psubs_w_128 : // FIXME: remove this intrinsic<br>

-          Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty,<br>

-                     llvm_v8i16_ty, llvm_i8_ty], [IntrNoMem]>;<br>

-  def int_x86_avx512_mask_psubs_w_256 : // FIXME: remove this intrinsic<br>

-          Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty,<br>

-                     llvm_v16i16_ty, llvm_i16_ty], [IntrNoMem]>;<br>

-  def int_x86_avx512_mask_psubs_w_512 : GCCBuiltin<"__builtin_ia32_psubsw512_mask">,<br>

-          Intrinsic<[llvm_v32i16_ty], [llvm_v32i16_ty, llvm_v32i16_ty,<br>

-                     llvm_v32i16_ty, llvm_i32_ty], [IntrNoMem]>;<br>

-  def int_x86_avx512_mask_psubus_b_128 : // FIXME: remove this intrinsic<br>

-          Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty,<br>

-                     llvm_v16i8_ty, llvm_i16_ty], [IntrNoMem]>;<br>

-  def int_x86_avx512_mask_psubus_b_256 : // FIXME: remove this intrinsic<br>

-          Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty,<br>

-                     llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>;<br>

-  def int_x86_avx512_mask_psubus_b_512 : GCCBuiltin<"__builtin_ia32_psubusb512_mask">,<br>

-          Intrinsic<[llvm_v64i8_ty], [llvm_v64i8_ty, llvm_v64i8_ty,<br>

-                     llvm_v64i8_ty, llvm_i64_ty], [IntrNoMem]>;<br>

-  def int_x86_avx512_mask_psubus_w_128 : // FIXME: remove this intrinsic<br>

-          Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty,<br>

-                     llvm_v8i16_ty, llvm_i8_ty], [IntrNoMem]>;<br>

-  def int_x86_avx512_mask_psubus_w_256 : // FIXME: remove this intrinsic<br>

-          Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty,<br>

-                     llvm_v16i16_ty, llvm_i16_ty], [IntrNoMem]>;<br>

-  def int_x86_avx512_mask_psubus_w_512 : GCCBuiltin<"__builtin_ia32_psubusw512_mask">,<br>

-          Intrinsic<[llvm_v32i16_ty], [llvm_v32i16_ty, llvm_v32i16_ty,<br>

-                     llvm_v32i16_ty, llvm_i32_ty], [IntrNoMem]>;<br>

   def int_x86_avx512_pmulhu_w_512 : GCCBuiltin<"__builtin_ia32_pmulhuw512">,<br>

               Intrinsic<[llvm_v32i16_ty], [llvm_v32i16_ty,<br>

                          llvm_v32i16_ty], [IntrNoMem, Commutative]>;<br>

<br>

Modified: llvm/trunk/lib/IR/AutoUpgrade.cpp<br>

URL: <a href="http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/IR/AutoUpgrade.cpp?rev=330322&r1=330321&r2=330322&view=diff" rel="noreferrer" target="_blank">http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/IR/AutoUpgrade.cpp?rev=330322&r1=330321&r2=330322&view=diff</a><br>

==============================================================================<br>

--- llvm/trunk/lib/IR/AutoUpgrade.cpp (original)<br>

+++ llvm/trunk/lib/IR/AutoUpgrade.cpp Thu Apr 19 05:13:30 2018<br>

@@ -84,7 +84,19 @@ static bool ShouldUpgradeX86Intrinsic(Fu<br>

   // like to use this information to remove upgrade code for some older<br>

   // intrinsics. It is currently undecided how we will determine that future<br>

   // point.<br>

-  if (Name=="ssse3.pabs.b.128" || // Added in 6.0<br>

+  if (Name.startswith("sse2.padds") || // Added in 7.0<br>

+      Name.startswith("sse2.paddus") || // Added in 7.0<br>

+      Name.startswith("sse2.psubs") || // Added in 7.0<br>

+      Name.startswith("sse2.psubus") || // Added in 7.0<br>

+      Name.startswith("avx2.padds") || // Added in 7.0<br>

+      Name.startswith("avx2.paddus") || // Added in 7.0<br>

+      Name.startswith("avx2.psubs") || // Added in 7.0<br>

+      Name.startswith("avx2.psubus") || // Added in 7.0<br>

+      Name.startswith("avx512.mask.padds") || // Added in 7.0<br>

+      Name.startswith("avx512.mask.paddus") || // Added in 7.0<br>

+      Name.startswith("avx512.mask.psubs") || // Added in 7.0<br>

+      Name.startswith("avx512.mask.psubus") || // Added in 7.0<br>

+      Name=="ssse3.pabs.b.128" || // Added in 6.0<br>

       Name=="ssse3.pabs.w.128" || // Added in 6.0<br>

       Name=="ssse3.pabs.d.128" || // Added in 6.0<br>

       Name.startswith("avx512.mask.shuf.i") || // Added in 6.0<br>

@@ -845,6 +857,77 @@ static Value *UpgradeX86ALIGNIntrinsics(<br>

   return EmitX86Select(Builder, Mask, Align, Passthru);<br>

 }<br>

<br>

+static Value *UpgradeX86AddSubSatIntrinsics(IRBuilder<> &Builder, CallInst &CI,<br>

+                                            bool IsSigned, bool IsAddition) {<br>

+  // Get elements.<br>

+  Value *Op0 = CI.getArgOperand(0);<br>

+  Value *Op1 = CI.getArgOperand(1);<br>

+<br>

+  // Extend elements.<br>

+  Type *ResultType = CI.getType();<br>

+  unsigned NumElts = ResultType->getVectorNumElements();<br>

+<br>

+  Value *Res;<br>

+  if (!IsAddition && !IsSigned) {<br>

+    Value *ICmp = Builder.CreateICmp(ICmpInst::ICMP_UGT, Op0, Op1);<br>

+    Value *Select = Builder.CreateSelect(ICmp, Op0, Op1);<br>

+    Res = Builder.CreateSub(Select, Op1);<br>

+  } else {<br>

+    Type *EltType = ResultType->getVectorElementType();<br>

+    Type *ExtEltType = EltType == Builder.getInt8Ty() ? Builder.getInt16Ty()<br>

+                                                      : Builder.getInt32Ty();<br>

+    Type *ExtVT = VectorType::get(ExtEltType, NumElts);<br>

+    Op0 = IsSigned ? Builder.CreateSExt(Op0, ExtVT)<br>

+                   : Builder.CreateZExt(Op0, ExtVT);<br>

+    Op1 = IsSigned ? Builder.CreateSExt(Op1, ExtVT)<br>

+                   : Builder.CreateZExt(Op1, ExtVT);<br>

+<br>

+    // Perform addition/substraction.<br>

+    Res = IsAddition ? Builder.CreateAdd(Op0, Op1)<br>

+                     : Builder.CreateSub(Op0, Op1);<br>

+<br>

+    // Create a vector of maximum values of not extended type<br>

+    // (if overflow occurs, it will be saturated to that value).<br>

+    unsigned EltSizeInBits = EltType->getPrimitiveSizeInBits();<br>

+    APInt MaxInt = IsSigned ? APInt::getSignedMaxValue(EltSizeInBits)<br>

+                            : APInt::getMaxValue(EltSizeInBits);<br>

+    Value *MaxVec = ConstantInt::get(ResultType, MaxInt);<br>

+    // Extend so that it can be compared to result of add/sub.<br>

+    MaxVec = IsSigned ? Builder.CreateSExt(MaxVec, ExtVT)<br>

+                      : Builder.CreateZExt(MaxVec, ExtVT);<br>

+<br>

+    // Saturate overflow.<br>

+    ICmpInst::Predicate Pred = IsSigned ? ICmpInst::ICMP_SLE<br>

+                                        : ICmpInst::ICMP_ULE;<br>

+    Value *Cmp = Builder.CreateICmp(Pred, Res,<br>

+                                    MaxVec); // 1 if no overflow.<br>

+    Res = Builder.CreateSelect(Cmp, Res,<br>

+                               MaxVec); // If overflowed, copy from max vec.<br>

+<br>

+    // Saturate underflow.<br>

+    if (IsSigned) {<br>

+      APInt MinInt = APInt::getSignedMinValue(EltSizeInBits);<br>

+      Value *MinVec = ConstantInt::get(ResultType, MinInt);<br>

+      // Extend so that it can be compared to result of add/sub.<br>

+      MinVec = Builder.CreateSExt(MinVec, ExtVT);<br>

+      Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_SGT, Res,<br>

+                                      MinVec); // 1 if no underflow.<br>

+      Res = Builder.CreateSelect(Cmp, Res,<br>

+                                 MinVec); // If underflowed, copy from min vec.<br>

+    }<br>

+<br>

+    // Truncate to original type.<br>

+    Res = Builder.CreateTrunc(Res, ResultType);<br>

+  }<br>

+<br>

+  if (CI.getNumArgOperands() == 4) { // For masked intrinsics.<br>

+    Value *VecSRC = CI.getArgOperand(2);<br>

+    Value *Mask = CI.getArgOperand(3);<br>

+    Res = EmitX86Select(Builder, Mask, Res, VecSRC);<br>

+  }<br>

+  return Res;<br>

+}<br>

+<br>

 static Value *UpgradeMaskedStore(IRBuilder<> &Builder,<br>

                                  Value *Ptr, Value *Data, Value *Mask,<br>

                                  bool Aligned) {<br>

@@ -1684,6 +1767,26 @@ void llvm::UpgradeIntrinsicCall(CallInst<br>

                                         ShuffleMask);<br>

       Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,<br>

                           CI->getArgOperand(1));<br>

+    } else if (IsX86 && (Name.startswith("sse2.padds") ||<br>

+                         Name.startswith("avx2.padds") ||<br>

+                         Name.startswith("avx512.mask.padds"))) {<br>

+      Rep = UpgradeX86AddSubSatIntrinsics(Builder, *CI,<br>

+                                          true, true); // Signed add.<br>

+    } else if (IsX86 && (Name.startswith("sse2.paddus") ||<br>

+                         Name.startswith("avx2.paddus") ||<br>

+                         Name.startswith("avx512.mask.paddus"))) {<br>

+      Rep = UpgradeX86AddSubSatIntrinsics(Builder, *CI,<br>

+                                          false, true); // Unsigned add.<br>

+    } else if (IsX86 && (Name.startswith("sse2.psubs") ||<br>

+                         Name.startswith("avx2.psubs") ||<br>

+                         Name.startswith("avx512.mask.psubs"))) {<br>

+      Rep = UpgradeX86AddSubSatIntrinsics(Builder, *CI,<br>

+                                          true, false); // Signed sub.<br>

+    } else if (IsX86 && (Name.startswith("sse2.psubus") ||<br>

+                         Name.startswith("avx2.psubus") ||<br>

+                         Name.startswith("avx512.mask.psubus"))) {<br>

+      Rep = UpgradeX86AddSubSatIntrinsics(Builder, *CI,<br>

+                                          false, false); // Unsigned sub.<br>

     } else if (IsX86 && (Name.startswith("avx2.pbroadcast") ||<br>

                          Name.startswith("avx2.vbroadcast") ||<br>

                          Name.startswith("avx512.pbroadcast") ||<br>

@@ -1694,7 +1797,6 @@ void llvm::UpgradeIntrinsicCall(CallInst<br>

       Type *MaskTy = VectorType::get(Type::getInt32Ty(C), NumElts);<br>

       Rep = Builder.CreateShuffleVector(Op, UndefValue::get(Op->getType()),<br>

                                         Constant::getNullValue(MaskTy));<br>

-<br>

       if (CI->getNumArgOperands() == 3)<br>

         Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,<br>

                             CI->getArgOperand(1));<br>

<br>

Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp<br>

URL: <a href="http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=330322&r1=330321&r2=330322&view=diff" rel="noreferrer" target="_blank">http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=330322&r1=330321&r2=330322&view=diff</a><br>

==============================================================================<br>

--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)<br>

+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Thu Apr 19 05:13:30 2018<br>

@@ -35997,6 +35997,91 @@ static SDValue combineVectorSignBitsTrun<br>

   return SDValue();<br>

 }<br>

<br>

+/// This function detects the addition or substraction with saturation pattern<br>

+/// between 2 unsigned i8/i16 vectors and replace this operation with the<br>

+/// efficient X86ISD::ADDUS/X86ISD::ADDS/X86ISD::SUBUS/x86ISD::SUBS instruction.<br>

+static SDValue detectAddSubSatPattern(SDValue In, EVT VT, SelectionDAG &DAG,<br>

+                                      const X86Subtarget &Subtarget,<br>

+                                      const SDLoc &DL) {<br>

+  if (!VT.isVector() || !VT.isSimple())<br>

+    return SDValue();<br>

+  EVT InVT = In.getValueType();<br>

+  unsigned NumElems = VT.getVectorNumElements();<br>

+<br>

+  EVT ScalarVT = VT.getVectorElementType();<br>

+  if ((ScalarVT != MVT::i8 && ScalarVT != MVT::i16) ||<br>

+      InVT.getSizeInBits() % 128 != 0 || !isPowerOf2_32(NumElems))<br>

+    return SDValue();<br>

+<br>

+  // InScalarVT is the intermediate type in AddSubSat pattern<br>

+  // and it should be greater than the original input type (i8/i16).<br>

+  EVT InScalarVT = InVT.getVectorElementType();<br>

+  if (InScalarVT.getSizeInBits() <= ScalarVT.getSizeInBits())<br>

+    return SDValue();<br>

+<br>

+  if (!Subtarget.hasSSE2())<br>

+    return SDValue();<br>

+<br>

+  // Detect the following pattern:<br>

+  // %2 = zext <16 x i8> %0 to <16 x i16><br>

+  // %3 = zext <16 x i8> %1 to <16 x i16><br>

+  // %4 = add nuw nsw <16 x i16> %3, %2<br>

+  // %5 = icmp ult <16 x i16> %4, <16 x i16> (vector of max InScalarVT values)<br>

+  // %6 = select <16 x i1> %5, <16 x i16> (vector of max InScalarVT values)<br>

+  // %7 = trunc <16 x i16> %6 to <16 x i8><br>

+<br>

+  // Detect a Sat Pattern<br>

+  bool Signed = true;<br>

+  SDValue Sat = detectSSatPattern(In, VT, false);<br>

+  if (!Sat) {<br>

+    Sat = detectUSatPattern(In, VT);<br>

+    Signed = false;<br>

+  }<br>

+  if (!Sat)<br>

+    return SDValue();<br>

+  if (Sat.getOpcode() != ISD::ADD && Sat.getOpcode() != ISD::SUB)<br>

+    return SDValue();<br>

+<br>

+  unsigned Opcode = Sat.getOpcode() == ISD::ADD ? Signed ? X86ISD::ADDS<br>

+                                                         : X86ISD::ADDUS<br>

+                                                : Signed ? X86ISD::SUBS<br>

+                                                         : X86ISD::SUBUS;<br>

+<br>

+  // Get addition elements.<br>

+  SDValue LHS = Sat.getOperand(0);<br>

+  SDValue RHS = Sat.getOperand(1);<br>

+<br>

+  // Check if LHS and RHS are results of type promotion or<br>

+  // one of them is and the other one is constant.<br>

+  unsigned ExtendOpcode = Signed ? ISD::SIGN_EXTEND :<br>

+                                   ISD::ZERO_EXTEND;<br>

+  unsigned LHSOpcode = LHS.getOpcode();<br>

+  unsigned RHSOpcode = RHS.getOpcode();<br>

+<br>

+  if (LHSOpcode == ExtendOpcode && RHSOpcode == ExtendOpcode) {<br>

+    LHS = LHS.getOperand(0);<br>

+    RHS = RHS.getOperand(0);<br>

+  } else if (LHSOpcode == ExtendOpcode &&<br>

+             ISD::isBuildVectorOfConstantSDNodes(RHS.getNode())) {<br>

+    LHS = LHS.getOperand(0);<br>

+    RHS = DAG.getNode(ISD::TRUNCATE, DL, VT, RHS);<br>

+  } else if (RHSOpcode == ExtendOpcode &&<br>

+           ISD::isBuildVectorOfConstantSDNodes(LHS.getNode())) {<br>

+    RHS = RHS.getOperand(0);<br>

+    LHS = DAG.getNode(ISD::TRUNCATE, DL, VT, LHS);<br>

+  } else<br>

+    return SDValue();<br>

+  <br>

+  // The pattern is detected, emit ADDS/ADDUS/SUBS/SUBUS instruction.<br>

+  auto AddSubSatBuilder = [Opcode](SelectionDAG &DAG, const SDLoc &DL,<br>

+                                   ArrayRef<SDValue> Ops) {<br>

+    EVT VT = Ops[0].getValueType();<br>

+    return DAG.getNode(Opcode, DL, VT, Ops);<br>

+  };<br>

+  return SplitOpsAndApply(DAG, Subtarget, DL, VT, { LHS, RHS },<br>

+                          AddSubSatBuilder);<br>

+}<br>

+<br>

 static SDValue combineTruncate(SDNode *N, SelectionDAG &DAG,<br>

                                const X86Subtarget &Subtarget) {<br>

   EVT VT = N->getValueType(0);<br>

@@ -36011,6 +36096,10 @@ static SDValue combineTruncate(SDNode *N<br>

   if (SDValue Avg = detectAVGPattern(Src, VT, DAG, Subtarget, DL))<br>

     return Avg;<br>

<br>

+  // Try to detect addition or substraction with saturation.<br>

+  if (SDValue AddSubSat = detectAddSubSatPattern(Src, VT, DAG, Subtarget, DL))<br>

+    return AddSubSat;<br>

+<br>

   // Try to combine truncation with signed/unsigned saturation.<br>

   if (SDValue Val = combineTruncateWithSat(Src, VT, DL, DAG, Subtarget))<br>

     return Val;<br>

<br>

Modified: llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h<br>

URL: <a href="http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h?rev=330322&r1=330321&r2=330322&view=diff" rel="noreferrer" target="_blank">http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h?rev=330322&r1=330321&r2=330322&view=diff</a><br>

==============================================================================<br>

--- llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h (original)<br>

+++ llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h Thu Apr 19 05:13:30 2018<br>

@@ -402,10 +402,6 @@ static const IntrinsicData  IntrinsicsWi<br>

   X86_INTRINSIC_DATA(avx2_packsswb, INTR_TYPE_2OP, X86ISD::PACKSS, 0),<br>

   X86_INTRINSIC_DATA(avx2_packusdw, INTR_TYPE_2OP, X86ISD::PACKUS, 0),<br>

   X86_INTRINSIC_DATA(avx2_packuswb, INTR_TYPE_2OP, X86ISD::PACKUS, 0),<br>

-  X86_INTRINSIC_DATA(avx2_padds_b, INTR_TYPE_2OP, X86ISD::ADDS, 0),<br>

-  X86_INTRINSIC_DATA(avx2_padds_w, INTR_TYPE_2OP, X86ISD::ADDS, 0),<br>

-  X86_INTRINSIC_DATA(avx2_paddus_b, INTR_TYPE_2OP, X86ISD::ADDUS, 0),<br>

-  X86_INTRINSIC_DATA(avx2_paddus_w, INTR_TYPE_2OP, X86ISD::ADDUS, 0),<br>

   X86_INTRINSIC_DATA(avx2_phadd_d, INTR_TYPE_2OP, X86ISD::HADD, 0),<br>

   X86_INTRINSIC_DATA(avx2_phadd_w, INTR_TYPE_2OP, X86ISD::HADD, 0),<br>

   X86_INTRINSIC_DATA(avx2_phsub_d, INTR_TYPE_2OP, X86ISD::HSUB, 0),<br>

@@ -444,10 +440,6 @@ static const IntrinsicData  IntrinsicsWi<br>

   X86_INTRINSIC_DATA(avx2_psrlv_d_256, INTR_TYPE_2OP, ISD::SRL, 0),<br>

   X86_INTRINSIC_DATA(avx2_psrlv_q, INTR_TYPE_2OP, ISD::SRL, 0),<br>

   X86_INTRINSIC_DATA(avx2_psrlv_q_256, INTR_TYPE_2OP, ISD::SRL, 0),<br>

-  X86_INTRINSIC_DATA(avx2_psubs_b, INTR_TYPE_2OP, X86ISD::SUBS, 0),<br>

-  X86_INTRINSIC_DATA(avx2_psubs_w, INTR_TYPE_2OP, X86ISD::SUBS, 0),<br>

-  X86_INTRINSIC_DATA(avx2_psubus_b, INTR_TYPE_2OP, X86ISD::SUBUS, 0),<br>

-  X86_INTRINSIC_DATA(avx2_psubus_w, INTR_TYPE_2OP, X86ISD::SUBUS, 0),<br>

   X86_INTRINSIC_DATA(avx512_cvtsi2sd64,  INTR_TYPE_3OP, X86ISD::SCALAR_SINT_TO_FP_RND, 0),<br>

   X86_INTRINSIC_DATA(avx512_cvtsi2ss32,  INTR_TYPE_3OP, X86ISD::SCALAR_SINT_TO_FP_RND, 0),<br>

   X86_INTRINSIC_DATA(avx512_cvtsi2ss64,  INTR_TYPE_3OP, X86ISD::SCALAR_SINT_TO_FP_RND, 0),<br>

@@ -803,18 +795,6 @@ static const IntrinsicData  IntrinsicsWi<br>

                      X86ISD::FMULS_RND, 0),<br>

   X86_INTRINSIC_DATA(avx512_mask_mul_ss_round, INTR_TYPE_SCALAR_MASK_RM,<br>

                      X86ISD::FMULS_RND, 0),<br>

-  X86_INTRINSIC_DATA(avx512_mask_padds_b_128, INTR_TYPE_2OP_MASK, X86ISD::ADDS, 0),<br>

-  X86_INTRINSIC_DATA(avx512_mask_padds_b_256, INTR_TYPE_2OP_MASK, X86ISD::ADDS, 0),<br>

-  X86_INTRINSIC_DATA(avx512_mask_padds_b_512, INTR_TYPE_2OP_MASK, X86ISD::ADDS, 0),<br>

-  X86_INTRINSIC_DATA(avx512_mask_padds_w_128, INTR_TYPE_2OP_MASK, X86ISD::ADDS, 0),<br>

-  X86_INTRINSIC_DATA(avx512_mask_padds_w_256, INTR_TYPE_2OP_MASK, X86ISD::ADDS, 0),<br>

-  X86_INTRINSIC_DATA(avx512_mask_padds_w_512, INTR_TYPE_2OP_MASK, X86ISD::ADDS, 0),<br>

-  X86_INTRINSIC_DATA(avx512_mask_paddus_b_128, INTR_TYPE_2OP_MASK, X86ISD::ADDUS, 0),<br>

-  X86_INTRINSIC_DATA(avx512_mask_paddus_b_256, INTR_TYPE_2OP_MASK, X86ISD::ADDUS, 0),<br>

-  X86_INTRINSIC_DATA(avx512_mask_paddus_b_512, INTR_TYPE_2OP_MASK, X86ISD::ADDUS, 0),<br>

-  X86_INTRINSIC_DATA(avx512_mask_paddus_w_128, INTR_TYPE_2OP_MASK, X86ISD::ADDUS, 0),<br>

-  X86_INTRINSIC_DATA(avx512_mask_paddus_w_256, INTR_TYPE_2OP_MASK, X86ISD::ADDUS, 0),<br>

-  X86_INTRINSIC_DATA(avx512_mask_paddus_w_512, INTR_TYPE_2OP_MASK, X86ISD::ADDUS, 0),<br>

   X86_INTRINSIC_DATA(avx512_mask_permvar_df_256, VPERM_2OP_MASK,<br>

                      X86ISD::VPERMV, 0),<br>

   X86_INTRINSIC_DATA(avx512_mask_permvar_df_512, VPERM_2OP_MASK,<br>

@@ -981,18 +961,6 @@ static const IntrinsicData  IntrinsicsWi<br>

   X86_INTRINSIC_DATA(avx512_mask_prorv_q_128, INTR_TYPE_2OP_MASK, ISD::ROTR, 0),<br>

   X86_INTRINSIC_DATA(avx512_mask_prorv_q_256, INTR_TYPE_2OP_MASK, ISD::ROTR, 0),<br>

   X86_INTRINSIC_DATA(avx512_mask_prorv_q_512, INTR_TYPE_2OP_MASK, ISD::ROTR, 0),<br>

-  X86_INTRINSIC_DATA(avx512_mask_psubs_b_128, INTR_TYPE_2OP_MASK, X86ISD::SUBS, 0),<br>

-  X86_INTRINSIC_DATA(avx512_mask_psubs_b_256, INTR_TYPE_2OP_MASK, X86ISD::SUBS, 0),<br>

-  X86_INTRINSIC_DATA(avx512_mask_psubs_b_512, INTR_TYPE_2OP_MASK, X86ISD::SUBS, 0),<br>

-  X86_INTRINSIC_DATA(avx512_mask_psubs_w_128, INTR_TYPE_2OP_MASK, X86ISD::SUBS, 0),<br>

-  X86_INTRINSIC_DATA(avx512_mask_psubs_w_256, INTR_TYPE_2OP_MASK, X86ISD::SUBS, 0),<br>

-  X86_INTRINSIC_DATA(avx512_mask_psubs_w_512, INTR_TYPE_2OP_MASK, X86ISD::SUBS, 0),<br>

-  X86_INTRINSIC_DATA(avx512_mask_psubus_b_128, INTR_TYPE_2OP_MASK, X86ISD::SUBUS, 0),<br>

-  X86_INTRINSIC_DATA(avx512_mask_psubus_b_256, INTR_TYPE_2OP_MASK, X86ISD::SUBUS, 0),<br>

-  X86_INTRINSIC_DATA(avx512_mask_psubus_b_512, INTR_TYPE_2OP_MASK, X86ISD::SUBUS, 0),<br>

-  X86_INTRINSIC_DATA(avx512_mask_psubus_w_128, INTR_TYPE_2OP_MASK, X86ISD::SUBUS, 0),<br>

-  X86_INTRINSIC_DATA(avx512_mask_psubus_w_256, INTR_TYPE_2OP_MASK, X86ISD::SUBUS, 0),<br>

-  X86_INTRINSIC_DATA(avx512_mask_psubus_w_512, INTR_TYPE_2OP_MASK, X86ISD::SUBUS, 0),<br>

   X86_INTRINSIC_DATA(avx512_mask_pternlog_d_128, TERLOG_OP_MASK,<br>

                      X86ISD::VPTERNLOG, 0),<br>

   X86_INTRINSIC_DATA(avx512_mask_pternlog_d_256, TERLOG_OP_MASK,<br>

@@ -1602,10 +1570,6 @@ static const IntrinsicData  IntrinsicsWi<br>

   X86_INTRINSIC_DATA(sse2_packssdw_128, INTR_TYPE_2OP, X86ISD::PACKSS, 0),<br>

   X86_INTRINSIC_DATA(sse2_packsswb_128, INTR_TYPE_2OP, X86ISD::PACKSS, 0),<br>

   X86_INTRINSIC_DATA(sse2_packuswb_128, INTR_TYPE_2OP, X86ISD::PACKUS, 0),<br>

-  X86_INTRINSIC_DATA(sse2_padds_b,      INTR_TYPE_2OP, X86ISD::ADDS, 0),<br>

-  X86_INTRINSIC_DATA(sse2_padds_w,      INTR_TYPE_2OP, X86ISD::ADDS, 0),<br>

-  X86_INTRINSIC_DATA(sse2_paddus_b,     INTR_TYPE_2OP, X86ISD::ADDUS, 0),<br>

-  X86_INTRINSIC_DATA(sse2_paddus_w,     INTR_TYPE_2OP, X86ISD::ADDUS, 0),<br>

   X86_INTRINSIC_DATA(sse2_pmadd_wd,     INTR_TYPE_2OP, X86ISD::VPMADDWD, 0),<br>

   X86_INTRINSIC_DATA(sse2_pmovmskb_128, INTR_TYPE_1OP, X86ISD::MOVMSK, 0),<br>

   X86_INTRINSIC_DATA(sse2_pmulh_w,      INTR_TYPE_2OP, ISD::MULHS, 0),<br>

@@ -1627,10 +1591,6 @@ static const IntrinsicData  IntrinsicsWi<br>

   X86_INTRINSIC_DATA(sse2_psrli_d,      VSHIFT, X86ISD::VSRLI, 0),<br>

   X86_INTRINSIC_DATA(sse2_psrli_q,      VSHIFT, X86ISD::VSRLI, 0),<br>

   X86_INTRINSIC_DATA(sse2_psrli_w,      VSHIFT, X86ISD::VSRLI, 0),<br>

-  X86_INTRINSIC_DATA(sse2_psubs_b,      INTR_TYPE_2OP, X86ISD::SUBS, 0),<br>

-  X86_INTRINSIC_DATA(sse2_psubs_w,      INTR_TYPE_2OP, X86ISD::SUBS, 0),<br>

-  X86_INTRINSIC_DATA(sse2_psubus_b,     INTR_TYPE_2OP, X86ISD::SUBUS, 0),<br>

-  X86_INTRINSIC_DATA(sse2_psubus_w,     INTR_TYPE_2OP, X86ISD::SUBUS, 0),<br>

   X86_INTRINSIC_DATA(sse2_sqrt_pd,      INTR_TYPE_1OP, ISD::FSQRT, 0),<br>

   X86_INTRINSIC_DATA(sse2_ucomieq_sd,   COMI, X86ISD::UCOMI, ISD::SETEQ),<br>

   X86_INTRINSIC_DATA(sse2_ucomige_sd,   COMI, X86ISD::UCOMI, ISD::SETGE),<br>

<br>

Modified: llvm/trunk/test/CodeGen/X86/avx2-intrinsics-fast-isel.ll<br>

URL: <a href="http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx2-intrinsics-fast-isel.ll?rev=330322&r1=330321&r2=330322&view=diff" rel="noreferrer" target="_blank">http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx2-intrinsics-fast-isel.ll?rev=330322&r1=330321&r2=330322&view=diff</a><br>

==============================================================================<br>

--- llvm/trunk/test/CodeGen/X86/avx2-intrinsics-fast-isel.ll (original)<br>

+++ llvm/trunk/test/CodeGen/X86/avx2-intrinsics-fast-isel.ll Thu Apr 19 05:13:30 2018<br>

@@ -98,11 +98,17 @@ define <4 x i64> @test_mm256_adds_epi8(<<br>

 ; CHECK-NEXT:    ret{{[l|q]}}<br>

   %arg0 = bitcast <4 x i64> %a0 to <32 x i8><br>

   %arg1 = bitcast <4 x i64> %a1 to <32 x i8><br>

-  %res = call <32 x i8> @llvm.x86.avx2.padds.b(<32 x i8> %arg0, <32 x i8> %arg1)<br>

-  %bc = bitcast <32 x i8> %res to <4 x i64><br>

+  %1 = sext <32 x i8> %arg0 to <32 x i16><br>

+  %2 = sext <32 x i8> %arg1 to <32 x i16><br>

+  %3 = add nsw <32 x i16> %1, %2<br>

+  %4 = icmp slt <32 x i16> %3, <i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127><br>

+  %5 = select <32 x i1> %4, <32 x i16> %3, <32 x i16> <i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127><br>

+  %6 = icmp sgt <32 x i16> %5, <i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128><br>

+  %7 = select <32 x i1> %6, <32 x i16> %5, <32 x i16> <i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128><br>

+  %8 = trunc <32 x i16> %7 to <32 x i8><br>

+  %bc = bitcast <32 x i8> %8 to <4 x i64><br>

   ret <4 x i64> %bc<br>

 }<br>

-declare <32 x i8> @llvm.x86.avx2.padds.b(<32 x i8>, <32 x i8>) nounwind readnone<br>

<br>

 define <4 x i64> @test_mm256_adds_epi16(<4 x i64> %a0, <4 x i64> %a1) {<br>

 ; CHECK-LABEL: test_mm256_adds_epi16:<br>

@@ -111,11 +117,17 @@ define <4 x i64> @test_mm256_adds_epi16(<br>

 ; CHECK-NEXT:    ret{{[l|q]}}<br>

   %arg0 = bitcast <4 x i64> %a0 to <16 x i16><br>

   %arg1 = bitcast <4 x i64> %a1 to <16 x i16><br>

-  %res = call <16 x i16> @llvm.x86.avx2.padds.w(<16 x i16> %arg0, <16 x i16> %arg1)<br>

-  %bc = bitcast <16 x i16> %res to <4 x i64><br>

+  %1 = sext <16 x i16> %arg0 to <16 x i32><br>

+  %2 = sext <16 x i16> %arg1 to <16 x i32><br>

+  %3 = add nsw <16 x i32> %1, %2<br>

+  %4 = icmp slt <16 x i32> %3, <i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767><br>

+  %5 = select <16 x i1> %4, <16 x i32> %3, <16 x i32> <i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767><br>

+  %6 = icmp sgt <16 x i32> %5, <i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768><br>

+  %7 = select <16 x i1> %6, <16 x i32> %5, <16 x i32> <i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768><br>

+  %8 = trunc <16 x i32> %7 to <16 x i16><br>

+  %bc = bitcast <16 x i16> %8 to <4 x i64><br>

   ret <4 x i64> %bc<br>

 }<br>

-declare <16 x i16> @llvm.x86.avx2.padds.w(<16 x i16>, <16 x i16>) nounwind readnone<br>

<br>

 define <4 x i64> @test_mm256_adds_epu8(<4 x i64> %a0, <4 x i64> %a1) {<br>

 ; CHECK-LABEL: test_mm256_adds_epu8:<br>

@@ -124,11 +136,15 @@ define <4 x i64> @test_mm256_adds_epu8(<<br>

 ; CHECK-NEXT:    ret{{[l|q]}}<br>

   %arg0 = bitcast <4 x i64> %a0 to <32 x i8><br>

   %arg1 = bitcast <4 x i64> %a1 to <32 x i8><br>

-  %res = call <32 x i8> @llvm.x86.avx2.paddus.b(<32 x i8> %arg0, <32 x i8> %arg1)<br>

-  %bc = bitcast <32 x i8> %res to <4 x i64><br>

+  %1 = zext <32 x i8> %arg0 to <32 x i16><br>

+  %2 = zext <32 x i8> %arg1 to <32 x i16><br>

+  %3 = add nsw <32 x i16> %1, %2<br>

+  %4 = icmp ult <32 x i16> %3, <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255><br>

+  %5 = select <32 x i1> %4, <32 x i16> %3, <32 x i16> <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255><br>

+  %6 = trunc <32 x i16> %5 to <32 x i8><br>

+  %bc = bitcast <32 x i8> %6 to <4 x i64><br>

   ret <4 x i64> %bc<br>

 }<br>

-declare <32 x i8> @llvm.x86.avx2.paddus.b(<32 x i8>, <32 x i8>) nounwind readnone<br>

<br>

 define <4 x i64> @test_mm256_adds_epu16(<4 x i64> %a0, <4 x i64> %a1) {<br>

 ; CHECK-LABEL: test_mm256_adds_epu16:<br>

@@ -137,11 +153,15 @@ define <4 x i64> @test_mm256_adds_epu16(<br>

 ; CHECK-NEXT:    ret{{[l|q]}}<br>

   %arg0 = bitcast <4 x i64> %a0 to <16 x i16><br>

   %arg1 = bitcast <4 x i64> %a1 to <16 x i16><br>

-  %res = call <16 x i16> @llvm.x86.avx2.paddus.w(<16 x i16> %arg0, <16 x i16> %arg1)<br>

-  %bc = bitcast <16 x i16> %res to <4 x i64><br>

+  %1 = zext <16 x i16> %arg0 to <16 x i32><br>

+  %2 = zext <16 x i16> %arg1 to <16 x i32><br>

+  %3 = add nsw <16 x i32> %1, %2<br>

+  %4 = icmp ult <16 x i32> %3, <i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535><br>

+  %5 = select <16 x i1> %4, <16 x i32> %3, <16 x i32> <i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535><br>

+  %6 = trunc <16 x i32> %5 to <16 x i16><br>

+  %bc = bitcast <16 x i16> %6 to <4 x i64><br>

   ret <4 x i64> %bc<br>

 }<br>

-declare <16 x i16> @llvm.x86.avx2.paddus.w(<16 x i16>, <16 x i16>) nounwind readnone<br>

<br>

 define <4 x i64> @test_mm256_alignr_epi8(<4 x i64> %a0, <4 x i64> %a1) {<br>

 ; CHECK-LABEL: test_mm256_alignr_epi8:<br>

@@ -2529,11 +2549,17 @@ define <4 x i64> @test_mm256_subs_epi8(<<br>

 ; CHECK-NEXT:    ret{{[l|q]}}<br>

   %arg0 = bitcast <4 x i64> %a0 to <32 x i8><br>

   %arg1 = bitcast <4 x i64> %a1 to <32 x i8><br>

-  %res = call <32 x i8> @llvm.x86.avx2.psubs.b(<32 x i8> %arg0, <32 x i8> %arg1)<br>

-  %bc = bitcast <32 x i8> %res to <4 x i64><br>

+  %1 = sext <32 x i8> %arg0 to <32 x i16><br>

+  %2 = sext <32 x i8> %arg1 to <32 x i16><br>

+  %3 = sub nsw <32 x i16> %1, %2<br>

+  %4 = icmp slt <32 x i16> %3, <i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127><br>

+  %5 = select <32 x i1> %4, <32 x i16> %3, <32 x i16> <i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127><br>

+  %6 = icmp sgt <32 x i16> %5, <i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128><br>

+  %7 = select <32 x i1> %6, <32 x i16> %5, <32 x i16> <i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128><br>

+  %8 = trunc <32 x i16> %7 to <32 x i8><br>

+  %bc = bitcast <32 x i8> %8 to <4 x i64><br>

   ret <4 x i64> %bc<br>

 }<br>

-declare <32 x i8> @llvm.x86.avx2.psubs.b(<32 x i8>, <32 x i8>) nounwind readnone<br>

<br>

 define <4 x i64> @test_mm256_subs_epi16(<4 x i64> %a0, <4 x i64> %a1) {<br>

 ; CHECK-LABEL: test_mm256_subs_epi16:<br>

@@ -2542,37 +2568,47 @@ define <4 x i64> @test_mm256_subs_epi16(<br>

 ; CHECK-NEXT:    ret{{[l|q]}}<br>

   %arg0 = bitcast <4 x i64> %a0 to <16 x i16><br>

   %arg1 = bitcast <4 x i64> %a1 to <16 x i16><br>

-  %res = call <16 x i16> @llvm.x86.avx2.psubs.w(<16 x i16> %arg0, <16 x i16> %arg1)<br>

-  %bc = bitcast <16 x i16> %res to <4 x i64><br>

+  %1 = sext <16 x i16> %arg0 to <16 x i32><br>

+  %2 = sext <16 x i16> %arg1 to <16 x i32><br>

+  %3 = sub nsw <16 x i32> %1, %2<br>

+  %4 = icmp slt <16 x i32> %3, <i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767><br>

+  %5 = select <16 x i1> %4, <16 x i32> %3, <16 x i32> <i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767><br>

+  %6 = icmp sgt <16 x i32> %5, <i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768><br>

+  %7 = select <16 x i1> %6, <16 x i32> %5, <16 x i32> <i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768><br>

+  %8 = trunc <16 x i32> %7 to <16 x i16><br>

+  %bc = bitcast <16 x i16> %8 to <4 x i64><br>

   ret <4 x i64> %bc<br>

 }<br>

-declare <16 x i16> @llvm.x86.avx2.psubs.w(<16 x i16>, <16 x i16>) nounwind readnone<br>

<br>

 define <4 x i64> @test_mm256_subs_epu8(<4 x i64> %a0, <4 x i64> %a1) {<br>

 ; CHECK-LABEL: test_mm256_subs_epu8:<br>

 ; CHECK:       # %bb.0:<br>

-; CHECK-NEXT:    vpsubusb %ymm1, %ymm0, %ymm0<br>

+; CHECK-NEXT:    vpmaxub %ymm1, %ymm0, %ymm0<br>

+; CHECK-NEXT:    vpsubb %ymm1, %ymm0, %ymm0<br>

 ; CHECK-NEXT:    ret{{[l|q]}}<br>

   %arg0 = bitcast <4 x i64> %a0 to <32 x i8><br>

   %arg1 = bitcast <4 x i64> %a1 to <32 x i8><br>

-  %res = call <32 x i8> @llvm.x86.avx2.psubus.b(<32 x i8> %arg0, <32 x i8> %arg1)<br>

-  %bc = bitcast <32 x i8> %res to <4 x i64><br>

+  %cmp = icmp ugt <32 x i8> %arg0, %arg1<br>

+  %sel = select <32 x i1> %cmp, <32 x i8> %arg0, <32 x i8> %arg1<br>

+  %sub = sub <32 x i8> %sel, %arg1<br>

+  %bc = bitcast <32 x i8> %sub to <4 x i64><br>

   ret <4 x i64> %bc<br>

 }<br>

-declare <32 x i8> @llvm.x86.avx2.psubus.b(<32 x i8>, <32 x i8>) nounwind readnone<br>

<br>

 define <4 x i64> @test_mm256_subs_epu16(<4 x i64> %a0, <4 x i64> %a1) {<br>

 ; CHECK-LABEL: test_mm256_subs_epu16:<br>

 ; CHECK:       # %bb.0:<br>

-; CHECK-NEXT:    vpsubusw %ymm1, %ymm0, %ymm0<br>

+; CHECK-NEXT:    vpmaxuw %ymm1, %ymm0, %ymm0<br>

+; CHECK-NEXT:    vpsubw %ymm1, %ymm0, %ymm0<br>

 ; CHECK-NEXT:    ret{{[l|q]}}<br>

   %arg0 = bitcast <4 x i64> %a0 to <16 x i16><br>

   %arg1 = bitcast <4 x i64> %a1 to <16 x i16><br>

-  %res = call <16 x i16> @llvm.x86.avx2.psubus.w(<16 x i16> %arg0, <16 x i16> %arg1)<br>

-  %bc = bitcast <16 x i16> %res to <4 x i64><br>

+  %cmp = icmp ugt <16 x i16> %arg0, %arg1<br>

+  %sel = select <16 x i1> %cmp, <16 x i16> %arg0, <16 x i16> %arg1<br>

+  %sub = sub <16 x i16> %sel, %arg1<br>

+  %bc = bitcast <16 x i16> %sub to <4 x i64><br>

   ret <4 x i64> %bc<br>

 }<br>

-declare <16 x i16> @llvm.x86.avx2.psubus.w(<16 x i16>, <16 x i16>) nounwind readnone<br>

<br>

 define <4 x i64> @test_mm256_unpackhi_epi8(<4 x i64> %a0, <4 x i64> %a1) nounwind {<br>

 ; CHECK-LABEL: test_mm256_unpackhi_epi8:<br>

<br>

Modified: llvm/trunk/test/CodeGen/X86/avx2-intrinsics-x86-upgrade.ll<br>

URL: <a href="http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx2-intrinsics-x86-upgrade.ll?rev=330322&r1=330321&r2=330322&view=diff" rel="noreferrer" target="_blank">http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx2-intrinsics-x86-upgrade.ll?rev=330322&r1=330321&r2=330322&view=diff</a><br>

==============================================================================<br>

--- llvm/trunk/test/CodeGen/X86/avx2-intrinsics-x86-upgrade.ll (original)<br>

+++ llvm/trunk/test/CodeGen/X86/avx2-intrinsics-x86-upgrade.ll Thu Apr 19 05:13:30 2018<br>

@@ -848,6 +848,133 @@ define <4 x i64> @test_x86_avx2_vperm2i1<br>

 declare <4 x i64> @llvm.x86.avx2.vperm2i128(<4 x i64>, <4 x i64>, i8) nounwind readonly<br>

<br>

<br>

+define <32 x i8> @test_x86_avx2_padds_b(<32 x i8> %a0, <32 x i8> %a1) {<br>

+; AVX2-LABEL: test_x86_avx2_padds_b:<br>

+; AVX2:       ## %bb.0:<br>

+; AVX2-NEXT:    vpaddsb %ymm1, %ymm0, %ymm0<br>

+; AVX2-NEXT:    ret{{[l|q]}}<br>

+;<br>

+; AVX512VL-LABEL: test_x86_avx2_padds_b:<br>

+; AVX512VL:       ## %bb.0:<br>

+; AVX512VL-NEXT:    vpaddsb %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xec,0xc1]<br>

+; AVX512VL-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]<br>

+  %res = call <32 x i8> @llvm.x86.avx2.padds.b(<32 x i8> %a0, <32 x i8> %a1) ; <<32 x i8>> [#uses=1]<br>

+  ret <32 x i8> %res<br>

+}<br>

+declare <32 x i8> @llvm.x86.avx2.padds.b(<32 x i8>, <32 x i8>) nounwind readnone<br>

+<br>

+<br>

+define <16 x i16> @test_x86_avx2_padds_w(<16 x i16> %a0, <16 x i16> %a1) {<br>

+; AVX2-LABEL: test_x86_avx2_padds_w:<br>

+; AVX2:       ## %bb.0:<br>

+; AVX2-NEXT:    vpaddsw %ymm1, %ymm0, %ymm0<br>

+; AVX2-NEXT:    ret{{[l|q]}}<br>

+;<br>

+; AVX512VL-LABEL: test_x86_avx2_padds_w:<br>

+; AVX512VL:       ## %bb.0:<br>

+; AVX512VL-NEXT:    vpaddsw %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xed,0xc1]<br>

+; AVX512VL-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]<br>

+  %res = call <16 x i16> @llvm.x86.avx2.padds.w(<16 x i16> %a0, <16 x i16> %a1) ; <<16 x i16>> [#uses=1]<br>

+  ret <16 x i16> %res<br>

+}<br>

+declare <16 x i16> @llvm.x86.avx2.padds.w(<16 x i16>, <16 x i16>) nounwind readnone<br>

+<br>

+<br>

+define <32 x i8> @test_x86_avx2_paddus_b(<32 x i8> %a0, <32 x i8> %a1) {<br>

+; AVX2-LABEL: test_x86_avx2_paddus_b:<br>

+; AVX2:       ## %bb.0:<br>

+; AVX2-NEXT:    vpaddusb %ymm1, %ymm0, %ymm0<br>

+; AVX2-NEXT:    ret{{[l|q]}}<br>

+;<br>

+; AVX512VL-LABEL: test_x86_avx2_paddus_b:<br>

+; AVX512VL:       ## %bb.0:<br>

+; AVX512VL-NEXT:    vpaddusb %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xdc,0xc1]<br>

+; AVX512VL-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]<br>

+  %res = call <32 x i8> @llvm.x86.avx2.paddus.b(<32 x i8> %a0, <32 x i8> %a1) ; <<32 x i8>> [#uses=1]<br>

+  ret <32 x i8> %res<br>

+}<br>

+declare <32 x i8> @llvm.x86.avx2.paddus.b(<32 x i8>, <32 x i8>) nounwind readnone<br>

+<br>

+<br>

+define <16 x i16> @test_x86_avx2_paddus_w(<16 x i16> %a0, <16 x i16> %a1) {<br>

+; AVX2-LABEL: test_x86_avx2_paddus_w:<br>

+; AVX2:       ## %bb.0:<br>

+; AVX2-NEXT:    vpaddusw %ymm1, %ymm0, %ymm0<br>

+; AVX2-NEXT:    ret{{[l|q]}}<br>

+;<br>

+; AVX512VL-LABEL: test_x86_avx2_paddus_w:<br>

+; AVX512VL:       ## %bb.0:<br>

+; AVX512VL-NEXT:    vpaddusw %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xdd,0xc1]<br>

+; AVX512VL-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]<br>

+  %res = call <16 x i16> @llvm.x86.avx2.paddus.w(<16 x i16> %a0, <16 x i16> %a1) ; <<16 x i16>> [#uses=1]<br>

+  ret <16 x i16> %res<br>

+}<br>

+declare <16 x i16> @llvm.x86.avx2.paddus.w(<16 x i16>, <16 x i16>) nounwind readnone<br>

+<br>

+<br>

+define <32 x i8> @test_x86_avx2_psubs_b(<32 x i8> %a0, <32 x i8> %a1) {<br>

+; AVX2-LABEL: test_x86_avx2_psubs_b:<br>

+; AVX2:       ## %bb.0:<br>

+; AVX2-NEXT:    vpsubsb %ymm1, %ymm0, %ymm0<br>

+; AVX2-NEXT:    ret{{[l|q]}}<br>

+;<br>

+; AVX512VL-LABEL: test_x86_avx2_psubs_b:<br>

+; AVX512VL:       ## %bb.0:<br>

+; AVX512VL-NEXT:    vpsubsb %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xe8,0xc1]<br>

+; AVX512VL-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]<br>

+  %res = call <32 x i8> @llvm.x86.avx2.psubs.b(<32 x i8> %a0, <32 x i8> %a1) ; <<32 x i8>> [#uses=1]<br>

+  ret <32 x i8> %res<br>

+}<br>

+declare <32 x i8> @llvm.x86.avx2.psubs.b(<32 x i8>, <32 x i8>) nounwind readnone<br>

+<br>

+<br>

+define <16 x i16> @test_x86_avx2_psubs_w(<16 x i16> %a0, <16 x i16> %a1) {<br>

+; AVX2-LABEL: test_x86_avx2_psubs_w:<br>

+; AVX2:       ## %bb.0:<br>

+; AVX2-NEXT:    vpsubsw %ymm1, %ymm0, %ymm0<br>

+; AVX2-NEXT:    ret{{[l|q]}}<br>

+;<br>

+; AVX512VL-LABEL: test_x86_avx2_psubs_w:<br>

+; AVX512VL:       ## %bb.0:<br>

+; AVX512VL-NEXT:    vpsubsw %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xe9,0xc1]<br>

+; AVX512VL-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]<br>

+  %res = call <16 x i16> @llvm.x86.avx2.psubs.w(<16 x i16> %a0, <16 x i16> %a1) ; <<16 x i16>> [#uses=1]<br>

+  ret <16 x i16> %res<br>

+}<br>

+declare <16 x i16> @llvm.x86.avx2.psubs.w(<16 x i16>, <16 x i16>) nounwind readnone<br>

+<br>

+<br>

+define <32 x i8> @test_x86_avx2_psubus_b(<32 x i8> %a0, <32 x i8> %a1) {<br>

+; AVX2-LABEL: test_x86_avx2_psubus_b:<br>

+; AVX2:       ## %bb.0:<br>

+; AVX2-NEXT:    vpsubusb %ymm1, %ymm0, %ymm0<br>

+; AVX2-NEXT:    ret{{[l|q]}}<br>

+;<br>

+; AVX512VL-LABEL: test_x86_avx2_psubus_b:<br>

+; AVX512VL:       ## %bb.0:<br>

+; AVX512VL-NEXT:    vpsubusb %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd8,0xc1]<br>

+; AVX512VL-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]<br>

+  %res = call <32 x i8> @llvm.x86.avx2.psubus.b(<32 x i8> %a0, <32 x i8> %a1) ; <<32 x i8>> [#uses=1]<br>

+  ret <32 x i8> %res<br>

+}<br>

+declare <32 x i8> @llvm.x86.avx2.psubus.b(<32 x i8>, <32 x i8>) nounwind readnone<br>

+<br>

+<br>

+define <16 x i16> @test_x86_avx2_psubus_w(<16 x i16> %a0, <16 x i16> %a1) {<br>

+; AVX2-LABEL: test_x86_avx2_psubus_w:<br>

+; AVX2:       ## %bb.0:<br>

+; AVX2-NEXT:    vpsubusw %ymm1, %ymm0, %ymm0<br>

+; AVX2-NEXT:    ret{{[l|q]}}<br>

+;<br>

+; AVX512VL-LABEL: test_x86_avx2_psubus_w:<br>

+; AVX512VL:       ## %bb.0:<br>

+; AVX512VL-NEXT:    vpsubusw %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd9,0xc1]<br>

+; AVX512VL-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]<br>

+  %res = call <16 x i16> @llvm.x86.avx2.psubus.w(<16 x i16> %a0, <16 x i16> %a1) ; <<16 x i16>> [#uses=1]<br>

+  ret <16 x i16> %res<br>

+}<br>

+declare <16 x i16> @llvm.x86.avx2.psubus.w(<16 x i16>, <16 x i16>) nounwind readnone<br>

+<br>

 define <4 x i64> @test_x86_avx2_pmulu_dq(<8 x i32> %a0, <8 x i32> %a1) {<br>

 ; X86-LABEL: test_x86_avx2_pmulu_dq:<br>

 ; X86:       ## %bb.0:<br>

<br>

Modified: llvm/trunk/test/CodeGen/X86/avx2-intrinsics-x86.ll<br>

URL: <a href="http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx2-intrinsics-x86.ll?rev=330322&r1=330321&r2=330322&view=diff" rel="noreferrer" target="_blank">http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx2-intrinsics-x86.ll?rev=330322&r1=330321&r2=330322&view=diff</a><br>

==============================================================================<br>

--- llvm/trunk/test/CodeGen/X86/avx2-intrinsics-x86.ll (original)<br>

+++ llvm/trunk/test/CodeGen/X86/avx2-intrinsics-x86.ll Thu Apr 19 05:13:30 2018<br>

@@ -181,110 +181,6 @@ define <32 x i8> @test_x86_avx2_packuswb<br>

 }<br>

<br>

<br>

-define <32 x i8> @test_x86_avx2_padds_b(<32 x i8> %a0, <32 x i8> %a1) {<br>

-; X86-AVX-LABEL: test_x86_avx2_padds_b:<br>

-; X86-AVX:       ## %bb.0:<br>

-; X86-AVX-NEXT:    vpaddsb %ymm1, %ymm0, %ymm0 ## encoding: [0xc5,0xfd,0xec,0xc1]<br>

-; X86-AVX-NEXT:    retl ## encoding: [0xc3]<br>

-;<br>

-; X86-AVX512VL-LABEL: test_x86_avx2_padds_b:<br>

-; X86-AVX512VL:       ## %bb.0:<br>

-; X86-AVX512VL-NEXT:    vpaddsb %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xec,0xc1]<br>

-; X86-AVX512VL-NEXT:    retl ## encoding: [0xc3]<br>

-;<br>

-; X64-AVX-LABEL: test_x86_avx2_padds_b:<br>

-; X64-AVX:       ## %bb.0:<br>

-; X64-AVX-NEXT:    vpaddsb %ymm1, %ymm0, %ymm0 ## encoding: [0xc5,0xfd,0xec,0xc1]<br>

-; X64-AVX-NEXT:    retq ## encoding: [0xc3]<br>

-;<br>

-; X64-AVX512VL-LABEL: test_x86_avx2_padds_b:<br>

-; X64-AVX512VL:       ## %bb.0:<br>

-; X64-AVX512VL-NEXT:    vpaddsb %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xec,0xc1]<br>

-; X64-AVX512VL-NEXT:    retq ## encoding: [0xc3]<br>

-  %res = call <32 x i8> @llvm.x86.avx2.padds.b(<32 x i8> %a0, <32 x i8> %a1) ; <<32 x i8>> [#uses=1]<br>

-  ret <32 x i8> %res<br>

-}<br>

-declare <32 x i8> @llvm.x86.avx2.padds.b(<32 x i8>, <32 x i8>) nounwind readnone<br>

-<br>

-<br>

-define <16 x i16> @test_x86_avx2_padds_w(<16 x i16> %a0, <16 x i16> %a1) {<br>

-; X86-AVX-LABEL: test_x86_avx2_padds_w:<br>

-; X86-AVX:       ## %bb.0:<br>

-; X86-AVX-NEXT:    vpaddsw %ymm1, %ymm0, %ymm0 ## encoding: [0xc5,0xfd,0xed,0xc1]<br>

-; X86-AVX-NEXT:    retl ## encoding: [0xc3]<br>

-;<br>

-; X86-AVX512VL-LABEL: test_x86_avx2_padds_w:<br>

-; X86-AVX512VL:       ## %bb.0:<br>

-; X86-AVX512VL-NEXT:    vpaddsw %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xed,0xc1]<br>

-; X86-AVX512VL-NEXT:    retl ## encoding: [0xc3]<br>

-;<br>

-; X64-AVX-LABEL: test_x86_avx2_padds_w:<br>

-; X64-AVX:       ## %bb.0:<br>

-; X64-AVX-NEXT:    vpaddsw %ymm1, %ymm0, %ymm0 ## encoding: [0xc5,0xfd,0xed,0xc1]<br>

-; X64-AVX-NEXT:    retq ## encoding: [0xc3]<br>

-;<br>

-; X64-AVX512VL-LABEL: test_x86_avx2_padds_w:<br>

-; X64-AVX512VL:       ## %bb.0:<br>

-; X64-AVX512VL-NEXT:    vpaddsw %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xed,0xc1]<br>

-; X64-AVX512VL-NEXT:    retq ## encoding: [0xc3]<br>

-  %res = call <16 x i16> @llvm.x86.avx2.padds.w(<16 x i16> %a0, <16 x i16> %a1) ; <<16 x i16>> [#uses=1]<br>

-  ret <16 x i16> %res<br>

-}<br>

-declare <16 x i16> @llvm.x86.avx2.padds.w(<16 x i16>, <16 x i16>) nounwind readnone<br>

-<br>

-<br>

-define <32 x i8> @test_x86_avx2_paddus_b(<32 x i8> %a0, <32 x i8> %a1) {<br>

-; X86-AVX-LABEL: test_x86_avx2_paddus_b:<br>

-; X86-AVX:       ## %bb.0:<br>

-; X86-AVX-NEXT:    vpaddusb %ymm1, %ymm0, %ymm0 ## encoding: [0xc5,0xfd,0xdc,0xc1]<br>

-; X86-AVX-NEXT:    retl ## encoding: [0xc3]<br>

-;<br>

-; X86-AVX512VL-LABEL: test_x86_avx2_paddus_b:<br>

-; X86-AVX512VL:       ## %bb.0:<br>

-; X86-AVX512VL-NEXT:    vpaddusb %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xdc,0xc1]<br>

-; X86-AVX512VL-NEXT:    retl ## encoding: [0xc3]<br>

-;<br>

-; X64-AVX-LABEL: test_x86_avx2_paddus_b:<br>

-; X64-AVX:       ## %bb.0:<br>

-; X64-AVX-NEXT:    vpaddusb %ymm1, %ymm0, %ymm0 ## encoding: [0xc5,0xfd,0xdc,0xc1]<br>

-; X64-AVX-NEXT:    retq ## encoding: [0xc3]<br>

-;<br>

-; X64-AVX512VL-LABEL: test_x86_avx2_paddus_b:<br>

-; X64-AVX512VL:       ## %bb.0:<br>

-; X64-AVX512VL-NEXT:    vpaddusb %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xdc,0xc1]<br>

-; X64-AVX512VL-NEXT:    retq ## encoding: [0xc3]<br>

-  %res = call <32 x i8> @llvm.x86.avx2.paddus.b(<32 x i8> %a0, <32 x i8> %a1) ; <<32 x i8>> [#uses=1]<br>

-  ret <32 x i8> %res<br>

-}<br>

-declare <32 x i8> @llvm.x86.avx2.paddus.b(<32 x i8>, <32 x i8>) nounwind readnone<br>

-<br>

-<br>

-define <16 x i16> @test_x86_avx2_paddus_w(<16 x i16> %a0, <16 x i16> %a1) {<br>

-; X86-AVX-LABEL: test_x86_avx2_paddus_w:<br>

-; X86-AVX:       ## %bb.0:<br>

-; X86-AVX-NEXT:    vpaddusw %ymm1, %ymm0, %ymm0 ## encoding: [0xc5,0xfd,0xdd,0xc1]<br>

-; X86-AVX-NEXT:    retl ## encoding: [0xc3]<br>

-;<br>

-; X86-AVX512VL-LABEL: test_x86_avx2_paddus_w:<br>

-; X86-AVX512VL:       ## %bb.0:<br>

-; X86-AVX512VL-NEXT:    vpaddusw %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xdd,0xc1]<br>

-; X86-AVX512VL-NEXT:    retl ## encoding: [0xc3]<br>

-;<br>

-; X64-AVX-LABEL: test_x86_avx2_paddus_w:<br>

-; X64-AVX:       ## %bb.0:<br>

-; X64-AVX-NEXT:    vpaddusw %ymm1, %ymm0, %ymm0 ## encoding: [0xc5,0xfd,0xdd,0xc1]<br>

-; X64-AVX-NEXT:    retq ## encoding: [0xc3]<br>

-;<br>

-; X64-AVX512VL-LABEL: test_x86_avx2_paddus_w:<br>

-; X64-AVX512VL:       ## %bb.0:<br>

-; X64-AVX512VL-NEXT:    vpaddusw %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xdd,0xc1]<br>

-; X64-AVX512VL-NEXT:    retq ## encoding: [0xc3]<br>

-  %res = call <16 x i16> @llvm.x86.avx2.paddus.w(<16 x i16> %a0, <16 x i16> %a1) ; <<16 x i16>> [#uses=1]<br>

-  ret <16 x i16> %res<br>

-}<br>

-declare <16 x i16> @llvm.x86.avx2.paddus.w(<16 x i16>, <16 x i16>) nounwind readnone<br>

-<br>

-<br>

 define <8 x i32> @test_x86_avx2_pmadd_wd(<16 x i16> %a0, <16 x i16> %a1) {<br>

 ; X86-AVX-LABEL: test_x86_avx2_pmadd_wd:<br>

 ; X86-AVX:       ## %bb.0:<br>

@@ -927,109 +823,6 @@ define <16 x i16> @test_x86_avx2_psrli_w<br>

 declare <16 x i16> @llvm.x86.avx2.psrli.w(<16 x i16>, i32) nounwind readnone<br>

<br>

<br>

-define <32 x i8> @test_x86_avx2_psubs_b(<32 x i8> %a0, <32 x i8> %a1) {<br>

-; X86-AVX-LABEL: test_x86_avx2_psubs_b:<br>

-; X86-AVX:       ## %bb.0:<br>

-; X86-AVX-NEXT:    vpsubsb %ymm1, %ymm0, %ymm0 ## encoding: [0xc5,0xfd,0xe8,0xc1]<br>

-; X86-AVX-NEXT:    retl ## encoding: [0xc3]<br>

-;<br>

-; X86-AVX512VL-LABEL: test_x86_avx2_psubs_b:<br>

-; X86-AVX512VL:       ## %bb.0:<br>

-; X86-AVX512VL-NEXT:    vpsubsb %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xe8,0xc1]<br>

-; X86-AVX512VL-NEXT:    retl ## encoding: [0xc3]<br>

-;<br>

-; X64-AVX-LABEL: test_x86_avx2_psubs_b:<br>

-; X64-AVX:       ## %bb.0:<br>

-; X64-AVX-NEXT:    vpsubsb %ymm1, %ymm0, %ymm0 ## encoding: [0xc5,0xfd,0xe8,0xc1]<br>

-; X64-AVX-NEXT:    retq ## encoding: [0xc3]<br>

-;<br>

-; X64-AVX512VL-LABEL: test_x86_avx2_psubs_b:<br>

-; X64-AVX512VL:       ## %bb.0:<br>

-; X64-AVX512VL-NEXT:    vpsubsb %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xe8,0xc1]<br>

-; X64-AVX512VL-NEXT:    retq ## encoding: [0xc3]<br>

-  %res = call <32 x i8> @llvm.x86.avx2.psubs.b(<32 x i8> %a0, <32 x i8> %a1) ; <<32 x i8>> [#uses=1]<br>

-  ret <32 x i8> %res<br>

-}<br>

-declare <32 x i8> @llvm.x86.avx2.psubs.b(<32 x i8>, <32 x i8>) nounwind readnone<br>

-<br>

-<br>

-define <16 x i16> @test_x86_avx2_psubs_w(<16 x i16> %a0, <16 x i16> %a1) {<br>

-; X86-AVX-LABEL: test_x86_avx2_psubs_w:<br>

-; X86-AVX:       ## %bb.0:<br>

-; X86-AVX-NEXT:    vpsubsw %ymm1, %ymm0, %ymm0 ## encoding: [0xc5,0xfd,0xe9,0xc1]<br>

-; X86-AVX-NEXT:    retl ## encoding: [0xc3]<br>

-;<br>

-; X86-AVX512VL-LABEL: test_x86_avx2_psubs_w:<br>

-; X86-AVX512VL:       ## %bb.0:<br>

-; X86-AVX512VL-NEXT:    vpsubsw %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xe9,0xc1]<br>

-; X86-AVX512VL-NEXT:    retl ## encoding: [0xc3]<br>

-;<br>

-; X64-AVX-LABEL: test_x86_avx2_psubs_w:<br>

-; X64-AVX:       ## %bb.0:<br>

-; X64-AVX-NEXT:    vpsubsw %ymm1, %ymm0, %ymm0 ## encoding: [0xc5,0xfd,0xe9,0xc1]<br>

-; X64-AVX-NEXT:    retq ## encoding: [0xc3]<br>

-;<br>

-; X64-AVX512VL-LABEL: test_x86_avx2_psubs_w:<br>

-; X64-AVX512VL:       ## %bb.0:<br>

-; X64-AVX512VL-NEXT:    vpsubsw %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xe9,0xc1]<br>

-; X64-AVX512VL-NEXT:    retq ## encoding: [0xc3]<br>

-  %res = call <16 x i16> @llvm.x86.avx2.psubs.w(<16 x i16> %a0, <16 x i16> %a1) ; <<16 x i16>> [#uses=1]<br>

-  ret <16 x i16> %res<br>

-}<br>

-declare <16 x i16> @llvm.x86.avx2.psubs.w(<16 x i16>, <16 x i16>) nounwind readnone<br>

-<br>

-<br>

-define <32 x i8> @test_x86_avx2_psubus_b(<32 x i8> %a0, <32 x i8> %a1) {<br>

-; X86-AVX-LABEL: test_x86_avx2_psubus_b:<br>

-; X86-AVX:       ## %bb.0:<br>

-; X86-AVX-NEXT:    vpsubusb %ymm1, %ymm0, %ymm0 ## encoding: [0xc5,0xfd,0xd8,0xc1]<br>

-; X86-AVX-NEXT:    retl ## encoding: [0xc3]<br>

-;<br>

-; X86-AVX512VL-LABEL: test_x86_avx2_psubus_b:<br>

-; X86-AVX512VL:       ## %bb.0:<br>

-; X86-AVX512VL-NEXT:    vpsubusb %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd8,0xc1]<br>

-; X86-AVX512VL-NEXT:    retl ## encoding: [0xc3]<br>

-;<br>

-; X64-AVX-LABEL: test_x86_avx2_psubus_b:<br>

-; X64-AVX:       ## %bb.0:<br>

-; X64-AVX-NEXT:    vpsubusb %ymm1, %ymm0, %ymm0 ## encoding: [0xc5,0xfd,0xd8,0xc1]<br>

-; X64-AVX-NEXT:    retq ## encoding: [0xc3]<br>

-;<br>

-; X64-AVX512VL-LABEL: test_x86_avx2_psubus_b:<br>

-; X64-AVX512VL:       ## %bb.0:<br>

-; X64-AVX512VL-NEXT:    vpsubusb %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd8,0xc1]<br>

-; X64-AVX512VL-NEXT:    retq ## encoding: [0xc3]<br>

-  %res = call <32 x i8> @llvm.x86.avx2.psubus.b(<32 x i8> %a0, <32 x i8> %a1) ; <<32 x i8>> [#uses=1]<br>

-  ret <32 x i8> %res<br>

-}<br>

-declare <32 x i8> @llvm.x86.avx2.psubus.b(<32 x i8>, <32 x i8>) nounwind readnone<br>

-<br>

-<br>

-define <16 x i16> @test_x86_avx2_psubus_w(<16 x i16> %a0, <16 x i16> %a1) {<br>

-; X86-AVX-LABEL: test_x86_avx2_psubus_w:<br>

-; X86-AVX:       ## %bb.0:<br>

-; X86-AVX-NEXT:    vpsubusw %ymm1, %ymm0, %ymm0 ## encoding: [0xc5,0xfd,0xd9,0xc1]<br>

-; X86-AVX-NEXT:    retl ## encoding: [0xc3]<br>

-;<br>

-; X86-AVX512VL-LABEL: test_x86_avx2_psubus_w:<br>

-; X86-AVX512VL:       ## %bb.0:<br>

-; X86-AVX512VL-NEXT:    vpsubusw %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd9,0xc1]<br>

-; X86-AVX512VL-NEXT:    retl ## encoding: [0xc3]<br>

-;<br>

-; X64-AVX-LABEL: test_x86_avx2_psubus_w:<br>

-; X64-AVX:       ## %bb.0:<br>

-; X64-AVX-NEXT:    vpsubusw %ymm1, %ymm0, %ymm0 ## encoding: [0xc5,0xfd,0xd9,0xc1]<br>

-; X64-AVX-NEXT:    retq ## encoding: [0xc3]<br>

-;<br>

-; X64-AVX512VL-LABEL: test_x86_avx2_psubus_w:<br>

-; X64-AVX512VL:       ## %bb.0:<br>

-; X64-AVX512VL-NEXT:    vpsubusw %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd9,0xc1]<br>

-; X64-AVX512VL-NEXT:    retq ## encoding: [0xc3]<br>

-  %res = call <16 x i16> @llvm.x86.avx2.psubus.w(<16 x i16> %a0, <16 x i16> %a1) ; <<16 x i16>> [#uses=1]<br>

-  ret <16 x i16> %res<br>

-}<br>

-declare <16 x i16> @llvm.x86.avx2.psubus.w(<16 x i16>, <16 x i16>) nounwind readnone<br>

-<br>

 define <8 x i32> @test_x86_avx2_phadd_d(<8 x i32> %a0, <8 x i32> %a1) {<br>

 ; X86-LABEL: test_x86_avx2_phadd_d:<br>

 ; X86:       ## %bb.0:<br>

@@ -1330,29 +1123,29 @@ define <16 x i16> @test_x86_avx2_packusd<br>

 ; X86-AVX:       ## %bb.0:<br>

 ; X86-AVX-NEXT:    vmovaps {{.*#+}} ymm0 = [0,0,0,0,255,32767,65535,0,0,0,0,0,0,0,0,0]<br>

 ; X86-AVX-NEXT:    ## encoding: [0xc5,0xfc,0x28,0x05,A,A,A,A]<br>

-; X86-AVX-NEXT:    ## fixup A - offset: 4, value: LCPI54_0, kind: FK_Data_4<br>

-; X86-AVX-NEXT:    retl ## encoding: [0xc3]<br>

+; X86-AVX-NEXT:    ## fixup A - offset: 4, value: LCPI46_0, kind: FK_Data_4<br>

+; X86-AVX-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]<br>

 ;<br>

 ; X86-AVX512VL-LABEL: test_x86_avx2_packusdw_fold:<br>

 ; X86-AVX512VL:       ## %bb.0:<br>

-; X86-AVX512VL-NEXT:    vmovaps LCPI54_0, %ymm0 ## EVEX TO VEX Compression ymm0 = [0,0,0,0,255,32767,65535,0,0,0,0,0,0,0,0,0]<br>

+; X86-AVX512VL-NEXT:    vmovaps LCPI46_0, %ymm0 ## EVEX TO VEX Compression ymm0 = [0,0,0,0,255,32767,65535,0,0,0,0,0,0,0,0,0]<br>

 ; X86-AVX512VL-NEXT:    ## encoding: [0xc5,0xfc,0x28,0x05,A,A,A,A]<br>

-; X86-AVX512VL-NEXT:    ## fixup A - offset: 4, value: LCPI54_0, kind: FK_Data_4<br>

-; X86-AVX512VL-NEXT:    retl ## encoding: [0xc3]<br>

+; X86-AVX512VL-NEXT:    ## fixup A - offset: 4, value: LCPI46_0, kind: FK_Data_4<br>

+; X86-AVX512VL-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]<br>

 ;<br>

 ; X64-AVX-LABEL: test_x86_avx2_packusdw_fold:<br>

 ; X64-AVX:       ## %bb.0:<br>

 ; X64-AVX-NEXT:    vmovaps {{.*#+}} ymm0 = [0,0,0,0,255,32767,65535,0,0,0,0,0,0,0,0,0]<br>

 ; X64-AVX-NEXT:    ## encoding: [0xc5,0xfc,0x28,0x05,A,A,A,A]<br>

-; X64-AVX-NEXT:    ## fixup A - offset: 4, value: LCPI54_0-4, kind: reloc_riprel_4byte<br>

-; X64-AVX-NEXT:    retq ## encoding: [0xc3]<br>

+; X64-AVX-NEXT:    ## fixup A - offset: 4, value: LCPI46_0-4, kind: reloc_riprel_4byte<br>

+; X64-AVX-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]<br>

 ;<br>

 ; X64-AVX512VL-LABEL: test_x86_avx2_packusdw_fold:<br>

 ; X64-AVX512VL:       ## %bb.0:<br>

 ; X64-AVX512VL-NEXT:    vmovaps {{.*}}(%rip), %ymm0 ## EVEX TO VEX Compression ymm0 = [0,0,0,0,255,32767,65535,0,0,0,0,0,0,0,0,0]<br>

 ; X64-AVX512VL-NEXT:    ## encoding: [0xc5,0xfc,0x28,0x05,A,A,A,A]<br>

-; X64-AVX512VL-NEXT:    ## fixup A - offset: 4, value: LCPI54_0-4, kind: reloc_riprel_4byte<br>

-; X64-AVX512VL-NEXT:    retq ## encoding: [0xc3]<br>

+; X64-AVX512VL-NEXT:    ## fixup A - offset: 4, value: LCPI46_0-4, kind: reloc_riprel_4byte<br>

+; X64-AVX512VL-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]<br>

   %res = call <16 x i16> @llvm.x86.avx2.packusdw(<8 x i32> zeroinitializer, <8 x i32> <i32 255, i32 32767, i32 65535, i32 -1, i32 -32767, i32 -65535, i32 0, i32 -256>)<br>

   ret <16 x i16> %res<br>

 }<br>

@@ -2071,37 +1864,37 @@ define <4 x i32> @test_x86_avx2_psrav_d_<br>

 ; X86-AVX:       ## %bb.0:<br>

 ; X86-AVX-NEXT:    vmovdqa {{.*#+}} xmm0 = [2,9,4294967284,23]<br>

 ; X86-AVX-NEXT:    ## encoding: [0xc5,0xf9,0x6f,0x05,A,A,A,A]<br>

-; X86-AVX-NEXT:    ## fixup A - offset: 4, value: LCPI86_0, kind: FK_Data_4<br>

-; X86-AVX-NEXT:    vpsravd LCPI86_1, %xmm0, %xmm0 ## encoding: [0xc4,0xe2,0x79,0x46,0x05,A,A,A,A]<br>

-; X86-AVX-NEXT:    ## fixup A - offset: 5, value: LCPI86_1, kind: FK_Data_4<br>

-; X86-AVX-NEXT:    retl ## encoding: [0xc3]<br>

+; X86-AVX-NEXT:    ## fixup A - offset: 4, value: LCPI78_0, kind: FK_Data_4<br>

+; X86-AVX-NEXT:    vpsravd LCPI78_1, %xmm0, %xmm0 ## encoding: [0xc4,0xe2,0x79,0x46,0x05,A,A,A,A]<br>

+; X86-AVX-NEXT:    ## fixup A - offset: 5, value: LCPI78_1, kind: FK_Data_4<br>

+; X86-AVX-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]<br>

 ;<br>

 ; X86-AVX512VL-LABEL: test_x86_avx2_psrav_d_const:<br>

 ; X86-AVX512VL:       ## %bb.0:<br>

-; X86-AVX512VL-NEXT:    vmovdqa LCPI86_0, %xmm0 ## EVEX TO VEX Compression xmm0 = [2,9,4294967284,23]<br>

+; X86-AVX512VL-NEXT:    vmovdqa LCPI78_0, %xmm0 ## EVEX TO VEX Compression xmm0 = [2,9,4294967284,23]<br>

 ; X86-AVX512VL-NEXT:    ## encoding: [0xc5,0xf9,0x6f,0x05,A,A,A,A]<br>

-; X86-AVX512VL-NEXT:    ## fixup A - offset: 4, value: LCPI86_0, kind: FK_Data_4<br>

-; X86-AVX512VL-NEXT:    vpsravd LCPI86_1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x46,0x05,A,A,A,A]<br>

-; X86-AVX512VL-NEXT:    ## fixup A - offset: 5, value: LCPI86_1, kind: FK_Data_4<br>

-; X86-AVX512VL-NEXT:    retl ## encoding: [0xc3]<br>

+; X86-AVX512VL-NEXT:    ## fixup A - offset: 4, value: LCPI78_0, kind: FK_Data_4<br>

+; X86-AVX512VL-NEXT:    vpsravd LCPI78_1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x46,0x05,A,A,A,A]<br>

+; X86-AVX512VL-NEXT:    ## fixup A - offset: 5, value: LCPI78_1, kind: FK_Data_4<br>

+; X86-AVX512VL-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]<br>

 ;<br>

 ; X64-AVX-LABEL: test_x86_avx2_psrav_d_const:<br>

 ; X64-AVX:       ## %bb.0:<br>

 ; X64-AVX-NEXT:    vmovdqa {{.*#+}} xmm0 = [2,9,4294967284,23]<br>

 ; X64-AVX-NEXT:    ## encoding: [0xc5,0xf9,0x6f,0x05,A,A,A,A]<br>

-; X64-AVX-NEXT:    ## fixup A - offset: 4, value: LCPI86_0-4, kind: reloc_riprel_4byte<br>

+; X64-AVX-NEXT:    ## fixup A - offset: 4, value: LCPI78_0-4, kind: reloc_riprel_4byte<br>

 ; X64-AVX-NEXT:    vpsravd {{.*}}(%rip), %xmm0, %xmm0 ## encoding: [0xc4,0xe2,0x79,0x46,0x05,A,A,A,A]<br>

-; X64-AVX-NEXT:    ## fixup A - offset: 5, value: LCPI86_1-4, kind: reloc_riprel_4byte<br>

-; X64-AVX-NEXT:    retq ## encoding: [0xc3]<br>

+; X64-AVX-NEXT:    ## fixup A - offset: 5, value: LCPI78_1-4, kind: reloc_riprel_4byte<br>

+; X64-AVX-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]<br>

 ;<br>

 ; X64-AVX512VL-LABEL: test_x86_avx2_psrav_d_const:<br>

 ; X64-AVX512VL:       ## %bb.0:<br>

 ; X64-AVX512VL-NEXT:    vmovdqa {{.*}}(%rip), %xmm0 ## EVEX TO VEX Compression xmm0 = [2,9,4294967284,23]<br>

 ; X64-AVX512VL-NEXT:    ## encoding: [0xc5,0xf9,0x6f,0x05,A,A,A,A]<br>

-; X64-AVX512VL-NEXT:    ## fixup A - offset: 4, value: LCPI86_0-4, kind: reloc_riprel_4byte<br>

+; X64-AVX512VL-NEXT:    ## fixup A - offset: 4, value: LCPI78_0-4, kind: reloc_riprel_4byte<br>

 ; X64-AVX512VL-NEXT:    vpsravd {{.*}}(%rip), %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x46,0x05,A,A,A,A]<br>

-; X64-AVX512VL-NEXT:    ## fixup A - offset: 5, value: LCPI86_1-4, kind: reloc_riprel_4byte<br>

-; X64-AVX512VL-NEXT:    retq ## encoding: [0xc3]<br>

+; X64-AVX512VL-NEXT:    ## fixup A - offset: 5, value: LCPI78_1-4, kind: reloc_riprel_4byte<br>

+; X64-AVX512VL-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]<br>

   %res = call <4 x i32> @llvm.x86.avx2.psrav.d(<4 x i32> <i32 2, i32 9, i32 -12, i32 23>, <4 x i32> <i32 1, i32 18, i32 35, i32 52>)<br>

   ret <4 x i32> %res<br>

 }<br>

@@ -2136,37 +1929,37 @@ define <8 x i32> @test_x86_avx2_psrav_d_<br>

 ; X86-AVX:       ## %bb.0:<br>

 ; X86-AVX-NEXT:    vmovdqa {{.*#+}} ymm0 = [2,9,4294967284,23,4294967270,37,4294967256,51]<br>

 ; X86-AVX-NEXT:    ## encoding: [0xc5,0xfd,0x6f,0x05,A,A,A,A]<br>

-; X86-AVX-NEXT:    ## fixup A - offset: 4, value: LCPI88_0, kind: FK_Data_4<br>

-; X86-AVX-NEXT:    vpsravd LCPI88_1, %ymm0, %ymm0 ## encoding: [0xc4,0xe2,0x7d,0x46,0x05,A,A,A,A]<br>

-; X86-AVX-NEXT:    ## fixup A - offset: 5, value: LCPI88_1, kind: FK_Data_4<br>

-; X86-AVX-NEXT:    retl ## encoding: [0xc3]<br>

+; X86-AVX-NEXT:    ## fixup A - offset: 4, value: LCPI80_0, kind: FK_Data_4<br>

+; X86-AVX-NEXT:    vpsravd LCPI80_1, %ymm0, %ymm0 ## encoding: [0xc4,0xe2,0x7d,0x46,0x05,A,A,A,A]<br>

+; X86-AVX-NEXT:    ## fixup A - offset: 5, value: LCPI80_1, kind: FK_Data_4<br>

+; X86-AVX-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]<br>

 ;<br>

 ; X86-AVX512VL-LABEL: test_x86_avx2_psrav_d_256_const:<br>

 ; X86-AVX512VL:       ## %bb.0:<br>

-; X86-AVX512VL-NEXT:    vmovdqa LCPI88_0, %ymm0 ## EVEX TO VEX Compression ymm0 = [2,9,4294967284,23,4294967270,37,4294967256,51]<br>

+; X86-AVX512VL-NEXT:    vmovdqa LCPI80_0, %ymm0 ## EVEX TO VEX Compression ymm0 = [2,9,4294967284,23,4294967270,37,4294967256,51]<br>

 ; X86-AVX512VL-NEXT:    ## encoding: [0xc5,0xfd,0x6f,0x05,A,A,A,A]<br>

-; X86-AVX512VL-NEXT:    ## fixup A - offset: 4, value: LCPI88_0, kind: FK_Data_4<br>

-; X86-AVX512VL-NEXT:    vpsravd LCPI88_1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x46,0x05,A,A,A,A]<br>

-; X86-AVX512VL-NEXT:    ## fixup A - offset: 5, value: LCPI88_1, kind: FK_Data_4<br>

-; X86-AVX512VL-NEXT:    retl ## encoding: [0xc3]<br>

+; X86-AVX512VL-NEXT:    ## fixup A - offset: 4, value: LCPI80_0, kind: FK_Data_4<br>

+; X86-AVX512VL-NEXT:    vpsravd LCPI80_1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x46,0x05,A,A,A,A]<br>

+; X86-AVX512VL-NEXT:    ## fixup A - offset: 5, value: LCPI80_1, kind: FK_Data_4<br>

+; X86-AVX512VL-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]<br>

 ;<br>

 ; X64-AVX-LABEL: test_x86_avx2_psrav_d_256_const:<br>

 ; X64-AVX:       ## %bb.0:<br>

 ; X64-AVX-NEXT:    vmovdqa {{.*#+}} ymm0 = [2,9,4294967284,23,4294967270,37,4294967256,51]<br>

 ; X64-AVX-NEXT:    ## encoding: [0xc5,0xfd,0x6f,0x05,A,A,A,A]<br>

-; X64-AVX-NEXT:    ## fixup A - offset: 4, value: LCPI88_0-4, kind: reloc_riprel_4byte<br>

+; X64-AVX-NEXT:    ## fixup A - offset: 4, value: LCPI80_0-4, kind: reloc_riprel_4byte<br>

 ; X64-AVX-NEXT:    vpsravd {{.*}}(%rip), %ymm0, %ymm0 ## encoding: [0xc4,0xe2,0x7d,0x46,0x05,A,A,A,A]<br>

-; X64-AVX-NEXT:    ## fixup A - offset: 5, value: LCPI88_1-4, kind: reloc_riprel_4byte<br>

-; X64-AVX-NEXT:    retq ## encoding: [0xc3]<br>

+; X64-AVX-NEXT:    ## fixup A - offset: 5, value: LCPI80_1-4, kind: reloc_riprel_4byte<br>

+; X64-AVX-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]<br>

 ;<br>

 ; X64-AVX512VL-LABEL: test_x86_avx2_psrav_d_256_const:<br>

 ; X64-AVX512VL:       ## %bb.0:<br>

 ; X64-AVX512VL-NEXT:    vmovdqa {{.*}}(%rip), %ymm0 ## EVEX TO VEX Compression ymm0 = [2,9,4294967284,23,4294967270,37,4294967256,51]<br>

 ; X64-AVX512VL-NEXT:    ## encoding: [0xc5,0xfd,0x6f,0x05,A,A,A,A]<br>

-; X64-AVX512VL-NEXT:    ## fixup A - offset: 4, value: LCPI88_0-4, kind: reloc_riprel_4byte<br>

+; X64-AVX512VL-NEXT:    ## fixup A - offset: 4, value: LCPI80_0-4, kind: reloc_riprel_4byte<br>

 ; X64-AVX512VL-NEXT:    vpsravd {{.*}}(%rip), %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x46,0x05,A,A,A,A]<br>

-; X64-AVX512VL-NEXT:    ## fixup A - offset: 5, value: LCPI88_1-4, kind: reloc_riprel_4byte<br>

-; X64-AVX512VL-NEXT:    retq ## encoding: [0xc3]<br>

+; X64-AVX512VL-NEXT:    ## fixup A - offset: 5, value: LCPI80_1-4, kind: reloc_riprel_4byte<br>

+; X64-AVX512VL-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]<br>

   %res = call <8 x i32> @llvm.x86.avx2.psrav.d.256(<8 x i32> <i32 2, i32 9, i32 -12, i32 23, i32 -26, i32 37, i32 -40, i32 51>, <8 x i32> <i32 1, i32 18, i32 35, i32 52, i32 69, i32 15, i32 32, i32 49>)<br>

   ret <8 x i32> %res<br>

 }<br>

<br>

Modified: llvm/trunk/test/CodeGen/X86/avx512bw-intrinsics-upgrade.ll<br>

URL: <a href="http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512bw-intrinsics-upgrade.ll?rev=330322&r1=330321&r2=330322&view=diff" rel="noreferrer" target="_blank">http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512bw-intrinsics-upgrade.ll?rev=330322&r1=330321&r2=330322&view=diff</a><br>

==============================================================================<br>

--- llvm/trunk/test/CodeGen/X86/avx512bw-intrinsics-upgrade.ll (original)<br>

+++ llvm/trunk/test/CodeGen/X86/avx512bw-intrinsics-upgrade.ll Thu Apr 19 05:13:30 2018<br>

@@ -2694,6 +2694,422 @@ define <32 x i16>@test_int_x86_avx512_ma<br>

   ret <32 x i16> %res2<br>

 }<br>

<br>

+define <32 x i16> @test_mask_adds_epi16_rr_512(<32 x i16> %a, <32 x i16> %b) {<br>

+; AVX512BW-LABEL: test_mask_adds_epi16_rr_512:<br>

+; AVX512BW:       ## %bb.0:<br>

+; AVX512BW-NEXT:    vpaddsw %zmm1, %zmm0, %zmm0<br>

+; AVX512BW-NEXT:    retq<br>

+;<br>

+; AVX512F-32-LABEL: test_mask_adds_epi16_rr_512:<br>

+; AVX512F-32:       # %bb.0:<br>

+; AVX512F-32-NEXT:    vpaddsw %zmm1, %zmm0, %zmm0<br>

+; AVX512F-32-NEXT:    retl<br>

+  %res = call <32 x i16> @llvm.x86.avx512.mask.padds.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1)<br>

+  ret <32 x i16> %res<br>

+}<br>

+<br>

+define <32 x i16> @test_mask_adds_epi16_rrk_512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) {<br>

+; AVX512BW-LABEL: test_mask_adds_epi16_rrk_512:<br>

+; AVX512BW:       ## %bb.0:<br>

+; AVX512BW-NEXT:    kmovd %edi, %k1<br>

+; AVX512BW-NEXT:    vpaddsw %zmm1, %zmm0, %zmm2 {%k1}<br>

+; AVX512BW-NEXT:    vmovdqa64 %zmm2, %zmm0<br>

+; AVX512BW-NEXT:    retq<br>

+;<br>

+; AVX512F-32-LABEL: test_mask_adds_epi16_rrk_512:<br>

+; AVX512F-32:       # %bb.0:<br>

+; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k1<br>

+; AVX512F-32-NEXT:    vpaddsw %zmm1, %zmm0, %zmm2 {%k1}<br>

+; AVX512F-32-NEXT:    vmovdqa64 %zmm2, %zmm0<br>

+; AVX512F-32-NEXT:    retl<br>

+  %res = call <32 x i16> @llvm.x86.avx512.mask.padds.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask)<br>

+  ret <32 x i16> %res<br>

+}<br>

+<br>

+define <32 x i16> @test_mask_adds_epi16_rrkz_512(<32 x i16> %a, <32 x i16> %b, i32 %mask) {<br>

+; AVX512BW-LABEL: test_mask_adds_epi16_rrkz_512:<br>

+; AVX512BW:       ## %bb.0:<br>

+; AVX512BW-NEXT:    kmovd %edi, %k1<br>

+; AVX512BW-NEXT:    vpaddsw %zmm1, %zmm0, %zmm0 {%k1} {z}<br>

+; AVX512BW-NEXT:    retq<br>

+;<br>

+; AVX512F-32-LABEL: test_mask_adds_epi16_rrkz_512:<br>

+; AVX512F-32:       # %bb.0:<br>

+; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k1<br>

+; AVX512F-32-NEXT:    vpaddsw %zmm1, %zmm0, %zmm0 {%k1} {z}<br>

+; AVX512F-32-NEXT:    retl<br>

+  %res = call <32 x i16> @llvm.x86.avx512.mask.padds.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask)<br>

+  ret <32 x i16> %res<br>

+}<br>

+<br>

+define <32 x i16> @test_mask_adds_epi16_rm_512(<32 x i16> %a, <32 x i16>* %ptr_b) {<br>

+; AVX512BW-LABEL: test_mask_adds_epi16_rm_512:<br>

+; AVX512BW:       ## %bb.0:<br>

+; AVX512BW-NEXT:    vpaddsw (%rdi), %zmm0, %zmm0<br>

+; AVX512BW-NEXT:    retq<br>

+;<br>

+; AVX512F-32-LABEL: test_mask_adds_epi16_rm_512:<br>

+; AVX512F-32:       # %bb.0:<br>

+; AVX512F-32-NEXT:    movl {{[0-9]+}}(%esp), %eax<br>

+; AVX512F-32-NEXT:</blockquote></div>