[llvm] r258659 - Added Skylake client to X86 targets and features

Elena Demikhovsky via llvm-commits llvm-commits at lists.llvm.org
Sun Jan 24 02:41:29 PST 2016


Author: delena
Date: Sun Jan 24 04:41:28 2016
New Revision: 258659

URL: http://llvm.org/viewvc/llvm-project?rev=258659&view=rev
Log:
Added Skylake client to X86 targets and features

Changes in X86.td:

I set features of Intel processors in incremental form: IVB = SNB + X HSW = IVB + X ..
I added Skylake client processor and defined it's features
FeatureADX was missing on KNL
Added some new features to appropriate processors SMAP, IFMA, PREFETCHWT1, VMFUNC and others

Differential Revision: http://reviews.llvm.org/D16357


Modified:
    llvm/trunk/lib/Support/Host.cpp
    llvm/trunk/lib/Target/X86/X86.td
    llvm/trunk/lib/Target/X86/X86InstrInfo.td
    llvm/trunk/lib/Target/X86/X86Subtarget.cpp
    llvm/trunk/lib/Target/X86/X86Subtarget.h
    llvm/trunk/test/CodeGen/X86/avx512bw-intrinsics.ll

Modified: llvm/trunk/lib/Support/Host.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Support/Host.cpp?rev=258659&r1=258658&r2=258659&view=diff
==============================================================================
--- llvm/trunk/lib/Support/Host.cpp (original)
+++ llvm/trunk/lib/Support/Host.cpp Sun Jan 24 04:41:28 2016
@@ -805,25 +805,34 @@ bool sys::getHostCPUFeatures(StringMap<b
   Features["avx2"]     = HasAVXSave && HasLeaf7 && ((EBX >>  5) & 1);
 
   Features["fsgsbase"] = HasLeaf7 && ((EBX >>  0) & 1);
+  Features["sgx"]      = HasLeaf7 && ((EBX >>  2) & 1);
   Features["bmi"]      = HasLeaf7 && ((EBX >>  3) & 1);
   Features["hle"]      = HasLeaf7 && ((EBX >>  4) & 1);
   Features["bmi2"]     = HasLeaf7 && ((EBX >>  8) & 1);
+  Features["invpcid"]  = HasLeaf7 && ((EBX >> 10) & 1);
   Features["rtm"]      = HasLeaf7 && ((EBX >> 11) & 1);
   Features["rdseed"]   = HasLeaf7 && ((EBX >> 18) & 1);
   Features["adx"]      = HasLeaf7 && ((EBX >> 19) & 1);
+  Features["smap"]     = HasLeaf7 && ((EBX >> 20) & 1);
+  Features["pcommit"]  = HasLeaf7 && ((EBX >> 22) & 1);
+  Features["clflushopt"] = HasLeaf7 && ((EBX >> 23) & 1);
+  Features["clwb"]     = HasLeaf7 && ((EBX >> 24) & 1);
   Features["sha"]      = HasLeaf7 && ((EBX >> 29) & 1);
-  // Enable protection keys
-  Features["pku"]    = HasLeaf7 && ((ECX >> 4) & 1);
 
   // AVX512 is only supported if the OS supports the context save for it.
   Features["avx512f"]  = HasLeaf7 && ((EBX >> 16) & 1) && HasAVX512Save;
   Features["avx512dq"] = HasLeaf7 && ((EBX >> 17) & 1) && HasAVX512Save;
+  Features["avx512ifma"] = HasLeaf7 && ((EBX >> 21) & 1) && HasAVX512Save;
   Features["avx512pf"] = HasLeaf7 && ((EBX >> 26) & 1) && HasAVX512Save;
   Features["avx512er"] = HasLeaf7 && ((EBX >> 27) & 1) && HasAVX512Save;
   Features["avx512cd"] = HasLeaf7 && ((EBX >> 28) & 1) && HasAVX512Save;
   Features["avx512bw"] = HasLeaf7 && ((EBX >> 30) & 1) && HasAVX512Save;
   Features["avx512vl"] = HasLeaf7 && ((EBX >> 31) & 1) && HasAVX512Save;
-  Features["avx512vbmi"] = HasLeaf7 && ((ECX >> 1) & 1) && HasAVX512Save;
+
+  Features["prefetchwt1"] = HasLeaf7 && (ECX & 1);
+  Features["avx512vbmi"]  = HasLeaf7 && ((ECX >> 1) & 1) && HasAVX512Save;
+  // Enable protection keys
+  Features["pku"]         = HasLeaf7 && ((ECX >> 4) & 1);
 
   bool HasLeafD = MaxLevel >= 0xd &&
     !GetX86CpuIDAndInfoEx(0xd, 0x1, &EAX, &EBX, &ECX, &EDX);

Modified: llvm/trunk/lib/Target/X86/X86.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86.td?rev=258659&r1=258658&r2=258659&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86.td (original)
+++ llvm/trunk/lib/Target/X86/X86.td Sun Jan 24 04:41:28 2016
@@ -125,6 +125,9 @@ def FeatureCDI      : SubtargetFeature<"
 def FeaturePFI      : SubtargetFeature<"avx512pf", "HasPFI", "true",
                       "Enable AVX-512 PreFetch Instructions",
                                       [FeatureAVX512]>;
+def FeaturePREFETCHWT1  : SubtargetFeature<"prefetchwt1", "HasPFPREFETCHWT1",
+                                   "true",
+                                   "Prefetch with Intent to Write and T1 Hint">;
 def FeatureDQI     : SubtargetFeature<"avx512dq", "HasDQI", "true",
                       "Enable AVX-512 Doubleword and Quadword Instructions",
                                       [FeatureAVX512]>;
@@ -137,6 +140,9 @@ def FeatureVLX     : SubtargetFeature<"a
 def FeatureVBMI     : SubtargetFeature<"avx512vbmi", "HasVBMI", "true",
                       "Enable AVX-512 Vector Bit Manipulation Instructions",
                                       [FeatureAVX512]>;
+def FeatureIFMA     : SubtargetFeature<"ifma", "HasIFMA", "true",
+                      "Enable AVX-512 Integer Fused Multiple-Add",
+                                      [FeatureAVX512]>;
 def FeaturePKU   : SubtargetFeature<"pku", "HasPKU", "true",
                       "Enable protection keys">;
 def FeaturePCLMUL  : SubtargetFeature<"pclmul", "HasPCLMUL", "true",
@@ -202,6 +208,20 @@ def FeatureSlowDivide64 : SubtargetFeatu
 def FeaturePadShortFunctions : SubtargetFeature<"pad-short-functions",
                                      "PadShortFunctions", "true",
                                      "Pad short functions">;
+def FeatureINVPCID : SubtargetFeature<"invpcid", "HasInvPCId", "true",
+                                      "Invalidate Process-Context Identifier">;
+def FeatureVMFUNC  : SubtargetFeature<"vmfunc", "HasVMFUNC", "true",
+                                      "VM Functions">;
+def FeatureSMAP    : SubtargetFeature<"smap", "HasSMAP", "true",
+                                      "Supervisor Mode Access Protection">;
+def FeatureSGX     : SubtargetFeature<"sgx", "HasSGX", "true",
+                                      "Enable Software Guard Extensions">;
+def FeatureCLFLUSHOPT : SubtargetFeature<"clflushopt", "HasCLFLUSHOPT", "true",
+                                      "Flush A Cache Line Optimized">;
+def FeaturePCOMMIT : SubtargetFeature<"pcommit", "HasPCOMMIT", "true",
+                                      "Enable Persistent Commit">;
+def FeatureCLWB    : SubtargetFeature<"clwb", "HasCLWB", "true",
+                                      "Cache Line Write Back">;
 // TODO: This feature ought to be renamed.
 // What it really refers to are CPUs for which certain instructions
 // (which ones besides the example below?) are microcoded.
@@ -365,13 +385,12 @@ def : WestmereProc<"westmere">;
 
 // SSE is not listed here since llvm treats AVX as a reimplementation of SSE,
 // rather than a superset.
-class SandyBridgeProc<string Name> : ProcessorModel<Name, SandyBridgeModel, [
+def ProcIntelSNB : SubtargetFeature<"snb", "X86ProcFamily", "IntelSNB",
+                                    " Intel SandyBridge Processor", [
   FeatureMMX,
   FeatureAVX,
   FeatureFXSR,
   FeatureCMPXCHG16B,
-  FeatureSlowBTMem,
-  FeatureSlowUAMem32,
   FeaturePOPCNT,
   FeatureAES,
   FeaturePCLMUL,
@@ -379,187 +398,125 @@ class SandyBridgeProc<string Name> : Pro
   FeatureXSAVEOPT,
   FeatureLAHFSAHF
 ]>;
+
+class SandyBridgeProc<string Name> : ProcessorModel<Name, SandyBridgeModel, [
+  ProcIntelSNB,
+  FeatureSlowBTMem,
+  FeatureSlowUAMem32
+]>;
 def : SandyBridgeProc<"sandybridge">;
 def : SandyBridgeProc<"corei7-avx">; // Legacy alias.
 
-class IvyBridgeProc<string Name> : ProcessorModel<Name, SandyBridgeModel, [
-  FeatureMMX,
-  FeatureAVX,
-  FeatureFXSR,
-  FeatureCMPXCHG16B,
-  FeatureSlowBTMem,
-  FeatureSlowUAMem32,
-  FeaturePOPCNT,
-  FeatureAES,
-  FeaturePCLMUL,
-  FeatureXSAVE,
-  FeatureXSAVEOPT,
+def ProcIntelIVB : SubtargetFeature<"ivb", "X86ProcFamily", "IntelIVB",
+                                    " Intel IvyBridge Processor", [
+  ProcIntelSNB,
   FeatureRDRAND,
   FeatureF16C,
-  FeatureFSGSBase,
-  FeatureLAHFSAHF
+  FeatureFSGSBase
+]>;
+
+class IvyBridgeProc<string Name> : ProcessorModel<Name, SandyBridgeModel, [
+  ProcIntelIVB,
+  FeatureSlowBTMem,
+  FeatureSlowUAMem32
 ]>;
 def : IvyBridgeProc<"ivybridge">;
 def : IvyBridgeProc<"core-avx-i">; // Legacy alias.
 
-class HaswellProc<string Name> : ProcessorModel<Name, HaswellModel, [
-  FeatureMMX,
+def ProcIntelHSW : SubtargetFeature<"hsw", "X86ProcFamily", "IntelHSW",
+                                    " Intel Haswell Processor", [
+  ProcIntelIVB,
   FeatureAVX2,
-  FeatureFXSR,
-  FeatureCMPXCHG16B,
-  FeatureSlowBTMem,
-  FeaturePOPCNT,
-  FeatureAES,
-  FeaturePCLMUL,
-  FeatureRDRAND,
-  FeatureXSAVE,
-  FeatureXSAVEOPT,
-  FeatureF16C,
-  FeatureFSGSBase,
-  FeatureMOVBE,
-  FeatureLZCNT,
   FeatureBMI,
   FeatureBMI2,
   FeatureFMA,
+  FeatureLZCNT,
+  FeatureMOVBE,
+  FeatureINVPCID,
+  FeatureVMFUNC,
   FeatureRTM,
   FeatureHLE,
-  FeatureSlowIncDec,
-  FeatureLAHFSAHF
+  FeatureSlowIncDec
 ]>;
+
+class HaswellProc<string Name> : ProcessorModel<Name, HaswellModel,
+                                 [ProcIntelHSW]>;
 def : HaswellProc<"haswell">;
 def : HaswellProc<"core-avx2">; // Legacy alias.
 
-class BroadwellProc<string Name> : ProcessorModel<Name, HaswellModel, [
-  FeatureMMX,
-  FeatureAVX2,
-  FeatureFXSR,
-  FeatureCMPXCHG16B,
-  FeatureSlowBTMem,
-  FeaturePOPCNT,
-  FeatureAES,
-  FeaturePCLMUL,
-  FeatureXSAVE,
-  FeatureXSAVEOPT,
-  FeatureRDRAND,
-  FeatureF16C,
-  FeatureFSGSBase,
-  FeatureMOVBE,
-  FeatureLZCNT,
-  FeatureBMI,
-  FeatureBMI2,
-  FeatureFMA,
-  FeatureRTM,
-  FeatureHLE,
+def ProcIntelBDW : SubtargetFeature<"bdw", "X86ProcFamily", "IntelBDW",
+                                    " Intel Broadwell Processor", [
+  ProcIntelHSW,
   FeatureADX,
   FeatureRDSEED,
-  FeatureSlowIncDec,
-  FeatureLAHFSAHF
+  FeatureSMAP
 ]>;
+class BroadwellProc<string Name> : ProcessorModel<Name, HaswellModel,
+                                   [ProcIntelBDW]>;
 def : BroadwellProc<"broadwell">;
 
+def ProcIntelSKL : SubtargetFeature<"skl", "X86ProcFamily", "IntelSKL",
+                                    " Intel Skylake Client Processor", [
+  ProcIntelBDW,
+  FeatureMPX,
+  FeatureXSAVEC,
+  FeatureXSAVES,
+  FeatureSGX,
+  FeatureCLFLUSHOPT
+]>;
+
+// FIXME: define SKL model
+class SkylakeClientProc<string Name> : ProcessorModel<Name, HaswellModel,
+                                       [ProcIntelSKL]>;
+def : SkylakeClientProc<"skl">;
+
 // FIXME: define KNL model
-class KnightsLandingProc<string Name> : ProcessorModel<Name, HaswellModel, [
-  FeatureMMX,
+class KnightsLandingProc<string Name> : ProcessorModel<Name, HaswellModel,[
+  ProcIntelIVB,
   FeatureAVX512,
-  FeatureFXSR,
   FeatureERI,
   FeatureCDI,
   FeaturePFI,
-  FeatureCMPXCHG16B,
-  FeaturePOPCNT,
-  FeatureAES,
-  FeaturePCLMUL,
-  FeatureXSAVE,
-  FeatureXSAVEOPT,
-  FeatureRDRAND,
-  FeatureF16C,
-  FeatureFSGSBase,
+  FeaturePREFETCHWT1,
+  FeatureADX,
+  FeatureRDSEED,
   FeatureMOVBE,
   FeatureLZCNT,
   FeatureBMI,
   FeatureBMI2,
-  FeatureFMA,
-  FeatureRTM,
-  FeatureHLE,
-  FeatureSlowIncDec,
-  FeatureMPX,
-  FeatureLAHFSAHF
+  FeatureFMA
 ]>;
 def : KnightsLandingProc<"knl">;
 
-// FIXME: define SKX model
-class SkylakeProc<string Name> : ProcessorModel<Name, HaswellModel, [
-  FeatureMMX,
+def ProcIntelSKX : SubtargetFeature<"skx", "X86ProcFamily", "IntelSKX",
+                                    " Intel Skylake Server Processor", [
+  ProcIntelSKL,
   FeatureAVX512,
-  FeatureFXSR,
   FeatureCDI,
   FeatureDQI,
   FeatureBWI,
   FeatureVLX,
   FeaturePKU,
-  FeatureCMPXCHG16B,
-  FeatureSlowBTMem,
-  FeaturePOPCNT,
-  FeatureAES,
-  FeaturePCLMUL,
-  FeatureXSAVE,
-  FeatureXSAVEOPT,
-  FeatureRDRAND,
-  FeatureF16C,
-  FeatureFSGSBase,
-  FeatureMOVBE,
-  FeatureLZCNT,
-  FeatureBMI,
-  FeatureBMI2,
-  FeatureFMA,
-  FeatureRTM,
-  FeatureHLE,
-  FeatureADX,
-  FeatureRDSEED,
-  FeatureSlowIncDec,
-  FeatureMPX,
-  FeatureXSAVEC,
-  FeatureXSAVES,
-  FeatureLAHFSAHF
+  FeaturePCOMMIT,
+  FeatureCLWB
 ]>;
-def : SkylakeProc<"skylake">;
-def : SkylakeProc<"skx">; // Legacy alias.
 
-class CannonlakeProc<string Name> : ProcessorModel<Name, HaswellModel, [
-  FeatureMMX,
-  FeatureAVX512,
-  FeatureFXSR,
-  FeatureCDI,
-  FeatureDQI,
-  FeatureBWI,
-  FeatureVLX,
-  FeaturePKU,
-  FeatureCMPXCHG16B,
-  FeatureSlowBTMem,
-  FeaturePOPCNT,
-  FeatureAES,
-  FeaturePCLMUL,
-  FeatureXSAVE,
-  FeatureXSAVEOPT,
-  FeatureRDRAND,
-  FeatureF16C,
-  FeatureFSGSBase,
-  FeatureMOVBE,
-  FeatureLZCNT,
-  FeatureBMI,
-  FeatureBMI2,
+// FIXME: define SKX model
+class SkylakeServerProc<string Name> : ProcessorModel<Name, HaswellModel,
+                                       [ ProcIntelSKX]>;
+def : SkylakeServerProc<"skylake">;
+def : SkylakeServerProc<"skx">; // Legacy alias.
+
+def ProcIntelCNL : SubtargetFeature<"cnl", "X86ProcFamily", "IntelCNL",
+                                    " Intel Cannonlake Processor", [
+  ProcIntelSKX,
   FeatureVBMI,
-  FeatureFMA,
-  FeatureRTM,
-  FeatureHLE,
-  FeatureADX,
-  FeatureRDSEED,
-  FeatureSlowIncDec,
-  FeatureMPX,
-  FeatureXSAVEC,
-  FeatureXSAVES,
-  FeatureLAHFSAHF
+  FeatureIFMA,
+  FeatureSHA
 ]>;
+
+class CannonlakeProc<string Name> : ProcessorModel<Name, HaswellModel,
+                                    [ ProcIntelCNL ]>;
 def : CannonlakeProc<"cannonlake">;
 def : CannonlakeProc<"cnl">;
 

Modified: llvm/trunk/lib/Target/X86/X86InstrInfo.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrInfo.td?rev=258659&r1=258658&r2=258659&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrInfo.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrInfo.td Sun Jan 24 04:41:28 2016
@@ -797,6 +797,8 @@ def HasBMI       : Predicate<"Subtarget-
 def HasBMI2      : Predicate<"Subtarget->hasBMI2()">;
 def HasVBMI      : Predicate<"Subtarget->hasVBMI()">,
                      AssemblerPredicate<"FeatureVBMI", "AVX-512 VBMI ISA">;
+def HasIFMA      : Predicate<"Subtarget->hasIFMA()">,
+                     AssemblerPredicate<"FeatureIFMA", "AVX-512 IFMA ISA">;
 def HasRTM       : Predicate<"Subtarget->hasRTM()">;
 def HasHLE       : Predicate<"Subtarget->hasHLE()">;
 def HasTSX       : Predicate<"Subtarget->hasRTM() || Subtarget->hasHLE()">;

Modified: llvm/trunk/lib/Target/X86/X86Subtarget.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86Subtarget.cpp?rev=258659&r1=258658&r2=258659&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86Subtarget.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86Subtarget.cpp Sun Jan 24 04:41:28 2016
@@ -262,6 +262,7 @@ void X86Subtarget::initializeEnvironment
   HasBMI = false;
   HasBMI2 = false;
   HasVBMI = false;
+  HasIFMA = false;
   HasRTM = false;
   HasHLE = false;
   HasERI = false;

Modified: llvm/trunk/lib/Target/X86/X86Subtarget.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86Subtarget.h?rev=258659&r1=258658&r2=258659&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86Subtarget.h (original)
+++ llvm/trunk/lib/Target/X86/X86Subtarget.h Sun Jan 24 04:41:28 2016
@@ -55,7 +55,8 @@ protected:
   };
 
   enum X86ProcFamilyEnum {
-    Others, IntelAtom, IntelSLM
+    Others, IntelAtom, IntelSLM, IntelSNB, IntelIVB, IntelHSW, IntelBDW,
+    IntelKNL, IntelSKL, IntelSKX, IntelCNL
   };
 
   /// X86 processor family: Intel Atom, and others
@@ -137,6 +138,9 @@ protected:
   /// Processor has VBMI instructions.
   bool HasVBMI;
 
+  /// Processor has Integer Fused Multiply Add
+  bool HasIFMA;
+
   /// Processor has RTM instructions.
   bool HasRTM;
 
@@ -158,6 +162,9 @@ protected:
   /// Processor has LAHF/SAHF instructions.
   bool HasLAHFSAHF;
 
+  /// Processor has Prefetch with intent to Write instruction
+  bool HasPFPREFETCHWT1;
+
   /// True if BT (bit test) of memory instructions are slow.
   bool IsBTMemSlow;
 
@@ -229,9 +236,30 @@ protected:
   /// Processor has PKU extenstions
   bool HasPKU;
 
-  /// Processot supports MPX - Memory Protection Extensions
+  /// Processor supports MPX - Memory Protection Extensions
   bool HasMPX;
 
+  /// Processor supports Invalidate Process-Context Identifier
+  bool HasInvPCId;
+
+  /// Processor has VM Functions
+  bool HasVMFUNC;
+
+  /// Processor has Supervisor Mode Access Protection
+  bool HasSMAP;
+
+  /// Processor has Software Guard Extensions
+  bool HasSGX;
+
+  /// Processor supports Flush Cache Line instruction
+  bool HasCLFLUSHOPT;
+
+  /// Processor has Persistent Commit feature
+  bool HasPCOMMIT;
+
+  /// Processor supports Cache Line Write Back instruction
+  bool HasCLWB;
+
   /// Use software floating point for code generation.
   bool UseSoftFloat;
 
@@ -378,6 +406,7 @@ public:
   bool hasBMI() const { return HasBMI; }
   bool hasBMI2() const { return HasBMI2; }
   bool hasVBMI() const { return HasVBMI; }
+  bool hasIFMA() const { return HasIFMA; }
   bool hasRTM() const { return HasRTM; }
   bool hasHLE() const { return HasHLE; }
   bool hasADX() const { return HasADX; }

Modified: llvm/trunk/test/CodeGen/X86/avx512bw-intrinsics.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512bw-intrinsics.ll?rev=258659&r1=258658&r2=258659&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512bw-intrinsics.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512bw-intrinsics.ll Sun Jan 24 04:41:28 2016
@@ -214,31 +214,31 @@ define i64 @test_cmp_b_512(<64 x i8> %a0
 ; AVX512F-32-NEXT:    vpcmpltb %zmm1, %zmm0, %k0
 ; AVX512F-32-NEXT:    kmovq %k0, {{[0-9]+}}(%esp)
 ; AVX512F-32-NEXT:    addl {{[0-9]+}}(%esp), %eax
-; AVX512F-32-NEXT:    adcl {{[0-9]+}}(%esp), %edx
+; AVX512F-32-NEXT:    adcxl {{[0-9]+}}(%esp), %edx
 ; AVX512F-32-NEXT:    vpcmpleb %zmm1, %zmm0, %k0
 ; AVX512F-32-NEXT:    kmovq %k0, {{[0-9]+}}(%esp)
 ; AVX512F-32-NEXT:    addl {{[0-9]+}}(%esp), %eax
-; AVX512F-32-NEXT:    adcl {{[0-9]+}}(%esp), %edx
+; AVX512F-32-NEXT:    adcxl {{[0-9]+}}(%esp), %edx
 ; AVX512F-32-NEXT:    vpcmpunordb %zmm1, %zmm0, %k0
 ; AVX512F-32-NEXT:    kmovq %k0, {{[0-9]+}}(%esp)
 ; AVX512F-32-NEXT:    addl {{[0-9]+}}(%esp), %eax
-; AVX512F-32-NEXT:    adcl {{[0-9]+}}(%esp), %edx
+; AVX512F-32-NEXT:    adcxl {{[0-9]+}}(%esp), %edx
 ; AVX512F-32-NEXT:    vpcmpneqb %zmm1, %zmm0, %k0
 ; AVX512F-32-NEXT:    kmovq %k0, {{[0-9]+}}(%esp)
 ; AVX512F-32-NEXT:    addl {{[0-9]+}}(%esp), %eax
-; AVX512F-32-NEXT:    adcl {{[0-9]+}}(%esp), %edx
+; AVX512F-32-NEXT:    adcxl {{[0-9]+}}(%esp), %edx
 ; AVX512F-32-NEXT:    vpcmpnltb %zmm1, %zmm0, %k0
 ; AVX512F-32-NEXT:    kmovq %k0, {{[0-9]+}}(%esp)
 ; AVX512F-32-NEXT:    addl {{[0-9]+}}(%esp), %eax
-; AVX512F-32-NEXT:    adcl {{[0-9]+}}(%esp), %edx
+; AVX512F-32-NEXT:    adcxl {{[0-9]+}}(%esp), %edx
 ; AVX512F-32-NEXT:    vpcmpnleb %zmm1, %zmm0, %k0
 ; AVX512F-32-NEXT:    kmovq %k0, {{[0-9]+}}(%esp)
 ; AVX512F-32-NEXT:    addl {{[0-9]+}}(%esp), %eax
-; AVX512F-32-NEXT:    adcl {{[0-9]+}}(%esp), %edx
+; AVX512F-32-NEXT:    adcxl {{[0-9]+}}(%esp), %edx
 ; AVX512F-32-NEXT:    vpcmpordb %zmm1, %zmm0, %k0
 ; AVX512F-32-NEXT:    kmovq %k0, (%esp)
 ; AVX512F-32-NEXT:    addl (%esp), %eax
-; AVX512F-32-NEXT:    adcl {{[0-9]+}}(%esp), %edx
+; AVX512F-32-NEXT:    adcxl {{[0-9]+}}(%esp), %edx
 ; AVX512F-32-NEXT:    addl $68, %esp
 ; AVX512F-32-NEXT:    retl
   %res0 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 0, i64 -1)
@@ -303,31 +303,31 @@ define i64 @test_mask_cmp_b_512(<64 x i8
 ; AVX512F-32-NEXT:    vpcmpltb %zmm1, %zmm0, %k0 {%k1}
 ; AVX512F-32-NEXT:    kmovq %k0, {{[0-9]+}}(%esp)
 ; AVX512F-32-NEXT:    addl {{[0-9]+}}(%esp), %eax
-; AVX512F-32-NEXT:    adcl {{[0-9]+}}(%esp), %edx
+; AVX512F-32-NEXT:    adcxl {{[0-9]+}}(%esp), %edx
 ; AVX512F-32-NEXT:    vpcmpleb %zmm1, %zmm0, %k0 {%k1}
 ; AVX512F-32-NEXT:    kmovq %k0, {{[0-9]+}}(%esp)
 ; AVX512F-32-NEXT:    addl {{[0-9]+}}(%esp), %eax
-; AVX512F-32-NEXT:    adcl {{[0-9]+}}(%esp), %edx
+; AVX512F-32-NEXT:    adcxl {{[0-9]+}}(%esp), %edx
 ; AVX512F-32-NEXT:    vpcmpunordb %zmm1, %zmm0, %k0 {%k1}
 ; AVX512F-32-NEXT:    kmovq %k0, {{[0-9]+}}(%esp)
 ; AVX512F-32-NEXT:    addl {{[0-9]+}}(%esp), %eax
-; AVX512F-32-NEXT:    adcl {{[0-9]+}}(%esp), %edx
+; AVX512F-32-NEXT:    adcxl {{[0-9]+}}(%esp), %edx
 ; AVX512F-32-NEXT:    vpcmpneqb %zmm1, %zmm0, %k0 {%k1}
 ; AVX512F-32-NEXT:    kmovq %k0, {{[0-9]+}}(%esp)
 ; AVX512F-32-NEXT:    addl {{[0-9]+}}(%esp), %eax
-; AVX512F-32-NEXT:    adcl {{[0-9]+}}(%esp), %edx
+; AVX512F-32-NEXT:    adcxl {{[0-9]+}}(%esp), %edx
 ; AVX512F-32-NEXT:    vpcmpnltb %zmm1, %zmm0, %k0 {%k1}
 ; AVX512F-32-NEXT:    kmovq %k0, {{[0-9]+}}(%esp)
 ; AVX512F-32-NEXT:    addl {{[0-9]+}}(%esp), %eax
-; AVX512F-32-NEXT:    adcl {{[0-9]+}}(%esp), %edx
+; AVX512F-32-NEXT:    adcxl {{[0-9]+}}(%esp), %edx
 ; AVX512F-32-NEXT:    vpcmpnleb %zmm1, %zmm0, %k0 {%k1}
 ; AVX512F-32-NEXT:    kmovq %k0, {{[0-9]+}}(%esp)
 ; AVX512F-32-NEXT:    addl {{[0-9]+}}(%esp), %eax
-; AVX512F-32-NEXT:    adcl {{[0-9]+}}(%esp), %edx
+; AVX512F-32-NEXT:    adcxl {{[0-9]+}}(%esp), %edx
 ; AVX512F-32-NEXT:    vpcmpordb %zmm1, %zmm0, %k0 {%k1}
 ; AVX512F-32-NEXT:    kmovq %k0, {{[0-9]+}}(%esp)
 ; AVX512F-32-NEXT:    addl {{[0-9]+}}(%esp), %eax
-; AVX512F-32-NEXT:    adcl {{[0-9]+}}(%esp), %edx
+; AVX512F-32-NEXT:    adcxl {{[0-9]+}}(%esp), %edx
 ; AVX512F-32-NEXT:    addl $68, %esp
 ; AVX512F-32-NEXT:    retl
   %res0 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 0, i64 %mask)
@@ -390,31 +390,31 @@ define i64 @test_ucmp_b_512(<64 x i8> %a
 ; AVX512F-32-NEXT:    vpcmpltub %zmm1, %zmm0, %k0
 ; AVX512F-32-NEXT:    kmovq %k0, {{[0-9]+}}(%esp)
 ; AVX512F-32-NEXT:    addl {{[0-9]+}}(%esp), %eax
-; AVX512F-32-NEXT:    adcl {{[0-9]+}}(%esp), %edx
+; AVX512F-32-NEXT:    adcxl {{[0-9]+}}(%esp), %edx
 ; AVX512F-32-NEXT:    vpcmpleub %zmm1, %zmm0, %k0
 ; AVX512F-32-NEXT:    kmovq %k0, {{[0-9]+}}(%esp)
 ; AVX512F-32-NEXT:    addl {{[0-9]+}}(%esp), %eax
-; AVX512F-32-NEXT:    adcl {{[0-9]+}}(%esp), %edx
+; AVX512F-32-NEXT:    adcxl {{[0-9]+}}(%esp), %edx
 ; AVX512F-32-NEXT:    vpcmpunordub %zmm1, %zmm0, %k0
 ; AVX512F-32-NEXT:    kmovq %k0, {{[0-9]+}}(%esp)
 ; AVX512F-32-NEXT:    addl {{[0-9]+}}(%esp), %eax
-; AVX512F-32-NEXT:    adcl {{[0-9]+}}(%esp), %edx
+; AVX512F-32-NEXT:    adcxl {{[0-9]+}}(%esp), %edx
 ; AVX512F-32-NEXT:    vpcmpnequb %zmm1, %zmm0, %k0
 ; AVX512F-32-NEXT:    kmovq %k0, {{[0-9]+}}(%esp)
 ; AVX512F-32-NEXT:    addl {{[0-9]+}}(%esp), %eax
-; AVX512F-32-NEXT:    adcl {{[0-9]+}}(%esp), %edx
+; AVX512F-32-NEXT:    adcxl {{[0-9]+}}(%esp), %edx
 ; AVX512F-32-NEXT:    vpcmpnltub %zmm1, %zmm0, %k0
 ; AVX512F-32-NEXT:    kmovq %k0, {{[0-9]+}}(%esp)
 ; AVX512F-32-NEXT:    addl {{[0-9]+}}(%esp), %eax
-; AVX512F-32-NEXT:    adcl {{[0-9]+}}(%esp), %edx
+; AVX512F-32-NEXT:    adcxl {{[0-9]+}}(%esp), %edx
 ; AVX512F-32-NEXT:    vpcmpnleub %zmm1, %zmm0, %k0
 ; AVX512F-32-NEXT:    kmovq %k0, {{[0-9]+}}(%esp)
 ; AVX512F-32-NEXT:    addl {{[0-9]+}}(%esp), %eax
-; AVX512F-32-NEXT:    adcl {{[0-9]+}}(%esp), %edx
+; AVX512F-32-NEXT:    adcxl {{[0-9]+}}(%esp), %edx
 ; AVX512F-32-NEXT:    vpcmpordub %zmm1, %zmm0, %k0
 ; AVX512F-32-NEXT:    kmovq %k0, (%esp)
 ; AVX512F-32-NEXT:    addl (%esp), %eax
-; AVX512F-32-NEXT:    adcl {{[0-9]+}}(%esp), %edx
+; AVX512F-32-NEXT:    adcxl {{[0-9]+}}(%esp), %edx
 ; AVX512F-32-NEXT:    addl $68, %esp
 ; AVX512F-32-NEXT:    retl
   %res0 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 0, i64 -1)
@@ -479,31 +479,31 @@ define i64 @test_mask_x86_avx512_ucmp_b_
 ; AVX512F-32-NEXT:    vpcmpltub %zmm1, %zmm0, %k0 {%k1}
 ; AVX512F-32-NEXT:    kmovq %k0, {{[0-9]+}}(%esp)
 ; AVX512F-32-NEXT:    addl {{[0-9]+}}(%esp), %eax
-; AVX512F-32-NEXT:    adcl {{[0-9]+}}(%esp), %edx
+; AVX512F-32-NEXT:    adcxl {{[0-9]+}}(%esp), %edx
 ; AVX512F-32-NEXT:    vpcmpleub %zmm1, %zmm0, %k0 {%k1}
 ; AVX512F-32-NEXT:    kmovq %k0, {{[0-9]+}}(%esp)
 ; AVX512F-32-NEXT:    addl {{[0-9]+}}(%esp), %eax
-; AVX512F-32-NEXT:    adcl {{[0-9]+}}(%esp), %edx
+; AVX512F-32-NEXT:    adcxl {{[0-9]+}}(%esp), %edx
 ; AVX512F-32-NEXT:    vpcmpunordub %zmm1, %zmm0, %k0 {%k1}
 ; AVX512F-32-NEXT:    kmovq %k0, {{[0-9]+}}(%esp)
 ; AVX512F-32-NEXT:    addl {{[0-9]+}}(%esp), %eax
-; AVX512F-32-NEXT:    adcl {{[0-9]+}}(%esp), %edx
+; AVX512F-32-NEXT:    adcxl {{[0-9]+}}(%esp), %edx
 ; AVX512F-32-NEXT:    vpcmpnequb %zmm1, %zmm0, %k0 {%k1}
 ; AVX512F-32-NEXT:    kmovq %k0, {{[0-9]+}}(%esp)
 ; AVX512F-32-NEXT:    addl {{[0-9]+}}(%esp), %eax
-; AVX512F-32-NEXT:    adcl {{[0-9]+}}(%esp), %edx
+; AVX512F-32-NEXT:    adcxl {{[0-9]+}}(%esp), %edx
 ; AVX512F-32-NEXT:    vpcmpnltub %zmm1, %zmm0, %k0 {%k1}
 ; AVX512F-32-NEXT:    kmovq %k0, {{[0-9]+}}(%esp)
 ; AVX512F-32-NEXT:    addl {{[0-9]+}}(%esp), %eax
-; AVX512F-32-NEXT:    adcl {{[0-9]+}}(%esp), %edx
+; AVX512F-32-NEXT:    adcxl {{[0-9]+}}(%esp), %edx
 ; AVX512F-32-NEXT:    vpcmpnleub %zmm1, %zmm0, %k0 {%k1}
 ; AVX512F-32-NEXT:    kmovq %k0, {{[0-9]+}}(%esp)
 ; AVX512F-32-NEXT:    addl {{[0-9]+}}(%esp), %eax
-; AVX512F-32-NEXT:    adcl {{[0-9]+}}(%esp), %edx
+; AVX512F-32-NEXT:    adcxl {{[0-9]+}}(%esp), %edx
 ; AVX512F-32-NEXT:    vpcmpordub %zmm1, %zmm0, %k0 {%k1}
 ; AVX512F-32-NEXT:    kmovq %k0, {{[0-9]+}}(%esp)
 ; AVX512F-32-NEXT:    addl {{[0-9]+}}(%esp), %eax
-; AVX512F-32-NEXT:    adcl {{[0-9]+}}(%esp), %edx
+; AVX512F-32-NEXT:    adcxl {{[0-9]+}}(%esp), %edx
 ; AVX512F-32-NEXT:    addl $68, %esp
 ; AVX512F-32-NEXT:    retl
   %res0 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 0, i64 %mask)
@@ -2879,6 +2879,16 @@ define <32 x i16>@test_int_x86_avx512_ma
 ; AVX512BW-NEXT:    vpaddw %zmm0, %zmm2, %zmm0
 ; AVX512BW-NEXT:    vpaddw %zmm3, %zmm0, %zmm0
 ; AVX512BW-NEXT:    retq
+;
+; AVX512F-32-LABEL: test_int_x86_avx512_mask_psrl_w_512:
+; AVX512F-32:       # BB#0:
+; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
+; AVX512F-32-NEXT:    vpsrlw %xmm1, %zmm0, %zmm2 {%k1}
+; AVX512F-32-NEXT:    vpsrlw %xmm1, %zmm0, %zmm3 {%k1} {z}
+; AVX512F-32-NEXT:    vpsrlw %xmm1, %zmm0, %zmm0
+; AVX512F-32-NEXT:    vpaddw %zmm0, %zmm2, %zmm0
+; AVX512F-32-NEXT:    vpaddw %zmm3, %zmm0, %zmm0
+; AVX512F-32-NEXT:    retl
   %res = call <32 x i16> @llvm.x86.avx512.mask.psrl.w.512(<32 x i16> %x0, <8 x i16> %x1, <32 x i16> %x2, i32 %x3)
   %res1 = call <32 x i16> @llvm.x86.avx512.mask.psrl.w.512(<32 x i16> %x0, <8 x i16> %x1, <32 x i16> %x2, i32 -1)
   %res2 = call <32 x i16> @llvm.x86.avx512.mask.psrl.w.512(<32 x i16> %x0, <8 x i16> %x1, <32 x i16> zeroinitializer, i32 %x3)
@@ -2899,6 +2909,16 @@ define <32 x i16>@test_int_x86_avx512_ma
 ; AVX512BW-NEXT:    vpaddw %zmm0, %zmm1, %zmm0
 ; AVX512BW-NEXT:    vpaddw %zmm2, %zmm0, %zmm0
 ; AVX512BW-NEXT:    retq
+;
+; AVX512F-32-LABEL: test_int_x86_avx512_mask_psrl_wi_512:
+; AVX512F-32:       # BB#0:
+; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
+; AVX512F-32-NEXT:    vpsrlw $3, %zmm0, %zmm1 {%k1}
+; AVX512F-32-NEXT:    vpsrlw $3, %zmm0, %zmm2 {%k1} {z}
+; AVX512F-32-NEXT:    vpsrlw $3, %zmm0, %zmm0
+; AVX512F-32-NEXT:    vpaddw %zmm0, %zmm1, %zmm0
+; AVX512F-32-NEXT:    vpaddw %zmm2, %zmm0, %zmm0
+; AVX512F-32-NEXT:    retl
   %res = call <32 x i16> @llvm.x86.avx512.mask.psrl.wi.512(<32 x i16> %x0, i8 3, <32 x i16> %x2, i32 %x3)
   %res1 = call <32 x i16> @llvm.x86.avx512.mask.psrl.wi.512(<32 x i16> %x0, i8 3, <32 x i16> %x2, i32 -1)
   %res2 = call <32 x i16> @llvm.x86.avx512.mask.psrl.wi.512(<32 x i16> %x0, i8 3, <32 x i16> zeroinitializer, i32 %x3)
@@ -2919,6 +2939,16 @@ define <32 x i16>@test_int_x86_avx512_ma
 ; AVX512BW-NEXT:    vpaddw %zmm3, %zmm2, %zmm1
 ; AVX512BW-NEXT:    vpaddw %zmm0, %zmm1, %zmm0
 ; AVX512BW-NEXT:    retq
+;
+; AVX512F-32-LABEL: test_int_x86_avx512_mask_psrlv32hi:
+; AVX512F-32:       # BB#0:
+; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
+; AVX512F-32-NEXT:    vpsrlvw %zmm1, %zmm0, %zmm2 {%k1}
+; AVX512F-32-NEXT:    vpsrlvw %zmm1, %zmm0, %zmm3 {%k1} {z}
+; AVX512F-32-NEXT:    vpsrlvw %zmm1, %zmm0, %zmm0
+; AVX512F-32-NEXT:    vpaddw %zmm3, %zmm2, %zmm1
+; AVX512F-32-NEXT:    vpaddw %zmm0, %zmm1, %zmm0
+; AVX512F-32-NEXT:    retl
   %res = call <32 x i16> @llvm.x86.avx512.mask.psrlv32hi(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3)
   %res1 = call <32 x i16> @llvm.x86.avx512.mask.psrlv32hi(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> zeroinitializer, i32 %x3)
   %res2 = call <32 x i16> @llvm.x86.avx512.mask.psrlv32hi(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1)
@@ -2939,6 +2969,16 @@ define <32 x i16>@test_int_x86_avx512_ma
 ; AVX512BW-NEXT:    vpaddw %zmm3, %zmm2, %zmm1
 ; AVX512BW-NEXT:    vpaddw %zmm0, %zmm1, %zmm0
 ; AVX512BW-NEXT:    retq
+;
+; AVX512F-32-LABEL: test_int_x86_avx512_mask_psra_w_512:
+; AVX512F-32:       # BB#0:
+; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
+; AVX512F-32-NEXT:    vpsraw %xmm1, %zmm0, %zmm2 {%k1}
+; AVX512F-32-NEXT:    vpsraw %xmm1, %zmm0, %zmm3 {%k1} {z}
+; AVX512F-32-NEXT:    vpsraw %xmm1, %zmm0, %zmm0
+; AVX512F-32-NEXT:    vpaddw %zmm3, %zmm2, %zmm1
+; AVX512F-32-NEXT:    vpaddw %zmm0, %zmm1, %zmm0
+; AVX512F-32-NEXT:    retl
   %res = call <32 x i16> @llvm.x86.avx512.mask.psra.w.512(<32 x i16> %x0, <8 x i16> %x1, <32 x i16> %x2, i32 %x3)
   %res1 = call <32 x i16> @llvm.x86.avx512.mask.psra.w.512(<32 x i16> %x0, <8 x i16> %x1, <32 x i16> zeroinitializer, i32 %x3)
   %res2 = call <32 x i16> @llvm.x86.avx512.mask.psra.w.512(<32 x i16> %x0, <8 x i16> %x1, <32 x i16> %x2, i32 -1)
@@ -2959,6 +2999,16 @@ define <32 x i16>@test_int_x86_avx512_ma
 ; AVX512BW-NEXT:    vpaddw %zmm2, %zmm1, %zmm1
 ; AVX512BW-NEXT:    vpaddw %zmm0, %zmm1, %zmm0
 ; AVX512BW-NEXT:    retq
+;
+; AVX512F-32-LABEL: test_int_x86_avx512_mask_psra_wi_512:
+; AVX512F-32:       # BB#0:
+; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
+; AVX512F-32-NEXT:    vpsraw $3, %zmm0, %zmm1 {%k1}
+; AVX512F-32-NEXT:    vpsraw $3, %zmm0, %zmm2 {%k1} {z}
+; AVX512F-32-NEXT:    vpsraw $3, %zmm0, %zmm0
+; AVX512F-32-NEXT:    vpaddw %zmm2, %zmm1, %zmm1
+; AVX512F-32-NEXT:    vpaddw %zmm0, %zmm1, %zmm0
+; AVX512F-32-NEXT:    retl
   %res = call <32 x i16> @llvm.x86.avx512.mask.psra.wi.512(<32 x i16> %x0, i8 3, <32 x i16> %x2, i32 %x3)
   %res1 = call <32 x i16> @llvm.x86.avx512.mask.psra.wi.512(<32 x i16> %x0, i8 3, <32 x i16> zeroinitializer, i32 %x3)
   %res2 = call <32 x i16> @llvm.x86.avx512.mask.psra.wi.512(<32 x i16> %x0, i8 3, <32 x i16> %x2, i32 -1)
@@ -2979,6 +3029,16 @@ define <32 x i16>@test_int_x86_avx512_ma
 ; AVX512BW-NEXT:    vpaddw %zmm2, %zmm1, %zmm1
 ; AVX512BW-NEXT:    vpaddw %zmm0, %zmm1, %zmm0
 ; AVX512BW-NEXT:    retq
+;
+; AVX512F-32-LABEL: test_int_x86_avx512_mask_pshufh_w_512:
+; AVX512F-32:       # BB#0:
+; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
+; AVX512F-32-NEXT:    vpshufhw $3, %zmm0, %zmm1 {%k1}
+; AVX512F-32-NEXT:    vpshufhw $3, %zmm0, %zmm2 {%k1} {z}
+; AVX512F-32-NEXT:    vpshufhw $3, %zmm0, %zmm0
+; AVX512F-32-NEXT:    vpaddw %zmm2, %zmm1, %zmm1
+; AVX512F-32-NEXT:    vpaddw %zmm0, %zmm1, %zmm0
+; AVX512F-32-NEXT:    retl
   %res = call <32 x i16> @llvm.x86.avx512.mask.pshufh.w.512(<32 x i16> %x0, i8 3, <32 x i16> %x2, i32 %x3)
   %res1 = call <32 x i16> @llvm.x86.avx512.mask.pshufh.w.512(<32 x i16> %x0, i8 3, <32 x i16> zeroinitializer, i32 %x3)
   %res2 = call <32 x i16> @llvm.x86.avx512.mask.pshufh.w.512(<32 x i16> %x0, i8 3, <32 x i16> %x2, i32 -1)
@@ -2992,13 +3052,23 @@ declare <32 x i16> @llvm.x86.avx512.mask
 define <32 x i16>@test_int_x86_avx512_mask_pshufl_w_512(<32 x i16> %x0, i8 %x1, <32 x i16> %x2, i32 %x3) {
 ; AVX512BW-LABEL: test_int_x86_avx512_mask_pshufl_w_512:
 ; AVX512BW:       ## BB#0:
-; AVX512BW-NEXT:    kmovd %esi, %k1 
-; AVX512BW-NEXT:    vpshuflw $3, %zmm0, %zmm1 {%k1} 
-; AVX512BW-NEXT:    vpshuflw $3, %zmm0, %zmm2 {%k1} {z} 
-; AVX512BW-NEXT:    vpshuflw $3, %zmm0, %zmm0 
-; AVX512BW-NEXT:    vpaddw %zmm2, %zmm1, %zmm1 
-; AVX512BW-NEXT:    vpaddw %zmm0, %zmm1, %zmm0 
-; AVX512BW-NEXT:    retq 
+; AVX512BW-NEXT:    kmovd %esi, %k1
+; AVX512BW-NEXT:    vpshuflw $3, %zmm0, %zmm1 {%k1}
+; AVX512BW-NEXT:    vpshuflw $3, %zmm0, %zmm2 {%k1} {z}
+; AVX512BW-NEXT:    vpshuflw $3, %zmm0, %zmm0
+; AVX512BW-NEXT:    vpaddw %zmm2, %zmm1, %zmm1
+; AVX512BW-NEXT:    vpaddw %zmm0, %zmm1, %zmm0
+; AVX512BW-NEXT:    retq
+;
+; AVX512F-32-LABEL: test_int_x86_avx512_mask_pshufl_w_512:
+; AVX512F-32:       # BB#0:
+; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
+; AVX512F-32-NEXT:    vpshuflw $3, %zmm0, %zmm1 {%k1}
+; AVX512F-32-NEXT:    vpshuflw $3, %zmm0, %zmm2 {%k1} {z}
+; AVX512F-32-NEXT:    vpshuflw $3, %zmm0, %zmm0
+; AVX512F-32-NEXT:    vpaddw %zmm2, %zmm1, %zmm1
+; AVX512F-32-NEXT:    vpaddw %zmm0, %zmm1, %zmm0
+; AVX512F-32-NEXT:    retl
   %res = call <32 x i16> @llvm.x86.avx512.mask.pshufl.w.512(<32 x i16> %x0, i8 3, <32 x i16> %x2, i32 %x3)
   %res1 = call <32 x i16> @llvm.x86.avx512.mask.pshufl.w.512(<32 x i16> %x0, i8 3, <32 x i16> zeroinitializer, i32 %x3)
   %res2 = call <32 x i16> @llvm.x86.avx512.mask.pshufl.w.512(<32 x i16> %x0, i8 3, <32 x i16> %x2, i32 -1)
@@ -3019,6 +3089,16 @@ define <32 x i16>@test_int_x86_avx512_ma
 ; AVX512BW-NEXT:    vpaddw %zmm3, %zmm2, %zmm1
 ; AVX512BW-NEXT:    vpaddw %zmm0, %zmm1, %zmm0
 ; AVX512BW-NEXT:    retq
+;
+; AVX512F-32-LABEL: test_int_x86_avx512_mask_psrav32_hi:
+; AVX512F-32:       # BB#0:
+; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
+; AVX512F-32-NEXT:    vpsravw %zmm1, %zmm0, %zmm2 {%k1}
+; AVX512F-32-NEXT:    vpsravw %zmm1, %zmm0, %zmm3 {%k1} {z}
+; AVX512F-32-NEXT:    vpsravw %zmm1, %zmm0, %zmm0
+; AVX512F-32-NEXT:    vpaddw %zmm3, %zmm2, %zmm1
+; AVX512F-32-NEXT:    vpaddw %zmm0, %zmm1, %zmm0
+; AVX512F-32-NEXT:    retl
   %res = call <32 x i16> @llvm.x86.avx512.mask.psrav32.hi(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3)
   %res1 = call <32 x i16> @llvm.x86.avx512.mask.psrav32.hi(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> zeroinitializer, i32 %x3)
   %res2 = call <32 x i16> @llvm.x86.avx512.mask.psrav32.hi(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1)
@@ -3039,6 +3119,16 @@ define <32 x i16>@test_int_x86_avx512_ma
 ; AVX512BW-NEXT:    vpaddw %zmm3, %zmm2, %zmm1
 ; AVX512BW-NEXT:    vpaddw %zmm0, %zmm1, %zmm0
 ; AVX512BW-NEXT:    retq
+;
+; AVX512F-32-LABEL: test_int_x86_avx512_mask_psll_w_512:
+; AVX512F-32:       # BB#0:
+; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
+; AVX512F-32-NEXT:    vpsllw %xmm1, %zmm0, %zmm2 {%k1}
+; AVX512F-32-NEXT:    vpsllw %xmm1, %zmm0, %zmm3 {%k1} {z}
+; AVX512F-32-NEXT:    vpsllw %xmm1, %zmm0, %zmm0
+; AVX512F-32-NEXT:    vpaddw %zmm3, %zmm2, %zmm1
+; AVX512F-32-NEXT:    vpaddw %zmm0, %zmm1, %zmm0
+; AVX512F-32-NEXT:    retl
   %res = call <32 x i16> @llvm.x86.avx512.mask.psll.w.512(<32 x i16> %x0, <8 x i16> %x1, <32 x i16> %x2, i32 %x3)
   %res1 = call <32 x i16> @llvm.x86.avx512.mask.psll.w.512(<32 x i16> %x0, <8 x i16> %x1, <32 x i16> zeroinitializer, i32 %x3)
   %res2 = call <32 x i16> @llvm.x86.avx512.mask.psll.w.512(<32 x i16> %x0, <8 x i16> %x1, <32 x i16> %x2, i32 -1)
@@ -3059,6 +3149,16 @@ define <32 x i16>@test_int_x86_avx512_ma
 ; AVX512BW-NEXT:    vpaddw %zmm2, %zmm1, %zmm1
 ; AVX512BW-NEXT:    vpaddw %zmm0, %zmm1, %zmm0
 ; AVX512BW-NEXT:    retq
+;
+; AVX512F-32-LABEL: test_int_x86_avx512_mask_psll_wi_512:
+; AVX512F-32:       # BB#0:
+; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
+; AVX512F-32-NEXT:    vpsllw $3, %zmm0, %zmm1 {%k1}
+; AVX512F-32-NEXT:    vpsllw $3, %zmm0, %zmm2 {%k1} {z}
+; AVX512F-32-NEXT:    vpsllw $3, %zmm0, %zmm0
+; AVX512F-32-NEXT:    vpaddw %zmm2, %zmm1, %zmm1
+; AVX512F-32-NEXT:    vpaddw %zmm0, %zmm1, %zmm0
+; AVX512F-32-NEXT:    retl
   %res = call <32 x i16> @llvm.x86.avx512.mask.psll.wi.512(<32 x i16> %x0, i8 3, <32 x i16> %x2, i32 %x3)
   %res1 = call <32 x i16> @llvm.x86.avx512.mask.psll.wi.512(<32 x i16> %x0, i8 3, <32 x i16> zeroinitializer, i32 %x3)
   %res2 = call <32 x i16> @llvm.x86.avx512.mask.psll.wi.512(<32 x i16> %x0, i8 3, <32 x i16> %x2, i32 -1)
@@ -3079,6 +3179,16 @@ define <32 x i16>@test_int_x86_avx512_ma
 ; AVX512BW-NEXT:    vpaddw %zmm3, %zmm2, %zmm1
 ; AVX512BW-NEXT:    vpaddw %zmm0, %zmm1, %zmm0
 ; AVX512BW-NEXT:    retq
+;
+; AVX512F-32-LABEL: test_int_x86_avx512_mask_psllv32hi:
+; AVX512F-32:       # BB#0:
+; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
+; AVX512F-32-NEXT:    vpsllvw %zmm1, %zmm0, %zmm2 {%k1}
+; AVX512F-32-NEXT:    vpsllvw %zmm1, %zmm0, %zmm3 {%k1} {z}
+; AVX512F-32-NEXT:    vpsllvw %zmm1, %zmm0, %zmm0
+; AVX512F-32-NEXT:    vpaddw %zmm3, %zmm2, %zmm1
+; AVX512F-32-NEXT:    vpaddw %zmm0, %zmm1, %zmm0
+; AVX512F-32-NEXT:    retl
   %res = call <32 x i16> @llvm.x86.avx512.mask.psllv32hi(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3)
   %res1 = call <32 x i16> @llvm.x86.avx512.mask.psllv32hi(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> zeroinitializer, i32 %x3)
   %res2 = call <32 x i16> @llvm.x86.avx512.mask.psllv32hi(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1)
@@ -3152,13 +3262,23 @@ declare <32 x i16> @llvm.x86.avx512.mask
 define <32 x i16>@test_int_x86_avx512_mask_pmovzxb_w_512(<32 x i8> %x0, <32 x i16> %x1, i32 %x2) {
 ; AVX512BW-LABEL: test_int_x86_avx512_mask_pmovzxb_w_512:
 ; AVX512BW:       ## BB#0:
-; AVX512BW-NEXT:    kmovd %edi, %k1 
-; AVX512BW-NEXT:    vpmovzxbw %ymm0, %zmm1 {%k1} 
-; AVX512BW-NEXT:    vpmovzxbw %ymm0, %zmm2 {%k1} {z} 
-; AVX512BW-NEXT:    vpmovzxbw %ymm0, %zmm0 
-; AVX512BW-NEXT:    vpaddw %zmm2, %zmm1, %zmm1 
-; AVX512BW-NEXT:    vpaddw %zmm0, %zmm1, %zmm0 
-; AVX512BW-NEXT:    retq 
+; AVX512BW-NEXT:    kmovd %edi, %k1
+; AVX512BW-NEXT:    vpmovzxbw %ymm0, %zmm1 {%k1}
+; AVX512BW-NEXT:    vpmovzxbw %ymm0, %zmm2 {%k1} {z}
+; AVX512BW-NEXT:    vpmovzxbw %ymm0, %zmm0
+; AVX512BW-NEXT:    vpaddw %zmm2, %zmm1, %zmm1
+; AVX512BW-NEXT:    vpaddw %zmm0, %zmm1, %zmm0
+; AVX512BW-NEXT:    retq
+;
+; AVX512F-32-LABEL: test_int_x86_avx512_mask_pmovzxb_w_512:
+; AVX512F-32:       # BB#0:
+; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
+; AVX512F-32-NEXT:    vpmovzxbw %ymm0, %zmm1 {%k1}
+; AVX512F-32-NEXT:    vpmovzxbw %ymm0, %zmm2 {%k1} {z}
+; AVX512F-32-NEXT:    vpmovzxbw %ymm0, %zmm0
+; AVX512F-32-NEXT:    vpaddw %zmm2, %zmm1, %zmm1
+; AVX512F-32-NEXT:    vpaddw %zmm0, %zmm1, %zmm0
+; AVX512F-32-NEXT:    retl
   %res = call <32 x i16> @llvm.x86.avx512.mask.pmovzxb.w.512(<32 x i8> %x0, <32 x i16> %x1, i32 %x2)
   %res1 = call <32 x i16> @llvm.x86.avx512.mask.pmovzxb.w.512(<32 x i8> %x0, <32 x i16> zeroinitializer, i32 %x2)
   %res2 = call <32 x i16> @llvm.x86.avx512.mask.pmovzxb.w.512(<32 x i8> %x0, <32 x i16> %x1, i32 -1)
@@ -3172,13 +3292,23 @@ declare <32 x i16> @llvm.x86.avx512.mask
 define <32 x i16>@test_int_x86_avx512_mask_pmovsxb_w_512(<32 x i8> %x0, <32 x i16> %x1, i32 %x2) {
 ; AVX512BW-LABEL: test_int_x86_avx512_mask_pmovsxb_w_512:
 ; AVX512BW:       ## BB#0:
-; AVX512BW-NEXT:    kmovd %edi, %k1 
-; AVX512BW-NEXT:    vpmovsxbw %ymm0, %zmm1 {%k1} 
-; AVX512BW-NEXT:    vpmovsxbw %ymm0, %zmm2 {%k1} {z} 
-; AVX512BW-NEXT:    vpmovsxbw %ymm0, %zmm0 
-; AVX512BW-NEXT:    vpaddw %zmm2, %zmm1, %zmm1 
-; AVX512BW-NEXT:    vpaddw %zmm0, %zmm1, %zmm0 
-; AVX512BW-NEXT:    retq 
+; AVX512BW-NEXT:    kmovd %edi, %k1
+; AVX512BW-NEXT:    vpmovsxbw %ymm0, %zmm1 {%k1}
+; AVX512BW-NEXT:    vpmovsxbw %ymm0, %zmm2 {%k1} {z}
+; AVX512BW-NEXT:    vpmovsxbw %ymm0, %zmm0
+; AVX512BW-NEXT:    vpaddw %zmm2, %zmm1, %zmm1
+; AVX512BW-NEXT:    vpaddw %zmm0, %zmm1, %zmm0
+; AVX512BW-NEXT:    retq
+;
+; AVX512F-32-LABEL: test_int_x86_avx512_mask_pmovsxb_w_512:
+; AVX512F-32:       # BB#0:
+; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
+; AVX512F-32-NEXT:    vpmovsxbw %ymm0, %zmm1 {%k1}
+; AVX512F-32-NEXT:    vpmovsxbw %ymm0, %zmm2 {%k1} {z}
+; AVX512F-32-NEXT:    vpmovsxbw %ymm0, %zmm0
+; AVX512F-32-NEXT:    vpaddw %zmm2, %zmm1, %zmm1
+; AVX512F-32-NEXT:    vpaddw %zmm0, %zmm1, %zmm0
+; AVX512F-32-NEXT:    retl
   %res = call <32 x i16> @llvm.x86.avx512.mask.pmovsxb.w.512(<32 x i8> %x0, <32 x i16> %x1, i32 %x2)
   %res1 = call <32 x i16> @llvm.x86.avx512.mask.pmovsxb.w.512(<32 x i8> %x0, <32 x i16> zeroinitializer, i32 %x2)
   %res2 = call <32 x i16> @llvm.x86.avx512.mask.pmovsxb.w.512(<32 x i8> %x0, <32 x i16> %x1, i32 -1)
@@ -3192,13 +3322,23 @@ declare <32 x i16> @llvm.x86.avx512.mask
 define <32 x i16>@test_int_x86_avx512_mask_permvar_hi_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) {
 ; AVX512BW-LABEL: test_int_x86_avx512_mask_permvar_hi_512:
 ; AVX512BW:       ## BB#0:
-; AVX512BW-NEXT:    kmovd %edi, %k1 
-; AVX512BW-NEXT:    vpermw %zmm1, %zmm0, %zmm2 {%k1} 
-; AVX512BW-NEXT:    vpermw %zmm1, %zmm0, %zmm3 {%k1} {z} 
-; AVX512BW-NEXT:    vpermw %zmm1, %zmm0, %zmm0 
-; AVX512BW-NEXT:    vpaddw %zmm3, %zmm2, %zmm1 
-; AVX512BW-NEXT:    vpaddw %zmm0, %zmm1, %zmm0 
-; AVX512BW-NEXT:    retq 
+; AVX512BW-NEXT:    kmovd %edi, %k1
+; AVX512BW-NEXT:    vpermw %zmm1, %zmm0, %zmm2 {%k1}
+; AVX512BW-NEXT:    vpermw %zmm1, %zmm0, %zmm3 {%k1} {z}
+; AVX512BW-NEXT:    vpermw %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT:    vpaddw %zmm3, %zmm2, %zmm1
+; AVX512BW-NEXT:    vpaddw %zmm0, %zmm1, %zmm0
+; AVX512BW-NEXT:    retq
+;
+; AVX512F-32-LABEL: test_int_x86_avx512_mask_permvar_hi_512:
+; AVX512F-32:       # BB#0:
+; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
+; AVX512F-32-NEXT:    vpermw %zmm1, %zmm0, %zmm2 {%k1}
+; AVX512F-32-NEXT:    vpermw %zmm1, %zmm0, %zmm3 {%k1} {z}
+; AVX512F-32-NEXT:    vpermw %zmm1, %zmm0, %zmm0
+; AVX512F-32-NEXT:    vpaddw %zmm3, %zmm2, %zmm1
+; AVX512F-32-NEXT:    vpaddw %zmm0, %zmm1, %zmm0
+; AVX512F-32-NEXT:    retl
   %res = call <32 x i16> @llvm.x86.avx512.mask.permvar.hi.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3)
   %res1 = call <32 x i16> @llvm.x86.avx512.mask.permvar.hi.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> zeroinitializer, i32 %x3)
   %res2 = call <32 x i16> @llvm.x86.avx512.mask.permvar.hi.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1)




More information about the llvm-commits mailing list