[llvm] 935d41e - [X86] Split v64i1 arguments into 2 v32i1s that will be promoted to v32i8 under min-legal-vector-width=256

Craig Topper via llvm-commits llvm-commits at lists.llvm.org
Tue Dec 10 17:29:39 PST 2019


Author: Craig Topper
Date: 2019-12-10T17:29:02-08:00
New Revision: 935d41e4bd6347155af10e50f8b24e86a5d626f7

URL: https://github.com/llvm/llvm-project/commit/935d41e4bd6347155af10e50f8b24e86a5d626f7
DIFF: https://github.com/llvm/llvm-project/commit/935d41e4bd6347155af10e50f8b24e86a5d626f7.diff

LOG: [X86] Split v64i1 arguments into 2 v32i1s that will be promoted to v32i8 under min-legal-vector-width=256

This is an improvement to 88dacbd43625cf7aad8a01c0c3b92142c4dc0970

Added: 
    

Modified: 
    llvm/lib/Target/X86/X86ISelLowering.cpp
    llvm/test/CodeGen/X86/min-legal-vector-width.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index f434b7de51b6..866ee5b9a602 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -2008,9 +2008,12 @@ MVT X86TargetLowering::getRegisterTypeForCallingConv(LLVMContext &Context,
       Subtarget.hasAVX512() &&
       (!isPowerOf2_32(VT.getVectorNumElements()) ||
        (VT.getVectorNumElements() > 16 && !Subtarget.hasBWI()) ||
-       (VT.getVectorNumElements() > 32 && !Subtarget.useBWIRegs()) ||
        (VT.getVectorNumElements() > 64 && Subtarget.hasBWI())))
     return MVT::i8;
+  // Split v64i1 vectors if we don't have v64i8 available.
+  if (VT == MVT::v64i1 && Subtarget.hasBWI() && !Subtarget.useAVX512Regs() &&
+      CC != CallingConv::X86_RegCall)
+    return MVT::v32i1;
   // FIXME: Should we just make these types legal and custom split operations?
   if ((VT == MVT::v32i16 || VT == MVT::v64i8) && !EnableOldKNLABI &&
       Subtarget.useAVX512Regs() && !Subtarget.hasBWI())
@@ -2029,9 +2032,12 @@ unsigned X86TargetLowering::getNumRegistersForCallingConv(LLVMContext &Context,
       Subtarget.hasAVX512() &&
       (!isPowerOf2_32(VT.getVectorNumElements()) ||
        (VT.getVectorNumElements() > 16 && !Subtarget.hasBWI()) ||
-       (VT.getVectorNumElements() > 32 && !Subtarget.useBWIRegs()) ||
        (VT.getVectorNumElements() > 64 && Subtarget.hasBWI())))
     return VT.getVectorNumElements();
+  // Split v64i1 vectors if we don't have v64i8 available.
+  if (VT == MVT::v64i1 && Subtarget.hasBWI() && !Subtarget.useAVX512Regs() &&
+      CC != CallingConv::X86_RegCall)
+    return 2;
   // FIXME: Should we just make these types legal and custom split operations?
   if ((VT == MVT::v32i16 || VT == MVT::v64i8) && !EnableOldKNLABI &&
       Subtarget.useAVX512Regs() && !Subtarget.hasBWI())
@@ -2047,7 +2053,6 @@ unsigned X86TargetLowering::getVectorTypeBreakdownForCallingConv(
       Subtarget.hasAVX512() &&
       (!isPowerOf2_32(VT.getVectorNumElements()) ||
        (VT.getVectorNumElements() > 16 && !Subtarget.hasBWI()) ||
-       (VT.getVectorNumElements() > 32 && !Subtarget.useBWIRegs()) ||
        (VT.getVectorNumElements() > 64 && Subtarget.hasBWI()))) {
     RegisterVT = MVT::i8;
     IntermediateVT = MVT::i1;
@@ -2055,6 +2060,15 @@ unsigned X86TargetLowering::getVectorTypeBreakdownForCallingConv(
     return NumIntermediates;
   }
 
+  // Split v64i1 vectors if we don't have v64i8 available.
+  if (VT == MVT::v64i1 && Subtarget.hasBWI() && !Subtarget.useAVX512Regs() &&
+      CC != CallingConv::X86_RegCall) {
+    RegisterVT = MVT::v32i1;
+    IntermediateVT = MVT::v32i1;
+    NumIntermediates = 2;
+    return 2;
+  }
+
   return TargetLowering::getVectorTypeBreakdownForCallingConv(Context, CC, VT, IntermediateVT,
                                               NumIntermediates, RegisterVT);
 }

diff  --git a/llvm/test/CodeGen/X86/min-legal-vector-width.ll b/llvm/test/CodeGen/X86/min-legal-vector-width.ll
index bf48a305a2ba..3273efd422c8 100644
--- a/llvm/test/CodeGen/X86/min-legal-vector-width.ll
+++ b/llvm/test/CodeGen/X86/min-legal-vector-width.ll
@@ -1120,448 +1120,6 @@ define void @trunc_packus_v16i32_v16i8_store(<16 x i32>* %p, <16 x i8>* %q) "min
 define <64 x i1> @v64i1_argument_return(<64 x i1> %x) "min-legal-vector-width"="256" {
 ; CHECK-LABEL: v64i1_argument_return:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    movq %rdi, %rax
-; CHECK-NEXT:    kmovd %esi, %k0
-; CHECK-NEXT:    kshiftlq $63, %k0, %k0
-; CHECK-NEXT:    kshiftrq $63, %k0, %k0
-; CHECK-NEXT:    kshiftlq $2, %k0, %k1
-; CHECK-NEXT:    kmovd %edx, %k2
-; CHECK-NEXT:    kshiftlq $1, %k2, %k2
-; CHECK-NEXT:    korq %k2, %k1, %k1
-; CHECK-NEXT:    korq %k1, %k0, %k0
-; CHECK-NEXT:    kshiftlq $62, %k0, %k0
-; CHECK-NEXT:    kshiftrq $62, %k0, %k0
-; CHECK-NEXT:    kshiftlq $3, %k0, %k1
-; CHECK-NEXT:    kmovd %ecx, %k2
-; CHECK-NEXT:    kshiftlq $2, %k2, %k2
-; CHECK-NEXT:    korq %k2, %k1, %k1
-; CHECK-NEXT:    korq %k1, %k0, %k0
-; CHECK-NEXT:    kshiftlq $61, %k0, %k0
-; CHECK-NEXT:    kshiftrq $61, %k0, %k0
-; CHECK-NEXT:    kshiftlq $4, %k0, %k1
-; CHECK-NEXT:    kmovd %r8d, %k2
-; CHECK-NEXT:    kshiftlq $3, %k2, %k2
-; CHECK-NEXT:    korq %k2, %k1, %k1
-; CHECK-NEXT:    korq %k1, %k0, %k0
-; CHECK-NEXT:    kshiftlq $60, %k0, %k0
-; CHECK-NEXT:    kshiftrq $60, %k0, %k0
-; CHECK-NEXT:    kshiftlq $5, %k0, %k1
-; CHECK-NEXT:    kmovd %r9d, %k2
-; CHECK-NEXT:    kshiftlq $4, %k2, %k2
-; CHECK-NEXT:    korq %k2, %k1, %k1
-; CHECK-NEXT:    korq %k1, %k0, %k0
-; CHECK-NEXT:    kshiftlq $59, %k0, %k0
-; CHECK-NEXT:    kshiftrq $59, %k0, %k0
-; CHECK-NEXT:    kshiftlq $6, %k0, %k1
-; CHECK-NEXT:    kmovb {{[0-9]+}}(%rsp), %k2
-; CHECK-NEXT:    kshiftlq $5, %k2, %k2
-; CHECK-NEXT:    korq %k2, %k1, %k1
-; CHECK-NEXT:    korq %k1, %k0, %k0
-; CHECK-NEXT:    kshiftlq $58, %k0, %k0
-; CHECK-NEXT:    kshiftrq $58, %k0, %k0
-; CHECK-NEXT:    kshiftlq $7, %k0, %k1
-; CHECK-NEXT:    kmovb {{[0-9]+}}(%rsp), %k2
-; CHECK-NEXT:    kshiftlq $6, %k2, %k2
-; CHECK-NEXT:    korq %k2, %k1, %k1
-; CHECK-NEXT:    korq %k1, %k0, %k0
-; CHECK-NEXT:    kshiftlq $57, %k0, %k0
-; CHECK-NEXT:    kshiftrq $57, %k0, %k0
-; CHECK-NEXT:    kshiftlq $8, %k0, %k1
-; CHECK-NEXT:    kmovb {{[0-9]+}}(%rsp), %k2
-; CHECK-NEXT:    kshiftlq $7, %k2, %k2
-; CHECK-NEXT:    korq %k2, %k1, %k1
-; CHECK-NEXT:    korq %k1, %k0, %k0
-; CHECK-NEXT:    kshiftlq $56, %k0, %k0
-; CHECK-NEXT:    kshiftrq $56, %k0, %k0
-; CHECK-NEXT:    kshiftlq $9, %k0, %k1
-; CHECK-NEXT:    kmovb {{[0-9]+}}(%rsp), %k2
-; CHECK-NEXT:    kshiftlq $8, %k2, %k2
-; CHECK-NEXT:    korq %k2, %k1, %k1
-; CHECK-NEXT:    korq %k1, %k0, %k0
-; CHECK-NEXT:    kshiftlq $55, %k0, %k0
-; CHECK-NEXT:    kshiftrq $55, %k0, %k0
-; CHECK-NEXT:    kshiftlq $10, %k0, %k1
-; CHECK-NEXT:    kmovb {{[0-9]+}}(%rsp), %k2
-; CHECK-NEXT:    kshiftlq $9, %k2, %k2
-; CHECK-NEXT:    korq %k2, %k1, %k1
-; CHECK-NEXT:    korq %k1, %k0, %k0
-; CHECK-NEXT:    kshiftlq $54, %k0, %k0
-; CHECK-NEXT:    kshiftrq $54, %k0, %k0
-; CHECK-NEXT:    kshiftlq $11, %k0, %k1
-; CHECK-NEXT:    kmovb {{[0-9]+}}(%rsp), %k2
-; CHECK-NEXT:    kshiftlq $10, %k2, %k2
-; CHECK-NEXT:    korq %k2, %k1, %k1
-; CHECK-NEXT:    korq %k1, %k0, %k0
-; CHECK-NEXT:    kshiftlq $53, %k0, %k0
-; CHECK-NEXT:    kshiftrq $53, %k0, %k0
-; CHECK-NEXT:    kshiftlq $12, %k0, %k1
-; CHECK-NEXT:    kmovb {{[0-9]+}}(%rsp), %k2
-; CHECK-NEXT:    kshiftlq $11, %k2, %k2
-; CHECK-NEXT:    korq %k2, %k1, %k1
-; CHECK-NEXT:    korq %k1, %k0, %k0
-; CHECK-NEXT:    kshiftlq $52, %k0, %k0
-; CHECK-NEXT:    kshiftrq $52, %k0, %k0
-; CHECK-NEXT:    kshiftlq $13, %k0, %k1
-; CHECK-NEXT:    kmovb {{[0-9]+}}(%rsp), %k2
-; CHECK-NEXT:    kshiftlq $12, %k2, %k2
-; CHECK-NEXT:    korq %k2, %k1, %k1
-; CHECK-NEXT:    korq %k1, %k0, %k0
-; CHECK-NEXT:    kshiftlq $51, %k0, %k0
-; CHECK-NEXT:    kshiftrq $51, %k0, %k0
-; CHECK-NEXT:    kshiftlq $14, %k0, %k1
-; CHECK-NEXT:    kmovb {{[0-9]+}}(%rsp), %k2
-; CHECK-NEXT:    kshiftlq $13, %k2, %k2
-; CHECK-NEXT:    korq %k2, %k1, %k1
-; CHECK-NEXT:    korq %k1, %k0, %k0
-; CHECK-NEXT:    kshiftlq $50, %k0, %k0
-; CHECK-NEXT:    kshiftrq $50, %k0, %k0
-; CHECK-NEXT:    kshiftlq $15, %k0, %k1
-; CHECK-NEXT:    kmovb {{[0-9]+}}(%rsp), %k2
-; CHECK-NEXT:    kshiftlq $14, %k2, %k2
-; CHECK-NEXT:    korq %k2, %k1, %k1
-; CHECK-NEXT:    korq %k1, %k0, %k0
-; CHECK-NEXT:    kshiftlq $49, %k0, %k0
-; CHECK-NEXT:    kshiftrq $49, %k0, %k0
-; CHECK-NEXT:    kshiftlq $16, %k0, %k1
-; CHECK-NEXT:    kmovb {{[0-9]+}}(%rsp), %k2
-; CHECK-NEXT:    kshiftlq $15, %k2, %k2
-; CHECK-NEXT:    korq %k2, %k1, %k1
-; CHECK-NEXT:    korq %k1, %k0, %k0
-; CHECK-NEXT:    kshiftlq $48, %k0, %k0
-; CHECK-NEXT:    kshiftrq $48, %k0, %k0
-; CHECK-NEXT:    kshiftlq $17, %k0, %k1
-; CHECK-NEXT:    kmovb {{[0-9]+}}(%rsp), %k2
-; CHECK-NEXT:    kshiftlq $16, %k2, %k2
-; CHECK-NEXT:    korq %k2, %k1, %k1
-; CHECK-NEXT:    korq %k1, %k0, %k0
-; CHECK-NEXT:    kshiftlq $47, %k0, %k0
-; CHECK-NEXT:    kshiftrq $47, %k0, %k0
-; CHECK-NEXT:    kshiftlq $18, %k0, %k1
-; CHECK-NEXT:    kmovb {{[0-9]+}}(%rsp), %k2
-; CHECK-NEXT:    kshiftlq $17, %k2, %k2
-; CHECK-NEXT:    korq %k2, %k1, %k1
-; CHECK-NEXT:    korq %k1, %k0, %k0
-; CHECK-NEXT:    kshiftlq $46, %k0, %k0
-; CHECK-NEXT:    kshiftrq $46, %k0, %k0
-; CHECK-NEXT:    kshiftlq $19, %k0, %k1
-; CHECK-NEXT:    kmovb {{[0-9]+}}(%rsp), %k2
-; CHECK-NEXT:    kshiftlq $18, %k2, %k2
-; CHECK-NEXT:    korq %k2, %k1, %k1
-; CHECK-NEXT:    korq %k1, %k0, %k0
-; CHECK-NEXT:    kshiftlq $45, %k0, %k0
-; CHECK-NEXT:    kshiftrq $45, %k0, %k0
-; CHECK-NEXT:    kshiftlq $20, %k0, %k1
-; CHECK-NEXT:    kmovb {{[0-9]+}}(%rsp), %k2
-; CHECK-NEXT:    kshiftlq $19, %k2, %k2
-; CHECK-NEXT:    korq %k2, %k1, %k1
-; CHECK-NEXT:    korq %k1, %k0, %k0
-; CHECK-NEXT:    kshiftlq $44, %k0, %k0
-; CHECK-NEXT:    kshiftrq $44, %k0, %k0
-; CHECK-NEXT:    kshiftlq $21, %k0, %k1
-; CHECK-NEXT:    kmovb {{[0-9]+}}(%rsp), %k2
-; CHECK-NEXT:    kshiftlq $20, %k2, %k2
-; CHECK-NEXT:    korq %k2, %k1, %k1
-; CHECK-NEXT:    korq %k1, %k0, %k0
-; CHECK-NEXT:    kshiftlq $43, %k0, %k0
-; CHECK-NEXT:    kshiftrq $43, %k0, %k0
-; CHECK-NEXT:    kshiftlq $22, %k0, %k1
-; CHECK-NEXT:    kmovb {{[0-9]+}}(%rsp), %k2
-; CHECK-NEXT:    kshiftlq $21, %k2, %k2
-; CHECK-NEXT:    korq %k2, %k1, %k1
-; CHECK-NEXT:    korq %k1, %k0, %k0
-; CHECK-NEXT:    kshiftlq $42, %k0, %k0
-; CHECK-NEXT:    kshiftrq $42, %k0, %k0
-; CHECK-NEXT:    kshiftlq $23, %k0, %k1
-; CHECK-NEXT:    kmovb {{[0-9]+}}(%rsp), %k2
-; CHECK-NEXT:    kshiftlq $22, %k2, %k2
-; CHECK-NEXT:    korq %k2, %k1, %k1
-; CHECK-NEXT:    korq %k1, %k0, %k0
-; CHECK-NEXT:    kshiftlq $41, %k0, %k0
-; CHECK-NEXT:    kshiftrq $41, %k0, %k0
-; CHECK-NEXT:    kshiftlq $24, %k0, %k1
-; CHECK-NEXT:    kmovb {{[0-9]+}}(%rsp), %k2
-; CHECK-NEXT:    kshiftlq $23, %k2, %k2
-; CHECK-NEXT:    korq %k2, %k1, %k1
-; CHECK-NEXT:    korq %k1, %k0, %k0
-; CHECK-NEXT:    kshiftlq $40, %k0, %k0
-; CHECK-NEXT:    kshiftrq $40, %k0, %k0
-; CHECK-NEXT:    kshiftlq $25, %k0, %k1
-; CHECK-NEXT:    kmovb {{[0-9]+}}(%rsp), %k2
-; CHECK-NEXT:    kshiftlq $24, %k2, %k2
-; CHECK-NEXT:    korq %k2, %k1, %k1
-; CHECK-NEXT:    korq %k1, %k0, %k0
-; CHECK-NEXT:    kshiftlq $39, %k0, %k0
-; CHECK-NEXT:    kshiftrq $39, %k0, %k0
-; CHECK-NEXT:    kshiftlq $26, %k0, %k1
-; CHECK-NEXT:    kmovb {{[0-9]+}}(%rsp), %k2
-; CHECK-NEXT:    kshiftlq $25, %k2, %k2
-; CHECK-NEXT:    korq %k2, %k1, %k1
-; CHECK-NEXT:    korq %k1, %k0, %k0
-; CHECK-NEXT:    kshiftlq $38, %k0, %k0
-; CHECK-NEXT:    kshiftrq $38, %k0, %k0
-; CHECK-NEXT:    kshiftlq $27, %k0, %k1
-; CHECK-NEXT:    kmovb {{[0-9]+}}(%rsp), %k2
-; CHECK-NEXT:    kshiftlq $26, %k2, %k2
-; CHECK-NEXT:    korq %k2, %k1, %k1
-; CHECK-NEXT:    korq %k1, %k0, %k0
-; CHECK-NEXT:    kshiftlq $37, %k0, %k0
-; CHECK-NEXT:    kshiftrq $37, %k0, %k0
-; CHECK-NEXT:    kshiftlq $28, %k0, %k1
-; CHECK-NEXT:    kmovb {{[0-9]+}}(%rsp), %k2
-; CHECK-NEXT:    kshiftlq $27, %k2, %k2
-; CHECK-NEXT:    korq %k2, %k1, %k1
-; CHECK-NEXT:    korq %k1, %k0, %k0
-; CHECK-NEXT:    kshiftlq $36, %k0, %k0
-; CHECK-NEXT:    kshiftrq $36, %k0, %k0
-; CHECK-NEXT:    kshiftlq $29, %k0, %k1
-; CHECK-NEXT:    kmovb {{[0-9]+}}(%rsp), %k2
-; CHECK-NEXT:    kshiftlq $28, %k2, %k2
-; CHECK-NEXT:    korq %k2, %k1, %k1
-; CHECK-NEXT:    korq %k1, %k0, %k0
-; CHECK-NEXT:    kshiftlq $35, %k0, %k0
-; CHECK-NEXT:    kshiftrq $35, %k0, %k0
-; CHECK-NEXT:    kshiftlq $30, %k0, %k1
-; CHECK-NEXT:    kmovb {{[0-9]+}}(%rsp), %k2
-; CHECK-NEXT:    kshiftlq $29, %k2, %k2
-; CHECK-NEXT:    korq %k2, %k1, %k1
-; CHECK-NEXT:    korq %k1, %k0, %k0
-; CHECK-NEXT:    kshiftlq $34, %k0, %k0
-; CHECK-NEXT:    kshiftrq $34, %k0, %k0
-; CHECK-NEXT:    kshiftlq $31, %k0, %k1
-; CHECK-NEXT:    kmovb {{[0-9]+}}(%rsp), %k2
-; CHECK-NEXT:    kshiftlq $30, %k2, %k2
-; CHECK-NEXT:    korq %k2, %k1, %k1
-; CHECK-NEXT:    korq %k1, %k0, %k0
-; CHECK-NEXT:    kshiftlq $33, %k0, %k0
-; CHECK-NEXT:    kshiftrq $33, %k0, %k0
-; CHECK-NEXT:    kshiftlq $32, %k0, %k1
-; CHECK-NEXT:    kmovb {{[0-9]+}}(%rsp), %k2
-; CHECK-NEXT:    kshiftlq $31, %k2, %k2
-; CHECK-NEXT:    korq %k2, %k1, %k1
-; CHECK-NEXT:    korq %k1, %k0, %k0
-; CHECK-NEXT:    kshiftlq $32, %k0, %k0
-; CHECK-NEXT:    kshiftrq $32, %k0, %k0
-; CHECK-NEXT:    kshiftlq $33, %k0, %k1
-; CHECK-NEXT:    kmovb {{[0-9]+}}(%rsp), %k2
-; CHECK-NEXT:    kshiftlq $32, %k2, %k2
-; CHECK-NEXT:    korq %k2, %k1, %k1
-; CHECK-NEXT:    korq %k1, %k0, %k0
-; CHECK-NEXT:    kshiftlq $31, %k0, %k0
-; CHECK-NEXT:    kshiftrq $31, %k0, %k0
-; CHECK-NEXT:    kshiftlq $34, %k0, %k1
-; CHECK-NEXT:    kmovb {{[0-9]+}}(%rsp), %k2
-; CHECK-NEXT:    kshiftlq $33, %k2, %k2
-; CHECK-NEXT:    korq %k2, %k1, %k1
-; CHECK-NEXT:    korq %k1, %k0, %k0
-; CHECK-NEXT:    kshiftlq $30, %k0, %k0
-; CHECK-NEXT:    kshiftrq $30, %k0, %k0
-; CHECK-NEXT:    kshiftlq $35, %k0, %k1
-; CHECK-NEXT:    kmovb {{[0-9]+}}(%rsp), %k2
-; CHECK-NEXT:    kshiftlq $34, %k2, %k2
-; CHECK-NEXT:    korq %k2, %k1, %k1
-; CHECK-NEXT:    korq %k1, %k0, %k0
-; CHECK-NEXT:    kshiftlq $29, %k0, %k0
-; CHECK-NEXT:    kshiftrq $29, %k0, %k0
-; CHECK-NEXT:    kshiftlq $36, %k0, %k1
-; CHECK-NEXT:    kmovb {{[0-9]+}}(%rsp), %k2
-; CHECK-NEXT:    kshiftlq $35, %k2, %k2
-; CHECK-NEXT:    korq %k2, %k1, %k1
-; CHECK-NEXT:    korq %k1, %k0, %k0
-; CHECK-NEXT:    kshiftlq $28, %k0, %k0
-; CHECK-NEXT:    kshiftrq $28, %k0, %k0
-; CHECK-NEXT:    kshiftlq $37, %k0, %k1
-; CHECK-NEXT:    kmovb {{[0-9]+}}(%rsp), %k2
-; CHECK-NEXT:    kshiftlq $36, %k2, %k2
-; CHECK-NEXT:    korq %k2, %k1, %k1
-; CHECK-NEXT:    korq %k1, %k0, %k0
-; CHECK-NEXT:    kshiftlq $27, %k0, %k0
-; CHECK-NEXT:    kshiftrq $27, %k0, %k0
-; CHECK-NEXT:    kshiftlq $38, %k0, %k1
-; CHECK-NEXT:    kmovb {{[0-9]+}}(%rsp), %k2
-; CHECK-NEXT:    kshiftlq $37, %k2, %k2
-; CHECK-NEXT:    korq %k2, %k1, %k1
-; CHECK-NEXT:    korq %k1, %k0, %k0
-; CHECK-NEXT:    kshiftlq $26, %k0, %k0
-; CHECK-NEXT:    kshiftrq $26, %k0, %k0
-; CHECK-NEXT:    kshiftlq $39, %k0, %k1
-; CHECK-NEXT:    kmovb {{[0-9]+}}(%rsp), %k2
-; CHECK-NEXT:    kshiftlq $38, %k2, %k2
-; CHECK-NEXT:    korq %k2, %k1, %k1
-; CHECK-NEXT:    korq %k1, %k0, %k0
-; CHECK-NEXT:    kshiftlq $25, %k0, %k0
-; CHECK-NEXT:    kshiftrq $25, %k0, %k0
-; CHECK-NEXT:    kshiftlq $40, %k0, %k1
-; CHECK-NEXT:    kmovb {{[0-9]+}}(%rsp), %k2
-; CHECK-NEXT:    kshiftlq $39, %k2, %k2
-; CHECK-NEXT:    korq %k2, %k1, %k1
-; CHECK-NEXT:    korq %k1, %k0, %k0
-; CHECK-NEXT:    kshiftlq $24, %k0, %k0
-; CHECK-NEXT:    kshiftrq $24, %k0, %k0
-; CHECK-NEXT:    kshiftlq $41, %k0, %k1
-; CHECK-NEXT:    kmovb {{[0-9]+}}(%rsp), %k2
-; CHECK-NEXT:    kshiftlq $40, %k2, %k2
-; CHECK-NEXT:    korq %k2, %k1, %k1
-; CHECK-NEXT:    korq %k1, %k0, %k0
-; CHECK-NEXT:    kshiftlq $23, %k0, %k0
-; CHECK-NEXT:    kshiftrq $23, %k0, %k0
-; CHECK-NEXT:    kshiftlq $42, %k0, %k1
-; CHECK-NEXT:    kmovb {{[0-9]+}}(%rsp), %k2
-; CHECK-NEXT:    kshiftlq $41, %k2, %k2
-; CHECK-NEXT:    korq %k2, %k1, %k1
-; CHECK-NEXT:    korq %k1, %k0, %k0
-; CHECK-NEXT:    kshiftlq $22, %k0, %k0
-; CHECK-NEXT:    kshiftrq $22, %k0, %k0
-; CHECK-NEXT:    kshiftlq $43, %k0, %k1
-; CHECK-NEXT:    kmovb {{[0-9]+}}(%rsp), %k2
-; CHECK-NEXT:    kshiftlq $42, %k2, %k2
-; CHECK-NEXT:    korq %k2, %k1, %k1
-; CHECK-NEXT:    korq %k1, %k0, %k0
-; CHECK-NEXT:    kshiftlq $21, %k0, %k0
-; CHECK-NEXT:    kshiftrq $21, %k0, %k0
-; CHECK-NEXT:    kshiftlq $44, %k0, %k1
-; CHECK-NEXT:    kmovb {{[0-9]+}}(%rsp), %k2
-; CHECK-NEXT:    kshiftlq $43, %k2, %k2
-; CHECK-NEXT:    korq %k2, %k1, %k1
-; CHECK-NEXT:    korq %k1, %k0, %k0
-; CHECK-NEXT:    kshiftlq $20, %k0, %k0
-; CHECK-NEXT:    kshiftrq $20, %k0, %k0
-; CHECK-NEXT:    kshiftlq $45, %k0, %k1
-; CHECK-NEXT:    kmovb {{[0-9]+}}(%rsp), %k2
-; CHECK-NEXT:    kshiftlq $44, %k2, %k2
-; CHECK-NEXT:    korq %k2, %k1, %k1
-; CHECK-NEXT:    korq %k1, %k0, %k0
-; CHECK-NEXT:    kshiftlq $19, %k0, %k0
-; CHECK-NEXT:    kshiftrq $19, %k0, %k0
-; CHECK-NEXT:    kshiftlq $46, %k0, %k1
-; CHECK-NEXT:    kmovb {{[0-9]+}}(%rsp), %k2
-; CHECK-NEXT:    kshiftlq $45, %k2, %k2
-; CHECK-NEXT:    korq %k2, %k1, %k1
-; CHECK-NEXT:    korq %k1, %k0, %k0
-; CHECK-NEXT:    kshiftlq $18, %k0, %k0
-; CHECK-NEXT:    kshiftrq $18, %k0, %k0
-; CHECK-NEXT:    kshiftlq $47, %k0, %k1
-; CHECK-NEXT:    kmovb {{[0-9]+}}(%rsp), %k2
-; CHECK-NEXT:    kshiftlq $46, %k2, %k2
-; CHECK-NEXT:    korq %k2, %k1, %k1
-; CHECK-NEXT:    korq %k1, %k0, %k0
-; CHECK-NEXT:    kshiftlq $17, %k0, %k0
-; CHECK-NEXT:    kshiftrq $17, %k0, %k0
-; CHECK-NEXT:    kshiftlq $48, %k0, %k1
-; CHECK-NEXT:    kmovb {{[0-9]+}}(%rsp), %k2
-; CHECK-NEXT:    kshiftlq $47, %k2, %k2
-; CHECK-NEXT:    korq %k2, %k1, %k1
-; CHECK-NEXT:    korq %k1, %k0, %k0
-; CHECK-NEXT:    kshiftlq $16, %k0, %k0
-; CHECK-NEXT:    kshiftrq $16, %k0, %k0
-; CHECK-NEXT:    kshiftlq $49, %k0, %k1
-; CHECK-NEXT:    kmovb {{[0-9]+}}(%rsp), %k2
-; CHECK-NEXT:    kshiftlq $48, %k2, %k2
-; CHECK-NEXT:    korq %k2, %k1, %k1
-; CHECK-NEXT:    korq %k1, %k0, %k0
-; CHECK-NEXT:    kshiftlq $15, %k0, %k0
-; CHECK-NEXT:    kshiftrq $15, %k0, %k0
-; CHECK-NEXT:    kshiftlq $50, %k0, %k1
-; CHECK-NEXT:    kmovb {{[0-9]+}}(%rsp), %k2
-; CHECK-NEXT:    kshiftlq $49, %k2, %k2
-; CHECK-NEXT:    korq %k2, %k1, %k1
-; CHECK-NEXT:    korq %k1, %k0, %k0
-; CHECK-NEXT:    kshiftlq $14, %k0, %k0
-; CHECK-NEXT:    kshiftrq $14, %k0, %k0
-; CHECK-NEXT:    kshiftlq $51, %k0, %k1
-; CHECK-NEXT:    kmovb {{[0-9]+}}(%rsp), %k2
-; CHECK-NEXT:    kshiftlq $50, %k2, %k2
-; CHECK-NEXT:    korq %k2, %k1, %k1
-; CHECK-NEXT:    korq %k1, %k0, %k0
-; CHECK-NEXT:    kshiftlq $13, %k0, %k0
-; CHECK-NEXT:    kshiftrq $13, %k0, %k0
-; CHECK-NEXT:    kshiftlq $52, %k0, %k1
-; CHECK-NEXT:    kmovb {{[0-9]+}}(%rsp), %k2
-; CHECK-NEXT:    kshiftlq $51, %k2, %k2
-; CHECK-NEXT:    korq %k2, %k1, %k1
-; CHECK-NEXT:    korq %k1, %k0, %k0
-; CHECK-NEXT:    kshiftlq $12, %k0, %k0
-; CHECK-NEXT:    kshiftrq $12, %k0, %k0
-; CHECK-NEXT:    kshiftlq $53, %k0, %k1
-; CHECK-NEXT:    kmovb {{[0-9]+}}(%rsp), %k2
-; CHECK-NEXT:    kshiftlq $52, %k2, %k2
-; CHECK-NEXT:    korq %k2, %k1, %k1
-; CHECK-NEXT:    korq %k1, %k0, %k0
-; CHECK-NEXT:    kshiftlq $11, %k0, %k0
-; CHECK-NEXT:    kshiftrq $11, %k0, %k0
-; CHECK-NEXT:    kshiftlq $54, %k0, %k1
-; CHECK-NEXT:    kmovb {{[0-9]+}}(%rsp), %k2
-; CHECK-NEXT:    kshiftlq $53, %k2, %k2
-; CHECK-NEXT:    korq %k2, %k1, %k1
-; CHECK-NEXT:    korq %k1, %k0, %k0
-; CHECK-NEXT:    kshiftlq $10, %k0, %k0
-; CHECK-NEXT:    kshiftrq $10, %k0, %k0
-; CHECK-NEXT:    kshiftlq $55, %k0, %k1
-; CHECK-NEXT:    kmovb {{[0-9]+}}(%rsp), %k2
-; CHECK-NEXT:    kshiftlq $54, %k2, %k2
-; CHECK-NEXT:    korq %k2, %k1, %k1
-; CHECK-NEXT:    korq %k1, %k0, %k0
-; CHECK-NEXT:    kshiftlq $9, %k0, %k0
-; CHECK-NEXT:    kshiftrq $9, %k0, %k0
-; CHECK-NEXT:    kshiftlq $56, %k0, %k1
-; CHECK-NEXT:    kmovb {{[0-9]+}}(%rsp), %k2
-; CHECK-NEXT:    kshiftlq $55, %k2, %k2
-; CHECK-NEXT:    korq %k2, %k1, %k1
-; CHECK-NEXT:    korq %k1, %k0, %k0
-; CHECK-NEXT:    kshiftlq $8, %k0, %k0
-; CHECK-NEXT:    kshiftrq $8, %k0, %k0
-; CHECK-NEXT:    kshiftlq $57, %k0, %k1
-; CHECK-NEXT:    kmovb {{[0-9]+}}(%rsp), %k2
-; CHECK-NEXT:    kshiftlq $56, %k2, %k2
-; CHECK-NEXT:    korq %k2, %k1, %k1
-; CHECK-NEXT:    korq %k1, %k0, %k0
-; CHECK-NEXT:    kshiftlq $7, %k0, %k0
-; CHECK-NEXT:    kshiftrq $7, %k0, %k0
-; CHECK-NEXT:    kshiftlq $58, %k0, %k1
-; CHECK-NEXT:    kmovb {{[0-9]+}}(%rsp), %k2
-; CHECK-NEXT:    kshiftlq $57, %k2, %k2
-; CHECK-NEXT:    korq %k2, %k1, %k1
-; CHECK-NEXT:    korq %k1, %k0, %k0
-; CHECK-NEXT:    kshiftlq $6, %k0, %k0
-; CHECK-NEXT:    kshiftrq $6, %k0, %k0
-; CHECK-NEXT:    kshiftlq $59, %k0, %k1
-; CHECK-NEXT:    kmovb {{[0-9]+}}(%rsp), %k2
-; CHECK-NEXT:    kshiftlq $58, %k2, %k2
-; CHECK-NEXT:    korq %k2, %k1, %k1
-; CHECK-NEXT:    korq %k1, %k0, %k0
-; CHECK-NEXT:    kshiftlq $5, %k0, %k0
-; CHECK-NEXT:    kshiftrq $5, %k0, %k0
-; CHECK-NEXT:    kshiftlq $60, %k0, %k1
-; CHECK-NEXT:    kmovb {{[0-9]+}}(%rsp), %k2
-; CHECK-NEXT:    kshiftlq $59, %k2, %k2
-; CHECK-NEXT:    korq %k2, %k1, %k1
-; CHECK-NEXT:    korq %k1, %k0, %k0
-; CHECK-NEXT:    kshiftlq $4, %k0, %k0
-; CHECK-NEXT:    kshiftrq $4, %k0, %k0
-; CHECK-NEXT:    kshiftlq $61, %k0, %k1
-; CHECK-NEXT:    kmovb {{[0-9]+}}(%rsp), %k2
-; CHECK-NEXT:    kshiftlq $60, %k2, %k2
-; CHECK-NEXT:    korq %k2, %k1, %k1
-; CHECK-NEXT:    korq %k1, %k0, %k0
-; CHECK-NEXT:    kshiftlq $3, %k0, %k0
-; CHECK-NEXT:    kshiftrq $3, %k0, %k0
-; CHECK-NEXT:    kshiftlq $62, %k0, %k1
-; CHECK-NEXT:    kmovb {{[0-9]+}}(%rsp), %k2
-; CHECK-NEXT:    kshiftlq $61, %k2, %k2
-; CHECK-NEXT:    korq %k2, %k1, %k1
-; CHECK-NEXT:    korq %k1, %k0, %k0
-; CHECK-NEXT:    kshiftlq $2, %k0, %k0
-; CHECK-NEXT:    kshiftrq $2, %k0, %k0
-; CHECK-NEXT:    kshiftlq $63, %k0, %k1
-; CHECK-NEXT:    kmovb {{[0-9]+}}(%rsp), %k2
-; CHECK-NEXT:    kshiftlq $62, %k2, %k2
-; CHECK-NEXT:    korq %k2, %k1, %k1
-; CHECK-NEXT:    korq %k1, %k0, %k0
-; CHECK-NEXT:    kmovb {{[0-9]+}}(%rsp), %k1
-; CHECK-NEXT:    kshiftlq $1, %k0, %k0
-; CHECK-NEXT:    kshiftrq $1, %k0, %k0
-; CHECK-NEXT:    kshiftlq $63, %k1, %k1
-; CHECK-NEXT:    korq %k1, %k0, %k0
-; CHECK-NEXT:    kmovq %k0, (%rdi)
 ; CHECK-NEXT:    retq
   ret <64 x i1> %x
 }


        


More information about the llvm-commits mailing list