[llvm] r326679 - [X86] Add a DAG combine to turn stores of vXi1 constants into scalar stores.

Craig Topper via llvm-commits llvm-commits at lists.llvm.org
Sun Mar 4 11:33:15 PST 2018


Author: ctopper
Date: Sun Mar  4 11:33:15 2018
New Revision: 326679

URL: http://llvm.org/viewvc/llvm-project?rev=326679&view=rev
Log:
[X86] Add a DAG combine to turn stores of vXi1 constants into scalar stores.

Modified:
    llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
    llvm/trunk/lib/Target/X86/X86InstrAVX512.td
    llvm/trunk/test/CodeGen/X86/avx512-mask-op.ll
    llvm/trunk/test/CodeGen/X86/avx512-schedule.ll

Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=326679&r1=326678&r2=326679&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Sun Mar  4 11:33:15 2018
@@ -35032,6 +35032,53 @@ static SDValue combineStore(SDNode *N, S
                         St->getAlignment(), St->getMemOperand()->getFlags());
   }
 
+  // Widen v2i1/v4i1 stores to v8i1.
+  if ((VT == MVT::v2i1 || VT == MVT::v4i1) && VT == StVT &&
+      Subtarget.hasAVX512()) {
+    unsigned NumConcats = 8 / VT.getVectorNumElements();
+    SmallVector<SDValue, 4> Ops(NumConcats, DAG.getUNDEF(VT));
+    Ops[0] = StoredVal;
+    StoredVal = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v8i1, Ops);
+    return DAG.getStore(St->getChain(), dl, StoredVal, St->getBasePtr(),
+                        St->getPointerInfo(), St->getAlignment(),
+                        St->getMemOperand()->getFlags());
+  }
+
+  // Turn vXi1 stores of constants into a scalar store.
+  if ((VT == MVT::v8i1 || VT == MVT::v16i1 || VT == MVT::v32i1 ||
+       VT == MVT::v64i1) && VT == StVT && TLI.isTypeLegal(VT) &&
+      ISD::isBuildVectorOfConstantSDNodes(StoredVal.getNode())) {
+    // If its a v64i1 store without 64-bit support, we need two stores.
+    if (VT == MVT::v64i1 && !Subtarget.is64Bit()) {
+      SDValue Lo = DAG.getBuildVector(MVT::v32i1, dl,
+                                      StoredVal->ops().slice(0, 32));
+      Lo = combinevXi1ConstantToInteger(Lo, DAG);
+      SDValue Hi = DAG.getBuildVector(MVT::v32i1, dl,
+                                      StoredVal->ops().slice(32, 32));
+      Hi = combinevXi1ConstantToInteger(Hi, DAG);
+
+      unsigned Alignment = St->getAlignment();
+
+      SDValue Ptr0 = St->getBasePtr();
+      SDValue Ptr1 = DAG.getMemBasePlusOffset(Ptr0, 4, dl);
+
+      SDValue Ch0 =
+          DAG.getStore(St->getChain(), dl, Lo, Ptr0, St->getPointerInfo(),
+                       Alignment, St->getMemOperand()->getFlags());
+      SDValue Ch1 =
+          DAG.getStore(St->getChain(), dl, Hi, Ptr1,
+                       St->getPointerInfo().getWithOffset(4),
+                       MinAlign(Alignment, 4U),
+                       St->getMemOperand()->getFlags());
+      return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Ch0, Ch1);
+    }
+
+    StoredVal = combinevXi1ConstantToInteger(StoredVal, DAG);
+    return DAG.getStore(St->getChain(), dl, StoredVal, St->getBasePtr(),
+                        St->getPointerInfo(), St->getAlignment(),
+                        St->getMemOperand()->getFlags());
+  }
+
   // If we are saving a concatenation of two XMM registers and 32-byte stores
   // are slow, such as on Sandy Bridge, perform two 16-byte stores.
   bool Fast;

Modified: llvm/trunk/lib/Target/X86/X86InstrAVX512.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrAVX512.td?rev=326679&r1=326678&r2=326679&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrAVX512.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrAVX512.td Sun Mar  4 11:33:15 2018
@@ -2809,10 +2809,6 @@ def : Pat<(i64 (bitconvert (v64i1 VK64:$
 
 // Load/store kreg
 let Predicates = [HasDQI] in {
-  def : Pat<(store VK4:$src, addr:$dst),
-            (KMOVBmk addr:$dst, (COPY_TO_REGCLASS VK4:$src, VK8))>;
-  def : Pat<(store VK2:$src, addr:$dst),
-            (KMOVBmk addr:$dst, (COPY_TO_REGCLASS VK2:$src, VK8))>;
   def : Pat<(store VK1:$src, addr:$dst),
             (KMOVBmk addr:$dst, (COPY_TO_REGCLASS VK1:$src, VK8))>;
 

Modified: llvm/trunk/test/CodeGen/X86/avx512-mask-op.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-mask-op.ll?rev=326679&r1=326678&r2=326679&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-mask-op.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-mask-op.ll Sun Mar  4 11:33:15 2018
@@ -447,43 +447,19 @@ define i8 @zext_test3(<16 x i32> %a, <16
 }
 
 define i8 @conv1(<8 x i1>* %R) {
-; KNL-LABEL: conv1:
-; KNL:       ## %bb.0: ## %entry
-; KNL-NEXT:    movb $-1, (%rdi)
-; KNL-NEXT:    movb $-2, -{{[0-9]+}}(%rsp)
-; KNL-NEXT:    movb $-2, %al
-; KNL-NEXT:    retq
-;
-; SKX-LABEL: conv1:
-; SKX:       ## %bb.0: ## %entry
-; SKX-NEXT:    kxnorw %k0, %k0, %k0
-; SKX-NEXT:    kmovb %k0, (%rdi)
-; SKX-NEXT:    movb $-2, -{{[0-9]+}}(%rsp)
-; SKX-NEXT:    movb $-2, %al
-; SKX-NEXT:    retq
-;
-; AVX512BW-LABEL: conv1:
-; AVX512BW:       ## %bb.0: ## %entry
-; AVX512BW-NEXT:    movb $-1, (%rdi)
-; AVX512BW-NEXT:    movb $-2, -{{[0-9]+}}(%rsp)
-; AVX512BW-NEXT:    movb $-2, %al
-; AVX512BW-NEXT:    retq
-;
-; AVX512DQ-LABEL: conv1:
-; AVX512DQ:       ## %bb.0: ## %entry
-; AVX512DQ-NEXT:    kxnorw %k0, %k0, %k0
-; AVX512DQ-NEXT:    kmovb %k0, (%rdi)
-; AVX512DQ-NEXT:    movb $-2, -{{[0-9]+}}(%rsp)
-; AVX512DQ-NEXT:    movb $-2, %al
-; AVX512DQ-NEXT:    retq
+; CHECK-LABEL: conv1:
+; CHECK:       ## %bb.0: ## %entry
+; CHECK-NEXT:    movb $-1, (%rdi)
+; CHECK-NEXT:    movb $-2, -{{[0-9]+}}(%rsp)
+; CHECK-NEXT:    movb $-2, %al
+; CHECK-NEXT:    retq
 ;
 ; X86-LABEL: conv1:
 ; X86:       ## %bb.0: ## %entry
 ; X86-NEXT:    subl $12, %esp
 ; X86-NEXT:    .cfi_def_cfa_offset 16
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    kxnorw %k0, %k0, %k0
-; X86-NEXT:    kmovb %k0, (%eax)
+; X86-NEXT:    movb $-1, (%eax)
 ; X86-NEXT:    movb $-2, (%esp)
 ; X86-NEXT:    movb $-2, %al
 ; X86-NEXT:    addl $12, %esp
@@ -3422,43 +3398,17 @@ entry:
 }
 
 define void @store_v64i1_constant(<64 x i1>* %R) {
-; KNL-LABEL: store_v64i1_constant:
-; KNL:       ## %bb.0: ## %entry
-; KNL-NEXT:    kxnorw %k0, %k0, %k0
-; KNL-NEXT:    kmovw %k0, 2(%rdi)
-; KNL-NEXT:    movl $-536871045, 4(%rdi) ## imm = 0xDFFFFF7B
-; KNL-NEXT:    movw $-4099, (%rdi) ## imm = 0xEFFD
-; KNL-NEXT:    retq
-;
-; SKX-LABEL: store_v64i1_constant:
-; SKX:       ## %bb.0: ## %entry
-; SKX-NEXT:    movabsq $-2305843576149381123, %rax ## imm = 0xDFFFFF7BFFFFEFFD
-; SKX-NEXT:    movq %rax, (%rdi)
-; SKX-NEXT:    retq
-;
-; AVX512BW-LABEL: store_v64i1_constant:
-; AVX512BW:       ## %bb.0: ## %entry
-; AVX512BW-NEXT:    movabsq $-2305843576149381123, %rax ## imm = 0xDFFFFF7BFFFFEFFD
-; AVX512BW-NEXT:    movq %rax, (%rdi)
-; AVX512BW-NEXT:    retq
-;
-; AVX512DQ-LABEL: store_v64i1_constant:
-; AVX512DQ:       ## %bb.0: ## %entry
-; AVX512DQ-NEXT:    kxnorw %k0, %k0, %k0
-; AVX512DQ-NEXT:    kmovw %k0, 2(%rdi)
-; AVX512DQ-NEXT:    movl $-536871045, 4(%rdi) ## imm = 0xDFFFFF7B
-; AVX512DQ-NEXT:    movw $-4099, (%rdi) ## imm = 0xEFFD
-; AVX512DQ-NEXT:    retq
+; CHECK-LABEL: store_v64i1_constant:
+; CHECK:       ## %bb.0: ## %entry
+; CHECK-NEXT:    movabsq $-2305843576149381123, %rax ## imm = 0xDFFFFF7BFFFFEFFD
+; CHECK-NEXT:    movq %rax, (%rdi)
+; CHECK-NEXT:    retq
 ;
 ; X86-LABEL: store_v64i1_constant:
 ; X86:       ## %bb.0: ## %entry
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    movl $-4099, %ecx ## imm = 0xEFFD
-; X86-NEXT:    kmovd %ecx, %k0
-; X86-NEXT:    movl $-536871045, %ecx ## imm = 0xDFFFFF7B
-; X86-NEXT:    kmovd %ecx, %k1
-; X86-NEXT:    kunpckdq %k0, %k1, %k0
-; X86-NEXT:    kmovq %k0, (%eax)
+; X86-NEXT:    movl $-536871045, 4(%eax) ## imm = 0xDFFFFF7B
+; X86-NEXT:    movl $-4099, (%eax) ## imm = 0xEFFD
 ; X86-NEXT:    retl
 entry:
   store <64 x i1> <i1 1, i1 0, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 0, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 0, i1 1, i1 1, i1 1, i1 1, i1 0, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 0, i1 1, i1 1>, <64 x i1>* %R
@@ -3466,36 +3416,15 @@ entry:
 }
 
 define void @store_v2i1_constant(<2 x i1>* %R) {
-; KNL-LABEL: store_v2i1_constant:
-; KNL:       ## %bb.0: ## %entry
-; KNL-NEXT:    movb $1, (%rdi)
-; KNL-NEXT:    retq
-;
-; SKX-LABEL: store_v2i1_constant:
-; SKX:       ## %bb.0: ## %entry
-; SKX-NEXT:    movb $1, %al
-; SKX-NEXT:    kmovd %eax, %k0
-; SKX-NEXT:    kmovb %k0, (%rdi)
-; SKX-NEXT:    retq
-;
-; AVX512BW-LABEL: store_v2i1_constant:
-; AVX512BW:       ## %bb.0: ## %entry
-; AVX512BW-NEXT:    movb $1, (%rdi)
-; AVX512BW-NEXT:    retq
-;
-; AVX512DQ-LABEL: store_v2i1_constant:
-; AVX512DQ:       ## %bb.0: ## %entry
-; AVX512DQ-NEXT:    movb $1, %al
-; AVX512DQ-NEXT:    kmovw %eax, %k0
-; AVX512DQ-NEXT:    kmovb %k0, (%rdi)
-; AVX512DQ-NEXT:    retq
+; CHECK-LABEL: store_v2i1_constant:
+; CHECK:       ## %bb.0: ## %entry
+; CHECK-NEXT:    movb $1, (%rdi)
+; CHECK-NEXT:    retq
 ;
 ; X86-LABEL: store_v2i1_constant:
 ; X86:       ## %bb.0: ## %entry
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    movb $1, %cl
-; X86-NEXT:    kmovd %ecx, %k0
-; X86-NEXT:    kmovb %k0, (%eax)
+; X86-NEXT:    movb $1, (%eax)
 ; X86-NEXT:    retl
 entry:
   store <2 x i1> <i1 1, i1 0>, <2 x i1>* %R
@@ -3503,36 +3432,15 @@ entry:
 }
 
 define void @store_v4i1_constant(<4 x i1>* %R) {
-; KNL-LABEL: store_v4i1_constant:
-; KNL:       ## %bb.0: ## %entry
-; KNL-NEXT:    movb $5, (%rdi)
-; KNL-NEXT:    retq
-;
-; SKX-LABEL: store_v4i1_constant:
-; SKX:       ## %bb.0: ## %entry
-; SKX-NEXT:    movb $5, %al
-; SKX-NEXT:    kmovd %eax, %k0
-; SKX-NEXT:    kmovb %k0, (%rdi)
-; SKX-NEXT:    retq
-;
-; AVX512BW-LABEL: store_v4i1_constant:
-; AVX512BW:       ## %bb.0: ## %entry
-; AVX512BW-NEXT:    movb $5, (%rdi)
-; AVX512BW-NEXT:    retq
-;
-; AVX512DQ-LABEL: store_v4i1_constant:
-; AVX512DQ:       ## %bb.0: ## %entry
-; AVX512DQ-NEXT:    movb $5, %al
-; AVX512DQ-NEXT:    kmovw %eax, %k0
-; AVX512DQ-NEXT:    kmovb %k0, (%rdi)
-; AVX512DQ-NEXT:    retq
+; CHECK-LABEL: store_v4i1_constant:
+; CHECK:       ## %bb.0: ## %entry
+; CHECK-NEXT:    movb $5, (%rdi)
+; CHECK-NEXT:    retq
 ;
 ; X86-LABEL: store_v4i1_constant:
 ; X86:       ## %bb.0: ## %entry
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    movb $5, %cl
-; X86-NEXT:    kmovd %ecx, %k0
-; X86-NEXT:    kmovb %k0, (%eax)
+; X86-NEXT:    movb $5, (%eax)
 ; X86-NEXT:    retl
 entry:
   store <4 x i1> <i1 1, i1 0, i1 1, i1 0>, <4 x i1>* %R

Modified: llvm/trunk/test/CodeGen/X86/avx512-schedule.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-schedule.ll?rev=326679&r1=326678&r2=326679&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-schedule.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-schedule.ll Sun Mar  4 11:33:15 2018
@@ -6943,16 +6943,14 @@ define i8 @zext_test3(<16 x i32> %a, <16
 define i8 @conv1(<8 x i1>* %R) {
 ; GENERIC-LABEL: conv1:
 ; GENERIC:       # %bb.0: # %entry
-; GENERIC-NEXT:    kxnorw %k0, %k0, %k0 # sched: [1:1.00]
-; GENERIC-NEXT:    kmovb %k0, (%rdi) # sched: [1:1.00]
+; GENERIC-NEXT:    movb $-1, (%rdi) # sched: [5:1.00]
 ; GENERIC-NEXT:    movb $-2, -{{[0-9]+}}(%rsp) # sched: [5:1.00]
 ; GENERIC-NEXT:    movb $-2, %al # sched: [1:0.33]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: conv1:
 ; SKX:       # %bb.0: # %entry
-; SKX-NEXT:    kxnorw %k0, %k0, %k0 # sched: [1:1.00]
-; SKX-NEXT:    kmovb %k0, (%rdi) # sched: [1:1.00]
+; SKX-NEXT:    movb $-1, (%rdi) # sched: [1:1.00]
 ; SKX-NEXT:    movb $-2, -{{[0-9]+}}(%rsp) # sched: [1:1.00]
 ; SKX-NEXT:    movb $-2, %al # sched: [1:0.25]
 ; SKX-NEXT:    retq # sched: [7:1.00]




More information about the llvm-commits mailing list