[llvm] r326679 - [X86] Add a DAG combine to turn stores of vXi1 constants into scalar stores.
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Sun Mar 4 11:33:15 PST 2018
Author: ctopper
Date: Sun Mar 4 11:33:15 2018
New Revision: 326679
URL: http://llvm.org/viewvc/llvm-project?rev=326679&view=rev
Log:
[X86] Add a DAG combine to turn stores of vXi1 constants into scalar stores.
Modified:
llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
llvm/trunk/lib/Target/X86/X86InstrAVX512.td
llvm/trunk/test/CodeGen/X86/avx512-mask-op.ll
llvm/trunk/test/CodeGen/X86/avx512-schedule.ll
Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=326679&r1=326678&r2=326679&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Sun Mar 4 11:33:15 2018
@@ -35032,6 +35032,53 @@ static SDValue combineStore(SDNode *N, S
St->getAlignment(), St->getMemOperand()->getFlags());
}
+ // Widen v2i1/v4i1 stores to v8i1.
+ if ((VT == MVT::v2i1 || VT == MVT::v4i1) && VT == StVT &&
+ Subtarget.hasAVX512()) {
+ unsigned NumConcats = 8 / VT.getVectorNumElements();
+ SmallVector<SDValue, 4> Ops(NumConcats, DAG.getUNDEF(VT));
+ Ops[0] = StoredVal;
+ StoredVal = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v8i1, Ops);
+ return DAG.getStore(St->getChain(), dl, StoredVal, St->getBasePtr(),
+ St->getPointerInfo(), St->getAlignment(),
+ St->getMemOperand()->getFlags());
+ }
+
+ // Turn vXi1 stores of constants into a scalar store.
+ if ((VT == MVT::v8i1 || VT == MVT::v16i1 || VT == MVT::v32i1 ||
+ VT == MVT::v64i1) && VT == StVT && TLI.isTypeLegal(VT) &&
+ ISD::isBuildVectorOfConstantSDNodes(StoredVal.getNode())) {
+ // If its a v64i1 store without 64-bit support, we need two stores.
+ if (VT == MVT::v64i1 && !Subtarget.is64Bit()) {
+ SDValue Lo = DAG.getBuildVector(MVT::v32i1, dl,
+ StoredVal->ops().slice(0, 32));
+ Lo = combinevXi1ConstantToInteger(Lo, DAG);
+ SDValue Hi = DAG.getBuildVector(MVT::v32i1, dl,
+ StoredVal->ops().slice(32, 32));
+ Hi = combinevXi1ConstantToInteger(Hi, DAG);
+
+ unsigned Alignment = St->getAlignment();
+
+ SDValue Ptr0 = St->getBasePtr();
+ SDValue Ptr1 = DAG.getMemBasePlusOffset(Ptr0, 4, dl);
+
+ SDValue Ch0 =
+ DAG.getStore(St->getChain(), dl, Lo, Ptr0, St->getPointerInfo(),
+ Alignment, St->getMemOperand()->getFlags());
+ SDValue Ch1 =
+ DAG.getStore(St->getChain(), dl, Hi, Ptr1,
+ St->getPointerInfo().getWithOffset(4),
+ MinAlign(Alignment, 4U),
+ St->getMemOperand()->getFlags());
+ return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Ch0, Ch1);
+ }
+
+ StoredVal = combinevXi1ConstantToInteger(StoredVal, DAG);
+ return DAG.getStore(St->getChain(), dl, StoredVal, St->getBasePtr(),
+ St->getPointerInfo(), St->getAlignment(),
+ St->getMemOperand()->getFlags());
+ }
+
// If we are saving a concatenation of two XMM registers and 32-byte stores
// are slow, such as on Sandy Bridge, perform two 16-byte stores.
bool Fast;
Modified: llvm/trunk/lib/Target/X86/X86InstrAVX512.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrAVX512.td?rev=326679&r1=326678&r2=326679&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrAVX512.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrAVX512.td Sun Mar 4 11:33:15 2018
@@ -2809,10 +2809,6 @@ def : Pat<(i64 (bitconvert (v64i1 VK64:$
// Load/store kreg
let Predicates = [HasDQI] in {
- def : Pat<(store VK4:$src, addr:$dst),
- (KMOVBmk addr:$dst, (COPY_TO_REGCLASS VK4:$src, VK8))>;
- def : Pat<(store VK2:$src, addr:$dst),
- (KMOVBmk addr:$dst, (COPY_TO_REGCLASS VK2:$src, VK8))>;
def : Pat<(store VK1:$src, addr:$dst),
(KMOVBmk addr:$dst, (COPY_TO_REGCLASS VK1:$src, VK8))>;
Modified: llvm/trunk/test/CodeGen/X86/avx512-mask-op.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-mask-op.ll?rev=326679&r1=326678&r2=326679&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-mask-op.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-mask-op.ll Sun Mar 4 11:33:15 2018
@@ -447,43 +447,19 @@ define i8 @zext_test3(<16 x i32> %a, <16
}
define i8 @conv1(<8 x i1>* %R) {
-; KNL-LABEL: conv1:
-; KNL: ## %bb.0: ## %entry
-; KNL-NEXT: movb $-1, (%rdi)
-; KNL-NEXT: movb $-2, -{{[0-9]+}}(%rsp)
-; KNL-NEXT: movb $-2, %al
-; KNL-NEXT: retq
-;
-; SKX-LABEL: conv1:
-; SKX: ## %bb.0: ## %entry
-; SKX-NEXT: kxnorw %k0, %k0, %k0
-; SKX-NEXT: kmovb %k0, (%rdi)
-; SKX-NEXT: movb $-2, -{{[0-9]+}}(%rsp)
-; SKX-NEXT: movb $-2, %al
-; SKX-NEXT: retq
-;
-; AVX512BW-LABEL: conv1:
-; AVX512BW: ## %bb.0: ## %entry
-; AVX512BW-NEXT: movb $-1, (%rdi)
-; AVX512BW-NEXT: movb $-2, -{{[0-9]+}}(%rsp)
-; AVX512BW-NEXT: movb $-2, %al
-; AVX512BW-NEXT: retq
-;
-; AVX512DQ-LABEL: conv1:
-; AVX512DQ: ## %bb.0: ## %entry
-; AVX512DQ-NEXT: kxnorw %k0, %k0, %k0
-; AVX512DQ-NEXT: kmovb %k0, (%rdi)
-; AVX512DQ-NEXT: movb $-2, -{{[0-9]+}}(%rsp)
-; AVX512DQ-NEXT: movb $-2, %al
-; AVX512DQ-NEXT: retq
+; CHECK-LABEL: conv1:
+; CHECK: ## %bb.0: ## %entry
+; CHECK-NEXT: movb $-1, (%rdi)
+; CHECK-NEXT: movb $-2, -{{[0-9]+}}(%rsp)
+; CHECK-NEXT: movb $-2, %al
+; CHECK-NEXT: retq
;
; X86-LABEL: conv1:
; X86: ## %bb.0: ## %entry
; X86-NEXT: subl $12, %esp
; X86-NEXT: .cfi_def_cfa_offset 16
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: kxnorw %k0, %k0, %k0
-; X86-NEXT: kmovb %k0, (%eax)
+; X86-NEXT: movb $-1, (%eax)
; X86-NEXT: movb $-2, (%esp)
; X86-NEXT: movb $-2, %al
; X86-NEXT: addl $12, %esp
@@ -3422,43 +3398,17 @@ entry:
}
define void @store_v64i1_constant(<64 x i1>* %R) {
-; KNL-LABEL: store_v64i1_constant:
-; KNL: ## %bb.0: ## %entry
-; KNL-NEXT: kxnorw %k0, %k0, %k0
-; KNL-NEXT: kmovw %k0, 2(%rdi)
-; KNL-NEXT: movl $-536871045, 4(%rdi) ## imm = 0xDFFFFF7B
-; KNL-NEXT: movw $-4099, (%rdi) ## imm = 0xEFFD
-; KNL-NEXT: retq
-;
-; SKX-LABEL: store_v64i1_constant:
-; SKX: ## %bb.0: ## %entry
-; SKX-NEXT: movabsq $-2305843576149381123, %rax ## imm = 0xDFFFFF7BFFFFEFFD
-; SKX-NEXT: movq %rax, (%rdi)
-; SKX-NEXT: retq
-;
-; AVX512BW-LABEL: store_v64i1_constant:
-; AVX512BW: ## %bb.0: ## %entry
-; AVX512BW-NEXT: movabsq $-2305843576149381123, %rax ## imm = 0xDFFFFF7BFFFFEFFD
-; AVX512BW-NEXT: movq %rax, (%rdi)
-; AVX512BW-NEXT: retq
-;
-; AVX512DQ-LABEL: store_v64i1_constant:
-; AVX512DQ: ## %bb.0: ## %entry
-; AVX512DQ-NEXT: kxnorw %k0, %k0, %k0
-; AVX512DQ-NEXT: kmovw %k0, 2(%rdi)
-; AVX512DQ-NEXT: movl $-536871045, 4(%rdi) ## imm = 0xDFFFFF7B
-; AVX512DQ-NEXT: movw $-4099, (%rdi) ## imm = 0xEFFD
-; AVX512DQ-NEXT: retq
+; CHECK-LABEL: store_v64i1_constant:
+; CHECK: ## %bb.0: ## %entry
+; CHECK-NEXT: movabsq $-2305843576149381123, %rax ## imm = 0xDFFFFF7BFFFFEFFD
+; CHECK-NEXT: movq %rax, (%rdi)
+; CHECK-NEXT: retq
;
; X86-LABEL: store_v64i1_constant:
; X86: ## %bb.0: ## %entry
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl $-4099, %ecx ## imm = 0xEFFD
-; X86-NEXT: kmovd %ecx, %k0
-; X86-NEXT: movl $-536871045, %ecx ## imm = 0xDFFFFF7B
-; X86-NEXT: kmovd %ecx, %k1
-; X86-NEXT: kunpckdq %k0, %k1, %k0
-; X86-NEXT: kmovq %k0, (%eax)
+; X86-NEXT: movl $-536871045, 4(%eax) ## imm = 0xDFFFFF7B
+; X86-NEXT: movl $-4099, (%eax) ## imm = 0xEFFD
; X86-NEXT: retl
entry:
store <64 x i1> <i1 1, i1 0, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 0, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 0, i1 1, i1 1, i1 1, i1 1, i1 0, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 0, i1 1, i1 1>, <64 x i1>* %R
@@ -3466,36 +3416,15 @@ entry:
}
define void @store_v2i1_constant(<2 x i1>* %R) {
-; KNL-LABEL: store_v2i1_constant:
-; KNL: ## %bb.0: ## %entry
-; KNL-NEXT: movb $1, (%rdi)
-; KNL-NEXT: retq
-;
-; SKX-LABEL: store_v2i1_constant:
-; SKX: ## %bb.0: ## %entry
-; SKX-NEXT: movb $1, %al
-; SKX-NEXT: kmovd %eax, %k0
-; SKX-NEXT: kmovb %k0, (%rdi)
-; SKX-NEXT: retq
-;
-; AVX512BW-LABEL: store_v2i1_constant:
-; AVX512BW: ## %bb.0: ## %entry
-; AVX512BW-NEXT: movb $1, (%rdi)
-; AVX512BW-NEXT: retq
-;
-; AVX512DQ-LABEL: store_v2i1_constant:
-; AVX512DQ: ## %bb.0: ## %entry
-; AVX512DQ-NEXT: movb $1, %al
-; AVX512DQ-NEXT: kmovw %eax, %k0
-; AVX512DQ-NEXT: kmovb %k0, (%rdi)
-; AVX512DQ-NEXT: retq
+; CHECK-LABEL: store_v2i1_constant:
+; CHECK: ## %bb.0: ## %entry
+; CHECK-NEXT: movb $1, (%rdi)
+; CHECK-NEXT: retq
;
; X86-LABEL: store_v2i1_constant:
; X86: ## %bb.0: ## %entry
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movb $1, %cl
-; X86-NEXT: kmovd %ecx, %k0
-; X86-NEXT: kmovb %k0, (%eax)
+; X86-NEXT: movb $1, (%eax)
; X86-NEXT: retl
entry:
store <2 x i1> <i1 1, i1 0>, <2 x i1>* %R
@@ -3503,36 +3432,15 @@ entry:
}
define void @store_v4i1_constant(<4 x i1>* %R) {
-; KNL-LABEL: store_v4i1_constant:
-; KNL: ## %bb.0: ## %entry
-; KNL-NEXT: movb $5, (%rdi)
-; KNL-NEXT: retq
-;
-; SKX-LABEL: store_v4i1_constant:
-; SKX: ## %bb.0: ## %entry
-; SKX-NEXT: movb $5, %al
-; SKX-NEXT: kmovd %eax, %k0
-; SKX-NEXT: kmovb %k0, (%rdi)
-; SKX-NEXT: retq
-;
-; AVX512BW-LABEL: store_v4i1_constant:
-; AVX512BW: ## %bb.0: ## %entry
-; AVX512BW-NEXT: movb $5, (%rdi)
-; AVX512BW-NEXT: retq
-;
-; AVX512DQ-LABEL: store_v4i1_constant:
-; AVX512DQ: ## %bb.0: ## %entry
-; AVX512DQ-NEXT: movb $5, %al
-; AVX512DQ-NEXT: kmovw %eax, %k0
-; AVX512DQ-NEXT: kmovb %k0, (%rdi)
-; AVX512DQ-NEXT: retq
+; CHECK-LABEL: store_v4i1_constant:
+; CHECK: ## %bb.0: ## %entry
+; CHECK-NEXT: movb $5, (%rdi)
+; CHECK-NEXT: retq
;
; X86-LABEL: store_v4i1_constant:
; X86: ## %bb.0: ## %entry
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movb $5, %cl
-; X86-NEXT: kmovd %ecx, %k0
-; X86-NEXT: kmovb %k0, (%eax)
+; X86-NEXT: movb $5, (%eax)
; X86-NEXT: retl
entry:
store <4 x i1> <i1 1, i1 0, i1 1, i1 0>, <4 x i1>* %R
Modified: llvm/trunk/test/CodeGen/X86/avx512-schedule.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-schedule.ll?rev=326679&r1=326678&r2=326679&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-schedule.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-schedule.ll Sun Mar 4 11:33:15 2018
@@ -6943,16 +6943,14 @@ define i8 @zext_test3(<16 x i32> %a, <16
define i8 @conv1(<8 x i1>* %R) {
; GENERIC-LABEL: conv1:
; GENERIC: # %bb.0: # %entry
-; GENERIC-NEXT: kxnorw %k0, %k0, %k0 # sched: [1:1.00]
-; GENERIC-NEXT: kmovb %k0, (%rdi) # sched: [1:1.00]
+; GENERIC-NEXT: movb $-1, (%rdi) # sched: [5:1.00]
; GENERIC-NEXT: movb $-2, -{{[0-9]+}}(%rsp) # sched: [5:1.00]
; GENERIC-NEXT: movb $-2, %al # sched: [1:0.33]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: conv1:
; SKX: # %bb.0: # %entry
-; SKX-NEXT: kxnorw %k0, %k0, %k0 # sched: [1:1.00]
-; SKX-NEXT: kmovb %k0, (%rdi) # sched: [1:1.00]
+; SKX-NEXT: movb $-1, (%rdi) # sched: [1:1.00]
; SKX-NEXT: movb $-2, -{{[0-9]+}}(%rsp) # sched: [1:1.00]
; SKX-NEXT: movb $-2, %al # sched: [1:0.25]
; SKX-NEXT: retq # sched: [7:1.00]
More information about the llvm-commits
mailing list