[llvm] r326670 - [X86] Combine (store (v1i1 (scalar_to_vector (i8 X)))) -> (store (i8 X)).

Craig Topper via llvm-commits llvm-commits at lists.llvm.org
Sat Mar 3 17:48:02 PST 2018


Author: ctopper
Date: Sat Mar  3 17:48:02 2018
New Revision: 326670

URL: http://llvm.org/viewvc/llvm-project?rev=326670&view=rev
Log:
[X86] Combine (store (v1i1 (scalar_to_vector (i8 X)))) -> (store (i8 X)).

Modified:
    llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
    llvm/trunk/test/CodeGen/X86/avx512-load-trunc-store-i1.ll

Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=326670&r1=326669&r2=326670&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Sat Mar  3 17:48:02 2018
@@ -35021,6 +35021,16 @@ static SDValue combineStore(SDNode *N, S
   SDValue StoredVal = St->getOperand(1);
   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
 
+  // If this is a store of a scalar_to_vector to v1i1, just use a scalar store.
+  // This will avoid a copy to k-register.
+  if (VT == MVT::v1i1 && VT == StVT && Subtarget.hasAVX512() &&
+      StoredVal.getOpcode() == ISD::SCALAR_TO_VECTOR &&
+      StoredVal.getOperand(0).getValueType() == MVT::i8) {
+    return DAG.getStore(St->getChain(), dl, StoredVal.getOperand(0),
+                        St->getBasePtr(), St->getPointerInfo(),
+                        St->getAlignment(), St->getMemOperand()->getFlags());
+  }
+
   // If we are saving a concatenation of two XMM registers and 32-byte stores
   // are slow, such as on Sandy Bridge, perform two 16-byte stores.
   bool Fast;

Modified: llvm/trunk/test/CodeGen/X86/avx512-load-trunc-store-i1.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-load-trunc-store-i1.ll?rev=326670&r1=326669&r2=326670&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-load-trunc-store-i1.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-load-trunc-store-i1.ll Sat Mar  3 17:48:02 2018
@@ -5,8 +5,8 @@
 define void @load_v1i2_trunc_v1i1_store(<1 x i2>* %a0,<1 x i1>* %a1) {
 ; AVX512-ALL-LABEL: load_v1i2_trunc_v1i1_store:
 ; AVX512-ALL:       # %bb.0:
-; AVX512-ALL-NEXT:    kmovb (%rdi), %k0
-; AVX512-ALL-NEXT:    kmovb %k0, (%rsi)
+; AVX512-ALL-NEXT:    movb (%rdi), %al
+; AVX512-ALL-NEXT:    movb %al, (%rsi)
 ; AVX512-ALL-NEXT:    retq
 ;
 ; AVX512-ONLY-LABEL: load_v1i2_trunc_v1i1_store:
@@ -22,8 +22,8 @@ define void @load_v1i2_trunc_v1i1_store(
 define void @load_v1i3_trunc_v1i1_store(<1 x i3>* %a0,<1 x i1>* %a1) {
 ; AVX512-ALL-LABEL: load_v1i3_trunc_v1i1_store:
 ; AVX512-ALL:       # %bb.0:
-; AVX512-ALL-NEXT:    kmovb (%rdi), %k0
-; AVX512-ALL-NEXT:    kmovb %k0, (%rsi)
+; AVX512-ALL-NEXT:    movb (%rdi), %al
+; AVX512-ALL-NEXT:    movb %al, (%rsi)
 ; AVX512-ALL-NEXT:    retq
 ;
 ; AVX512-ONLY-LABEL: load_v1i3_trunc_v1i1_store:
@@ -39,8 +39,8 @@ define void @load_v1i3_trunc_v1i1_store(
 define void @load_v1i4_trunc_v1i1_store(<1 x i4>* %a0,<1 x i1>* %a1) {
 ; AVX512-ALL-LABEL: load_v1i4_trunc_v1i1_store:
 ; AVX512-ALL:       # %bb.0:
-; AVX512-ALL-NEXT:    kmovb (%rdi), %k0
-; AVX512-ALL-NEXT:    kmovb %k0, (%rsi)
+; AVX512-ALL-NEXT:    movb (%rdi), %al
+; AVX512-ALL-NEXT:    movb %al, (%rsi)
 ; AVX512-ALL-NEXT:    retq
 ;
 ; AVX512-ONLY-LABEL: load_v1i4_trunc_v1i1_store:
@@ -56,8 +56,8 @@ define void @load_v1i4_trunc_v1i1_store(
 define void @load_v1i8_trunc_v1i1_store(<1 x i8>* %a0,<1 x i1>* %a1) {
 ; AVX512-ALL-LABEL: load_v1i8_trunc_v1i1_store:
 ; AVX512-ALL:       # %bb.0:
-; AVX512-ALL-NEXT:    kmovb (%rdi), %k0
-; AVX512-ALL-NEXT:    kmovb %k0, (%rsi)
+; AVX512-ALL-NEXT:    movb (%rdi), %al
+; AVX512-ALL-NEXT:    movb %al, (%rsi)
 ; AVX512-ALL-NEXT:    retq
 ;
 ; AVX512-ONLY-LABEL: load_v1i8_trunc_v1i1_store:
@@ -73,8 +73,8 @@ define void @load_v1i8_trunc_v1i1_store(
 define void @load_v1i16_trunc_v1i1_store(<1 x i16>* %a0,<1 x i1>* %a1) {
 ; AVX512-ALL-LABEL: load_v1i16_trunc_v1i1_store:
 ; AVX512-ALL:       # %bb.0:
-; AVX512-ALL-NEXT:    kmovb (%rdi), %k0
-; AVX512-ALL-NEXT:    kmovb %k0, (%rsi)
+; AVX512-ALL-NEXT:    movb (%rdi), %al
+; AVX512-ALL-NEXT:    movb %al, (%rsi)
 ; AVX512-ALL-NEXT:    retq
 ;
 ; AVX512-ONLY-LABEL: load_v1i16_trunc_v1i1_store:
@@ -90,8 +90,8 @@ define void @load_v1i16_trunc_v1i1_store
 define void @load_v1i32_trunc_v1i1_store(<1 x i32>* %a0,<1 x i1>* %a1) {
 ; AVX512-ALL-LABEL: load_v1i32_trunc_v1i1_store:
 ; AVX512-ALL:       # %bb.0:
-; AVX512-ALL-NEXT:    kmovb (%rdi), %k0
-; AVX512-ALL-NEXT:    kmovb %k0, (%rsi)
+; AVX512-ALL-NEXT:    movb (%rdi), %al
+; AVX512-ALL-NEXT:    movb %al, (%rsi)
 ; AVX512-ALL-NEXT:    retq
 ;
 ; AVX512-ONLY-LABEL: load_v1i32_trunc_v1i1_store:
@@ -107,8 +107,8 @@ define void @load_v1i32_trunc_v1i1_store
 define void @load_v1i64_trunc_v1i1_store(<1 x i64>* %a0,<1 x i1>* %a1) {
 ; AVX512-ALL-LABEL: load_v1i64_trunc_v1i1_store:
 ; AVX512-ALL:       # %bb.0:
-; AVX512-ALL-NEXT:    kmovb (%rdi), %k0
-; AVX512-ALL-NEXT:    kmovb %k0, (%rsi)
+; AVX512-ALL-NEXT:    movb (%rdi), %al
+; AVX512-ALL-NEXT:    movb %al, (%rsi)
 ; AVX512-ALL-NEXT:    retq
 ;
 ; AVX512-ONLY-LABEL: load_v1i64_trunc_v1i1_store:




More information about the llvm-commits mailing list