[llvm] e578237 - [X86] Add CMOV_VK1 pseudo so we don't crash on v1i1 ISD::SELECT

Thu Feb 20 15:14:42 PST 2020

Author: Craig Topper
Date: 2020-02-20T15:13:48-08:00
New Revision: e5782377f3f6184abc73a7098c9a0cea0a93350f

URL: https://github.com/llvm/llvm-project/commit/e5782377f3f6184abc73a7098c9a0cea0a93350f
DIFF: https://github.com/llvm/llvm-project/commit/e5782377f3f6184abc73a7098c9a0cea0a93350f.diff

LOG: [X86] Add CMOV_VK1 pseudo so we don't crash on v1i1 ISD::SELECT

Added: 
    

Modified: 
    llvm/lib/Target/X86/X86ISelLowering.cpp
    llvm/lib/Target/X86/X86InstrCompiler.td
    llvm/test/CodeGen/X86/avx512-select.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index c56dd33c98ae..74583efddb8e 100644

--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -30949,6 +30949,7 @@ static bool isCMOVPseudo(MachineInstr &MI) {
   case X86::CMOV_VR256:
   case X86::CMOV_VR256X:
   case X86::CMOV_VR512:
+  case X86::CMOV_VK1:
   case X86::CMOV_VK2:
   case X86::CMOV_VK4:
   case X86::CMOV_VK8:
@@ -32590,6 +32591,7 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
   case X86::CMOV_VR256:
   case X86::CMOV_VR256X:
   case X86::CMOV_VR512:
+  case X86::CMOV_VK1:
   case X86::CMOV_VK2:
   case X86::CMOV_VK4:
   case X86::CMOV_VK8:

diff  --git a/llvm/lib/Target/X86/X86InstrCompiler.td b/llvm/lib/Target/X86/X86InstrCompiler.td
index e8f2f584cde0..c3300e0e97f5 100644
--- a/llvm/lib/Target/X86/X86InstrCompiler.td
+++ b/llvm/lib/Target/X86/X86InstrCompiler.td
@@ -548,6 +548,7 @@ let usesCustomInserter = 1, hasNoSchedulingInfo = 1, Uses = [EFLAGS] in {
     defm _VR256X : CMOVrr_PSEUDO<VR256X, v4i64>;
   }
   defm _VR512  : CMOVrr_PSEUDO<VR512, v8i64>;
+  defm _VK1    : CMOVrr_PSEUDO<VK1,  v1i1>;
   defm _VK2    : CMOVrr_PSEUDO<VK2,  v2i1>;
   defm _VK4    : CMOVrr_PSEUDO<VK4,  v4i1>;
   defm _VK8    : CMOVrr_PSEUDO<VK8,  v8i1>;

diff  --git a/llvm/test/CodeGen/X86/avx512-select.ll b/llvm/test/CodeGen/X86/avx512-select.ll
index 5cbca0050177..2ac4057e6123 100644
--- a/llvm/test/CodeGen/X86/avx512-select.ll
+++ b/llvm/test/CodeGen/X86/avx512-select.ll
@@ -677,3 +677,95 @@ define void @vselect_v1i1(<1 x i1>* %w, <1 x i1>* %x, <1 x i1>* %y) nounwind {
   store <1 x i1> %c, <1 x i1>* %x
   ret void
 }
+
+; Scalar condition with v1i1 operands
+define void @select_v1i1(<1 x i1>* %w, <1 x i1>* %x, <1 x i1>* %y, i1 %z) nounwind {
+; X86-AVX512F-LABEL: select_v1i1:
+; X86-AVX512F:       # %bb.0:
+; X86-AVX512F-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-AVX512F-NEXT:    testb $1, {{[0-9]+}}(%esp)
+; X86-AVX512F-NEXT:    jne .LBB18_1
+; X86-AVX512F-NEXT:  # %bb.2:
+; X86-AVX512F-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-AVX512F-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-AVX512F-NEXT:    movzbl (%edx), %edx
+; X86-AVX512F-NEXT:    kmovw %edx, %k0
+; X86-AVX512F-NEXT:    movzbl (%ecx), %ecx
+; X86-AVX512F-NEXT:    kmovw %ecx, %k1
+; X86-AVX512F-NEXT:    kxorw %k1, %k0, %k0
+; X86-AVX512F-NEXT:    jmp .LBB18_3
+; X86-AVX512F-NEXT:  .LBB18_1:
+; X86-AVX512F-NEXT:    movzbl (%eax), %ecx
+; X86-AVX512F-NEXT:    kmovw %ecx, %k0
+; X86-AVX512F-NEXT:  .LBB18_3:
+; X86-AVX512F-NEXT:    kmovw %k0, %ecx
+; X86-AVX512F-NEXT:    movb %cl, (%eax)
+; X86-AVX512F-NEXT:    retl
+;
+; X64-AVX512F-LABEL: select_v1i1:
+; X64-AVX512F:       # %bb.0:
+; X64-AVX512F-NEXT:    testb $1, %cl
+; X64-AVX512F-NEXT:    jne .LBB18_1
+; X64-AVX512F-NEXT:  # %bb.2:
+; X64-AVX512F-NEXT:    movzbl (%rdx), %eax
+; X64-AVX512F-NEXT:    kmovw %eax, %k0
+; X64-AVX512F-NEXT:    movzbl (%rdi), %eax
+; X64-AVX512F-NEXT:    kmovw %eax, %k1
+; X64-AVX512F-NEXT:    kxorw %k1, %k0, %k0
+; X64-AVX512F-NEXT:    jmp .LBB18_3
+; X64-AVX512F-NEXT:  .LBB18_1:
+; X64-AVX512F-NEXT:    movzbl (%rsi), %eax
+; X64-AVX512F-NEXT:    kmovw %eax, %k0
+; X64-AVX512F-NEXT:  .LBB18_3:
+; X64-AVX512F-NEXT:    kmovw %k0, %eax
+; X64-AVX512F-NEXT:    movb %al, (%rsi)
+; X64-AVX512F-NEXT:    retq
+;
+; X86-AVX512BW-LABEL: select_v1i1:
+; X86-AVX512BW:       # %bb.0:
+; X86-AVX512BW-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-AVX512BW-NEXT:    testb $1, {{[0-9]+}}(%esp)
+; X86-AVX512BW-NEXT:    jne .LBB18_1
+; X86-AVX512BW-NEXT:  # %bb.2:
+; X86-AVX512BW-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-AVX512BW-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-AVX512BW-NEXT:    movzbl (%edx), %edx
+; X86-AVX512BW-NEXT:    kmovd %edx, %k0
+; X86-AVX512BW-NEXT:    movzbl (%ecx), %ecx
+; X86-AVX512BW-NEXT:    kmovd %ecx, %k1
+; X86-AVX512BW-NEXT:    kxorw %k1, %k0, %k0
+; X86-AVX512BW-NEXT:    jmp .LBB18_3
+; X86-AVX512BW-NEXT:  .LBB18_1:
+; X86-AVX512BW-NEXT:    movzbl (%eax), %ecx
+; X86-AVX512BW-NEXT:    kmovd %ecx, %k0
+; X86-AVX512BW-NEXT:  .LBB18_3:
+; X86-AVX512BW-NEXT:    kmovd %k0, %ecx
+; X86-AVX512BW-NEXT:    movb %cl, (%eax)
+; X86-AVX512BW-NEXT:    retl
+;
+; X64-AVX512BW-LABEL: select_v1i1:
+; X64-AVX512BW:       # %bb.0:
+; X64-AVX512BW-NEXT:    testb $1, %cl
+; X64-AVX512BW-NEXT:    jne .LBB18_1
+; X64-AVX512BW-NEXT:  # %bb.2:
+; X64-AVX512BW-NEXT:    movzbl (%rdx), %eax
+; X64-AVX512BW-NEXT:    kmovd %eax, %k0
+; X64-AVX512BW-NEXT:    movzbl (%rdi), %eax
+; X64-AVX512BW-NEXT:    kmovd %eax, %k1
+; X64-AVX512BW-NEXT:    kxorw %k1, %k0, %k0
+; X64-AVX512BW-NEXT:    jmp .LBB18_3
+; X64-AVX512BW-NEXT:  .LBB18_1:
+; X64-AVX512BW-NEXT:    movzbl (%rsi), %eax
+; X64-AVX512BW-NEXT:    kmovd %eax, %k0
+; X64-AVX512BW-NEXT:  .LBB18_3:
+; X64-AVX512BW-NEXT:    kmovd %k0, %eax
+; X64-AVX512BW-NEXT:    movb %al, (%rsi)
+; X64-AVX512BW-NEXT:    retq
+  %a = load <1 x i1>, <1 x i1>* %x
+  %b = load <1 x i1>, <1 x i1>* %y
+  %b2 = load <1 x i1>, <1 x i1>* %w
+  %b3 = xor <1 x i1> %b, %b2
+  %c = select i1 %z, <1 x i1> %a, <1 x i1> %b3
+  store <1 x i1> %c, <1 x i1>* %x
+  ret void
+}