[llvm] r361814 - [X86] Custom lower CONCAT_VECTORS of v2i1
Benjamin Kramer via llvm-commits
llvm-commits at lists.llvm.org
Tue May 28 05:52:57 PDT 2019
Author: d0k
Date: Tue May 28 05:52:57 2019
New Revision: 361814
URL: http://llvm.org/viewvc/llvm-project?rev=361814&view=rev
Log:
[X86] Custom lower CONCAT_VECTORS of v2i1
The generic legalizer cannot handle this. Add an assert instead of
silently miscompiling vectors with elements smaller than 8 bits.
Modified:
llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
llvm/trunk/test/CodeGen/X86/avx512-insert-extract.ll
llvm/trunk/test/CodeGen/X86/vec_saddo.ll
llvm/trunk/test/CodeGen/X86/vec_smulo.ll
llvm/trunk/test/CodeGen/X86/vec_ssubo.ll
llvm/trunk/test/CodeGen/X86/vec_uaddo.ll
llvm/trunk/test/CodeGen/X86/vec_umulo.ll
llvm/trunk/test/CodeGen/X86/vec_usubo.ll
Modified: llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp?rev=361814&r1=361813&r2=361814&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp (original)
+++ llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp Tue May 28 05:52:57 2019
@@ -1415,6 +1415,7 @@ SDValue SelectionDAGLegalize::ExpandVect
// Emit a store of each element to the stack slot.
SmallVector<SDValue, 8> Stores;
unsigned TypeByteSize = EltVT.getSizeInBits() / 8;
+ assert(TypeByteSize > 0 && "Vector element type too small for stack store!");
// Store (in the right endianness) the elements to memory.
for (unsigned i = 0, e = Node->getNumOperands(); i != e; ++i) {
// Ignore undef elements.
Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=361814&r1=361813&r2=361814&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Tue May 28 05:52:57 2019
@@ -1357,19 +1357,14 @@ X86TargetLowering::X86TargetLowering(con
setOperationAction(ISD::SSUBSAT, VT, Custom);
setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
+ setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
+ setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom);
setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
setOperationAction(ISD::VSELECT, VT, Expand);
}
- setOperationAction(ISD::CONCAT_VECTORS, MVT::v16i1, Custom);
- setOperationAction(ISD::CONCAT_VECTORS, MVT::v8i1, Custom);
- setOperationAction(ISD::CONCAT_VECTORS, MVT::v4i1, Custom);
- setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v2i1, Custom);
- setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v4i1, Custom);
- setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v8i1, Custom);
- setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v16i1, Custom);
for (auto VT : { MVT::v1i1, MVT::v2i1, MVT::v4i1, MVT::v8i1 })
setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
}
Modified: llvm/trunk/test/CodeGen/X86/avx512-insert-extract.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-insert-extract.ll?rev=361814&r1=361813&r2=361814&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-insert-extract.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-insert-extract.ll Tue May 28 05:52:57 2019
@@ -2252,3 +2252,107 @@ define i128 @test_insertelement_variable
%t4 = bitcast <128 x i1> %t3 to i128
ret i128 %t4
}
+
+define void @test_concat_v2i1(<2 x half>* %arg, <2 x half>* %arg1, <2 x half>* %arg2) {
+; KNL-LABEL: test_concat_v2i1:
+; KNL: ## %bb.0:
+; KNL-NEXT: movswl (%rdi), %eax
+; KNL-NEXT: vmovd %eax, %xmm0
+; KNL-NEXT: vcvtph2ps %xmm0, %xmm0
+; KNL-NEXT: movswl 2(%rdi), %eax
+; KNL-NEXT: vmovd %eax, %xmm1
+; KNL-NEXT: vcvtph2ps %xmm1, %xmm1
+; KNL-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
+; KNL-NEXT: vucomiss %xmm2, %xmm1
+; KNL-NEXT: setb %al
+; KNL-NEXT: kmovw %eax, %k0
+; KNL-NEXT: kshiftlw $1, %k0, %k0
+; KNL-NEXT: vucomiss %xmm2, %xmm0
+; KNL-NEXT: setb %al
+; KNL-NEXT: andl $1, %eax
+; KNL-NEXT: kmovw %eax, %k1
+; KNL-NEXT: korw %k0, %k1, %k0
+; KNL-NEXT: vxorps %xmm2, %xmm2, %xmm2
+; KNL-NEXT: vucomiss %xmm2, %xmm1
+; KNL-NEXT: seta %al
+; KNL-NEXT: kmovw %eax, %k1
+; KNL-NEXT: kshiftlw $1, %k1, %k1
+; KNL-NEXT: vucomiss %xmm2, %xmm0
+; KNL-NEXT: seta %al
+; KNL-NEXT: andl $1, %eax
+; KNL-NEXT: kmovw %eax, %k2
+; KNL-NEXT: korw %k1, %k2, %k1
+; KNL-NEXT: kandw %k1, %k0, %k1
+; KNL-NEXT: kshiftrw $1, %k1, %k2
+; KNL-NEXT: movswl (%rsi), %eax
+; KNL-NEXT: vmovd %eax, %xmm0
+; KNL-NEXT: vcvtph2ps %xmm0, %xmm0
+; KNL-NEXT: movswl 2(%rsi), %eax
+; KNL-NEXT: vmovd %eax, %xmm1
+; KNL-NEXT: vcvtph2ps %xmm1, %xmm1
+; KNL-NEXT: vmovss %xmm1, %xmm0, %xmm1 {%k2} {z}
+; KNL-NEXT: vmovss %xmm0, %xmm0, %xmm0 {%k1} {z}
+; KNL-NEXT: vcvtps2ph $4, %xmm0, %xmm0
+; KNL-NEXT: vmovd %xmm0, %eax
+; KNL-NEXT: movw %ax, (%rdx)
+; KNL-NEXT: vcvtps2ph $4, %xmm1, %xmm0
+; KNL-NEXT: vmovd %xmm0, %eax
+; KNL-NEXT: movw %ax, 2(%rdx)
+; KNL-NEXT: retq
+;
+; SKX-LABEL: test_concat_v2i1:
+; SKX: ## %bb.0:
+; SKX-NEXT: movswl (%rdi), %eax
+; SKX-NEXT: vmovd %eax, %xmm0
+; SKX-NEXT: vcvtph2ps %xmm0, %xmm0
+; SKX-NEXT: movswl 2(%rdi), %eax
+; SKX-NEXT: vmovd %eax, %xmm1
+; SKX-NEXT: vcvtph2ps %xmm1, %xmm1
+; SKX-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
+; SKX-NEXT: vucomiss %xmm2, %xmm1
+; SKX-NEXT: setb %al
+; SKX-NEXT: kmovd %eax, %k0
+; SKX-NEXT: kshiftlb $1, %k0, %k0
+; SKX-NEXT: vucomiss %xmm2, %xmm0
+; SKX-NEXT: setb %al
+; SKX-NEXT: kmovd %eax, %k1
+; SKX-NEXT: kshiftlb $7, %k1, %k1
+; SKX-NEXT: kshiftrb $7, %k1, %k1
+; SKX-NEXT: korw %k0, %k1, %k0
+; SKX-NEXT: vxorps %xmm2, %xmm2, %xmm2
+; SKX-NEXT: vucomiss %xmm2, %xmm1
+; SKX-NEXT: seta %al
+; SKX-NEXT: kmovd %eax, %k1
+; SKX-NEXT: kshiftlb $1, %k1, %k1
+; SKX-NEXT: vucomiss %xmm2, %xmm0
+; SKX-NEXT: seta %al
+; SKX-NEXT: kmovd %eax, %k2
+; SKX-NEXT: kshiftlb $7, %k2, %k2
+; SKX-NEXT: kshiftrb $7, %k2, %k2
+; SKX-NEXT: korw %k1, %k2, %k1
+; SKX-NEXT: kandw %k1, %k0, %k1
+; SKX-NEXT: kshiftrb $1, %k1, %k2
+; SKX-NEXT: movswl (%rsi), %eax
+; SKX-NEXT: vmovd %eax, %xmm0
+; SKX-NEXT: vcvtph2ps %xmm0, %xmm0
+; SKX-NEXT: movswl 2(%rsi), %eax
+; SKX-NEXT: vmovd %eax, %xmm1
+; SKX-NEXT: vcvtph2ps %xmm1, %xmm1
+; SKX-NEXT: vmovss %xmm1, %xmm0, %xmm1 {%k2} {z}
+; SKX-NEXT: vmovss %xmm0, %xmm0, %xmm0 {%k1} {z}
+; SKX-NEXT: vcvtps2ph $4, %xmm0, %xmm0
+; SKX-NEXT: vmovd %xmm0, %eax
+; SKX-NEXT: movw %ax, (%rdx)
+; SKX-NEXT: vcvtps2ph $4, %xmm1, %xmm0
+; SKX-NEXT: vmovd %xmm0, %eax
+; SKX-NEXT: movw %ax, 2(%rdx)
+; SKX-NEXT: retq
+ %tmp = load <2 x half>, <2 x half>* %arg, align 8
+ %tmp3 = fcmp fast olt <2 x half> %tmp, <half 0xH4600, half 0xH4600>
+ %tmp4 = fcmp fast ogt <2 x half> %tmp, zeroinitializer
+ %tmp5 = and <2 x i1> %tmp3, %tmp4
+ %tmp6 = load <2 x half>, <2 x half>* %arg1, align 8
+ %tmp7 = select <2 x i1> %tmp5, <2 x half> %tmp6, <2 x half> zeroinitializer
+ store <2 x half> %tmp7, <2 x half>* %arg2, align 8
+ ret void
+}
Modified: llvm/trunk/test/CodeGen/X86/vec_saddo.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vec_saddo.ll?rev=361814&r1=361813&r2=361814&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vec_saddo.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vec_saddo.ll Tue May 28 05:52:57 2019
@@ -1871,7 +1871,8 @@ define <2 x i32> @saddo_v2i128(<2 x i128
; AVX512-NEXT: cmpb %al, %cl
; AVX512-NEXT: sete %al
; AVX512-NEXT: andb %bl, %al
-; AVX512-NEXT: movb %al, -{{[0-9]+}}(%rsp)
+; AVX512-NEXT: kmovd %eax, %k0
+; AVX512-NEXT: kshiftlw $1, %k0, %k0
; AVX512-NEXT: testq %r9, %r9
; AVX512-NEXT: setns %al
; AVX512-NEXT: testq %rsi, %rsi
@@ -1884,8 +1885,9 @@ define <2 x i32> @saddo_v2i128(<2 x i128
; AVX512-NEXT: cmpb %bl, %cl
; AVX512-NEXT: setne %cl
; AVX512-NEXT: andb %al, %cl
-; AVX512-NEXT: movb %cl, -{{[0-9]+}}(%rsp)
-; AVX512-NEXT: kmovw -{{[0-9]+}}(%rsp), %k1
+; AVX512-NEXT: andl $1, %ecx
+; AVX512-NEXT: kmovw %ecx, %k1
+; AVX512-NEXT: korw %k0, %k1, %k1
; AVX512-NEXT: movq %rdx, 16(%r10)
; AVX512-NEXT: movq %rdi, (%r10)
; AVX512-NEXT: movq %r14, 24(%r10)
Modified: llvm/trunk/test/CodeGen/X86/vec_smulo.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vec_smulo.ll?rev=361814&r1=361813&r2=361814&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vec_smulo.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vec_smulo.ll Tue May 28 05:52:57 2019
@@ -2706,44 +2706,42 @@ define <2 x i32> @smulo_v2i128(<2 x i128
; AVX512-NEXT: pushq %r13
; AVX512-NEXT: pushq %r12
; AVX512-NEXT: pushq %rbx
-; AVX512-NEXT: subq $40, %rsp
-; AVX512-NEXT: movq %r9, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; AVX512-NEXT: movq %r8, %r15
-; AVX512-NEXT: movq %rdx, %rax
-; AVX512-NEXT: movq %rsi, %r12
-; AVX512-NEXT: movq %rdi, %rbx
-; AVX512-NEXT: movq {{[0-9]+}}(%rsp), %r14
-; AVX512-NEXT: movq {{[0-9]+}}(%rsp), %rdx
-; AVX512-NEXT: movq {{[0-9]+}}(%rsp), %r9
+; AVX512-NEXT: subq $24, %rsp
+; AVX512-NEXT: movq %r8, %rax
+; AVX512-NEXT: movq %rcx, %r14
+; AVX512-NEXT: movq %rdx, %rbx
+; AVX512-NEXT: movq {{[0-9]+}}(%rsp), %r15
+; AVX512-NEXT: movq {{[0-9]+}}(%rsp), %r12
; AVX512-NEXT: movq $0, {{[0-9]+}}(%rsp)
; AVX512-NEXT: leaq {{[0-9]+}}(%rsp), %r8
-; AVX512-NEXT: movq %rax, %rdi
-; AVX512-NEXT: movq %rcx, %rsi
+; AVX512-NEXT: movq %rax, %rdx
; AVX512-NEXT: movq %r9, %rcx
; AVX512-NEXT: callq __muloti4
; AVX512-NEXT: movq %rax, %r13
; AVX512-NEXT: movq %rdx, %rbp
-; AVX512-NEXT: cmpq $0, {{[0-9]+}}(%rsp)
-; AVX512-NEXT: setne %al
-; AVX512-NEXT: movb %al, {{[0-9]+}}(%rsp)
; AVX512-NEXT: movq $0, {{[0-9]+}}(%rsp)
; AVX512-NEXT: leaq {{[0-9]+}}(%rsp), %r8
; AVX512-NEXT: movq %rbx, %rdi
-; AVX512-NEXT: movq %r12, %rsi
-; AVX512-NEXT: movq %r15, %rdx
-; AVX512-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
+; AVX512-NEXT: movq %r14, %rsi
+; AVX512-NEXT: movq {{[0-9]+}}(%rsp), %rdx
+; AVX512-NEXT: movq %r12, %rcx
; AVX512-NEXT: callq __muloti4
; AVX512-NEXT: cmpq $0, {{[0-9]+}}(%rsp)
; AVX512-NEXT: setne %cl
-; AVX512-NEXT: movb %cl, {{[0-9]+}}(%rsp)
-; AVX512-NEXT: kmovw {{[0-9]+}}(%rsp), %k1
-; AVX512-NEXT: movq %rbp, 24(%r14)
-; AVX512-NEXT: movq %r13, 16(%r14)
-; AVX512-NEXT: movq %rdx, 8(%r14)
-; AVX512-NEXT: movq %rax, (%r14)
+; AVX512-NEXT: kmovd %ecx, %k0
+; AVX512-NEXT: kshiftlw $1, %k0, %k0
+; AVX512-NEXT: cmpq $0, {{[0-9]+}}(%rsp)
+; AVX512-NEXT: setne %cl
+; AVX512-NEXT: andl $1, %ecx
+; AVX512-NEXT: kmovw %ecx, %k1
+; AVX512-NEXT: korw %k0, %k1, %k1
+; AVX512-NEXT: movq %rdx, 24(%r15)
+; AVX512-NEXT: movq %rax, 16(%r15)
+; AVX512-NEXT: movq %rbp, 8(%r15)
+; AVX512-NEXT: movq %r13, (%r15)
; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
; AVX512-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
-; AVX512-NEXT: addq $40, %rsp
+; AVX512-NEXT: addq $24, %rsp
; AVX512-NEXT: popq %rbx
; AVX512-NEXT: popq %r12
; AVX512-NEXT: popq %r13
Modified: llvm/trunk/test/CodeGen/X86/vec_ssubo.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vec_ssubo.ll?rev=361814&r1=361813&r2=361814&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vec_ssubo.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vec_ssubo.ll Tue May 28 05:52:57 2019
@@ -1910,7 +1910,8 @@ define <2 x i32> @ssubo_v2i128(<2 x i128
; AVX512-NEXT: cmpb %al, %cl
; AVX512-NEXT: setne %al
; AVX512-NEXT: andb %bl, %al
-; AVX512-NEXT: movb %al, -{{[0-9]+}}(%rsp)
+; AVX512-NEXT: kmovd %eax, %k0
+; AVX512-NEXT: kshiftlw $1, %k0, %k0
; AVX512-NEXT: testq %r9, %r9
; AVX512-NEXT: setns %al
; AVX512-NEXT: testq %rsi, %rsi
@@ -1923,8 +1924,9 @@ define <2 x i32> @ssubo_v2i128(<2 x i128
; AVX512-NEXT: cmpb %bl, %cl
; AVX512-NEXT: setne %cl
; AVX512-NEXT: andb %al, %cl
-; AVX512-NEXT: movb %cl, -{{[0-9]+}}(%rsp)
-; AVX512-NEXT: kmovw -{{[0-9]+}}(%rsp), %k1
+; AVX512-NEXT: andl $1, %ecx
+; AVX512-NEXT: kmovw %ecx, %k1
+; AVX512-NEXT: korw %k0, %k1, %k1
; AVX512-NEXT: movq %rdx, 16(%r10)
; AVX512-NEXT: movq %rdi, (%r10)
; AVX512-NEXT: movq %r14, 24(%r10)
Modified: llvm/trunk/test/CodeGen/X86/vec_uaddo.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vec_uaddo.ll?rev=361814&r1=361813&r2=361814&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vec_uaddo.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vec_uaddo.ll Tue May 28 05:52:57 2019
@@ -1336,12 +1336,14 @@ define <2 x i32> @uaddo_v2i128(<2 x i128
; AVX512-NEXT: addq {{[0-9]+}}(%rsp), %rdx
; AVX512-NEXT: adcq {{[0-9]+}}(%rsp), %rcx
; AVX512-NEXT: setb %al
-; AVX512-NEXT: movb %al, -{{[0-9]+}}(%rsp)
+; AVX512-NEXT: kmovd %eax, %k0
+; AVX512-NEXT: kshiftlw $1, %k0, %k0
; AVX512-NEXT: addq %r8, %rdi
; AVX512-NEXT: adcq %r9, %rsi
; AVX512-NEXT: setb %al
-; AVX512-NEXT: movb %al, -{{[0-9]+}}(%rsp)
-; AVX512-NEXT: kmovw -{{[0-9]+}}(%rsp), %k1
+; AVX512-NEXT: andl $1, %eax
+; AVX512-NEXT: kmovw %eax, %k1
+; AVX512-NEXT: korw %k0, %k1, %k1
; AVX512-NEXT: movq %rdx, 16(%r10)
; AVX512-NEXT: movq %rdi, (%r10)
; AVX512-NEXT: movq %rcx, 24(%r10)
Modified: llvm/trunk/test/CodeGen/X86/vec_umulo.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vec_umulo.ll?rev=361814&r1=361813&r2=361814&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vec_umulo.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vec_umulo.ll Tue May 28 05:52:57 2019
@@ -2575,7 +2575,8 @@ define <2 x i32> @umulo_v2i128(<2 x i128
; AVX512-NEXT: setb %al
; AVX512-NEXT: orb %cl, %al
; AVX512-NEXT: orb %r13b, %al
-; AVX512-NEXT: movb %al, -{{[0-9]+}}(%rsp)
+; AVX512-NEXT: kmovd %eax, %k0
+; AVX512-NEXT: kshiftlw $1, %k0, %k0
; AVX512-NEXT: testq %r9, %r9
; AVX512-NEXT: setne %al
; AVX512-NEXT: testq %rsi, %rsi
@@ -2597,8 +2598,9 @@ define <2 x i32> @umulo_v2i128(<2 x i128
; AVX512-NEXT: setb %sil
; AVX512-NEXT: orb %bl, %sil
; AVX512-NEXT: orb %cl, %sil
-; AVX512-NEXT: movb %sil, -{{[0-9]+}}(%rsp)
-; AVX512-NEXT: kmovw -{{[0-9]+}}(%rsp), %k1
+; AVX512-NEXT: andl $1, %esi
+; AVX512-NEXT: kmovw %esi, %k1
+; AVX512-NEXT: korw %k0, %k1, %k1
; AVX512-NEXT: movq %r10, 16(%r14)
; AVX512-NEXT: movq %rax, (%r14)
; AVX512-NEXT: movq %r15, 24(%r14)
Modified: llvm/trunk/test/CodeGen/X86/vec_usubo.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vec_usubo.ll?rev=361814&r1=361813&r2=361814&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vec_usubo.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vec_usubo.ll Tue May 28 05:52:57 2019
@@ -1378,12 +1378,14 @@ define <2 x i32> @usubo_v2i128(<2 x i128
; AVX512-NEXT: subq {{[0-9]+}}(%rsp), %rdx
; AVX512-NEXT: sbbq {{[0-9]+}}(%rsp), %rcx
; AVX512-NEXT: setb %al
-; AVX512-NEXT: movb %al, -{{[0-9]+}}(%rsp)
+; AVX512-NEXT: kmovd %eax, %k0
+; AVX512-NEXT: kshiftlw $1, %k0, %k0
; AVX512-NEXT: subq %r8, %rdi
; AVX512-NEXT: sbbq %r9, %rsi
; AVX512-NEXT: setb %al
-; AVX512-NEXT: movb %al, -{{[0-9]+}}(%rsp)
-; AVX512-NEXT: kmovw -{{[0-9]+}}(%rsp), %k1
+; AVX512-NEXT: andl $1, %eax
+; AVX512-NEXT: kmovw %eax, %k1
+; AVX512-NEXT: korw %k0, %k1, %k1
; AVX512-NEXT: movq %rdx, 16(%r10)
; AVX512-NEXT: movq %rdi, (%r10)
; AVX512-NEXT: movq %rcx, 24(%r10)
More information about the llvm-commits
mailing list