[llvm] r365711 - [SDAG] commute setcc operands to match a subtract
Sanjay Patel via llvm-commits
llvm-commits at lists.llvm.org
Wed Jul 10 16:23:55 PDT 2019
Author: spatel
Date: Wed Jul 10 16:23:54 2019
New Revision: 365711
URL: http://llvm.org/viewvc/llvm-project?rev=365711&view=rev
Log:
[SDAG] commute setcc operands to match a subtract
If we have:
R = sub X, Y
P = cmp Y, X
...then flipping the operands in the compare instruction can allow using a subtract that sets compare flags.
Motivated by diffs in D58875 - not sure if this changes anything there,
but this seems like a good thing independent of that.
There's a more involved version of this transform already in IR (in instcombine
although that seems misplaced to me) - see "swapMayExposeCSEOpportunities()".
Differential Revision: https://reviews.llvm.org/D63958
Modified:
llvm/trunk/lib/CodeGen/SelectionDAG/TargetLowering.cpp
llvm/trunk/test/CodeGen/AArch64/arm64-shrink-wrapping.ll
llvm/trunk/test/CodeGen/AArch64/cgp-usubo.ll
llvm/trunk/test/CodeGen/Lanai/sub-cmp-peephole.ll
llvm/trunk/test/CodeGen/X86/jump_sign.ll
llvm/trunk/test/CodeGen/X86/psubus.ll
Modified: llvm/trunk/lib/CodeGen/SelectionDAG/TargetLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/TargetLowering.cpp?rev=365711&r1=365710&r2=365711&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/SelectionDAG/TargetLowering.cpp (original)
+++ llvm/trunk/lib/CodeGen/SelectionDAG/TargetLowering.cpp Wed Jul 10 16:23:54 2019
@@ -2674,6 +2674,17 @@ SDValue TargetLowering::SimplifySetCC(EV
isCondCodeLegal(SwappedCC, N0.getSimpleValueType())))
return DAG.getSetCC(dl, VT, N1, N0, SwappedCC);
+ // If we have a subtract with the same 2 non-constant operands as this setcc
+ // -- but in reverse order -- then try to commute the operands of this setcc
+ // to match. A matching pair of setcc (cmp) and sub may be combined into 1
+ // instruction on some targets.
+ if (!isConstOrConstSplat(N0) && !isConstOrConstSplat(N1) &&
+ (DCI.isBeforeLegalizeOps() ||
+ isCondCodeLegal(SwappedCC, N0.getSimpleValueType())) &&
+ DAG.getNodeIfExists(ISD::SUB, DAG.getVTList(OpVT), { N1, N0 } ) &&
+ !DAG.getNodeIfExists(ISD::SUB, DAG.getVTList(OpVT), { N0, N1 } ))
+ return DAG.getSetCC(dl, VT, N1, N0, SwappedCC);
+
if (auto *N1C = dyn_cast<ConstantSDNode>(N1.getNode())) {
const APInt &C1 = N1C->getAPIntValue();
Modified: llvm/trunk/test/CodeGen/AArch64/arm64-shrink-wrapping.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/arm64-shrink-wrapping.ll?rev=365711&r1=365710&r2=365711&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/arm64-shrink-wrapping.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/arm64-shrink-wrapping.ll Wed Jul 10 16:23:54 2019
@@ -1037,15 +1037,14 @@ define void @stack_realign2(i32 %a, i32
; ENABLE-NEXT: lsl w8, w0, w1
; ENABLE-NEXT: lsl w9, w1, w0
; ENABLE-NEXT: lsr w10, w0, w1
-; ENABLE-NEXT: lsr w11, w1, w0
-; ENABLE-NEXT: add w12, w1, w0
-; ENABLE-NEXT: sub w13, w1, w0
-; ENABLE-NEXT: cmp w0, w1
-; ENABLE-NEXT: add w17, w8, w9
-; ENABLE-NEXT: sub w16, w9, w10
-; ENABLE-NEXT: add w15, w10, w11
-; ENABLE-NEXT: add w14, w11, w12
-; ENABLE-NEXT: b.ge LBB14_2
+; ENABLE-NEXT: lsr w12, w1, w0
+; ENABLE-NEXT: add w15, w1, w0
+; ENABLE-NEXT: subs w17, w1, w0
+; ENABLE-NEXT: sub w11, w9, w10
+; ENABLE-NEXT: add w16, w8, w9
+; ENABLE-NEXT: add w13, w10, w12
+; ENABLE-NEXT: add w14, w12, w15
+; ENABLE-NEXT: b.le LBB14_2
; ENABLE-NEXT: ; %bb.1: ; %true
; ENABLE-NEXT: str w0, [sp]
; ENABLE-NEXT: ; InlineAsm Start
@@ -1055,12 +1054,12 @@ define void @stack_realign2(i32 %a, i32
; ENABLE-NEXT: str w8, [x2]
; ENABLE-NEXT: str w9, [x3]
; ENABLE-NEXT: str w10, [x4]
-; ENABLE-NEXT: str w11, [x5]
-; ENABLE-NEXT: str w12, [x6]
-; ENABLE-NEXT: str w13, [x7]
+; ENABLE-NEXT: str w12, [x5]
+; ENABLE-NEXT: str w15, [x6]
+; ENABLE-NEXT: str w17, [x7]
; ENABLE-NEXT: stp w0, w1, [x2, #4]
-; ENABLE-NEXT: stp w17, w16, [x2, #12]
-; ENABLE-NEXT: stp w15, w14, [x2, #20]
+; ENABLE-NEXT: stp w16, w11, [x2, #12]
+; ENABLE-NEXT: stp w13, w14, [x2, #20]
; ENABLE-NEXT: sub sp, x29, #80 ; =80
; ENABLE-NEXT: ldp x29, x30, [sp, #80] ; 16-byte Folded Reload
; ENABLE-NEXT: ldp x20, x19, [sp, #64] ; 16-byte Folded Reload
@@ -1097,15 +1096,14 @@ define void @stack_realign2(i32 %a, i32
; DISABLE-NEXT: lsl w8, w0, w1
; DISABLE-NEXT: lsl w9, w1, w0
; DISABLE-NEXT: lsr w10, w0, w1
-; DISABLE-NEXT: lsr w11, w1, w0
-; DISABLE-NEXT: add w12, w1, w0
-; DISABLE-NEXT: sub w13, w1, w0
-; DISABLE-NEXT: cmp w0, w1
-; DISABLE-NEXT: add w17, w8, w9
-; DISABLE-NEXT: sub w16, w9, w10
-; DISABLE-NEXT: add w15, w10, w11
-; DISABLE-NEXT: add w14, w11, w12
-; DISABLE-NEXT: b.ge LBB14_2
+; DISABLE-NEXT: lsr w12, w1, w0
+; DISABLE-NEXT: add w15, w1, w0
+; DISABLE-NEXT: subs w17, w1, w0
+; DISABLE-NEXT: sub w11, w9, w10
+; DISABLE-NEXT: add w16, w8, w9
+; DISABLE-NEXT: add w13, w10, w12
+; DISABLE-NEXT: add w14, w12, w15
+; DISABLE-NEXT: b.le LBB14_2
; DISABLE-NEXT: ; %bb.1: ; %true
; DISABLE-NEXT: str w0, [sp]
; DISABLE-NEXT: ; InlineAsm Start
@@ -1115,12 +1113,12 @@ define void @stack_realign2(i32 %a, i32
; DISABLE-NEXT: str w8, [x2]
; DISABLE-NEXT: str w9, [x3]
; DISABLE-NEXT: str w10, [x4]
-; DISABLE-NEXT: str w11, [x5]
-; DISABLE-NEXT: str w12, [x6]
-; DISABLE-NEXT: str w13, [x7]
+; DISABLE-NEXT: str w12, [x5]
+; DISABLE-NEXT: str w15, [x6]
+; DISABLE-NEXT: str w17, [x7]
; DISABLE-NEXT: stp w0, w1, [x2, #4]
-; DISABLE-NEXT: stp w17, w16, [x2, #12]
-; DISABLE-NEXT: stp w15, w14, [x2, #20]
+; DISABLE-NEXT: stp w16, w11, [x2, #12]
+; DISABLE-NEXT: stp w13, w14, [x2, #20]
; DISABLE-NEXT: sub sp, x29, #80 ; =80
; DISABLE-NEXT: ldp x29, x30, [sp, #80] ; 16-byte Folded Reload
; DISABLE-NEXT: ldp x20, x19, [sp, #64] ; 16-byte Folded Reload
Modified: llvm/trunk/test/CodeGen/AArch64/cgp-usubo.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/cgp-usubo.ll?rev=365711&r1=365710&r2=365711&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/cgp-usubo.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/cgp-usubo.ll Wed Jul 10 16:23:54 2019
@@ -21,11 +21,9 @@ define i1 @usubo_ult_i64(i64 %x, i64 %y,
define i1 @usubo_ugt_i32(i32 %x, i32 %y, i32* %p) nounwind {
; CHECK-LABEL: usubo_ugt_i32:
; CHECK: // %bb.0:
-; CHECK-NEXT: cmp w1, w0
-; CHECK-NEXT: cset w8, hi
-; CHECK-NEXT: sub w9, w0, w1
-; CHECK-NEXT: mov w0, w8
-; CHECK-NEXT: str w9, [x2]
+; CHECK-NEXT: subs w8, w0, w1
+; CHECK-NEXT: cset w0, lo
+; CHECK-NEXT: str w8, [x2]
; CHECK-NEXT: ret
%ov = icmp ugt i32 %y, %x
%s = sub i32 %x, %y
Modified: llvm/trunk/test/CodeGen/Lanai/sub-cmp-peephole.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/Lanai/sub-cmp-peephole.ll?rev=365711&r1=365710&r2=365711&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/Lanai/sub-cmp-peephole.ll (original)
+++ llvm/trunk/test/CodeGen/Lanai/sub-cmp-peephole.ll Wed Jul 10 16:23:54 2019
@@ -25,7 +25,7 @@ define i32 @g(i32 inreg %a, i32 inreg %b
; CHECK-NEXT: add %sp, 0x8, %fp
; CHECK-NEXT: sub %sp, 0x8, %sp
; CHECK-NEXT: sub.f %r7, %r6, %r3
-; CHECK-NEXT: sel.lt %r3, %r0, %rv
+; CHECK-NEXT: sel.gt %r3, %r0, %rv
; CHECK-NEXT: ld -4[%fp], %pc ! return
; CHECK-NEXT: add %fp, 0x0, %sp
; CHECK-NEXT: ld -8[%fp], %fp
@@ -59,7 +59,7 @@ define i32 @i(i32 inreg %a, i32 inreg %b
; CHECK-NEXT: add %sp, 0x8, %fp
; CHECK-NEXT: sub %sp, 0x8, %sp
; CHECK-NEXT: sub.f %r7, %r6, %r3
-; CHECK-NEXT: sel.ult %r3, %r0, %rv
+; CHECK-NEXT: sel.ugt %r3, %r0, %rv
; CHECK-NEXT: ld -4[%fp], %pc ! return
; CHECK-NEXT: add %fp, 0x0, %sp
; CHECK-NEXT: ld -8[%fp], %fp
@@ -75,11 +75,11 @@ define i32 @j(i32 inreg %a, i32 inreg %b
; CHECK: ! %bb.0: ! %entry
; CHECK-NEXT: st %fp, [--%sp]
; CHECK-NEXT: add %sp, 0x8, %fp
-; CHECK-NEXT: sub %sp, 0x8, %sp
-; CHECK-NEXT: sub.f %r7, %r6, %r0
+; CHECK-NEXT: sub.f %r6, %r7, %rv
; CHECK-NEXT: bne .LBB4_2
-; CHECK-NEXT: sub %r6, %r7, %rv
+; CHECK-NEXT: sub %sp, 0x8, %sp
; CHECK-NEXT: .LBB4_1: ! %if.then
+; CHECK-NEXT: sub.f %r7, %r6, %r0
; CHECK-NEXT: sel.gt %rv, %r6, %rv
; CHECK-NEXT: .LBB4_2: ! %if.else
; CHECK-NEXT: ld -4[%fp], %pc ! return
Modified: llvm/trunk/test/CodeGen/X86/jump_sign.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/jump_sign.ll?rev=365711&r1=365710&r2=365711&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/jump_sign.ll (original)
+++ llvm/trunk/test/CodeGen/X86/jump_sign.ll Wed Jul 10 16:23:54 2019
@@ -48,11 +48,10 @@ define i32 @func_g(i32 %a, i32 %b) nounw
define i32 @func_h(i32 %a, i32 %b) nounwind {
; CHECK-LABEL: func_h:
; CHECK: # %bb.0:
-; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
-; CHECK-NEXT: xorl %edx, %edx
-; CHECK-NEXT: subl %ecx, %eax
-; CHECK-NEXT: cmovlel %edx, %eax
+; CHECK-NEXT: xorl %ecx, %ecx
+; CHECK-NEXT: subl {{[0-9]+}}(%esp), %eax
+; CHECK-NEXT: cmovlel %ecx, %eax
; CHECK-NEXT: retl
%cmp = icmp slt i32 %b, %a
%sub = sub nsw i32 %a, %b
@@ -91,11 +90,10 @@ define i32 @func_j(i32 %a, i32 %b) nounw
define i32 @func_k(i32 %a, i32 %b) nounwind {
; CHECK-LABEL: func_k:
; CHECK: # %bb.0:
-; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
-; CHECK-NEXT: xorl %edx, %edx
-; CHECK-NEXT: subl %ecx, %eax
-; CHECK-NEXT: cmovbel %edx, %eax
+; CHECK-NEXT: xorl %ecx, %ecx
+; CHECK-NEXT: subl {{[0-9]+}}(%esp), %eax
+; CHECK-NEXT: cmovbel %ecx, %eax
; CHECK-NEXT: retl
%cmp = icmp ult i32 %b, %a
%sub = sub i32 %a, %b
@@ -108,10 +106,9 @@ define i32 @func_l(i32 %a, i32 %b) nounw
; CHECK-LABEL: func_l:
; CHECK: # %bb.0:
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edx
-; CHECK-NEXT: movl %edx, %eax
-; CHECK-NEXT: subl %ecx, %eax
-; CHECK-NEXT: cmovlel %edx, %eax
+; CHECK-NEXT: movl %ecx, %eax
+; CHECK-NEXT: subl {{[0-9]+}}(%esp), %eax
+; CHECK-NEXT: cmovlel %ecx, %eax
; CHECK-NEXT: retl
%cmp = icmp slt i32 %b, %a
%sub = sub nsw i32 %a, %b
@@ -139,16 +136,14 @@ define i32 @func_l2(i32 %a, i32 %b) noun
; CHECK-LABEL: func_l2:
; CHECK: # %bb.0:
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edx
-; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
-; CHECK-NEXT: movl %eax, %ecx
-; CHECK-NEXT: subl %edx, %ecx
-; CHECK-NEXT: cmpl %eax, %edx
+; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; CHECK-NEXT: movl %ecx, %eax
+; CHECK-NEXT: subl %edx, %eax
; CHECK-NEXT: jne .LBB8_2
; CHECK-NEXT: # %bb.1: # %if.then
-; CHECK-NEXT: cmovgl %ecx, %eax
-; CHECK-NEXT: retl
+; CHECK-NEXT: cmpl %ecx, %edx
+; CHECK-NEXT: cmovlel %ecx, %eax
; CHECK-NEXT: .LBB8_2: # %if.else
-; CHECK-NEXT: movl %ecx, %eax
; CHECK-NEXT: retl
%cmp = icmp eq i32 %b, %a
%sub = sub nsw i32 %a, %b
@@ -166,9 +161,8 @@ if.else:
define i32 @func_l3(i32 %a, i32 %b) nounwind {
; CHECK-LABEL: func_l3:
; CHECK: # %bb.0:
-; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
-; CHECK-NEXT: subl %ecx, %eax
+; CHECK-NEXT: subl {{[0-9]+}}(%esp), %eax
; CHECK-NEXT: jge .LBB9_2
; CHECK-NEXT: # %bb.1: # %if.then
; CHECK-NEXT: retl
@@ -192,11 +186,10 @@ if.else:
define i32 @func_l4(i32 %a, i32 %b) nounwind {
; CHECK-LABEL: func_l4:
; CHECK: # %bb.0:
-; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
-; CHECK-NEXT: xorl %edx, %edx
-; CHECK-NEXT: subl %ecx, %eax
-; CHECK-NEXT: cmovll %edx, %eax
+; CHECK-NEXT: xorl %ecx, %ecx
+; CHECK-NEXT: subl {{[0-9]+}}(%esp), %eax
+; CHECK-NEXT: cmovll %ecx, %eax
; CHECK-NEXT: retl
%cmp = icmp sgt i32 %b, %a
%sub = sub i32 %a, %b
Modified: llvm/trunk/test/CodeGen/X86/psubus.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/psubus.ll?rev=365711&r1=365710&r2=365711&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/psubus.ll (original)
+++ llvm/trunk/test/CodeGen/X86/psubus.ll Wed Jul 10 16:23:54 2019
@@ -1038,14 +1038,14 @@ define <8 x i16> @test16(<8 x i16> %x, <
; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm0[2,3,0,1]
; SSE41-NEXT: pmovzxwd {{.*#+}} xmm3 = xmm3[0],zero,xmm3[1],zero,xmm3[2],zero,xmm3[3],zero
; SSE41-NEXT: pmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
-; SSE41-NEXT: movdqa %xmm1, %xmm4
-; SSE41-NEXT: pmaxud %xmm0, %xmm4
-; SSE41-NEXT: pcmpeqd %xmm1, %xmm4
+; SSE41-NEXT: movdqa %xmm0, %xmm4
+; SSE41-NEXT: pminud %xmm1, %xmm4
+; SSE41-NEXT: pcmpeqd %xmm0, %xmm4
; SSE41-NEXT: pcmpeqd %xmm5, %xmm5
; SSE41-NEXT: pxor %xmm5, %xmm4
-; SSE41-NEXT: movdqa %xmm2, %xmm6
-; SSE41-NEXT: pmaxud %xmm3, %xmm6
-; SSE41-NEXT: pcmpeqd %xmm2, %xmm6
+; SSE41-NEXT: movdqa %xmm3, %xmm6
+; SSE41-NEXT: pminud %xmm2, %xmm6
+; SSE41-NEXT: pcmpeqd %xmm3, %xmm6
; SSE41-NEXT: pxor %xmm5, %xmm6
; SSE41-NEXT: packssdw %xmm6, %xmm4
; SSE41-NEXT: psubd %xmm2, %xmm3
@@ -1062,15 +1062,15 @@ define <8 x i16> @test16(<8 x i16> %x, <
; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm2 = xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
-; AVX1-NEXT: vpmaxud %xmm0, %xmm1, %xmm3
-; AVX1-NEXT: vpcmpeqd %xmm3, %xmm1, %xmm3
-; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm4
-; AVX1-NEXT: vpmaxud %xmm2, %xmm4, %xmm5
-; AVX1-NEXT: vpcmpeqd %xmm5, %xmm4, %xmm5
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
+; AVX1-NEXT: vpminud %xmm3, %xmm2, %xmm4
+; AVX1-NEXT: vpcmpeqd %xmm4, %xmm2, %xmm4
+; AVX1-NEXT: vpminud %xmm1, %xmm0, %xmm5
+; AVX1-NEXT: vpcmpeqd %xmm5, %xmm0, %xmm5
; AVX1-NEXT: vpsubd %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: vpandn %xmm0, %xmm3, %xmm0
-; AVX1-NEXT: vpsubd %xmm4, %xmm2, %xmm1
-; AVX1-NEXT: vpandn %xmm1, %xmm5, %xmm1
+; AVX1-NEXT: vpandn %xmm0, %xmm5, %xmm0
+; AVX1-NEXT: vpsubd %xmm3, %xmm2, %xmm1
+; AVX1-NEXT: vpandn %xmm1, %xmm4, %xmm1
; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1
; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0
@@ -1081,8 +1081,8 @@ define <8 x i16> @test16(<8 x i16> %x, <
; AVX2-LABEL: test16:
; AVX2: # %bb.0: # %vector.ph
; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
-; AVX2-NEXT: vpmaxud %ymm0, %ymm1, %ymm2
-; AVX2-NEXT: vpcmpeqd %ymm2, %ymm1, %ymm2
+; AVX2-NEXT: vpminud %ymm1, %ymm0, %ymm2
+; AVX2-NEXT: vpcmpeqd %ymm2, %ymm0, %ymm2
; AVX2-NEXT: vpsubd %ymm1, %ymm0, %ymm0
; AVX2-NEXT: vpandn %ymm0, %ymm2, %ymm0
; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15,16,17,20,21,24,25,28,29,24,25,28,29,28,29,30,31]
@@ -1094,7 +1094,7 @@ define <8 x i16> @test16(<8 x i16> %x, <
; AVX512-LABEL: test16:
; AVX512: # %bb.0: # %vector.ph
; AVX512-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
-; AVX512-NEXT: vpcmpltud %ymm0, %ymm1, %k1
+; AVX512-NEXT: vpcmpnleud %ymm1, %ymm0, %k1
; AVX512-NEXT: vpsubd %ymm1, %ymm0, %ymm0
; AVX512-NEXT: vpmovdw %ymm0, %xmm0 {%k1} {z}
; AVX512-NEXT: vzeroupper
More information about the llvm-commits
mailing list