[llvm] 06fad8b - [DAGCombine] Add node in the worklist in topological order in CombineTo

Amaury Séchet via llvm-commits llvm-commits at lists.llvm.org
Sat May 7 09:24:35 PDT 2022


Author: Amaury Séchet
Date: 2022-05-07T16:24:31Z
New Revision: 06fad8bc05dcd0ecaf7d95f133a6344283d4f5ee

URL: https://github.com/llvm/llvm-project/commit/06fad8bc05dcd0ecaf7d95f133a6344283d4f5ee
DIFF: https://github.com/llvm/llvm-project/commit/06fad8bc05dcd0ecaf7d95f133a6344283d4f5ee.diff

LOG: [DAGCombine] Add node in the worklist in topological order in CombineTo

This is part of an ongoing effort toward making DAGCombine process the nodes in topological order.

This is able to discover a couple of new optimizations, but also causes a couple of regression. I nevertheless chose to submit this patch for review as to start the discussion with people working on the backend so we can find a good way forward.

Reviewed By: RKSimon

Differential Revision: https://reviews.llvm.org/D124743

Added: 
    

Modified: 
    llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
    llvm/test/CodeGen/AArch64/arm64-abi-varargs.ll
    llvm/test/CodeGen/AArch64/pr51476.ll
    llvm/test/CodeGen/AArch64/swifterror.ll
    llvm/test/CodeGen/ARM/and-load-combine.ll
    llvm/test/CodeGen/ARM/swifterror.ll
    llvm/test/CodeGen/PowerPC/aix32-cc-abi-vaarg.ll
    llvm/test/CodeGen/X86/and-with-overflow.ll
    llvm/test/CodeGen/X86/movmsk-cmp.ll
    llvm/test/CodeGen/X86/or-with-overflow.ll
    llvm/test/CodeGen/X86/pr51175.ll
    llvm/test/CodeGen/X86/vector-mulfix-legalize.ll
    llvm/test/CodeGen/X86/xor-with-overflow.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 66effdebc47d5..2bcd917d3304d 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -1136,10 +1136,8 @@ SDValue DAGCombiner::CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
   if (AddTo) {
     // Push the new nodes and any users onto the worklist
     for (unsigned i = 0, e = NumTo; i != e; ++i) {
-      if (To[i].getNode()) {
-        AddToWorklist(To[i].getNode());
-        AddUsersToWorklist(To[i].getNode());
-      }
+      if (To[i].getNode())
+        AddToWorklistWithUsers(To[i].getNode());
     }
   }
 

diff  --git a/llvm/test/CodeGen/AArch64/arm64-abi-varargs.ll b/llvm/test/CodeGen/AArch64/arm64-abi-varargs.ll
index 893fb8c2c1b16..b2ac6fa314351 100644
--- a/llvm/test/CodeGen/AArch64/arm64-abi-varargs.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-abi-varargs.ll
@@ -14,14 +14,15 @@ define void @fn9(i32* %a1, i32 %a2, i32 %a3, i32 %a4, i32 %a5, i32 %a6, i32 %a7,
 ; CHECK-NEXT:    stp w6, w5, [sp, #36]
 ; CHECK-NEXT:    str w7, [sp, #32]
 ; CHECK-NEXT:    str w8, [x0]
-; CHECK-NEXT:    ldr w9, [sp, #72]
-; CHECK-NEXT:    ldr w8, [sp, #80]
-; CHECK-NEXT:    stp w8, w9, [sp, #16]
 ; CHECK-NEXT:    add x8, sp, #72
-; CHECK-NEXT:    add x8, x8, #24
+; CHECK-NEXT:    add x8, x8, #8
+; CHECK-NEXT:    ldr w9, [sp, #72]
+; CHECK-NEXT:    str w9, [sp, #20]
+; CHECK-NEXT:    ldr w9, [x8], #8
+; CHECK-NEXT:    str w9, [sp, #16]
+; CHECK-NEXT:    ldr w9, [x8], #8
 ; CHECK-NEXT:    str x8, [sp, #24]
-; CHECK-NEXT:    ldr w8, [sp, #88]
-; CHECK-NEXT:    str w8, [sp, #12]
+; CHECK-NEXT:    str w9, [sp, #12]
 ; CHECK-NEXT:    add sp, sp, #64
 ; CHECK-NEXT:    ret
   %1 = alloca i32, align 4

diff  --git a/llvm/test/CodeGen/AArch64/pr51476.ll b/llvm/test/CodeGen/AArch64/pr51476.ll
index 6abd41a121546..b71aed5c322ae 100644
--- a/llvm/test/CodeGen/AArch64/pr51476.ll
+++ b/llvm/test/CodeGen/AArch64/pr51476.ll
@@ -5,12 +5,12 @@ define void @test(i8 %arg) nounwind {
 ; CHECK-LABEL: test:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
-; CHECK-NEXT:    and w8, w0, #0xff
-; CHECK-NEXT:    cmp w8, #1
+; CHECK-NEXT:    and w9, w0, #0xff
+; CHECK-NEXT:    mov w8, #1
+; CHECK-NEXT:    cmp w9, #1
 ; CHECK-NEXT:    cset w0, ne
-; CHECK-NEXT:    cmp w0, #3
 ; CHECK-NEXT:    strb w0, [sp, #12]
-; CHECK-NEXT:    b.eq .LBB0_2
+; CHECK-NEXT:    cbz w8, .LBB0_2
 ; CHECK-NEXT:  // %bb.1: // %do_call
 ; CHECK-NEXT:    bl unknown
 ; CHECK-NEXT:  .LBB0_2: // %common.ret

diff  --git a/llvm/test/CodeGen/AArch64/swifterror.ll b/llvm/test/CodeGen/AArch64/swifterror.ll
index c51db5f37aeb3..15dda462cbbd2 100644
--- a/llvm/test/CodeGen/AArch64/swifterror.ll
+++ b/llvm/test/CodeGen/AArch64/swifterror.ll
@@ -936,17 +936,18 @@ define float @foo_vararg(%swift_error** swifterror %error_ptr_ref, ...) {
 ; CHECK-APPLE-AARCH64-NEXT:    mov w0, #16
 ; CHECK-APPLE-AARCH64-NEXT:    bl _malloc
 ; CHECK-APPLE-AARCH64-NEXT:    mov w8, #1
-; CHECK-APPLE-AARCH64-NEXT:    ldr w9, [x29, #16]
-; CHECK-APPLE-AARCH64-NEXT:    add x10, x29, #16
-; CHECK-APPLE-AARCH64-NEXT:    ldr w11, [x29, #32]
+; CHECK-APPLE-AARCH64-NEXT:    add x9, x29, #16
+; CHECK-APPLE-AARCH64-NEXT:    ldr w10, [x29, #16]
+; CHECK-APPLE-AARCH64-NEXT:    orr x9, x9, #0x8
 ; CHECK-APPLE-AARCH64-NEXT:    strb w8, [x0, #8]
-; CHECK-APPLE-AARCH64-NEXT:    add x8, x10, #24
-; CHECK-APPLE-AARCH64-NEXT:    stur w9, [x29, #-12]
-; CHECK-APPLE-AARCH64-NEXT:    ldr w9, [x29, #24]
+; CHECK-APPLE-AARCH64-NEXT:    stur w10, [x29, #-12]
+; CHECK-APPLE-AARCH64-NEXT:    ldr w8, [x9], #8
+; CHECK-APPLE-AARCH64-NEXT:    str w8, [sp, #16]
+; CHECK-APPLE-AARCH64-NEXT:    ldr w8, [x9], #8
 ; CHECK-APPLE-AARCH64-NEXT:    fmov s0, #1.00000000
 ; CHECK-APPLE-AARCH64-NEXT:    mov x21, x0
-; CHECK-APPLE-AARCH64-NEXT:    stur x8, [x29, #-8]
-; CHECK-APPLE-AARCH64-NEXT:    stp w11, w9, [sp, #12]
+; CHECK-APPLE-AARCH64-NEXT:    stur x9, [x29, #-8]
+; CHECK-APPLE-AARCH64-NEXT:    str w8, [sp, #12]
 ; CHECK-APPLE-AARCH64-NEXT:    ldp x29, x30, [sp, #32] ; 16-byte Folded Reload
 ; CHECK-APPLE-AARCH64-NEXT:    add sp, sp, #48
 ; CHECK-APPLE-AARCH64-NEXT:    ret

diff  --git a/llvm/test/CodeGen/ARM/and-load-combine.ll b/llvm/test/CodeGen/ARM/and-load-combine.ll
index a281cff79ddca..41361947d5b01 100644
--- a/llvm/test/CodeGen/ARM/and-load-combine.ll
+++ b/llvm/test/CodeGen/ARM/and-load-combine.ll
@@ -414,35 +414,35 @@ entry:
 define arm_aapcscc zeroext i1 @cmp_and8_short_int(i16* nocapture readonly %a, i32* nocapture readonly %b) {
 ; ARM-LABEL: cmp_and8_short_int:
 ; ARM:       @ %bb.0: @ %entry
-; ARM-NEXT:    ldrb r1, [r1]
 ; ARM-NEXT:    ldrb r0, [r0]
-; ARM-NEXT:    and r0, r0, r1
+; ARM-NEXT:    ldr r1, [r1]
+; ARM-NEXT:    and r0, r1, r0
 ; ARM-NEXT:    clz r0, r0
 ; ARM-NEXT:    lsr r0, r0, #5
 ; ARM-NEXT:    bx lr
 ;
 ; ARMEB-LABEL: cmp_and8_short_int:
 ; ARMEB:       @ %bb.0: @ %entry
-; ARMEB-NEXT:    ldrb r1, [r1, #3]
 ; ARMEB-NEXT:    ldrb r0, [r0, #1]
-; ARMEB-NEXT:    and r0, r0, r1
+; ARMEB-NEXT:    ldr r1, [r1]
+; ARMEB-NEXT:    and r0, r1, r0
 ; ARMEB-NEXT:    clz r0, r0
 ; ARMEB-NEXT:    lsr r0, r0, #5
 ; ARMEB-NEXT:    bx lr
 ;
 ; THUMB1-LABEL: cmp_and8_short_int:
 ; THUMB1:       @ %bb.0: @ %entry
-; THUMB1-NEXT:    ldrb r1, [r1]
-; THUMB1-NEXT:    ldrb r2, [r0]
-; THUMB1-NEXT:    ands r2, r1
-; THUMB1-NEXT:    rsbs r0, r2, #0
-; THUMB1-NEXT:    adcs r0, r2
+; THUMB1-NEXT:    ldrb r0, [r0]
+; THUMB1-NEXT:    ldr r1, [r1]
+; THUMB1-NEXT:    ands r1, r0
+; THUMB1-NEXT:    rsbs r0, r1, #0
+; THUMB1-NEXT:    adcs r0, r1
 ; THUMB1-NEXT:    bx lr
 ;
 ; THUMB2-LABEL: cmp_and8_short_int:
 ; THUMB2:       @ %bb.0: @ %entry
-; THUMB2-NEXT:    ldrb r1, [r1]
 ; THUMB2-NEXT:    ldrb r0, [r0]
+; THUMB2-NEXT:    ldr r1, [r1]
 ; THUMB2-NEXT:    ands r0, r1
 ; THUMB2-NEXT:    clz r0, r0
 ; THUMB2-NEXT:    lsrs r0, r0, #5

diff  --git a/llvm/test/CodeGen/ARM/swifterror.ll b/llvm/test/CodeGen/ARM/swifterror.ll
index 7cef2a321d2b0..1a330e7e5c77f 100644
--- a/llvm/test/CodeGen/ARM/swifterror.ll
+++ b/llvm/test/CodeGen/ARM/swifterror.ll
@@ -681,15 +681,16 @@ define float @foo_vararg(%swift_error** swifterror %error_ptr_ref, ...) {
 ; CHECK-APPLE-NEXT:    bl _malloc
 ; CHECK-APPLE-NEXT:    mov r8, r0
 ; CHECK-APPLE-NEXT:    mov r0, #1
-; CHECK-APPLE-NEXT:    add r3, r7, #8
 ; CHECK-APPLE-NEXT:    strb r0, [r8, #8]
 ; CHECK-APPLE-NEXT:    add r0, r7, #8
-; CHECK-APPLE-NEXT:    ldm r3, {r1, r2, r3}
-; CHECK-APPLE-NEXT:    add r0, r0, #12
+; CHECK-APPLE-NEXT:    add r0, r0, #4
+; CHECK-APPLE-NEXT:    ldr r2, [r7, #8]
+; CHECK-APPLE-NEXT:    ldr r1, [r0], #4
+; CHECK-APPLE-NEXT:    ldr r3, [r0], #4
 ; CHECK-APPLE-NEXT:    str r0, [sp, #16]
 ; CHECK-APPLE-NEXT:    mov r0, #1065353216
-; CHECK-APPLE-NEXT:    str r1, [sp, #12]
-; CHECK-APPLE-NEXT:    str r2, [sp, #8]
+; CHECK-APPLE-NEXT:    str r2, [sp, #12]
+; CHECK-APPLE-NEXT:    str r1, [sp, #8]
 ; CHECK-APPLE-NEXT:    str r3, [sp, #4]
 ; CHECK-APPLE-NEXT:    mov sp, r7
 ; CHECK-APPLE-NEXT:    pop {r7, lr}
@@ -752,15 +753,16 @@ define float @foo_vararg(%swift_error** swifterror %error_ptr_ref, ...) {
 ; CHECK-ANDROID-NEXT:    bl malloc
 ; CHECK-ANDROID-NEXT:    mov r8, r0
 ; CHECK-ANDROID-NEXT:    mov r0, #1
-; CHECK-ANDROID-NEXT:    add r3, sp, #32
 ; CHECK-ANDROID-NEXT:    strb r0, [r8, #8]
 ; CHECK-ANDROID-NEXT:    add r0, sp, #32
-; CHECK-ANDROID-NEXT:    ldm r3, {r1, r2, r3}
-; CHECK-ANDROID-NEXT:    add r0, r0, #12
+; CHECK-ANDROID-NEXT:    orr r0, r0, #4
+; CHECK-ANDROID-NEXT:    ldr r2, [sp, #32]
+; CHECK-ANDROID-NEXT:    ldr r1, [r0], #4
+; CHECK-ANDROID-NEXT:    ldr r3, [r0], #4
 ; CHECK-ANDROID-NEXT:    str r0, [sp, #16]
 ; CHECK-ANDROID-NEXT:    mov r0, #1065353216
-; CHECK-ANDROID-NEXT:    str r1, [sp, #12]
-; CHECK-ANDROID-NEXT:    str r2, [sp, #8]
+; CHECK-ANDROID-NEXT:    str r2, [sp, #12]
+; CHECK-ANDROID-NEXT:    str r1, [sp, #8]
 ; CHECK-ANDROID-NEXT:    str r3, [sp, #4]
 ; CHECK-ANDROID-NEXT:    add sp, sp, #24
 ; CHECK-ANDROID-NEXT:    pop {r11, lr}

diff  --git a/llvm/test/CodeGen/PowerPC/aix32-cc-abi-vaarg.ll b/llvm/test/CodeGen/PowerPC/aix32-cc-abi-vaarg.ll
index 990b692715abc..ee7730931bfb1 100644
--- a/llvm/test/CodeGen/PowerPC/aix32-cc-abi-vaarg.ll
+++ b/llvm/test/CodeGen/PowerPC/aix32-cc-abi-vaarg.ll
@@ -191,13 +191,13 @@ entry:
 define double @double_va_arg(double %a, ...) local_unnamed_addr  {
 ; ASM32-LABEL: double_va_arg:
 ; ASM32:       # %bb.0: # %entry
-; ASM32-NEXT:    stw 6, -12(1)
-; ASM32-NEXT:    addi 3, 1, 32
 ; ASM32-NEXT:    stw 5, -16(1)
+; ASM32-NEXT:    addi 3, 1, 32
+; ASM32-NEXT:    stw 6, -12(1)
 ; ASM32-NEXT:    lfd 0, -16(1)
-; ASM32-NEXT:    stw 6, -20(1)
-; ASM32-NEXT:    fadd 0, 0, 1
 ; ASM32-NEXT:    stw 5, -24(1)
+; ASM32-NEXT:    fadd 0, 0, 1
+; ASM32-NEXT:    stw 6, -20(1)
 ; ASM32-NEXT:    lfd 1, -24(1)
 ; ASM32-NEXT:    fadd 1, 1, 1
 ; ASM32-NEXT:    stw 7, 40(1)
@@ -273,22 +273,22 @@ define double @double_stack_va_arg(double %one, double %two, double %three, doub
 ; ASM32-LABEL: double_stack_va_arg:
 ; ASM32:       # %bb.0: # %entry
 ; ASM32-NEXT:    fadd 0, 1, 2
-; ASM32-NEXT:    addi 3, 1, 128
-; ASM32-NEXT:    lwz 4, 132(1)
+; ASM32-NEXT:    addi 4, 1, 128
+; ASM32-NEXT:    lwz 3, 132(1)
 ; ASM32-NEXT:    fadd 0, 0, 3
-; ASM32-NEXT:    stw 3, -4(1)
+; ASM32-NEXT:    stw 4, -4(1)
 ; ASM32-NEXT:    fadd 0, 0, 4
-; ASM32-NEXT:    lwz 3, 128(1)
+; ASM32-NEXT:    lwz 4, 128(1)
 ; ASM32-NEXT:    fadd 0, 0, 5
-; ASM32-NEXT:    stw 3, -16(1)
+; ASM32-NEXT:    stw 3, -12(1)
 ; ASM32-NEXT:    fadd 0, 0, 6
-; ASM32-NEXT:    stw 4, -12(1)
+; ASM32-NEXT:    stw 4, -16(1)
 ; ASM32-NEXT:    fadd 0, 0, 7
 ; ASM32-NEXT:    lfd 1, -16(1)
 ; ASM32-NEXT:    fadd 0, 0, 8
-; ASM32-NEXT:    stw 3, -24(1)
+; ASM32-NEXT:    stw 3, -20(1)
 ; ASM32-NEXT:    fadd 0, 0, 9
-; ASM32-NEXT:    stw 4, -20(1)
+; ASM32-NEXT:    stw 4, -24(1)
 ; ASM32-NEXT:    fadd 0, 0, 10
 ; ASM32-NEXT:    fadd 0, 0, 11
 ; ASM32-NEXT:    fadd 0, 0, 12
@@ -360,9 +360,9 @@ entry:
 
 ; 32BIT-LABEL:   body:             |
 ; 32BIT-DAG:     liveins: $f1, $f2, $f3, $f4, $f5, $f6, $f7, $f8, $f9, $f10, $f11, $f12, $f13
-; 32BIT-DAG:     renamable $r3 = ADDI %fixed-stack.0, 0
-; 32BIT-DAG:     STW killed renamable $r3, 0, %stack.0.arg1 :: (store (s32) into %ir.0)
-; 32BIT-DAG:     renamable $r3 = LWZ 0, %fixed-stack.0 :: (load (s32) from %ir.argp.cur142, align 16)
+; 32BIT-DAG:     renamable $r4 = ADDI %fixed-stack.0, 0
+; 32BIT-DAG:     STW killed renamable $r4, 0, %stack.0.arg1 :: (store (s32) into %ir.0)
+; 32BIT-DAG:     renamable $r4 = LWZ 0, %fixed-stack.0 :: (load (s32) from %ir.argp.cur142, align 16)
 ; 32BIT-DAG:     renamable $f1 = nofpexcept FADD killed renamable $f0, killed renamable $f1, implicit $rm
 ; 32BIT-DAG:     renamable $f0 = nofpexcept FADD killed renamable $f1, killed renamable $f2, implicit $rm
 ; 32BIT-DAG:     renamable $f0 = nofpexcept FADD killed renamable $f0, killed renamable $f3, implicit $rm
@@ -376,14 +376,13 @@ entry:
 ; 32BIT-DAG:     renamable $f0 = nofpexcept FADD killed renamable $f0, killed renamable $f11, implicit $rm
 ; 32BIT-DAG:     renamable $f0 = nofpexcept FADD killed renamable $f0, killed renamable $f12, implicit $rm
 ; 32BIT-DAG:     renamable $f0 = nofpexcept FADD killed renamable $f0, killed renamable $f13, implicit $rm
-; 32BIT-DAG:     renamable $r4 = LWZ 4, %fixed-stack.0 :: (load (s32) from %ir.argp.cur1 + 4)
-; 32BIT-DAG:     STW renamable $r4, 4, %stack.2 :: (store (s32) into %stack.2 + 4)
+; 32BIT-DAG:     renamable $r3 = LWZ 4, %fixed-stack.0 :: (load (s32) from %ir.argp.cur142 + 4)
+; 32BIT-DAG:     STW renamable $r3, 4, %stack.2 :: (store (s32) into %stack.2 + 4)
 ; 32BIT-DAG:     renamable $f1 = LFD 0, %stack.2 :: (load (s64) from %stack.2)
-; 32BIT-DAG:     STW killed renamable $r3, 0, %stack.3 :: (store (s32) into %stack.3, align 8)
-; 32BIT-DAG:     STW killed renamable $r4, 4, %stack.3 :: (store (s32) into %stack.3 + 4)
+; 32BIT-DAG:     STW killed renamable $r4, 0, %stack.3 :: (store (s32) into %stack.3, align 8)
+; 32BIT-DAG:     STW killed renamable $r3, 4, %stack.3 :: (store (s32) into %stack.3 + 4)
 ; 32BIT-DAG:     renamable $f2 = LFD 0, %stack.3 :: (load (s64) from %stack.3)
 ; 32BIT-DAG:     renamable $f0 = nofpexcept FADD killed renamable $f0, killed renamable $f1, implicit $rm
-; 32BIT-DAG:     STW renamable $r3, 0, %stack.2 :: (store (s32) into %stack.2, align 8)
+; 32BIT-DAG:     STW renamable $r4, 0, %stack.2 :: (store (s32) into %stack.2, align 8)
 ; 32BIT-DAG:     renamable $f1 = nofpexcept FADD killed renamable $f2, renamable $f2, implicit $rm
 ; 32BIT-DAG:     BLR implicit $lr, implicit $rm, implicit $f1
-

diff  --git a/llvm/test/CodeGen/X86/and-with-overflow.ll b/llvm/test/CodeGen/X86/and-with-overflow.ll
index 05b1db4cecd2c..aba73de1e7719 100644
--- a/llvm/test/CodeGen/X86/and-with-overflow.ll
+++ b/llvm/test/CodeGen/X86/and-with-overflow.ll
@@ -60,10 +60,9 @@ define i8 @and_i8_rr(i8 zeroext %0, i8 zeroext %1) {
 define i16 @and_i16_ri(i16 zeroext %0, i16 zeroext %1) {
 ; X86-LABEL: and_i16_ri:
 ; X86:       # %bb.0:
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
 ; X86-NEXT:    movl %eax, %ecx
 ; X86-NEXT:    andl $-17, %ecx
-; X86-NEXT:    testw %cx, %cx
 ; X86-NEXT:    je .LBB2_2
 ; X86-NEXT:  # %bb.1:
 ; X86-NEXT:    movl %ecx, %eax

diff  --git a/llvm/test/CodeGen/X86/movmsk-cmp.ll b/llvm/test/CodeGen/X86/movmsk-cmp.ll
index 5a7f572cd265d..8ed51fde6cbde 100644
--- a/llvm/test/CodeGen/X86/movmsk-cmp.ll
+++ b/llvm/test/CodeGen/X86/movmsk-cmp.ll
@@ -4545,21 +4545,25 @@ define i32 @PR39665_c_ray(<2 x double> %x, <2 x double> %y) {
 ; SSE-LABEL: PR39665_c_ray:
 ; SSE:       # %bb.0:
 ; SSE-NEXT:    cmpltpd %xmm0, %xmm1
-; SSE-NEXT:    movmskpd %xmm1, %eax
-; SSE-NEXT:    cmpb $3, %al
-; SSE-NEXT:    movl $42, %ecx
-; SSE-NEXT:    movl $99, %eax
-; SSE-NEXT:    cmovel %ecx, %eax
+; SSE-NEXT:    movmskpd %xmm1, %ecx
+; SSE-NEXT:    testb $2, %cl
+; SSE-NEXT:    movl $42, %eax
+; SSE-NEXT:    movl $99, %edx
+; SSE-NEXT:    cmovel %edx, %eax
+; SSE-NEXT:    testb $1, %cl
+; SSE-NEXT:    cmovel %edx, %eax
 ; SSE-NEXT:    retq
 ;
 ; AVX1OR2-LABEL: PR39665_c_ray:
 ; AVX1OR2:       # %bb.0:
 ; AVX1OR2-NEXT:    vcmpltpd %xmm0, %xmm1, %xmm0
-; AVX1OR2-NEXT:    vmovmskpd %xmm0, %eax
-; AVX1OR2-NEXT:    cmpb $3, %al
-; AVX1OR2-NEXT:    movl $42, %ecx
-; AVX1OR2-NEXT:    movl $99, %eax
-; AVX1OR2-NEXT:    cmovel %ecx, %eax
+; AVX1OR2-NEXT:    vmovmskpd %xmm0, %ecx
+; AVX1OR2-NEXT:    testb $2, %cl
+; AVX1OR2-NEXT:    movl $42, %eax
+; AVX1OR2-NEXT:    movl $99, %edx
+; AVX1OR2-NEXT:    cmovel %edx, %eax
+; AVX1OR2-NEXT:    testb $1, %cl
+; AVX1OR2-NEXT:    cmovel %edx, %eax
 ; AVX1OR2-NEXT:    retq
 ;
 ; KNL-LABEL: PR39665_c_ray:

diff  --git a/llvm/test/CodeGen/X86/or-with-overflow.ll b/llvm/test/CodeGen/X86/or-with-overflow.ll
index 3e39ab65ad2ab..495da7e88b773 100644
--- a/llvm/test/CodeGen/X86/or-with-overflow.ll
+++ b/llvm/test/CodeGen/X86/or-with-overflow.ll
@@ -62,7 +62,7 @@ define i16 @or_i16_ri(i16 zeroext %0, i16 zeroext %1) {
 ; X86:       # %bb.0:
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-NEXT:    movl %eax, %ecx
-; X86-NEXT:    orl $-17, %ecx
+; X86-NEXT:    orl $65519, %ecx # imm = 0xFFEF
 ; X86-NEXT:    testw %cx, %cx
 ; X86-NEXT:    je .LBB2_2
 ; X86-NEXT:  # %bb.1:
@@ -74,8 +74,7 @@ define i16 @or_i16_ri(i16 zeroext %0, i16 zeroext %1) {
 ; X64-LABEL: or_i16_ri:
 ; X64:       # %bb.0:
 ; X64-NEXT:    movl %edi, %eax
-; X64-NEXT:    orl $-17, %eax
-; X64-NEXT:    testw %ax, %ax
+; X64-NEXT:    orl $65519, %eax # imm = 0xFFEF
 ; X64-NEXT:    cmovel %edi, %eax
 ; X64-NEXT:    # kill: def $ax killed $ax killed $eax
 ; X64-NEXT:    retq

diff  --git a/llvm/test/CodeGen/X86/pr51175.ll b/llvm/test/CodeGen/X86/pr51175.ll
index 26d7492d90719..3406602335f8b 100644
--- a/llvm/test/CodeGen/X86/pr51175.ll
+++ b/llvm/test/CodeGen/X86/pr51175.ll
@@ -11,11 +11,10 @@ define i32 @foo(i16 signext %0, i32 %1, i32* nocapture %2) {
 ; CHECK-LABEL: foo:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    incl %edi
-; CHECK-NEXT:    movl %edi, %eax
-; CHECK-NEXT:    andl $65527, %eax # imm = 0xFFF7
-; CHECK-NEXT:    movl %eax, (%rdx)
+; CHECK-NEXT:    andl $65527, %edi # imm = 0xFFF7
+; CHECK-NEXT:    movl %edi, (%rdx)
 ; CHECK-NEXT:    xorl %eax, %eax
-; CHECK-NEXT:    testb $-9, %dil
+; CHECK-NEXT:    testb %dil, %dil
 ; CHECK-NEXT:    cmovel %esi, %eax
 ; CHECK-NEXT:    retq
   %4 = add i16 %0, 1

diff  --git a/llvm/test/CodeGen/X86/vector-mulfix-legalize.ll b/llvm/test/CodeGen/X86/vector-mulfix-legalize.ll
index b1a7a2485701a..0d4074df399f5 100644
--- a/llvm/test/CodeGen/X86/vector-mulfix-legalize.ll
+++ b/llvm/test/CodeGen/X86/vector-mulfix-legalize.ll
@@ -58,10 +58,10 @@ define <4 x i16> @smulfixsat(<4 x i16> %a) {
 ; CHECK-NEXT:    cmovll %ecx, %edx
 ; CHECK-NEXT:    pextrw $1, %xmm0, %esi
 ; CHECK-NEXT:    movswl %si, %edi
-; CHECK-NEXT:    movl %edi, %eax
-; CHECK-NEXT:    shrl $16, %eax
-; CHECK-NEXT:    leal (%rdi,%rdi), %esi
-; CHECK-NEXT:    shrdw $15, %ax, %si
+; CHECK-NEXT:    leal (%rdi,%rdi), %eax
+; CHECK-NEXT:    movl %eax, %esi
+; CHECK-NEXT:    shrl $16, %esi
+; CHECK-NEXT:    shldw $1, %ax, %si
 ; CHECK-NEXT:    sarl $15, %edi
 ; CHECK-NEXT:    cmpl $16384, %edi # imm = 0x4000
 ; CHECK-NEXT:    cmovgel %r8d, %esi
@@ -83,10 +83,10 @@ define <4 x i16> @smulfixsat(<4 x i16> %a) {
 ; CHECK-NEXT:    pinsrw $2, %edx, %xmm1
 ; CHECK-NEXT:    pextrw $3, %xmm0, %eax
 ; CHECK-NEXT:    cwtl
-; CHECK-NEXT:    movl %eax, %edx
-; CHECK-NEXT:    shrl $14, %edx
-; CHECK-NEXT:    leal (,%rax,4), %esi
-; CHECK-NEXT:    shrdw $15, %dx, %si
+; CHECK-NEXT:    leal (,%rax,4), %edx
+; CHECK-NEXT:    movl %edx, %esi
+; CHECK-NEXT:    shrl $16, %esi
+; CHECK-NEXT:    shldw $1, %dx, %si
 ; CHECK-NEXT:    sarl $14, %eax
 ; CHECK-NEXT:    cmpl $16384, %eax # imm = 0x4000
 ; CHECK-NEXT:    cmovgel %r8d, %esi

diff  --git a/llvm/test/CodeGen/X86/xor-with-overflow.ll b/llvm/test/CodeGen/X86/xor-with-overflow.ll
index ad2da087929ec..96533a7798bd2 100644
--- a/llvm/test/CodeGen/X86/xor-with-overflow.ll
+++ b/llvm/test/CodeGen/X86/xor-with-overflow.ll
@@ -62,7 +62,7 @@ define i16 @xor_i16_ri(i16 zeroext %0, i16 zeroext %1) {
 ; X86:       # %bb.0:
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-NEXT:    movl %eax, %ecx
-; X86-NEXT:    xorl $-17, %ecx
+; X86-NEXT:    xorl $65519, %ecx # imm = 0xFFEF
 ; X86-NEXT:    testw %cx, %cx
 ; X86-NEXT:    je .LBB2_2
 ; X86-NEXT:  # %bb.1:
@@ -74,8 +74,7 @@ define i16 @xor_i16_ri(i16 zeroext %0, i16 zeroext %1) {
 ; X64-LABEL: xor_i16_ri:
 ; X64:       # %bb.0:
 ; X64-NEXT:    movl %edi, %eax
-; X64-NEXT:    xorl $-17, %eax
-; X64-NEXT:    testw %ax, %ax
+; X64-NEXT:    xorl $65519, %eax # imm = 0xFFEF
 ; X64-NEXT:    cmovel %edi, %eax
 ; X64-NEXT:    # kill: def $ax killed $ax killed $eax
 ; X64-NEXT:    retq


        


More information about the llvm-commits mailing list