[llvm] 6c56cf7 - [DAG] FoldSetCC - add missing icmp(X,undef) -> isTrueWhenEqual case
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Wed Sep 13 03:04:46 PDT 2023
Author: Simon Pilgrim
Date: 2023-09-13T11:01:58+01:00
New Revision: 6c56cf71ee82ec3a28e0dfc2b751bd10c16929da
URL: https://github.com/llvm/llvm-project/commit/6c56cf71ee82ec3a28e0dfc2b751bd10c16929da
DIFF: https://github.com/llvm/llvm-project/commit/6c56cf71ee82ec3a28e0dfc2b751bd10c16929da.diff
LOG: [DAG] FoldSetCC - add missing icmp(X,undef) -> isTrueWhenEqual case
Followup to D59363 which failed to handle the icmp(X,undef) -> isTrueWhenEqual case - similar to llvm::ConstantFoldCompareInstruction
As discussed on the review, this is affecting some previously reduced test cases, but will also prevent reductions from relying on this inconsistent behaviour in the future.
Differential Revision: https://reviews.llvm.org/D158068
Added:
Modified:
llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
llvm/test/CodeGen/AArch64/pr55178.ll
llvm/test/CodeGen/AMDGPU/swdev373493.ll
llvm/test/CodeGen/PowerPC/p10-spill-crgt.ll
llvm/test/CodeGen/PowerPC/pr45709.ll
llvm/test/CodeGen/RISCV/pr64503.ll
llvm/test/CodeGen/Thumb2/ldr-str-imm12.ll
llvm/test/CodeGen/WebAssembly/pr59626.ll
llvm/test/CodeGen/X86/2012-01-10-UndefExceptionEdge.ll
llvm/test/CodeGen/X86/ragreedy-hoist-spill.ll
llvm/test/CodeGen/X86/tail-dup-merge-loop-headers.ll
llvm/test/CodeGen/X86/twoaddr-lea.ll
llvm/test/CodeGen/X86/vec_int_to_fp.ll
Removed:
################################################################################
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index b2ba747ce209867..8d6d328e8a1f1f6 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -2519,7 +2519,7 @@ SDValue SelectionDAG::FoldSetCC(EVT VT, SDValue N1, SDValue N2,
// icmp X, X -> true/false
// icmp X, undef -> true/false because undef could be X.
- if (N1 == N2)
+ if (N1.isUndef() || N2.isUndef() || N1 == N2)
return getBoolConstant(ISD::isTrueWhenEqual(Cond), dl, VT, OpVT);
}
diff --git a/llvm/test/CodeGen/AArch64/pr55178.ll b/llvm/test/CodeGen/AArch64/pr55178.ll
index d814d8e66499949..a4d7c72815c3a47 100644
--- a/llvm/test/CodeGen/AArch64/pr55178.ll
+++ b/llvm/test/CodeGen/AArch64/pr55178.ll
@@ -7,12 +7,7 @@
define i1 @test14(i8 %X) {
; CHECK-LABEL: test14:
; CHECK: // %bb.0:
-; CHECK-NEXT: mov w8, #-113
-; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0
-; CHECK-NEXT: lsl w8, w8, w0
-; CHECK-NEXT: sxtb w8, w8
-; CHECK-NEXT: cmp w8, #0
-; CHECK-NEXT: cset w0, gt
+; CHECK-NEXT: mov w0, wzr
; CHECK-NEXT: ret
%1 = shl i8 -113, %X
%cmp = icmp slt i8 undef, %1
diff --git a/llvm/test/CodeGen/AMDGPU/swdev373493.ll b/llvm/test/CodeGen/AMDGPU/swdev373493.ll
index dea192aad80a7e5..4d1d88d643f1512 100644
--- a/llvm/test/CodeGen/AMDGPU/swdev373493.ll
+++ b/llvm/test/CodeGen/AMDGPU/swdev373493.ll
@@ -17,8 +17,7 @@ define hidden fastcc void @bar(i32 %arg, ptr %arg1, ptr %arg2, ptr %arg3, ptr %a
; CHECK-NEXT: v_mov_b32_e32 v8, v5
; CHECK-NEXT: v_mov_b32_e32 v7, v4
; CHECK-NEXT: v_mov_b32_e32 v6, v3
-; CHECK-NEXT: s_cmp_lt_i32 s4, 3
-; CHECK-NEXT: s_cbranch_scc0 .LBB0_3
+; CHECK-NEXT: s_branch .LBB0_3
; CHECK-NEXT: ; %bb.1: ; %LeafBlock
; CHECK-NEXT: s_cbranch_scc1 .LBB0_5
; CHECK-NEXT: ; %bb.2: ; %bb7
diff --git a/llvm/test/CodeGen/PowerPC/p10-spill-crgt.ll b/llvm/test/CodeGen/PowerPC/p10-spill-crgt.ll
index 4bea1d81a9de0a4..eeadb73b9db2cff 100644
--- a/llvm/test/CodeGen/PowerPC/p10-spill-crgt.ll
+++ b/llvm/test/CodeGen/PowerPC/p10-spill-crgt.ll
@@ -33,6 +33,7 @@ define dso_local fastcc void @P10_Spill_CR_GT() unnamed_addr {
; CHECK-NEXT: lwz r3, 0(r3)
; CHECK-NEXT: std r29, 40(r1) # 8-byte Folded Spill
; CHECK-NEXT: std r30, 48(r1) # 8-byte Folded Spill
+; CHECK-NEXT: crxor 4*cr2+eq, 4*cr2+eq, 4*cr2+eq
; CHECK-NEXT: paddi r29, 0, .LJTI0_0 at PCREL, 1
; CHECK-NEXT: srwi r4, r3, 4
; CHECK-NEXT: srwi r3, r3, 5
@@ -40,15 +41,14 @@ define dso_local fastcc void @P10_Spill_CR_GT() unnamed_addr {
; CHECK-NEXT: li r4, 0
; CHECK-NEXT: crmove 4*cr2+gt, gt
; CHECK-NEXT: andi. r3, r3, 1
-; CHECK-NEXT: crmove 4*cr2+lt, gt
-; CHECK-NEXT: cmplwi cr3, r3, 336
; CHECK-NEXT: li r3, 0
+; CHECK-NEXT: crmove 4*cr2+lt, gt
; CHECK-NEXT: sldi r30, r3, 2
; CHECK-NEXT: b .LBB0_2
; CHECK-NEXT: .LBB0_1: # %bb43
; CHECK-NEXT: #
; CHECK-NEXT: bl call_1 at notoc
-; CHECK-NEXT: setnbc r3, 4*cr4+eq
+; CHECK-NEXT: setnbc r3, 4*cr3+eq
; CHECK-NEXT: li r4, 0
; CHECK-NEXT: stb r4, 0(r3)
; CHECK-NEXT: li r4, 0
@@ -58,13 +58,13 @@ define dso_local fastcc void @P10_Spill_CR_GT() unnamed_addr {
; CHECK-NEXT: bc 12, 4*cr2+gt, .LBB0_31
; CHECK-NEXT: # %bb.3: # %bb10
; CHECK-NEXT: #
-; CHECK-NEXT: bgt cr3, .LBB0_5
+; CHECK-NEXT: bc 12, 4*cr2+eq, .LBB0_5
; CHECK-NEXT: # %bb.4: # %bb10
; CHECK-NEXT: #
; CHECK-NEXT: mr r3, r4
; CHECK-NEXT: lwz r5, 0(r3)
; CHECK-NEXT: rlwinm r4, r5, 0, 21, 22
-; CHECK-NEXT: cmpwi cr4, r4, 512
+; CHECK-NEXT: cmpwi cr3, r4, 512
; CHECK-NEXT: lwax r4, r30, r29
; CHECK-NEXT: add r4, r4, r29
; CHECK-NEXT: mtctr r4
@@ -186,7 +186,7 @@ define dso_local fastcc void @P10_Spill_CR_GT() unnamed_addr {
; CHECK-NEXT: mtocrf 8, r12
; CHECK-NEXT: blr
; CHECK-NEXT: .LBB0_32: # %bb29
-; CHECK-NEXT: crmove eq, 4*cr4+eq
+; CHECK-NEXT: crmove eq, 4*cr3+eq
; CHECK-NEXT: cmpwi cr3, r5, 366
; CHECK-NEXT: cmpwi cr4, r3, 0
; CHECK-NEXT: li r29, 0
@@ -224,15 +224,15 @@ define dso_local fastcc void @P10_Spill_CR_GT() unnamed_addr {
; CHECK-BE-NEXT: lwz r3, 0(r3)
; CHECK-BE-NEXT: std r29, 120(r1) # 8-byte Folded Spill
; CHECK-BE-NEXT: std r30, 128(r1) # 8-byte Folded Spill
+; CHECK-BE-NEXT: crxor 4*cr2+eq, 4*cr2+eq, 4*cr2+eq
; CHECK-BE-NEXT: srwi r4, r3, 4
; CHECK-BE-NEXT: srwi r3, r3, 5
; CHECK-BE-NEXT: andi. r4, r4, 1
; CHECK-BE-NEXT: li r4, 0
; CHECK-BE-NEXT: crmove 4*cr2+gt, gt
; CHECK-BE-NEXT: andi. r3, r3, 1
-; CHECK-BE-NEXT: crmove 4*cr2+lt, gt
-; CHECK-BE-NEXT: cmplwi cr3, r3, 336
; CHECK-BE-NEXT: li r3, 0
+; CHECK-BE-NEXT: crmove 4*cr2+lt, gt
; CHECK-BE-NEXT: sldi r30, r3, 2
; CHECK-BE-NEXT: addis r3, r2, .LC0 at toc@ha
; CHECK-BE-NEXT: ld r29, .LC0 at toc@l(r3)
@@ -241,7 +241,7 @@ define dso_local fastcc void @P10_Spill_CR_GT() unnamed_addr {
; CHECK-BE-NEXT: #
; CHECK-BE-NEXT: bl call_1
; CHECK-BE-NEXT: nop
-; CHECK-BE-NEXT: setnbc r3, 4*cr4+eq
+; CHECK-BE-NEXT: setnbc r3, 4*cr3+eq
; CHECK-BE-NEXT: li r4, 0
; CHECK-BE-NEXT: stb r4, 0(r3)
; CHECK-BE-NEXT: li r4, 0
@@ -251,13 +251,13 @@ define dso_local fastcc void @P10_Spill_CR_GT() unnamed_addr {
; CHECK-BE-NEXT: bc 12, 4*cr2+gt, .LBB0_31
; CHECK-BE-NEXT: # %bb.3: # %bb10
; CHECK-BE-NEXT: #
-; CHECK-BE-NEXT: bgt cr3, .LBB0_5
+; CHECK-BE-NEXT: bc 12, 4*cr2+eq, .LBB0_5
; CHECK-BE-NEXT: # %bb.4: # %bb10
; CHECK-BE-NEXT: #
; CHECK-BE-NEXT: mr r3, r4
; CHECK-BE-NEXT: lwz r5, 0(r3)
; CHECK-BE-NEXT: rlwinm r4, r5, 0, 21, 22
-; CHECK-BE-NEXT: cmpwi cr4, r4, 512
+; CHECK-BE-NEXT: cmpwi cr3, r4, 512
; CHECK-BE-NEXT: lwax r4, r30, r29
; CHECK-BE-NEXT: add r4, r4, r29
; CHECK-BE-NEXT: mtctr r4
@@ -379,7 +379,7 @@ define dso_local fastcc void @P10_Spill_CR_GT() unnamed_addr {
; CHECK-BE-NEXT: mtocrf 8, r12
; CHECK-BE-NEXT: blr
; CHECK-BE-NEXT: .LBB0_32: # %bb29
-; CHECK-BE-NEXT: crmove eq, 4*cr4+eq
+; CHECK-BE-NEXT: crmove eq, 4*cr3+eq
; CHECK-BE-NEXT: cmpwi cr3, r5, 366
; CHECK-BE-NEXT: cmpwi cr4, r3, 0
; CHECK-BE-NEXT: li r29, 0
diff --git a/llvm/test/CodeGen/PowerPC/pr45709.ll b/llvm/test/CodeGen/PowerPC/pr45709.ll
index 5448418aaf3c8b0..9886fc44295fdfc 100644
--- a/llvm/test/CodeGen/PowerPC/pr45709.ll
+++ b/llvm/test/CodeGen/PowerPC/pr45709.ll
@@ -10,38 +10,14 @@
define dso_local void @_ZN1a1bEv(<4 x float> %in) local_unnamed_addr #0 align 2 {
; CHECK-LABEL: _ZN1a1bEv:
; CHECK: # %bb.0:
-; CHECK-NEXT: bc 12, 4*cr5+lt, .LBB0_6
+; CHECK-NEXT: bc 12, 4*cr5+lt, .LBB0_4
; CHECK-NEXT: b .LBB0_1
; CHECK-NEXT: .LBB0_1: # %.preheader
; CHECK-NEXT: b .LBB0_2
; CHECK-NEXT: .LBB0_2:
; CHECK-NEXT: b .LBB0_3
; CHECK-NEXT: .LBB0_3:
-; CHECK-NEXT: addis r3, r2, .LCPI0_0 at toc@ha
-; CHECK-NEXT: addi r3, r3, .LCPI0_0 at toc@l
-; CHECK-NEXT: lvx v3, 0, r3
-; CHECK-NEXT: vperm v2, v2, v2, v3
-; CHECK-NEXT: vxor v3, v3, v3
-; CHECK-NEXT: addi r3, r1, -48
-; CHECK-NEXT: stvx v3, 0, r3
-; CHECK-NEXT: addi r3, r1, -32
-; CHECK-NEXT: stvx v2, 0, r3
-; CHECK-NEXT: lwz r3, -48(r1)
-; CHECK-NEXT: lwz r4, -32(r1)
-; CHECK-NEXT: cmpw r4, r3
-; CHECK-NEXT: bc 12, gt, .LBB0_4
-; CHECK-NEXT: b .LBB0_5
; CHECK-NEXT: .LBB0_4:
-; CHECK-NEXT: addi r3, r4, 0
-; CHECK-NEXT: .LBB0_5:
-; CHECK-NEXT: cmpw r3, r3
-; CHECK-NEXT: stw r3, -64(r1)
-; CHECK-NEXT: addi r3, r1, -64
-; CHECK-NEXT: lvx v2, 0, r3
-; CHECK-NEXT: addi r3, r1, -16
-; CHECK-NEXT: stvx v2, 0, r3
-; CHECK-NEXT: lfs f0, -16(r1)
-; CHECK-NEXT: .LBB0_6:
; CHECK-NEXT: blr
br i1 undef, label %7, label %1
diff --git a/llvm/test/CodeGen/RISCV/pr64503.ll b/llvm/test/CodeGen/RISCV/pr64503.ll
index 63a1c03a904d8e1..3a4b2cf828c6b04 100644
--- a/llvm/test/CodeGen/RISCV/pr64503.ll
+++ b/llvm/test/CodeGen/RISCV/pr64503.ll
@@ -4,8 +4,7 @@
define i1 @f(i64 %LGV1) {
; CHECK-LABEL: f:
; CHECK: # %bb.0:
-; CHECK-NEXT: sltu a0, a0, a1
-; CHECK-NEXT: xori a0, a0, 1
+; CHECK-NEXT: li a0, 1
; CHECK-NEXT: ret
%B1 = xor i64 %LGV1, %LGV1
%B2 = srem i64 1, %B1
@@ -17,8 +16,7 @@ define i1 @f(i64 %LGV1) {
define i64 @g(ptr %A, i64 %0) {
; CHECK-LABEL: g:
; CHECK: # %bb.0:
-; CHECK-NEXT: slt a0, a0, a2
-; CHECK-NEXT: xori a0, a0, 1
+; CHECK-NEXT: li a0, 1
; CHECK-NEXT: sb a0, 0(zero)
; CHECK-NEXT: ret
store i64 poison, ptr %A, align 4
diff --git a/llvm/test/CodeGen/Thumb2/ldr-str-imm12.ll b/llvm/test/CodeGen/Thumb2/ldr-str-imm12.ll
index 4ac7b511db89f0c..1d177b0a4ebbf46 100644
--- a/llvm/test/CodeGen/Thumb2/ldr-str-imm12.ll
+++ b/llvm/test/CodeGen/Thumb2/ldr-str-imm12.ll
@@ -38,11 +38,11 @@ define ptr @Manifest(ptr %x, ptr %env, ptr %style, ptr %bthr, ptr %fthr, ptr %ta
; CHECK-NEXT: popne.w {r8, r10, r11}
; CHECK-NEXT: popne {r4, r5, r6, r7, pc}
; CHECK-NEXT: LBB0_1: @ %bb20
-; CHECK-NEXT: cmp.w r0, #450
-; CHECK-NEXT: bge LBB0_4
+; CHECK-NEXT: movs r5, #1
+; CHECK-NEXT: cmp r5, #0
+; CHECK-NEXT: bne LBB0_4
; CHECK-NEXT: @ %bb.2: @ %bb20
-; CHECK-NEXT: cmp r0, #209
-; CHECK-NEXT: ble LBB0_5
+; CHECK-NEXT: beq LBB0_5
; CHECK-NEXT: @ %bb.3: @ %bb420
; CHECK-NEXT: movw r5, :lower16:(L_zz_hold$non_lazy_ptr-(LPC0_0+4))
; CHECK-NEXT: movt r5, :upper16:(L_zz_hold$non_lazy_ptr-(LPC0_0+4))
@@ -72,10 +72,9 @@ define ptr @Manifest(ptr %x, ptr %env, ptr %style, ptr %bthr, ptr %fthr, ptr %ta
; CHECK-NEXT: bl _Manifest
; CHECK-NEXT: trap
; CHECK-NEXT: LBB0_4: @ %bb20
-; CHECK-NEXT: cmp.w r0, #560
-; CHECK-NEXT: itt ge
-; CHECK-NEXT: movge r0, #0
-; CHECK-NEXT: cmpge r0, #0
+; CHECK-NEXT: itt ne
+; CHECK-NEXT: movne r0, #0
+; CHECK-NEXT: cmpne r0, #0
; CHECK-NEXT: LBB0_5: @ %bb20
; CHECK-NEXT: trap
entry:
diff --git a/llvm/test/CodeGen/WebAssembly/pr59626.ll b/llvm/test/CodeGen/WebAssembly/pr59626.ll
index 1a93f56a32d92f6..6ffdead28129ef7 100644
--- a/llvm/test/CodeGen/WebAssembly/pr59626.ll
+++ b/llvm/test/CodeGen/WebAssembly/pr59626.ll
@@ -13,12 +13,12 @@ define i8 @f(ptr %0, ptr %1) {
; CHECK-32-NEXT: i32.const 0
; CHECK-32-NEXT: i32.store16 0
; CHECK-32-NEXT: local.get 1
-; CHECK-32-NEXT: i32.const 0
+; CHECK-32-NEXT: i32.const 5
; CHECK-32-NEXT: i32.store8 2
; CHECK-32-NEXT: local.get 1
-; CHECK-32-NEXT: i32.const 0
+; CHECK-32-NEXT: i32.const 769
; CHECK-32-NEXT: i32.store16 0
-; CHECK-32-NEXT: i32.const 0
+; CHECK-32-NEXT: i32.const 1
; CHECK-32-NEXT: # fallthrough-return
;
; CHECK-64-LABEL: f:
@@ -31,9 +31,12 @@ define i8 @f(ptr %0, ptr %1) {
; CHECK-64-NEXT: i32.const 0
; CHECK-64-NEXT: i32.store16 0
; CHECK-64-NEXT: local.get 1
-; CHECK-64-NEXT: i32.const 0
+; CHECK-64-NEXT: i32.const 5
+; CHECK-64-NEXT: i32.store8 2
+; CHECK-64-NEXT: local.get 1
+; CHECK-64-NEXT: i32.const 769
; CHECK-64-NEXT: i32.store16 0
-; CHECK-64-NEXT: i32.const 0
+; CHECK-64-NEXT: i32.const 1
; CHECK-64-NEXT: # fallthrough-return
BB:
store <3 x i8> zeroinitializer, ptr %0
diff --git a/llvm/test/CodeGen/X86/2012-01-10-UndefExceptionEdge.ll b/llvm/test/CodeGen/X86/2012-01-10-UndefExceptionEdge.ll
index 8597de4493f4a4f..1962ddebc2115ef 100644
--- a/llvm/test/CodeGen/X86/2012-01-10-UndefExceptionEdge.ll
+++ b/llvm/test/CodeGen/X86/2012-01-10-UndefExceptionEdge.ll
@@ -34,7 +34,7 @@ define void @f(ptr nocapture %arg, ptr nocapture %arg1, ptr nocapture %arg2, ptr
; CHECK-NEXT: .cfi_offset %edi, -16
; CHECK-NEXT: .cfi_offset %ebx, -12
; CHECK-NEXT: xorl %eax, %eax
-; CHECK-NEXT: movl $0, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Folded Spill
+; CHECK-NEXT: xorl %edi, %edi
; CHECK-NEXT: testb %al, %al
; CHECK-NEXT: Ltmp0:
; CHECK-NEXT: ## implicit-def: $ebx
@@ -46,14 +46,16 @@ define void @f(ptr nocapture %arg, ptr nocapture %arg1, ptr nocapture %arg2, ptr
; CHECK-NEXT: testb %al, %al
; CHECK-NEXT: jne LBB0_2
; CHECK-NEXT: ## %bb.7: ## %bb31
-; CHECK-NEXT: ## implicit-def: $edi
+; CHECK-NEXT: ## implicit-def: $eax
+; CHECK-NEXT: ## kill: killed $eax
; CHECK-NEXT: LBB0_8: ## %bb38
; CHECK-NEXT: ## =>This Loop Header: Depth=1
; CHECK-NEXT: ## Child Loop BB0_13 Depth 2
; CHECK-NEXT: ## Child Loop BB0_16 Depth 3
; CHECK-NEXT: ## Child Loop BB0_21 Depth 2
-; CHECK-NEXT: cmpl %eax, %edi
-; CHECK-NEXT: jle LBB0_9
+; CHECK-NEXT: movb $1, %al
+; CHECK-NEXT: testb %al, %al
+; CHECK-NEXT: jne LBB0_9
; CHECK-NEXT: ## %bb.10: ## %bb41
; CHECK-NEXT: ## in Loop: Header=BB0_8 Depth=1
; CHECK-NEXT: Ltmp2:
@@ -76,18 +78,17 @@ define void @f(ptr nocapture %arg, ptr nocapture %arg1, ptr nocapture %arg2, ptr
; CHECK-NEXT: ## Parent Loop BB0_8 Depth=1
; CHECK-NEXT: ## => This Loop Header: Depth=2
; CHECK-NEXT: ## Child Loop BB0_16 Depth 3
-; CHECK-NEXT: testl %eax, %eax
-; CHECK-NEXT: jns LBB0_19
-; CHECK-NEXT: ## %bb.14: ## %bb48
-; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=2
; CHECK-NEXT: movb $1, %cl
; CHECK-NEXT: testb %cl, %cl
+; CHECK-NEXT: jne LBB0_19
+; CHECK-NEXT: ## %bb.14: ## %bb48
+; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=2
; CHECK-NEXT: jne LBB0_17
; CHECK-NEXT: ## %bb.15: ## %bb49.preheader
; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=2
; CHECK-NEXT: xorl %ecx, %ecx
; CHECK-NEXT: movl %esi, %edx
-; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx ## 4-byte Reload
+; CHECK-NEXT: movl %edi, %ebx
; CHECK-NEXT: LBB0_16: ## %bb49
; CHECK-NEXT: ## Parent Loop BB0_8 Depth=1
; CHECK-NEXT: ## Parent Loop BB0_13 Depth=2
@@ -112,7 +113,7 @@ define void @f(ptr nocapture %arg, ptr nocapture %arg1, ptr nocapture %arg2, ptr
; CHECK-NEXT: ## %bb.20: ## %bb61.preheader
; CHECK-NEXT: ## in Loop: Header=BB0_8 Depth=1
; CHECK-NEXT: movl %esi, %eax
-; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx ## 4-byte Reload
+; CHECK-NEXT: movl %edi, %ecx
; CHECK-NEXT: LBB0_21: ## %bb61
; CHECK-NEXT: ## Parent Loop BB0_8 Depth=1
; CHECK-NEXT: ## => This Inner Loop Header: Depth=2
@@ -122,7 +123,7 @@ define void @f(ptr nocapture %arg, ptr nocapture %arg1, ptr nocapture %arg2, ptr
; CHECK-NEXT: jne LBB0_21
; CHECK-NEXT: LBB0_22: ## %bb67
; CHECK-NEXT: ## in Loop: Header=BB0_8 Depth=1
-; CHECK-NEXT: decl %edi
+; CHECK-NEXT: decl {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Folded Spill
; CHECK-NEXT: jmp LBB0_8
; CHECK-NEXT: LBB0_18: ## %bb43
; CHECK-NEXT: Ltmp5:
diff --git a/llvm/test/CodeGen/X86/ragreedy-hoist-spill.ll b/llvm/test/CodeGen/X86/ragreedy-hoist-spill.ll
index fd9251323ca1e2d..7217245cc8ac30b 100644
--- a/llvm/test/CodeGen/X86/ragreedy-hoist-spill.ll
+++ b/llvm/test/CodeGen/X86/ragreedy-hoist-spill.ll
@@ -66,7 +66,7 @@ define ptr @SyFgets(ptr %line, i64 %length, i64 %fid) {
; CHECK-NEXT: je LBB0_54
; CHECK-NEXT: ## %bb.6: ## %SyTime.exit2720
; CHECK-NEXT: movq %rdx, %r14
-; CHECK-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill
+; CHECK-NEXT: movq %rdi, %rbx
; CHECK-NEXT: leaq {{[0-9]+}}(%rsp), %rax
; CHECK-NEXT: leaq {{[0-9]+}}(%rsp), %rcx
; CHECK-NEXT: cmpq %rax, %rcx
@@ -78,9 +78,8 @@ define ptr @SyFgets(ptr %line, i64 %length, i64 %fid) {
; CHECK-NEXT: LBB0_8: ## %while.body.preheader
; CHECK-NEXT: imulq $1040, %r14, %rax ## imm = 0x410
; CHECK-NEXT: movq _syBuf at GOTPCREL(%rip), %rcx
-; CHECK-NEXT: leaq 8(%rcx,%rax), %rax
-; CHECK-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill
-; CHECK-NEXT: movl $1, %r15d
+; CHECK-NEXT: leaq 8(%rcx,%rax), %rdx
+; CHECK-NEXT: movl $1, %r13d
; CHECK-NEXT: movq _syCTRO at GOTPCREL(%rip), %rax
; CHECK-NEXT: movb $1, %cl
; CHECK-NEXT: .p2align 4, 0x90
@@ -90,32 +89,35 @@ define ptr @SyFgets(ptr %line, i64 %length, i64 %fid) {
; CHECK-NEXT: testb %cl, %cl
; CHECK-NEXT: jne LBB0_9
; CHECK-NEXT: ## %bb.10: ## %do.end
-; CHECK-NEXT: xorl %ebx, %ebx
-; CHECK-NEXT: testb %bl, %bl
+; CHECK-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill
+; CHECK-NEXT: movq %rbx, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill
+; CHECK-NEXT: xorl %ebp, %ebp
+; CHECK-NEXT: testb %bpl, %bpl
; CHECK-NEXT: jne LBB0_11
; CHECK-NEXT: ## %bb.12: ## %while.body200.preheader
-; CHECK-NEXT: xorl %r13d, %r13d
+; CHECK-NEXT: xorl %r12d, %r12d
; CHECK-NEXT: leaq LJTI0_0(%rip), %rdx
; CHECK-NEXT: leaq LJTI0_1(%rip), %r14
+; CHECK-NEXT: movb $1, %sil
; CHECK-NEXT: movl $0, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Folded Spill
-; CHECK-NEXT: xorl %r12d, %r12d
+; CHECK-NEXT: xorl %r15d, %r15d
; CHECK-NEXT: jmp LBB0_13
; CHECK-NEXT: LBB0_43: ## %while.cond1037.preheader
; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1
-; CHECK-NEXT: testb %r13b, %r13b
+; CHECK-NEXT: testb %r12b, %r12b
; CHECK-NEXT: je LBB0_54
; CHECK-NEXT: .p2align 4, 0x90
; CHECK-NEXT: LBB0_20: ## %while.cond197.backedge
; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1
-; CHECK-NEXT: decl %r15d
-; CHECK-NEXT: testl %r15d, %r15d
-; CHECK-NEXT: movl %ebx, %r12d
+; CHECK-NEXT: decl %r13d
+; CHECK-NEXT: testl %r13d, %r13d
+; CHECK-NEXT: movl %ebp, %r15d
; CHECK-NEXT: jle LBB0_21
; CHECK-NEXT: LBB0_13: ## %while.body200
; CHECK-NEXT: ## =>This Loop Header: Depth=1
; CHECK-NEXT: ## Child Loop BB0_28 Depth 2
; CHECK-NEXT: ## Child Loop BB0_37 Depth 2
-; CHECK-NEXT: leal -268(%rbx), %eax
+; CHECK-NEXT: leal -268(%rbp), %eax
; CHECK-NEXT: cmpl $105, %eax
; CHECK-NEXT: ja LBB0_14
; CHECK-NEXT: ## %bb.55: ## %while.body200
@@ -125,13 +127,13 @@ define ptr @SyFgets(ptr %line, i64 %length, i64 %fid) {
; CHECK-NEXT: jmpq *%rax
; CHECK-NEXT: LBB0_25: ## %sw.bb474
; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1
-; CHECK-NEXT: testb %r13b, %r13b
-; CHECK-NEXT: ## implicit-def: $rbp
+; CHECK-NEXT: testb %r12b, %r12b
+; CHECK-NEXT: ## implicit-def: $rbx
; CHECK-NEXT: jne LBB0_33
; CHECK-NEXT: ## %bb.26: ## %do.body479.preheader
; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1
-; CHECK-NEXT: testb %r13b, %r13b
-; CHECK-NEXT: ## implicit-def: $rbp
+; CHECK-NEXT: testb %r12b, %r12b
+; CHECK-NEXT: ## implicit-def: $rbx
; CHECK-NEXT: jne LBB0_33
; CHECK-NEXT: ## %bb.27: ## %land.rhs485.preheader
; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1
@@ -140,30 +142,31 @@ define ptr @SyFgets(ptr %line, i64 %length, i64 %fid) {
; CHECK-NEXT: .p2align 4, 0x90
; CHECK-NEXT: LBB0_31: ## %do.body479.backedge
; CHECK-NEXT: ## in Loop: Header=BB0_28 Depth=2
-; CHECK-NEXT: leaq 1(%rbp), %rax
-; CHECK-NEXT: testb %r13b, %r13b
+; CHECK-NEXT: leaq 1(%rbx), %rax
+; CHECK-NEXT: testb %r12b, %r12b
; CHECK-NEXT: je LBB0_32
; CHECK-NEXT: LBB0_28: ## %land.rhs485
; CHECK-NEXT: ## Parent Loop BB0_13 Depth=1
; CHECK-NEXT: ## => This Inner Loop Header: Depth=2
-; CHECK-NEXT: testb %al, %al
-; CHECK-NEXT: js LBB0_54
+; CHECK-NEXT: testb %sil, %sil
+; CHECK-NEXT: jne LBB0_54
; CHECK-NEXT: ## %bb.29: ## %cond.true.i.i2780
; CHECK-NEXT: ## in Loop: Header=BB0_28 Depth=2
-; CHECK-NEXT: movq %rax, %rbp
-; CHECK-NEXT: testb %r13b, %r13b
+; CHECK-NEXT: movq %rax, %rbx
+; CHECK-NEXT: testb %r12b, %r12b
; CHECK-NEXT: jne LBB0_31
; CHECK-NEXT: ## %bb.30: ## %lor.rhs500
; CHECK-NEXT: ## in Loop: Header=BB0_28 Depth=2
; CHECK-NEXT: movl $256, %esi ## imm = 0x100
; CHECK-NEXT: callq ___maskrune
-; CHECK-NEXT: testb %r13b, %r13b
+; CHECK-NEXT: movb $1, %sil
+; CHECK-NEXT: testb %r12b, %r12b
; CHECK-NEXT: jne LBB0_31
; CHECK-NEXT: jmp LBB0_33
; CHECK-NEXT: .p2align 4, 0x90
; CHECK-NEXT: LBB0_14: ## %while.body200
; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1
-; CHECK-NEXT: leal 1(%rbx), %eax
+; CHECK-NEXT: leal 1(%rbp), %eax
; CHECK-NEXT: cmpl $21, %eax
; CHECK-NEXT: ja LBB0_20
; CHECK-NEXT: ## %bb.15: ## %while.body200
@@ -173,7 +176,7 @@ define ptr @SyFgets(ptr %line, i64 %length, i64 %fid) {
; CHECK-NEXT: jmpq *%rax
; CHECK-NEXT: LBB0_18: ## %while.cond201.preheader
; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1
-; CHECK-NEXT: movl $1, %ebx
+; CHECK-NEXT: movl $1, %ebp
; CHECK-NEXT: jmp LBB0_20
; CHECK-NEXT: LBB0_44: ## %sw.bb1134
; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1
@@ -183,22 +186,22 @@ define ptr @SyFgets(ptr %line, i64 %length, i64 %fid) {
; CHECK-NEXT: jb LBB0_54
; CHECK-NEXT: ## %bb.45: ## in Loop: Header=BB0_13 Depth=1
; CHECK-NEXT: movl $0, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Folded Spill
-; CHECK-NEXT: movl $268, %ebx ## imm = 0x10C
+; CHECK-NEXT: movl $268, %ebp ## imm = 0x10C
; CHECK-NEXT: jmp LBB0_20
; CHECK-NEXT: LBB0_39: ## %sw.bb566
; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1
-; CHECK-NEXT: movl $20, %ebx
+; CHECK-NEXT: movl $20, %ebp
; CHECK-NEXT: jmp LBB0_20
; CHECK-NEXT: LBB0_19: ## %sw.bb243
; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1
-; CHECK-NEXT: movl $2, %ebx
+; CHECK-NEXT: movl $2, %ebp
; CHECK-NEXT: jmp LBB0_20
; CHECK-NEXT: LBB0_32: ## %if.end517.loopexitsplit
; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1
-; CHECK-NEXT: incq %rbp
+; CHECK-NEXT: incq %rbx
; CHECK-NEXT: LBB0_33: ## %if.end517
; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1
-; CHECK-NEXT: leal -324(%r12), %eax
+; CHECK-NEXT: leal -324(%r15), %eax
; CHECK-NEXT: cmpl $59, %eax
; CHECK-NEXT: ja LBB0_34
; CHECK-NEXT: ## %bb.56: ## %if.end517
@@ -208,11 +211,11 @@ define ptr @SyFgets(ptr %line, i64 %length, i64 %fid) {
; CHECK-NEXT: jb LBB0_37
; CHECK-NEXT: LBB0_34: ## %if.end517
; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1
-; CHECK-NEXT: cmpl $11, %r12d
+; CHECK-NEXT: cmpl $11, %r15d
; CHECK-NEXT: je LBB0_37
; CHECK-NEXT: ## %bb.35: ## %if.end517
; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1
-; CHECK-NEXT: cmpl $24, %r12d
+; CHECK-NEXT: cmpl $24, %r15d
; CHECK-NEXT: je LBB0_37
; CHECK-NEXT: ## %bb.36: ## %if.then532
; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1
@@ -222,12 +225,12 @@ define ptr @SyFgets(ptr %line, i64 %length, i64 %fid) {
; CHECK-NEXT: LBB0_37: ## %for.cond534
; CHECK-NEXT: ## Parent Loop BB0_13 Depth=1
; CHECK-NEXT: ## => This Inner Loop Header: Depth=2
-; CHECK-NEXT: testb %r13b, %r13b
+; CHECK-NEXT: testb %r12b, %r12b
; CHECK-NEXT: jne LBB0_37
; CHECK-NEXT: ## %bb.38: ## %for.cond542.preheader
; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1
-; CHECK-NEXT: testb %r13b, %r13b
-; CHECK-NEXT: movb $0, (%rbp)
+; CHECK-NEXT: testb %r12b, %r12b
+; CHECK-NEXT: movb $0, (%rbx)
; CHECK-NEXT: leaq LJTI0_0(%rip), %rdx
; CHECK-NEXT: jmp LBB0_20
; CHECK-NEXT: .p2align 4, 0x90
@@ -245,12 +248,12 @@ define ptr @SyFgets(ptr %line, i64 %length, i64 %fid) {
; CHECK-NEXT: LBB0_11:
; CHECK-NEXT: movl $0, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Folded Spill
; CHECK-NEXT: LBB0_21: ## %while.end1465
-; CHECK-NEXT: incl %ebx
-; CHECK-NEXT: cmpl $16, %ebx
+; CHECK-NEXT: incl %ebp
+; CHECK-NEXT: cmpl $16, %ebp
; CHECK-NEXT: ja LBB0_49
; CHECK-NEXT: ## %bb.22: ## %while.end1465
; CHECK-NEXT: movl $83969, %eax ## imm = 0x14801
-; CHECK-NEXT: btl %ebx, %eax
+; CHECK-NEXT: btl %ebp, %eax
; CHECK-NEXT: jae LBB0_49
; CHECK-NEXT: ## %bb.23:
; CHECK-NEXT: xorl %ebx, %ebx
@@ -283,26 +286,26 @@ define ptr @SyFgets(ptr %line, i64 %length, i64 %fid) {
; CHECK-NEXT: ## =>This Inner Loop Header: Depth=1
; CHECK-NEXT: jmp LBB0_40
; CHECK-NEXT: LBB0_49: ## %for.cond1480.preheader
-; CHECK-NEXT: movl $512, %eax ## imm = 0x200
-; CHECK-NEXT: cmpq %rax, %rax
-; CHECK-NEXT: jae LBB0_54
+; CHECK-NEXT: xorl %eax, %eax
+; CHECK-NEXT: testb %al, %al
+; CHECK-NEXT: je LBB0_54
; CHECK-NEXT: ## %bb.50: ## %for.body1664.lr.ph
; CHECK-NEXT: xorl %eax, %eax
; CHECK-NEXT: testb %al, %al
-; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rbx ## 8-byte Reload
+; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r14 ## 8-byte Reload
; CHECK-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %ebp ## 4-byte Reload
; CHECK-NEXT: jne LBB0_53
; CHECK-NEXT: ## %bb.51: ## %while.body1679.preheader
; CHECK-NEXT: incl %ebp
+; CHECK-NEXT: xorl %ebx, %ebx
; CHECK-NEXT: .p2align 4, 0x90
; CHECK-NEXT: LBB0_52: ## %while.body1679
; CHECK-NEXT: ## =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: movq (%rbx), %rdi
+; CHECK-NEXT: movq (%r14), %rdi
; CHECK-NEXT: callq _fileno
-; CHECK-NEXT: movslq %ebp, %rax
-; CHECK-NEXT: leal 1(%rax), %ebp
-; CHECK-NEXT: cmpq %rax, %rax
-; CHECK-NEXT: jl LBB0_52
+; CHECK-NEXT: incl %ebp
+; CHECK-NEXT: testb %bl, %bl
+; CHECK-NEXT: jne LBB0_52
; CHECK-NEXT: LBB0_53: ## %while.cond1683.preheader
; CHECK-NEXT: xorl %eax, %eax
; CHECK-NEXT: testb %al, %al
diff --git a/llvm/test/CodeGen/X86/tail-dup-merge-loop-headers.ll b/llvm/test/CodeGen/X86/tail-dup-merge-loop-headers.ll
index 4a25b7486a1c625..937cc173e7faefd 100644
--- a/llvm/test/CodeGen/X86/tail-dup-merge-loop-headers.ll
+++ b/llvm/test/CodeGen/X86/tail-dup-merge-loop-headers.ll
@@ -148,8 +148,8 @@ define i32 @loop_shared_header(i8* %exe, i32 %exesz, i32 %headsize, i32 %min, i3
; CHECK-NEXT: jne .LBB1_25
; CHECK-NEXT: # %bb.10: # %inner_loop_body
; CHECK-NEXT: # in Loop: Header=BB1_9 Depth=2
-; CHECK-NEXT: testl %eax, %eax
-; CHECK-NEXT: jns .LBB1_9
+; CHECK-NEXT: testb %al, %al
+; CHECK-NEXT: je .LBB1_9
; CHECK-NEXT: # %bb.11: # %if.end96.i
; CHECK-NEXT: # in Loop: Header=BB1_8 Depth=1
; CHECK-NEXT: cmpl $3, %r13d
diff --git a/llvm/test/CodeGen/X86/twoaddr-lea.ll b/llvm/test/CodeGen/X86/twoaddr-lea.ll
index b11c9de00886f26..2f6b943ca727534 100644
--- a/llvm/test/CodeGen/X86/twoaddr-lea.ll
+++ b/llvm/test/CodeGen/X86/twoaddr-lea.ll
@@ -69,8 +69,8 @@ define void @ham() {
; CHECK-NEXT: movq _global at GOTPCREL(%rip), %rdx
; CHECK-NEXT: movq _global2 at GOTPCREL(%rip), %rsi
; CHECK-NEXT: xorl %eax, %eax
-; CHECK-NEXT: cmpl $10, %eax
-; CHECK-NEXT: jle LBB3_2
+; CHECK-NEXT: testb %cl, %cl
+; CHECK-NEXT: je LBB3_2
; CHECK-NEXT: .p2align 4, 0x90
; CHECK-NEXT: LBB3_6: ## %bb2
; CHECK-NEXT: ## =>This Loop Header: Depth=1
@@ -90,8 +90,8 @@ define void @ham() {
; CHECK-NEXT: ## %bb.8: ## %bb9
; CHECK-NEXT: ## in Loop: Header=BB3_6 Depth=1
; CHECK-NEXT: addq $4, %rax
-; CHECK-NEXT: cmpl $10, %eax
-; CHECK-NEXT: jg LBB3_6
+; CHECK-NEXT: testb %cl, %cl
+; CHECK-NEXT: jne LBB3_6
; CHECK-NEXT: LBB3_2: ## %bb3.preheader
; CHECK-NEXT: xorl %ecx, %ecx
; CHECK-NEXT: .p2align 4, 0x90
diff --git a/llvm/test/CodeGen/X86/vec_int_to_fp.ll b/llvm/test/CodeGen/X86/vec_int_to_fp.ll
index 8cf6045e1f22ce9..4a1bbb890fe716f 100644
--- a/llvm/test/CodeGen/X86/vec_int_to_fp.ll
+++ b/llvm/test/CodeGen/X86/vec_int_to_fp.ll
@@ -2141,19 +2141,21 @@ define <4 x float> @uitofp_4i64_to_4f32_undef(<2 x i64> %a) {
; SSE41-NEXT: movdqa %xmm0, %xmm2
; SSE41-NEXT: psrlq $1, %xmm2
; SSE41-NEXT: por %xmm1, %xmm2
-; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,3,2,3]
-; SSE41-NEXT: blendvpd %xmm0, %xmm2, %xmm0
-; SSE41-NEXT: pextrq $1, %xmm0, %rax
-; SSE41-NEXT: cvtsi2ss %rax, %xmm3
-; SSE41-NEXT: movq %xmm0, %rax
+; SSE41-NEXT: movdqa %xmm0, %xmm1
+; SSE41-NEXT: blendvpd %xmm0, %xmm2, %xmm1
+; SSE41-NEXT: pextrq $1, %xmm1, %rax
; SSE41-NEXT: xorps %xmm2, %xmm2
; SSE41-NEXT: cvtsi2ss %rax, %xmm2
-; SSE41-NEXT: insertps {{.*#+}} xmm2 = xmm2[0],xmm3[0],zero,zero
-; SSE41-NEXT: movaps %xmm2, %xmm3
-; SSE41-NEXT: addps %xmm2, %xmm3
-; SSE41-NEXT: movdqa %xmm1, %xmm0
-; SSE41-NEXT: blendvps %xmm0, %xmm3, %xmm2
-; SSE41-NEXT: movaps %xmm2, %xmm0
+; SSE41-NEXT: movq %xmm1, %rax
+; SSE41-NEXT: xorps %xmm1, %xmm1
+; SSE41-NEXT: cvtsi2ss %rax, %xmm1
+; SSE41-NEXT: insertps {{.*#+}} xmm1 = xmm1[0],xmm2[0],zero,zero
+; SSE41-NEXT: movaps %xmm1, %xmm2
+; SSE41-NEXT: addps %xmm1, %xmm2
+; SSE41-NEXT: xorps %xmm3, %xmm3
+; SSE41-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,3],xmm3[2,3]
+; SSE41-NEXT: blendvps %xmm0, %xmm2, %xmm1
+; SSE41-NEXT: movaps %xmm1, %xmm0
; SSE41-NEXT: retq
;
; AVX1-LABEL: uitofp_4i64_to_4f32_undef:
@@ -2162,7 +2164,8 @@ define <4 x float> @uitofp_4i64_to_4f32_undef(<2 x i64> %a) {
; AVX1-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm1
; AVX1-NEXT: vpsrlq $1, %xmm0, %xmm2
; AVX1-NEXT: vorps %ymm1, %ymm2, %ymm1
-; AVX1-NEXT: vblendvpd %ymm0, %ymm1, %ymm0, %ymm1
+; AVX1-NEXT: vmovaps %xmm0, %xmm2
+; AVX1-NEXT: vblendvpd %ymm2, %ymm1, %ymm0, %ymm1
; AVX1-NEXT: vpextrq $1, %xmm1, %rax
; AVX1-NEXT: vcvtsi2ss %rax, %xmm3, %xmm2
; AVX1-NEXT: vmovq %xmm1, %rax
@@ -2176,7 +2179,8 @@ define <4 x float> @uitofp_4i64_to_4f32_undef(<2 x i64> %a) {
; AVX1-NEXT: vcvtsi2ss %rax, %xmm4, %xmm1
; AVX1-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0,1,2],xmm1[0]
; AVX1-NEXT: vaddps %xmm1, %xmm1, %xmm2
-; AVX1-NEXT: vpackssdw %xmm0, %xmm0, %xmm0
+; AVX1-NEXT: vxorps %xmm3, %xmm3, %xmm3
+; AVX1-NEXT: vpackssdw %xmm3, %xmm0, %xmm0
; AVX1-NEXT: vblendvps %xmm0, %xmm2, %xmm1, %xmm0
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq
More information about the llvm-commits
mailing list