[llvm] [DAG] Fixing the non-optimal code with the following: `select i1 %0, float 1.0, float 0.0`. (PR #107732)
via llvm-commits
llvm-commits at lists.llvm.org
Sun Sep 8 00:54:13 PDT 2024
https://github.com/c8ef updated https://github.com/llvm/llvm-project/pull/107732
>From 486ea64b373226f8e5ce31c18209f6dd88c77d6f Mon Sep 17 00:00:00 2001
From: c8ef <c8ef at outlook.com>
Date: Sun, 8 Sep 2024 09:52:37 +0800
Subject: [PATCH 1/2] fold
---
llvm/lib/Target/X86/X86ISelLowering.cpp | 25 ++++++++++++++++++++++
llvm/test/CodeGen/X86/fp-select-cmp-and.ll | 19 ++++++++++++++++
2 files changed, 44 insertions(+)
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 839b87dd5d4dd8..e40e28bacd4f66 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -45844,6 +45844,28 @@ static SDValue combineSelectOfTwoConstants(SDNode *N, SelectionDAG &DAG,
return SDValue();
}
+static SDValue combineSelectOfTwoFPConstants(SDNode *N, SelectionDAG &DAG,
+ const SDLoc &DL) {
+ SDValue Cond = N->getOperand(0);
+ SDValue LHS = N->getOperand(1);
+ SDValue RHS = N->getOperand(2);
+ EVT VT = N->getValueType(0);
+
+ auto *TrueC = dyn_cast<ConstantFPSDNode>(LHS);
+ auto *FalseC = dyn_cast<ConstantFPSDNode>(RHS);
+ if (!TrueC || !FalseC)
+ return SDValue();
+
+ const APFloat &TrueVal = TrueC->getValueAPF();
+ const APFloat &FalseVal = FalseC->getValueAPF();
+
+ if (TrueVal == APFloat::getOne(TrueVal.getSemantics()) && FalseVal.isZero()) {
+ return DAG.getNode(ISD::UINT_TO_FP, DL, VT, Cond);
+ }
+
+ return SDValue();
+}
+
/// If this is a *dynamic* select (non-constant condition) and we can match
/// this node with one of the variable blend instructions, restructure the
/// condition so that blends can use the high (sign) bit of each element.
@@ -46336,6 +46358,9 @@ static SDValue combineSelect(SDNode *N, SelectionDAG &DAG,
if (SDValue V = combineSelectOfTwoConstants(N, DAG, DL))
return V;
+ if (SDValue V = combineSelectOfTwoFPConstants(N, DAG, DL))
+ return V;
+
if (N->getOpcode() == ISD::SELECT && Cond.getOpcode() == ISD::SETCC &&
Cond.hasOneUse()) {
EVT CondVT = Cond.getValueType();
diff --git a/llvm/test/CodeGen/X86/fp-select-cmp-and.ll b/llvm/test/CodeGen/X86/fp-select-cmp-and.ll
index 0f6159d36ea818..64e6c410742754 100644
--- a/llvm/test/CodeGen/X86/fp-select-cmp-and.ll
+++ b/llvm/test/CodeGen/X86/fp-select-cmp-and.ll
@@ -213,3 +213,22 @@ define double @test18(double %a, double %b, double %c, double %eps) {
ret double %cond
}
+define float @test19(i1 %cmp) {
+; CHECK-LABEL: test19:
+; CHECK: # %bb.0:
+; CHECK-NEXT: andl $1, %edi
+; CHECK-NEXT: cvtsi2ss %edi, %xmm0
+; CHECK-NEXT: retq
+ %cond = select i1 %cmp, float 1.000000e+00, float 0.000000e+00
+ ret float %cond
+}
+
+define double @test20(i1 %cmp) {
+; CHECK-LABEL: test20:
+; CHECK: # %bb.0:
+; CHECK-NEXT: andl $1, %edi
+; CHECK-NEXT: cvtsi2sd %edi, %xmm0
+; CHECK-NEXT: retq
+ %cond = select i1 %cmp, double 1.000000e+00, double 0.000000e+00
+ ret double %cond
+}
>From 54f6ac405e89d4d0a6dc404b8c200b1fbc59c36f Mon Sep 17 00:00:00 2001
From: c8ef <c8ef at outlook.com>
Date: Sun, 8 Sep 2024 15:53:58 +0800
Subject: [PATCH 2/2] relocate
---
llvm/lib/Target/X86/X86ISelLowering.cpp | 32 ++-----
.../apx/kmov-copy-to-from-asymmetric-reg.ll | 13 ++-
.../CodeGen/X86/apx/kmov-postrapseudos.ll | 12 +--
llvm/test/CodeGen/X86/cmovcmov.ll | 56 +++++------
llvm/test/CodeGen/X86/fp128-select.ll | 21 ++---
llvm/test/CodeGen/X86/logical-load-fold.ll | 30 +++---
.../X86/machine-trace-metrics-crash.ll | 17 ++--
llvm/test/CodeGen/X86/pr33349.ll | 80 ++++++++--------
llvm/test/CodeGen/X86/pr34177.ll | 94 +++++++++++--------
llvm/test/CodeGen/X86/sse1.ll | 82 ++++++++--------
10 files changed, 214 insertions(+), 223 deletions(-)
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index e40e28bacd4f66..856290988195a3 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -24150,6 +24150,13 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
DAG.getBitcast(NVT, Op2)));
}
+ // (select cc, 1.0, 0.0) -> (sint_to_fp (zext cc))
+ const ConstantFPSDNode* FPTV = dyn_cast<ConstantFPSDNode>(Op1);
+ const ConstantFPSDNode* FPFV = dyn_cast<ConstantFPSDNode>(Op2);
+ if (FPTV && FPFV && FPTV->isExactlyValue(1.0) && FPFV->isExactlyValue(0.0)) {
+ return DAG.getNode(ISD::SINT_TO_FP, DL, Op.getValueType(), Cond);
+ }
+
// Lower FP selects into a CMP/AND/ANDN/OR sequence when the necessary SSE ops
// are available or VBLENDV if AVX is available.
// Otherwise FP cmovs get lowered into a less efficient branch sequence later.
@@ -45844,28 +45851,6 @@ static SDValue combineSelectOfTwoConstants(SDNode *N, SelectionDAG &DAG,
return SDValue();
}
-static SDValue combineSelectOfTwoFPConstants(SDNode *N, SelectionDAG &DAG,
- const SDLoc &DL) {
- SDValue Cond = N->getOperand(0);
- SDValue LHS = N->getOperand(1);
- SDValue RHS = N->getOperand(2);
- EVT VT = N->getValueType(0);
-
- auto *TrueC = dyn_cast<ConstantFPSDNode>(LHS);
- auto *FalseC = dyn_cast<ConstantFPSDNode>(RHS);
- if (!TrueC || !FalseC)
- return SDValue();
-
- const APFloat &TrueVal = TrueC->getValueAPF();
- const APFloat &FalseVal = FalseC->getValueAPF();
-
- if (TrueVal == APFloat::getOne(TrueVal.getSemantics()) && FalseVal.isZero()) {
- return DAG.getNode(ISD::UINT_TO_FP, DL, VT, Cond);
- }
-
- return SDValue();
-}
-
/// If this is a *dynamic* select (non-constant condition) and we can match
/// this node with one of the variable blend instructions, restructure the
/// condition so that blends can use the high (sign) bit of each element.
@@ -46358,9 +46343,6 @@ static SDValue combineSelect(SDNode *N, SelectionDAG &DAG,
if (SDValue V = combineSelectOfTwoConstants(N, DAG, DL))
return V;
- if (SDValue V = combineSelectOfTwoFPConstants(N, DAG, DL))
- return V;
-
if (N->getOpcode() == ISD::SELECT && Cond.getOpcode() == ISD::SETCC &&
Cond.hasOneUse()) {
EVT CondVT = Cond.getValueType();
diff --git a/llvm/test/CodeGen/X86/apx/kmov-copy-to-from-asymmetric-reg.ll b/llvm/test/CodeGen/X86/apx/kmov-copy-to-from-asymmetric-reg.ll
index 747b288ec2f032..ff967f3440039e 100644
--- a/llvm/test/CodeGen/X86/apx/kmov-copy-to-from-asymmetric-reg.ll
+++ b/llvm/test/CodeGen/X86/apx/kmov-copy-to-from-asymmetric-reg.ll
@@ -1,12 +1,21 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx512f -show-mc-encoding | FileCheck %s
; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx512f,+egpr -show-mc-encoding | FileCheck --check-prefix=EGPR %s
define void @kmov(i1 %cmp23.not) {
; CHECK-LABEL: kmov:
-; CHECK: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: andl $1, %edi # encoding: [0x83,0xe7,0x01]
+; CHECK-NEXT: vcvtsi2sd %edi, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x2a,0xc7]
+; CHECK-NEXT: vmovsd %xmm0, 0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x11,0x04,0x25,0x00,0x00,0x00,0x00]
+; CHECK-NEXT: retq # encoding: [0xc3]
;
; EGPR-LABEL: kmov:
-; EGPR: kmovw %edi, %k1 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x92,0xcf]
+; EGPR: # %bb.0: # %entry
+; EGPR-NEXT: andl $1, %edi # encoding: [0x83,0xe7,0x01]
+; EGPR-NEXT: vcvtsi2sd %edi, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x2a,0xc7]
+; EGPR-NEXT: vmovsd %xmm0, 0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x11,0x04,0x25,0x00,0x00,0x00,0x00]
+; EGPR-NEXT: retq # encoding: [0xc3]
entry:
%0 = select i1 %cmp23.not, double 1.000000e+00, double 0.000000e+00
store double %0, ptr null, align 8
diff --git a/llvm/test/CodeGen/X86/apx/kmov-postrapseudos.ll b/llvm/test/CodeGen/X86/apx/kmov-postrapseudos.ll
index b2cb2c3e04b3f4..1468a8ac259857 100644
--- a/llvm/test/CodeGen/X86/apx/kmov-postrapseudos.ll
+++ b/llvm/test/CodeGen/X86/apx/kmov-postrapseudos.ll
@@ -5,19 +5,15 @@
define void @kmovkr_1(i1 %cmp23.not) {
; AVX512-LABEL: kmovkr_1:
; AVX512: # %bb.0: # %entry
-; AVX512-NEXT: kmovw %edi, %k1 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x92,0xcf]
-; AVX512-NEXT: vmovsd {{.*#+}} xmm0 {%k1} {z} = [1.0E+0,0.0E+0]
-; AVX512-NEXT: # encoding: [0x62,0xf1,0xff,0x89,0x10,0x05,A,A,A,A]
-; AVX512-NEXT: # fixup A - offset: 6, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte
+; AVX512-NEXT: andl $1, %edi # encoding: [0x83,0xe7,0x01]
+; AVX512-NEXT: vcvtsi2sd %edi, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x2a,0xc7]
; AVX512-NEXT: vmovsd %xmm0, 0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x11,0x04,0x25,0x00,0x00,0x00,0x00]
; AVX512-NEXT: retq # encoding: [0xc3]
;
; AVX512BW-LABEL: kmovkr_1:
; AVX512BW: # %bb.0: # %entry
-; AVX512BW-NEXT: kmovd %edi, %k1 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x92,0xcf]
-; AVX512BW-NEXT: vmovsd {{.*#+}} xmm0 {%k1} {z} = [1.0E+0,0.0E+0]
-; AVX512BW-NEXT: # encoding: [0x62,0xf1,0xff,0x89,0x10,0x05,A,A,A,A]
-; AVX512BW-NEXT: # fixup A - offset: 6, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte
+; AVX512BW-NEXT: andl $1, %edi # encoding: [0x83,0xe7,0x01]
+; AVX512BW-NEXT: vcvtsi2sd %edi, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x2a,0xc7]
; AVX512BW-NEXT: vmovsd %xmm0, 0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x11,0x04,0x25,0x00,0x00,0x00,0x00]
; AVX512BW-NEXT: retq # encoding: [0xc3]
entry:
diff --git a/llvm/test/CodeGen/X86/cmovcmov.ll b/llvm/test/CodeGen/X86/cmovcmov.ll
index d2d1c4db4608d9..f954dfcbf312ed 100644
--- a/llvm/test/CodeGen/X86/cmovcmov.ll
+++ b/llvm/test/CodeGen/X86/cmovcmov.ll
@@ -217,32 +217,28 @@ define dso_local float @test_zext_fcmp_une(float %a, float %b) nounwind {
; CMOV-LABEL: test_zext_fcmp_une:
; CMOV: # %bb.0: # %entry
; CMOV-NEXT: cmpneqss %xmm1, %xmm0
-; CMOV-NEXT: movss {{.*#+}} xmm1 = [1.0E+0,0.0E+0,0.0E+0,0.0E+0]
-; CMOV-NEXT: andps %xmm1, %xmm0
+; CMOV-NEXT: movd %xmm0, %eax
+; CMOV-NEXT: andl $1, %eax
+; CMOV-NEXT: xorps %xmm0, %xmm0
+; CMOV-NEXT: cvtsi2ss %eax, %xmm0
; CMOV-NEXT: retq
;
; NOCMOV-LABEL: test_zext_fcmp_une:
; NOCMOV: # %bb.0: # %entry
+; NOCMOV-NEXT: pushl %eax
; NOCMOV-NEXT: flds {{[0-9]+}}(%esp)
; NOCMOV-NEXT: flds {{[0-9]+}}(%esp)
; NOCMOV-NEXT: fucompp
; NOCMOV-NEXT: fnstsw %ax
; NOCMOV-NEXT: # kill: def $ah killed $ah killed $ax
; NOCMOV-NEXT: sahf
-; NOCMOV-NEXT: fld1
-; NOCMOV-NEXT: fldz
-; NOCMOV-NEXT: jne .LBB5_1
-; NOCMOV-NEXT: # %bb.2: # %entry
-; NOCMOV-NEXT: jp .LBB5_5
-; NOCMOV-NEXT: # %bb.3: # %entry
-; NOCMOV-NEXT: fstp %st(1)
-; NOCMOV-NEXT: jmp .LBB5_4
-; NOCMOV-NEXT: .LBB5_1:
-; NOCMOV-NEXT: fstp %st(0)
-; NOCMOV-NEXT: .LBB5_4: # %entry
-; NOCMOV-NEXT: fldz
-; NOCMOV-NEXT: .LBB5_5: # %entry
-; NOCMOV-NEXT: fstp %st(0)
+; NOCMOV-NEXT: setp %al
+; NOCMOV-NEXT: setne %cl
+; NOCMOV-NEXT: orb %al, %cl
+; NOCMOV-NEXT: movzbl %cl, %eax
+; NOCMOV-NEXT: movw %ax, {{[0-9]+}}(%esp)
+; NOCMOV-NEXT: filds {{[0-9]+}}(%esp)
+; NOCMOV-NEXT: popl %eax
; NOCMOV-NEXT: retl
entry:
%cmp = fcmp une float %a, %b
@@ -255,32 +251,28 @@ define dso_local float @test_zext_fcmp_oeq(float %a, float %b) nounwind {
; CMOV-LABEL: test_zext_fcmp_oeq:
; CMOV: # %bb.0: # %entry
; CMOV-NEXT: cmpeqss %xmm1, %xmm0
-; CMOV-NEXT: movss {{.*#+}} xmm1 = [1.0E+0,0.0E+0,0.0E+0,0.0E+0]
-; CMOV-NEXT: andps %xmm1, %xmm0
+; CMOV-NEXT: movd %xmm0, %eax
+; CMOV-NEXT: andl $1, %eax
+; CMOV-NEXT: xorps %xmm0, %xmm0
+; CMOV-NEXT: cvtsi2ss %eax, %xmm0
; CMOV-NEXT: retq
;
; NOCMOV-LABEL: test_zext_fcmp_oeq:
; NOCMOV: # %bb.0: # %entry
+; NOCMOV-NEXT: pushl %eax
; NOCMOV-NEXT: flds {{[0-9]+}}(%esp)
; NOCMOV-NEXT: flds {{[0-9]+}}(%esp)
; NOCMOV-NEXT: fucompp
; NOCMOV-NEXT: fnstsw %ax
; NOCMOV-NEXT: # kill: def $ah killed $ah killed $ax
; NOCMOV-NEXT: sahf
-; NOCMOV-NEXT: fldz
-; NOCMOV-NEXT: fld1
-; NOCMOV-NEXT: jne .LBB6_1
-; NOCMOV-NEXT: # %bb.2: # %entry
-; NOCMOV-NEXT: jp .LBB6_5
-; NOCMOV-NEXT: # %bb.3: # %entry
-; NOCMOV-NEXT: fstp %st(1)
-; NOCMOV-NEXT: jmp .LBB6_4
-; NOCMOV-NEXT: .LBB6_1:
-; NOCMOV-NEXT: fstp %st(0)
-; NOCMOV-NEXT: .LBB6_4: # %entry
-; NOCMOV-NEXT: fldz
-; NOCMOV-NEXT: .LBB6_5: # %entry
-; NOCMOV-NEXT: fstp %st(0)
+; NOCMOV-NEXT: setnp %al
+; NOCMOV-NEXT: sete %cl
+; NOCMOV-NEXT: andb %al, %cl
+; NOCMOV-NEXT: movzbl %cl, %eax
+; NOCMOV-NEXT: movw %ax, {{[0-9]+}}(%esp)
+; NOCMOV-NEXT: filds {{[0-9]+}}(%esp)
+; NOCMOV-NEXT: popl %eax
; NOCMOV-NEXT: retl
entry:
%cmp = fcmp oeq float %a, %b
diff --git a/llvm/test/CodeGen/X86/fp128-select.ll b/llvm/test/CodeGen/X86/fp128-select.ll
index 0486c1c4d28e95..34318008fb60a4 100644
--- a/llvm/test/CodeGen/X86/fp128-select.ll
+++ b/llvm/test/CodeGen/X86/fp128-select.ll
@@ -14,7 +14,7 @@ define void @test_select(ptr %p, ptr %q, i1 zeroext %c) {
; SSE-NEXT: testl %edx, %edx
; SSE-NEXT: jne .LBB0_1
; SSE-NEXT: # %bb.3:
-; SSE-NEXT: movaps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; SSE-NEXT: movaps {{.*#+}} xmm0 = [NaN]
; SSE-NEXT: movaps %xmm0, (%rsi)
; SSE-NEXT: retq
; SSE-NEXT: .LBB0_1:
@@ -55,20 +55,17 @@ define fp128 @test_select_cc(fp128, fp128) {
; SSE-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload
; SSE-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
; SSE-NEXT: callq __eqtf2 at PLT
+; SSE-NEXT: xorl %edi, %edi
; SSE-NEXT: testl %eax, %eax
-; SSE-NEXT: je .LBB1_1
-; SSE-NEXT: # %bb.2: # %BB0
-; SSE-NEXT: xorps %xmm1, %xmm1
-; SSE-NEXT: jmp .LBB1_3
-; SSE-NEXT: .LBB1_1:
-; SSE-NEXT: movaps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
-; SSE-NEXT: .LBB1_3: # %BB0
+; SSE-NEXT: sete %dil
+; SSE-NEXT: callq __floatsitf at PLT
+; SSE-NEXT: movaps (%rsp), %xmm1 # 16-byte Reload
; SSE-NEXT: testl %ebx, %ebx
-; SSE-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload
-; SSE-NEXT: jne .LBB1_5
-; SSE-NEXT: # %bb.4: # %BB1
+; SSE-NEXT: jne .LBB1_2
+; SSE-NEXT: # %bb.1: # %BB1
+; SSE-NEXT: movaps %xmm0, %xmm1
+; SSE-NEXT: .LBB1_2: # %BB2
; SSE-NEXT: movaps %xmm1, %xmm0
-; SSE-NEXT: .LBB1_5: # %BB2
; SSE-NEXT: addq $32, %rsp
; SSE-NEXT: .cfi_def_cfa_offset 16
; SSE-NEXT: popq %rbx
diff --git a/llvm/test/CodeGen/X86/logical-load-fold.ll b/llvm/test/CodeGen/X86/logical-load-fold.ll
index 1c3f209fc1e66d..9dcc17d0283f91 100644
--- a/llvm/test/CodeGen/X86/logical-load-fold.ll
+++ b/llvm/test/CodeGen/X86/logical-load-fold.ll
@@ -13,16 +13,19 @@
define double @load_double_no_fold(double %x, double %y) {
; SSE2-LABEL: load_double_no_fold:
; SSE2: # %bb.0:
-; SSE2-NEXT: cmplesd %xmm0, %xmm1
-; SSE2-NEXT: movsd {{.*#+}} xmm0 = [1.0E+0,0.0E+0]
-; SSE2-NEXT: andpd %xmm1, %xmm0
+; SSE2-NEXT: xorl %eax, %eax
+; SSE2-NEXT: ucomisd %xmm1, %xmm0
+; SSE2-NEXT: setae %al
+; SSE2-NEXT: xorps %xmm0, %xmm0
+; SSE2-NEXT: cvtsi2sd %eax, %xmm0
; SSE2-NEXT: retq
;
; AVX-LABEL: load_double_no_fold:
; AVX: # %bb.0:
-; AVX-NEXT: vcmplesd %xmm0, %xmm1, %xmm0
-; AVX-NEXT: vmovsd {{.*#+}} xmm1 = [1.0E+0,0.0E+0]
-; AVX-NEXT: vandpd %xmm1, %xmm0, %xmm0
+; AVX-NEXT: xorl %eax, %eax
+; AVX-NEXT: vucomisd %xmm1, %xmm0
+; AVX-NEXT: setae %al
+; AVX-NEXT: vcvtsi2sd %eax, %xmm2, %xmm0
; AVX-NEXT: retq
%cmp = fcmp oge double %x, %y
@@ -34,16 +37,19 @@ define double @load_double_no_fold(double %x, double %y) {
define float @load_float_no_fold(float %x, float %y) {
; SSE2-LABEL: load_float_no_fold:
; SSE2: # %bb.0:
-; SSE2-NEXT: cmpless %xmm0, %xmm1
-; SSE2-NEXT: movss {{.*#+}} xmm0 = [1.0E+0,0.0E+0,0.0E+0,0.0E+0]
-; SSE2-NEXT: andps %xmm1, %xmm0
+; SSE2-NEXT: xorl %eax, %eax
+; SSE2-NEXT: ucomiss %xmm1, %xmm0
+; SSE2-NEXT: setae %al
+; SSE2-NEXT: xorps %xmm0, %xmm0
+; SSE2-NEXT: cvtsi2ss %eax, %xmm0
; SSE2-NEXT: retq
;
; AVX-LABEL: load_float_no_fold:
; AVX: # %bb.0:
-; AVX-NEXT: vcmpless %xmm0, %xmm1, %xmm0
-; AVX-NEXT: vmovss {{.*#+}} xmm1 = [1.0E+0,0.0E+0,0.0E+0,0.0E+0]
-; AVX-NEXT: vandps %xmm1, %xmm0, %xmm0
+; AVX-NEXT: xorl %eax, %eax
+; AVX-NEXT: vucomiss %xmm1, %xmm0
+; AVX-NEXT: setae %al
+; AVX-NEXT: vcvtsi2ss %eax, %xmm2, %xmm0
; AVX-NEXT: retq
%cmp = fcmp oge float %x, %y
diff --git a/llvm/test/CodeGen/X86/machine-trace-metrics-crash.ll b/llvm/test/CodeGen/X86/machine-trace-metrics-crash.ll
index 5828f06bf1c39b..01f5cd268d554f 100644
--- a/llvm/test/CodeGen/X86/machine-trace-metrics-crash.ll
+++ b/llvm/test/CodeGen/X86/machine-trace-metrics-crash.ll
@@ -14,7 +14,6 @@ define void @PR24199(i32 %a0) {
; CHECK-NEXT: subq $16, %rsp
; CHECK-NEXT: .cfi_def_cfa_offset 32
; CHECK-NEXT: .cfi_offset %rbx, -16
-; CHECK-NEXT: movl %edi, %ebx
; CHECK-NEXT: xorl %eax, %eax
; CHECK-NEXT: testb %al, %al
; CHECK-NEXT: je .LBB0_2
@@ -25,20 +24,18 @@ define void @PR24199(i32 %a0) {
; CHECK-NEXT: xorps %xmm0, %xmm0
; CHECK-NEXT: .LBB0_3: # %if.end
; CHECK-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; CHECK-NEXT: xorl %ebx, %ebx
+; CHECK-NEXT: testl %edi, %edi
+; CHECK-NEXT: setne %bl
; CHECK-NEXT: callq foo at PLT
; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; CHECK-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm2 # 4-byte Reload
; CHECK-NEXT: # xmm2 = mem[0],zero,zero,zero
; CHECK-NEXT: mulss %xmm0, %xmm2
-; CHECK-NEXT: movss {{.*#+}} xmm1 = [1.0E+0,0.0E+0,0.0E+0,0.0E+0]
-; CHECK-NEXT: addss %xmm1, %xmm0
-; CHECK-NEXT: addss %xmm2, %xmm0
-; CHECK-NEXT: movss %xmm0, (%rax)
-; CHECK-NEXT: testl %ebx, %ebx
-; CHECK-NEXT: jne .LBB0_5
-; CHECK-NEXT: # %bb.4: # %if.end
-; CHECK-NEXT: xorps %xmm1, %xmm1
-; CHECK-NEXT: .LBB0_5: # %if.end
+; CHECK-NEXT: addss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2
+; CHECK-NEXT: addss %xmm0, %xmm2
+; CHECK-NEXT: cvtsi2ss %ebx, %xmm1
+; CHECK-NEXT: movss %xmm2, (%rax)
; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; CHECK-NEXT: addss %xmm0, %xmm0
; CHECK-NEXT: addss %xmm1, %xmm0
diff --git a/llvm/test/CodeGen/X86/pr33349.ll b/llvm/test/CodeGen/X86/pr33349.ll
index 83d3a33572266f..e31dada58a8a50 100644
--- a/llvm/test/CodeGen/X86/pr33349.ll
+++ b/llvm/test/CodeGen/X86/pr33349.ll
@@ -11,28 +11,30 @@ target triple = "x86_64-unknown-linux-gnu"
; KNL-NEXT: vpslld $31, %xmm0, %xmm0
; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
; KNL-NEXT: kshiftrw $2, %k0, %k1
+; KNL-NEXT: kshiftrw $1, %k1, %k2
; KNL-NEXT: kmovw %k1, %eax
-; KNL-NEXT: testb $1, %al
-; KNL-NEXT: fld1
-; KNL-NEXT: fldz
-; KNL-NEXT: fld %st(0)
-; KNL-NEXT: fcmovne %st(2), %st
-; KNL-NEXT: testb $2, %al
-; KNL-NEXT: fld %st(1)
-; KNL-NEXT: fcmovne %st(3), %st
+; KNL-NEXT: andb $1, %al
+; KNL-NEXT: movzbl %al, %eax
+; KNL-NEXT: movw %ax, -{{[0-9]+}}(%rsp)
+; KNL-NEXT: kmovw %k2, %eax
+; KNL-NEXT: andb $1, %al
+; KNL-NEXT: movzbl %al, %eax
+; KNL-NEXT: movw %ax, -{{[0-9]+}}(%rsp)
+; KNL-NEXT: kshiftrw $1, %k0, %k1
+; KNL-NEXT: kmovw %k1, %eax
+; KNL-NEXT: andb $1, %al
+; KNL-NEXT: movzbl %al, %eax
+; KNL-NEXT: movw %ax, -{{[0-9]+}}(%rsp)
; KNL-NEXT: kmovw %k0, %eax
-; KNL-NEXT: testb $1, %al
-; KNL-NEXT: fld %st(2)
-; KNL-NEXT: fcmovne %st(4), %st
-; KNL-NEXT: testb $2, %al
-; KNL-NEXT: fxch %st(3)
-; KNL-NEXT: fcmovne %st(4), %st
-; KNL-NEXT: fstp %st(4)
-; KNL-NEXT: fxch %st(3)
-; KNL-NEXT: fstpt 10(%rdi)
-; KNL-NEXT: fxch %st(1)
+; KNL-NEXT: andb $1, %al
+; KNL-NEXT: movzbl %al, %eax
+; KNL-NEXT: movw %ax, -{{[0-9]+}}(%rsp)
+; KNL-NEXT: filds -{{[0-9]+}}(%rsp)
+; KNL-NEXT: filds -{{[0-9]+}}(%rsp)
+; KNL-NEXT: filds -{{[0-9]+}}(%rsp)
+; KNL-NEXT: filds -{{[0-9]+}}(%rsp)
; KNL-NEXT: fstpt (%rdi)
-; KNL-NEXT: fxch %st(1)
+; KNL-NEXT: fstpt 10(%rdi)
; KNL-NEXT: fstpt 30(%rdi)
; KNL-NEXT: fstpt 20(%rdi)
; KNL-NEXT: vzeroupper
@@ -43,28 +45,30 @@ target triple = "x86_64-unknown-linux-gnu"
; SKX-NEXT: vpslld $31, %xmm0, %xmm0
; SKX-NEXT: vpmovd2m %xmm0, %k0
; SKX-NEXT: kshiftrb $2, %k0, %k1
+; SKX-NEXT: kshiftrb $1, %k1, %k2
; SKX-NEXT: kmovd %k1, %eax
-; SKX-NEXT: testb $1, %al
-; SKX-NEXT: fld1
-; SKX-NEXT: fldz
-; SKX-NEXT: fld %st(0)
-; SKX-NEXT: fcmovne %st(2), %st
-; SKX-NEXT: testb $2, %al
-; SKX-NEXT: fld %st(1)
-; SKX-NEXT: fcmovne %st(3), %st
+; SKX-NEXT: andb $1, %al
+; SKX-NEXT: movzbl %al, %eax
+; SKX-NEXT: movw %ax, -{{[0-9]+}}(%rsp)
+; SKX-NEXT: kmovd %k2, %eax
+; SKX-NEXT: andb $1, %al
+; SKX-NEXT: movzbl %al, %eax
+; SKX-NEXT: movw %ax, -{{[0-9]+}}(%rsp)
+; SKX-NEXT: kshiftrb $1, %k0, %k1
+; SKX-NEXT: kmovd %k1, %eax
+; SKX-NEXT: andb $1, %al
+; SKX-NEXT: movzbl %al, %eax
+; SKX-NEXT: movw %ax, -{{[0-9]+}}(%rsp)
; SKX-NEXT: kmovd %k0, %eax
-; SKX-NEXT: testb $1, %al
-; SKX-NEXT: fld %st(2)
-; SKX-NEXT: fcmovne %st(4), %st
-; SKX-NEXT: testb $2, %al
-; SKX-NEXT: fxch %st(3)
-; SKX-NEXT: fcmovne %st(4), %st
-; SKX-NEXT: fstp %st(4)
-; SKX-NEXT: fxch %st(3)
-; SKX-NEXT: fstpt 10(%rdi)
-; SKX-NEXT: fxch %st(1)
+; SKX-NEXT: andb $1, %al
+; SKX-NEXT: movzbl %al, %eax
+; SKX-NEXT: movw %ax, -{{[0-9]+}}(%rsp)
+; SKX-NEXT: filds -{{[0-9]+}}(%rsp)
+; SKX-NEXT: filds -{{[0-9]+}}(%rsp)
+; SKX-NEXT: filds -{{[0-9]+}}(%rsp)
+; SKX-NEXT: filds -{{[0-9]+}}(%rsp)
; SKX-NEXT: fstpt (%rdi)
-; SKX-NEXT: fxch %st(1)
+; SKX-NEXT: fstpt 10(%rdi)
; SKX-NEXT: fstpt 30(%rdi)
; SKX-NEXT: fstpt 20(%rdi)
; SKX-NEXT: retq
diff --git a/llvm/test/CodeGen/X86/pr34177.ll b/llvm/test/CodeGen/X86/pr34177.ll
index 29922c2ac1a716..fefc92be73518b 100644
--- a/llvm/test/CodeGen/X86/pr34177.ll
+++ b/llvm/test/CodeGen/X86/pr34177.ll
@@ -8,27 +8,33 @@ target triple = "x86_64-unknown-linux-gnu"
define void @test(<4 x i64> %a, <4 x x86_fp80> %b, ptr %c) local_unnamed_addr {
; AVX512F-LABEL: test:
; AVX512F: # %bb.0:
-; AVX512F-NEXT: vmovq %xmm0, %rax
+; AVX512F-NEXT: fldt {{[0-9]+}}(%rsp)
+; AVX512F-NEXT: vmovq %xmm0, %rcx
+; AVX512F-NEXT: xorl %eax, %eax
+; AVX512F-NEXT: testq %rcx, %rcx
+; AVX512F-NEXT: sete %al
; AVX512F-NEXT: vpextrq $1, %xmm0, %rcx
-; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm0
-; AVX512F-NEXT: vmovq %xmm0, %rdx
-; AVX512F-NEXT: vpextrq $1, %xmm0, %rsi
-; AVX512F-NEXT: cmpq $3, %rsi
-; AVX512F-NEXT: fld1
-; AVX512F-NEXT: fldz
-; AVX512F-NEXT: fld %st(0)
-; AVX512F-NEXT: fcmove %st(2), %st
-; AVX512F-NEXT: cmpq $2, %rdx
-; AVX512F-NEXT: fld %st(1)
-; AVX512F-NEXT: fcmove %st(3), %st
+; AVX512F-NEXT: xorl %edx, %edx
; AVX512F-NEXT: cmpq $1, %rcx
-; AVX512F-NEXT: fld %st(2)
-; AVX512F-NEXT: fcmove %st(4), %st
-; AVX512F-NEXT: testq %rax, %rax
-; AVX512F-NEXT: fxch %st(3)
-; AVX512F-NEXT: fcmove %st(4), %st
-; AVX512F-NEXT: fstp %st(4)
-; AVX512F-NEXT: fldt {{[0-9]+}}(%rsp)
+; AVX512F-NEXT: sete %dl
+; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm0
+; AVX512F-NEXT: vmovq %xmm0, %rcx
+; AVX512F-NEXT: xorl %esi, %esi
+; AVX512F-NEXT: cmpq $2, %rcx
+; AVX512F-NEXT: sete %sil
+; AVX512F-NEXT: vpextrq $1, %xmm0, %rcx
+; AVX512F-NEXT: xorl %r8d, %r8d
+; AVX512F-NEXT: cmpq $3, %rcx
+; AVX512F-NEXT: sete %r8b
+; AVX512F-NEXT: movw %r8w, -{{[0-9]+}}(%rsp)
+; AVX512F-NEXT: movw %si, -{{[0-9]+}}(%rsp)
+; AVX512F-NEXT: movw %dx, -{{[0-9]+}}(%rsp)
+; AVX512F-NEXT: movw %ax, -{{[0-9]+}}(%rsp)
+; AVX512F-NEXT: filds -{{[0-9]+}}(%rsp)
+; AVX512F-NEXT: filds -{{[0-9]+}}(%rsp)
+; AVX512F-NEXT: filds -{{[0-9]+}}(%rsp)
+; AVX512F-NEXT: filds -{{[0-9]+}}(%rsp)
+; AVX512F-NEXT: fxch %st(4)
; AVX512F-NEXT: fstpt 70(%rdi)
; AVX512F-NEXT: fldt {{[0-9]+}}(%rsp)
; AVX512F-NEXT: fstpt 50(%rdi)
@@ -36,7 +42,7 @@ define void @test(<4 x i64> %a, <4 x x86_fp80> %b, ptr %c) local_unnamed_addr {
; AVX512F-NEXT: fstpt 30(%rdi)
; AVX512F-NEXT: fldt {{[0-9]+}}(%rsp)
; AVX512F-NEXT: fstpt 10(%rdi)
-; AVX512F-NEXT: fxch %st(1)
+; AVX512F-NEXT: fxch %st(2)
; AVX512F-NEXT: fadd %st, %st(0)
; AVX512F-NEXT: fstpt 60(%rdi)
; AVX512F-NEXT: fadd %st, %st(0)
@@ -48,26 +54,32 @@ define void @test(<4 x i64> %a, <4 x x86_fp80> %b, ptr %c) local_unnamed_addr {
;
; AVX512VL-LABEL: test:
; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vpcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %k0
-; AVX512VL-NEXT: kshiftrb $2, %k0, %k1
-; AVX512VL-NEXT: kmovd %k0, %eax
-; AVX512VL-NEXT: testb $2, %al
-; AVX512VL-NEXT: fld1
-; AVX512VL-NEXT: fldz
-; AVX512VL-NEXT: fld %st(0)
-; AVX512VL-NEXT: fcmovne %st(2), %st
-; AVX512VL-NEXT: testb $1, %al
-; AVX512VL-NEXT: fld %st(1)
-; AVX512VL-NEXT: fcmovne %st(3), %st
+; AVX512VL-NEXT: vpcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %k1
+; AVX512VL-NEXT: kshiftrb $1, %k1, %k2
+; AVX512VL-NEXT: kshiftrb $2, %k1, %k0
; AVX512VL-NEXT: kmovd %k1, %eax
-; AVX512VL-NEXT: testb $2, %al
-; AVX512VL-NEXT: fld %st(2)
-; AVX512VL-NEXT: fcmovne %st(4), %st
-; AVX512VL-NEXT: testb $1, %al
-; AVX512VL-NEXT: fxch %st(3)
-; AVX512VL-NEXT: fcmovne %st(4), %st
-; AVX512VL-NEXT: fstp %st(4)
; AVX512VL-NEXT: fldt {{[0-9]+}}(%rsp)
+; AVX512VL-NEXT: andb $1, %al
+; AVX512VL-NEXT: movzbl %al, %eax
+; AVX512VL-NEXT: movw %ax, -{{[0-9]+}}(%rsp)
+; AVX512VL-NEXT: kmovd %k2, %eax
+; AVX512VL-NEXT: andb $1, %al
+; AVX512VL-NEXT: movzbl %al, %eax
+; AVX512VL-NEXT: movw %ax, -{{[0-9]+}}(%rsp)
+; AVX512VL-NEXT: kshiftrb $1, %k0, %k1
+; AVX512VL-NEXT: kmovd %k1, %eax
+; AVX512VL-NEXT: andb $1, %al
+; AVX512VL-NEXT: movzbl %al, %eax
+; AVX512VL-NEXT: movw %ax, -{{[0-9]+}}(%rsp)
+; AVX512VL-NEXT: kmovd %k0, %eax
+; AVX512VL-NEXT: andb $1, %al
+; AVX512VL-NEXT: movzbl %al, %eax
+; AVX512VL-NEXT: movw %ax, -{{[0-9]+}}(%rsp)
+; AVX512VL-NEXT: filds -{{[0-9]+}}(%rsp)
+; AVX512VL-NEXT: filds -{{[0-9]+}}(%rsp)
+; AVX512VL-NEXT: filds -{{[0-9]+}}(%rsp)
+; AVX512VL-NEXT: filds -{{[0-9]+}}(%rsp)
+; AVX512VL-NEXT: fxch %st(4)
; AVX512VL-NEXT: fstpt 70(%rdi)
; AVX512VL-NEXT: fldt {{[0-9]+}}(%rsp)
; AVX512VL-NEXT: fstpt 50(%rdi)
@@ -75,12 +87,12 @@ define void @test(<4 x i64> %a, <4 x x86_fp80> %b, ptr %c) local_unnamed_addr {
; AVX512VL-NEXT: fstpt 30(%rdi)
; AVX512VL-NEXT: fldt {{[0-9]+}}(%rsp)
; AVX512VL-NEXT: fstpt 10(%rdi)
-; AVX512VL-NEXT: fxch %st(1)
-; AVX512VL-NEXT: fadd %st, %st(0)
-; AVX512VL-NEXT: fstpt 20(%rdi)
+; AVX512VL-NEXT: fxch %st(2)
; AVX512VL-NEXT: fadd %st, %st(0)
; AVX512VL-NEXT: fstpt (%rdi)
; AVX512VL-NEXT: fadd %st, %st(0)
+; AVX512VL-NEXT: fstpt 20(%rdi)
+; AVX512VL-NEXT: fadd %st, %st(0)
; AVX512VL-NEXT: fstpt 60(%rdi)
; AVX512VL-NEXT: fadd %st, %st(0)
; AVX512VL-NEXT: fstpt 40(%rdi)
diff --git a/llvm/test/CodeGen/X86/sse1.ll b/llvm/test/CodeGen/X86/sse1.ll
index 8ac86d11d89e6c..c721e6a2e6eb4b 100644
--- a/llvm/test/CodeGen/X86/sse1.ll
+++ b/llvm/test/CodeGen/X86/sse1.ll
@@ -44,78 +44,74 @@ entry:
define <4 x float> @vselect(ptr%p, <4 x i32> %q) {
; X86-LABEL: vselect:
; X86: # %bb.0: # %entry
+; X86-NEXT: xorl %eax, %eax
; X86-NEXT: cmpl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: xorps %xmm0, %xmm0
+; X86-NEXT: sete %cl
+; X86-NEXT: cmpl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: xorps %xmm1, %xmm1
; X86-NEXT: je .LBB1_1
; X86-NEXT: # %bb.2: # %entry
-; X86-NEXT: xorps %xmm1, %xmm1
+; X86-NEXT: xorps %xmm2, %xmm2
; X86-NEXT: cmpl $0, {{[0-9]+}}(%esp)
; X86-NEXT: jne .LBB1_5
; X86-NEXT: .LBB1_4:
-; X86-NEXT: movss {{.*#+}} xmm2 = [3.0E+0,0.0E+0,0.0E+0,0.0E+0]
-; X86-NEXT: cmpl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: jne .LBB1_8
-; X86-NEXT: .LBB1_7:
-; X86-NEXT: movss {{.*#+}} xmm3 = [4.0E+0,0.0E+0,0.0E+0,0.0E+0]
-; X86-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
+; X86-NEXT: movss {{.*#+}} xmm0 = [4.0E+0,0.0E+0,0.0E+0,0.0E+0]
+; X86-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1]
; X86-NEXT: cmpl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: je .LBB1_10
-; X86-NEXT: jmp .LBB1_11
+; X86-NEXT: je .LBB1_7
+; X86-NEXT: jmp .LBB1_8
; X86-NEXT: .LBB1_1:
-; X86-NEXT: movss {{.*#+}} xmm1 = [2.0E+0,0.0E+0,0.0E+0,0.0E+0]
+; X86-NEXT: movss {{.*#+}} xmm2 = [3.0E+0,0.0E+0,0.0E+0,0.0E+0]
; X86-NEXT: cmpl $0, {{[0-9]+}}(%esp)
; X86-NEXT: je .LBB1_4
; X86-NEXT: .LBB1_5: # %entry
-; X86-NEXT: xorps %xmm2, %xmm2
+; X86-NEXT: xorps %xmm0, %xmm0
+; X86-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1]
; X86-NEXT: cmpl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: je .LBB1_7
+; X86-NEXT: jne .LBB1_8
+; X86-NEXT: .LBB1_7:
+; X86-NEXT: movss {{.*#+}} xmm1 = [2.0E+0,0.0E+0,0.0E+0,0.0E+0]
; X86-NEXT: .LBB1_8: # %entry
-; X86-NEXT: xorps %xmm3, %xmm3
-; X86-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
-; X86-NEXT: cmpl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: jne .LBB1_11
-; X86-NEXT: .LBB1_10:
-; X86-NEXT: movss {{.*#+}} xmm0 = [1.0E+0,0.0E+0,0.0E+0,0.0E+0]
-; X86-NEXT: .LBB1_11: # %entry
+; X86-NEXT: movb %cl, %al
+; X86-NEXT: xorps %xmm0, %xmm0
+; X86-NEXT: cvtsi2ss %eax, %xmm0
; X86-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; X86-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm2[0]
; X86-NEXT: retl
;
; X64-LABEL: vselect:
; X64: # %bb.0: # %entry
-; X64-NEXT: testl %edx, %edx
-; X64-NEXT: xorps %xmm0, %xmm0
+; X64-NEXT: xorl %eax, %eax
+; X64-NEXT: testl %esi, %esi
+; X64-NEXT: sete %sil
+; X64-NEXT: testl %ecx, %ecx
+; X64-NEXT: xorps %xmm1, %xmm1
; X64-NEXT: je .LBB1_1
; X64-NEXT: # %bb.2: # %entry
-; X64-NEXT: xorps %xmm1, %xmm1
-; X64-NEXT: testl %ecx, %ecx
+; X64-NEXT: xorps %xmm2, %xmm2
+; X64-NEXT: testl %r8d, %r8d
; X64-NEXT: jne .LBB1_5
; X64-NEXT: .LBB1_4:
+; X64-NEXT: movss {{.*#+}} xmm0 = [4.0E+0,0.0E+0,0.0E+0,0.0E+0]
+; X64-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1]
+; X64-NEXT: testl %edx, %edx
+; X64-NEXT: je .LBB1_7
+; X64-NEXT: jmp .LBB1_8
+; X64-NEXT: .LBB1_1:
; X64-NEXT: movss {{.*#+}} xmm2 = [3.0E+0,0.0E+0,0.0E+0,0.0E+0]
; X64-NEXT: testl %r8d, %r8d
+; X64-NEXT: je .LBB1_4
+; X64-NEXT: .LBB1_5: # %entry
+; X64-NEXT: xorps %xmm0, %xmm0
+; X64-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1]
+; X64-NEXT: testl %edx, %edx
; X64-NEXT: jne .LBB1_8
; X64-NEXT: .LBB1_7:
-; X64-NEXT: movss {{.*#+}} xmm3 = [4.0E+0,0.0E+0,0.0E+0,0.0E+0]
-; X64-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
-; X64-NEXT: testl %esi, %esi
-; X64-NEXT: je .LBB1_10
-; X64-NEXT: jmp .LBB1_11
-; X64-NEXT: .LBB1_1:
; X64-NEXT: movss {{.*#+}} xmm1 = [2.0E+0,0.0E+0,0.0E+0,0.0E+0]
-; X64-NEXT: testl %ecx, %ecx
-; X64-NEXT: je .LBB1_4
-; X64-NEXT: .LBB1_5: # %entry
-; X64-NEXT: xorps %xmm2, %xmm2
-; X64-NEXT: testl %r8d, %r8d
-; X64-NEXT: je .LBB1_7
; X64-NEXT: .LBB1_8: # %entry
-; X64-NEXT: xorps %xmm3, %xmm3
-; X64-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
-; X64-NEXT: testl %esi, %esi
-; X64-NEXT: jne .LBB1_11
-; X64-NEXT: .LBB1_10:
-; X64-NEXT: movss {{.*#+}} xmm0 = [1.0E+0,0.0E+0,0.0E+0,0.0E+0]
-; X64-NEXT: .LBB1_11: # %entry
+; X64-NEXT: movb %sil, %al
+; X64-NEXT: xorps %xmm0, %xmm0
+; X64-NEXT: cvtsi2ss %eax, %xmm0
; X64-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; X64-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm2[0]
; X64-NEXT: retq
More information about the llvm-commits
mailing list