[llvm] 22987ba - [X86] Teach combineCVTP2I_CVTTP2I to handle STRICT_CVTTP2SI/STRICT_CVTTP2UI
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Sun Jun 7 19:49:01 PDT 2020
Author: Craig Topper
Date: 2020-06-07T19:31:06-07:00
New Revision: 22987babd5c4a0eff59a8ac6c479665b37dadea3
URL: https://github.com/llvm/llvm-project/commit/22987babd5c4a0eff59a8ac6c479665b37dadea3
DIFF: https://github.com/llvm/llvm-project/commit/22987babd5c4a0eff59a8ac6c479665b37dadea3.diff
LOG: [X86] Teach combineCVTP2I_CVTTP2I to handle STRICT_CVTTP2SI/STRICT_CVTTP2UI
Allows us to shrink 128-bit simple load to enable folding for
v2f32->v2i64 vcvttps2qq/vcvttps2uqq.
Added:
Modified:
llvm/lib/Target/X86/X86ISelLowering.cpp
llvm/test/CodeGen/X86/vec-strict-fptoint-128.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 7db874e0e8e0..dfbf459927a4 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -44765,11 +44765,11 @@ static SDValue combineX86INT_TO_FP(SDNode *N, SelectionDAG &DAG,
static SDValue combineCVTP2I_CVTTP2I(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI) {
- // FIXME: Handle strict fp nodes.
+ bool IsStrict = N->isTargetStrictFPOpcode();
EVT VT = N->getValueType(0);
// Convert a full vector load into vzload when not all bits are needed.
- SDValue In = N->getOperand(0);
+ SDValue In = N->getOperand(IsStrict ? 1 : 0);
MVT InVT = In.getSimpleValueType();
if (VT.getVectorNumElements() < InVT.getVectorNumElements() &&
ISD::isNormalLoad(In.getNode()) && In.hasOneUse()) {
@@ -44780,9 +44780,16 @@ static SDValue combineCVTP2I_CVTTP2I(SDNode *N, SelectionDAG &DAG,
MVT LoadVT = MVT::getVectorVT(MemVT, 128 / NumBits);
if (SDValue VZLoad = narrowLoadToVZLoad(LN, MemVT, LoadVT, DAG)) {
SDLoc dl(N);
- SDValue Convert = DAG.getNode(N->getOpcode(), dl, VT,
- DAG.getBitcast(InVT, VZLoad));
- DCI.CombineTo(N, Convert);
+ if (IsStrict) {
+ SDValue Convert =
+ DAG.getNode(N->getOpcode(), dl, {VT, MVT::Other},
+ {N->getOperand(0), DAG.getBitcast(InVT, VZLoad)});
+ DCI.CombineTo(N, Convert, Convert.getValue(1));
+ } else {
+ SDValue Convert =
+ DAG.getNode(N->getOpcode(), dl, VT, DAG.getBitcast(InVT, VZLoad));
+ DCI.CombineTo(N, Convert);
+ }
DAG.ReplaceAllUsesOfValueWith(SDValue(LN, 1), VZLoad.getValue(1));
DCI.recursivelyDeleteUnusedNodes(LN);
return SDValue(N, 0);
@@ -47991,8 +47998,11 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
case X86ISD::CVTUI2P: return combineX86INT_TO_FP(N, DAG, DCI);
case X86ISD::CVTP2SI:
case X86ISD::CVTP2UI:
+ case X86ISD::STRICT_CVTTP2SI:
case X86ISD::CVTTP2SI:
- case X86ISD::CVTTP2UI: return combineCVTP2I_CVTTP2I(N, DAG, DCI);
+ case X86ISD::STRICT_CVTTP2UI:
+ case X86ISD::CVTTP2UI:
+ return combineCVTP2I_CVTTP2I(N, DAG, DCI);
case X86ISD::STRICT_CVTPH2PS:
case X86ISD::CVTPH2PS: return combineCVTPH2PS(N, DAG, DCI);
case X86ISD::BT: return combineBT(N, DAG, DCI);
diff --git a/llvm/test/CodeGen/X86/vec-strict-fptoint-128.ll b/llvm/test/CodeGen/X86/vec-strict-fptoint-128.ll
index 0c706b36e432..a5a3460056e8 100644
--- a/llvm/test/CodeGen/X86/vec-strict-fptoint-128.ll
+++ b/llvm/test/CodeGen/X86/vec-strict-fptoint-128.ll
@@ -1,16 +1,16 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse2 -O3 | FileCheck %s --check-prefixes=CHECK,SSE-32
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 -O3 | FileCheck %s --check-prefixes=CHECK,SSE-64
-; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx -O3 | FileCheck %s --check-prefixes=CHECK,AVX,AVX-32
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx -O3 | FileCheck %s --check-prefixes=CHECK,AVX,AVX-64
-; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512f -O3 | FileCheck %s --check-prefixes=CHECK,AVX512F,AVX512F-32
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f -O3 | FileCheck %s --check-prefixes=CHECK,AVX512F,AVX512F-64
-; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512vl -O3 | FileCheck %s --check-prefixes=CHECK,AVX512VL,AVX512VL-32
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl -O3 | FileCheck %s --check-prefixes=CHECK,AVX512VL,AVX512VL-64
-; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=avx512dq -O3 | FileCheck %s --check-prefixes=CHECK,AVX512DQ,AVX512DQ-32
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512dq -O3 | FileCheck %s --check-prefixes=CHECK,AVX512DQ,AVX512DQ-64
-; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=avx512dq,avx512vl -O3 | FileCheck %s --check-prefixes=CHECK,AVX512VLDQ,AVX512VLDQ-32
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512dq,avx512vl -O3 | FileCheck %s --check-prefixes=CHECK,AVX512VLDQ,AVX512VLDQ-64
+; RUN: llc < %s -disable-peephole -mtriple=i686-unknown-unknown -mattr=+sse2 -O3 | FileCheck %s --check-prefixes=CHECK,SSE-32
+; RUN: llc < %s -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=+sse2 -O3 | FileCheck %s --check-prefixes=CHECK,SSE-64
+; RUN: llc < %s -disable-peephole -mtriple=i686-unknown-unknown -mattr=+avx -O3 | FileCheck %s --check-prefixes=CHECK,AVX,AVX-32
+; RUN: llc < %s -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=+avx -O3 | FileCheck %s --check-prefixes=CHECK,AVX,AVX-64
+; RUN: llc < %s -disable-peephole -mtriple=i686-unknown-unknown -mattr=+avx512f -O3 | FileCheck %s --check-prefixes=CHECK,AVX512F,AVX512F-32
+; RUN: llc < %s -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=+avx512f -O3 | FileCheck %s --check-prefixes=CHECK,AVX512F,AVX512F-64
+; RUN: llc < %s -disable-peephole -mtriple=i686-unknown-unknown -mattr=+avx512vl -O3 | FileCheck %s --check-prefixes=CHECK,AVX512VL,AVX512VL-32
+; RUN: llc < %s -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=+avx512vl -O3 | FileCheck %s --check-prefixes=CHECK,AVX512VL,AVX512VL-64
+; RUN: llc < %s -disable-peephole -mtriple=i686-unknown-unknown -mattr=avx512dq -O3 | FileCheck %s --check-prefixes=CHECK,AVX512DQ,AVX512DQ-32
+; RUN: llc < %s -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=avx512dq -O3 | FileCheck %s --check-prefixes=CHECK,AVX512DQ,AVX512DQ-64
+; RUN: llc < %s -disable-peephole -mtriple=i686-unknown-unknown -mattr=avx512dq,avx512vl -O3 | FileCheck %s --check-prefixes=CHECK,AVX512VLDQ,AVX512VLDQ-32
+; RUN: llc < %s -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=avx512dq,avx512vl -O3 | FileCheck %s --check-prefixes=CHECK,AVX512VLDQ,AVX512VLDQ-64
declare <2 x i64> @llvm.experimental.constrained.fptosi.v2i64.v2f64(<2 x double>, metadata)
declare <2 x i64> @llvm.experimental.constrained.fptoui.v2i64.v2f64(<2 x double>, metadata)
@@ -703,6 +703,201 @@ define <2 x i64> @strict_vector_fptosi_v2f32_to_v2i64(<2 x float> %a) #0 {
ret <2 x i64> %ret
}
+define <2 x i64> @strict_vector_fptosi_v2f32_to_v2i64_load128(<4 x float>* %x) strictfp {
+; SSE-32-LABEL: strict_vector_fptosi_v2f32_to_v2i64_load128:
+; SSE-32: # %bb.0:
+; SSE-32-NEXT: pushl %ebp
+; SSE-32-NEXT: .cfi_def_cfa_offset 8
+; SSE-32-NEXT: .cfi_offset %ebp, -8
+; SSE-32-NEXT: movl %esp, %ebp
+; SSE-32-NEXT: .cfi_def_cfa_register %ebp
+; SSE-32-NEXT: andl $-8, %esp
+; SSE-32-NEXT: subl $24, %esp
+; SSE-32-NEXT: movl 8(%ebp), %eax
+; SSE-32-NEXT: movaps (%eax), %xmm0
+; SSE-32-NEXT: movss %xmm0, {{[0-9]+}}(%esp)
+; SSE-32-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,2,3]
+; SSE-32-NEXT: movss %xmm0, {{[0-9]+}}(%esp)
+; SSE-32-NEXT: flds {{[0-9]+}}(%esp)
+; SSE-32-NEXT: wait
+; SSE-32-NEXT: fnstcw {{[0-9]+}}(%esp)
+; SSE-32-NEXT: movzwl {{[0-9]+}}(%esp), %eax
+; SSE-32-NEXT: orl $3072, %eax # imm = 0xC00
+; SSE-32-NEXT: movw %ax, {{[0-9]+}}(%esp)
+; SSE-32-NEXT: fldcw {{[0-9]+}}(%esp)
+; SSE-32-NEXT: fistpll {{[0-9]+}}(%esp)
+; SSE-32-NEXT: fldcw {{[0-9]+}}(%esp)
+; SSE-32-NEXT: flds {{[0-9]+}}(%esp)
+; SSE-32-NEXT: wait
+; SSE-32-NEXT: fnstcw (%esp)
+; SSE-32-NEXT: movzwl (%esp), %eax
+; SSE-32-NEXT: orl $3072, %eax # imm = 0xC00
+; SSE-32-NEXT: movw %ax, {{[0-9]+}}(%esp)
+; SSE-32-NEXT: fldcw {{[0-9]+}}(%esp)
+; SSE-32-NEXT: fistpll {{[0-9]+}}(%esp)
+; SSE-32-NEXT: fldcw (%esp)
+; SSE-32-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
+; SSE-32-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
+; SSE-32-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; SSE-32-NEXT: movl %ebp, %esp
+; SSE-32-NEXT: popl %ebp
+; SSE-32-NEXT: .cfi_def_cfa %esp, 4
+; SSE-32-NEXT: retl
+;
+; SSE-64-LABEL: strict_vector_fptosi_v2f32_to_v2i64_load128:
+; SSE-64: # %bb.0:
+; SSE-64-NEXT: movaps (%rdi), %xmm1
+; SSE-64-NEXT: cvttss2si %xmm1, %rax
+; SSE-64-NEXT: movq %rax, %xmm0
+; SSE-64-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1,2,3]
+; SSE-64-NEXT: cvttss2si %xmm1, %rax
+; SSE-64-NEXT: movq %rax, %xmm1
+; SSE-64-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; SSE-64-NEXT: retq
+;
+; AVX-32-LABEL: strict_vector_fptosi_v2f32_to_v2i64_load128:
+; AVX-32: # %bb.0:
+; AVX-32-NEXT: pushl %ebp
+; AVX-32-NEXT: .cfi_def_cfa_offset 8
+; AVX-32-NEXT: .cfi_offset %ebp, -8
+; AVX-32-NEXT: movl %esp, %ebp
+; AVX-32-NEXT: .cfi_def_cfa_register %ebp
+; AVX-32-NEXT: andl $-8, %esp
+; AVX-32-NEXT: subl $16, %esp
+; AVX-32-NEXT: movl 8(%ebp), %eax
+; AVX-32-NEXT: vmovaps (%eax), %xmm0
+; AVX-32-NEXT: vmovss %xmm0, (%esp)
+; AVX-32-NEXT: vextractps $1, %xmm0, {{[0-9]+}}(%esp)
+; AVX-32-NEXT: flds (%esp)
+; AVX-32-NEXT: fisttpll (%esp)
+; AVX-32-NEXT: flds {{[0-9]+}}(%esp)
+; AVX-32-NEXT: fisttpll {{[0-9]+}}(%esp)
+; AVX-32-NEXT: wait
+; AVX-32-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; AVX-32-NEXT: vpinsrd $1, {{[0-9]+}}(%esp), %xmm0, %xmm0
+; AVX-32-NEXT: vpinsrd $2, {{[0-9]+}}(%esp), %xmm0, %xmm0
+; AVX-32-NEXT: vpinsrd $3, {{[0-9]+}}(%esp), %xmm0, %xmm0
+; AVX-32-NEXT: movl %ebp, %esp
+; AVX-32-NEXT: popl %ebp
+; AVX-32-NEXT: .cfi_def_cfa %esp, 4
+; AVX-32-NEXT: retl
+;
+; AVX-64-LABEL: strict_vector_fptosi_v2f32_to_v2i64_load128:
+; AVX-64: # %bb.0:
+; AVX-64-NEXT: vcvttss2si 4(%rdi), %rax
+; AVX-64-NEXT: vmovq %rax, %xmm0
+; AVX-64-NEXT: vcvttss2si (%rdi), %rax
+; AVX-64-NEXT: vmovq %rax, %xmm1
+; AVX-64-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
+; AVX-64-NEXT: retq
+;
+; AVX512F-32-LABEL: strict_vector_fptosi_v2f32_to_v2i64_load128:
+; AVX512F-32: # %bb.0:
+; AVX512F-32-NEXT: pushl %ebp
+; AVX512F-32-NEXT: .cfi_def_cfa_offset 8
+; AVX512F-32-NEXT: .cfi_offset %ebp, -8
+; AVX512F-32-NEXT: movl %esp, %ebp
+; AVX512F-32-NEXT: .cfi_def_cfa_register %ebp
+; AVX512F-32-NEXT: andl $-8, %esp
+; AVX512F-32-NEXT: subl $16, %esp
+; AVX512F-32-NEXT: movl 8(%ebp), %eax
+; AVX512F-32-NEXT: vmovdqa (%eax), %xmm0
+; AVX512F-32-NEXT: vmovd %xmm0, (%esp)
+; AVX512F-32-NEXT: vextractps $1, %xmm0, {{[0-9]+}}(%esp)
+; AVX512F-32-NEXT: flds (%esp)
+; AVX512F-32-NEXT: fisttpll (%esp)
+; AVX512F-32-NEXT: flds {{[0-9]+}}(%esp)
+; AVX512F-32-NEXT: fisttpll {{[0-9]+}}(%esp)
+; AVX512F-32-NEXT: wait
+; AVX512F-32-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; AVX512F-32-NEXT: vpinsrd $1, {{[0-9]+}}(%esp), %xmm0, %xmm0
+; AVX512F-32-NEXT: vpinsrd $2, {{[0-9]+}}(%esp), %xmm0, %xmm0
+; AVX512F-32-NEXT: vpinsrd $3, {{[0-9]+}}(%esp), %xmm0, %xmm0
+; AVX512F-32-NEXT: movl %ebp, %esp
+; AVX512F-32-NEXT: popl %ebp
+; AVX512F-32-NEXT: .cfi_def_cfa %esp, 4
+; AVX512F-32-NEXT: retl
+;
+; AVX512F-64-LABEL: strict_vector_fptosi_v2f32_to_v2i64_load128:
+; AVX512F-64: # %bb.0:
+; AVX512F-64-NEXT: vcvttss2si 4(%rdi), %rax
+; AVX512F-64-NEXT: vmovq %rax, %xmm0
+; AVX512F-64-NEXT: vcvttss2si (%rdi), %rax
+; AVX512F-64-NEXT: vmovq %rax, %xmm1
+; AVX512F-64-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
+; AVX512F-64-NEXT: retq
+;
+; AVX512VL-32-LABEL: strict_vector_fptosi_v2f32_to_v2i64_load128:
+; AVX512VL-32: # %bb.0:
+; AVX512VL-32-NEXT: pushl %ebp
+; AVX512VL-32-NEXT: .cfi_def_cfa_offset 8
+; AVX512VL-32-NEXT: .cfi_offset %ebp, -8
+; AVX512VL-32-NEXT: movl %esp, %ebp
+; AVX512VL-32-NEXT: .cfi_def_cfa_register %ebp
+; AVX512VL-32-NEXT: andl $-8, %esp
+; AVX512VL-32-NEXT: subl $16, %esp
+; AVX512VL-32-NEXT: movl 8(%ebp), %eax
+; AVX512VL-32-NEXT: vmovdqa (%eax), %xmm0
+; AVX512VL-32-NEXT: vmovd %xmm0, (%esp)
+; AVX512VL-32-NEXT: vextractps $1, %xmm0, {{[0-9]+}}(%esp)
+; AVX512VL-32-NEXT: flds (%esp)
+; AVX512VL-32-NEXT: fisttpll (%esp)
+; AVX512VL-32-NEXT: flds {{[0-9]+}}(%esp)
+; AVX512VL-32-NEXT: fisttpll {{[0-9]+}}(%esp)
+; AVX512VL-32-NEXT: wait
+; AVX512VL-32-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; AVX512VL-32-NEXT: vpinsrd $1, {{[0-9]+}}(%esp), %xmm0, %xmm0
+; AVX512VL-32-NEXT: vpinsrd $2, {{[0-9]+}}(%esp), %xmm0, %xmm0
+; AVX512VL-32-NEXT: vpinsrd $3, {{[0-9]+}}(%esp), %xmm0, %xmm0
+; AVX512VL-32-NEXT: movl %ebp, %esp
+; AVX512VL-32-NEXT: popl %ebp
+; AVX512VL-32-NEXT: .cfi_def_cfa %esp, 4
+; AVX512VL-32-NEXT: retl
+;
+; AVX512VL-64-LABEL: strict_vector_fptosi_v2f32_to_v2i64_load128:
+; AVX512VL-64: # %bb.0:
+; AVX512VL-64-NEXT: vcvttss2si 4(%rdi), %rax
+; AVX512VL-64-NEXT: vmovq %rax, %xmm0
+; AVX512VL-64-NEXT: vcvttss2si (%rdi), %rax
+; AVX512VL-64-NEXT: vmovq %rax, %xmm1
+; AVX512VL-64-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
+; AVX512VL-64-NEXT: retq
+;
+; AVX512DQ-32-LABEL: strict_vector_fptosi_v2f32_to_v2i64_load128:
+; AVX512DQ-32: # %bb.0:
+; AVX512DQ-32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; AVX512DQ-32-NEXT: vmovdqa (%eax), %xmm0
+; AVX512DQ-32-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
+; AVX512DQ-32-NEXT: vcvttps2qq %ymm0, %zmm0
+; AVX512DQ-32-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
+; AVX512DQ-32-NEXT: vzeroupper
+; AVX512DQ-32-NEXT: retl
+;
+; AVX512DQ-64-LABEL: strict_vector_fptosi_v2f32_to_v2i64_load128:
+; AVX512DQ-64: # %bb.0:
+; AVX512DQ-64-NEXT: vmovdqa (%rdi), %xmm0
+; AVX512DQ-64-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
+; AVX512DQ-64-NEXT: vcvttps2qq %ymm0, %zmm0
+; AVX512DQ-64-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
+; AVX512DQ-64-NEXT: vzeroupper
+; AVX512DQ-64-NEXT: retq
+;
+; AVX512VLDQ-32-LABEL: strict_vector_fptosi_v2f32_to_v2i64_load128:
+; AVX512VLDQ-32: # %bb.0:
+; AVX512VLDQ-32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; AVX512VLDQ-32-NEXT: vcvttps2qq (%eax), %xmm0
+; AVX512VLDQ-32-NEXT: retl
+;
+; AVX512VLDQ-64-LABEL: strict_vector_fptosi_v2f32_to_v2i64_load128:
+; AVX512VLDQ-64: # %bb.0:
+; AVX512VLDQ-64-NEXT: vcvttps2qq (%rdi), %xmm0
+; AVX512VLDQ-64-NEXT: retq
+ %a = load <4 x float>, <4 x float>* %x
+ %b = shufflevector <4 x float> %a, <4 x float> undef, <2 x i32> <i32 0, i32 1>
+ %c = call <2 x i64> @llvm.experimental.constrained.fptosi.v2i64.v2f32(<2 x float> %b, metadata !"fpexcept.strict") #0
+ ret <2 x i64> %c
+}
+
define <2 x i64> @strict_vector_fptoui_v2f32_to_v2i64(<2 x float> %a) #0 {
; SSE-32-LABEL: strict_vector_fptoui_v2f32_to_v2i64:
; SSE-32: # %bb.0:
@@ -717,10 +912,10 @@ define <2 x i64> @strict_vector_fptoui_v2f32_to_v2i64(<2 x float> %a) #0 {
; SSE-32-NEXT: comiss %xmm2, %xmm0
; SSE-32-NEXT: xorps %xmm1, %xmm1
; SSE-32-NEXT: xorps %xmm3, %xmm3
-; SSE-32-NEXT: jb .LBB3_2
+; SSE-32-NEXT: jb .LBB4_2
; SSE-32-NEXT: # %bb.1:
; SSE-32-NEXT: movaps %xmm2, %xmm3
-; SSE-32-NEXT: .LBB3_2:
+; SSE-32-NEXT: .LBB4_2:
; SSE-32-NEXT: movaps %xmm0, %xmm4
; SSE-32-NEXT: subss %xmm3, %xmm4
; SSE-32-NEXT: movss %xmm4, {{[0-9]+}}(%esp)
@@ -736,10 +931,10 @@ define <2 x i64> @strict_vector_fptoui_v2f32_to_v2i64(<2 x float> %a) #0 {
; SSE-32-NEXT: fldcw {{[0-9]+}}(%esp)
; SSE-32-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,2,3]
; SSE-32-NEXT: comiss %xmm2, %xmm0
-; SSE-32-NEXT: jb .LBB3_4
+; SSE-32-NEXT: jb .LBB4_4
; SSE-32-NEXT: # %bb.3:
; SSE-32-NEXT: movaps %xmm2, %xmm1
-; SSE-32-NEXT: .LBB3_4:
+; SSE-32-NEXT: .LBB4_4:
; SSE-32-NEXT: subss %xmm1, %xmm0
; SSE-32-NEXT: movss %xmm0, {{[0-9]+}}(%esp)
; SSE-32-NEXT: setae %cl
@@ -776,10 +971,10 @@ define <2 x i64> @strict_vector_fptoui_v2f32_to_v2i64(<2 x float> %a) #0 {
; SSE-64-NEXT: comiss %xmm3, %xmm0
; SSE-64-NEXT: xorps %xmm2, %xmm2
; SSE-64-NEXT: xorps %xmm1, %xmm1
-; SSE-64-NEXT: jb .LBB3_2
+; SSE-64-NEXT: jb .LBB4_2
; SSE-64-NEXT: # %bb.1:
; SSE-64-NEXT: movaps %xmm3, %xmm1
-; SSE-64-NEXT: .LBB3_2:
+; SSE-64-NEXT: .LBB4_2:
; SSE-64-NEXT: movaps %xmm0, %xmm4
; SSE-64-NEXT: subss %xmm1, %xmm4
; SSE-64-NEXT: cvttss2si %xmm4, %rax
@@ -790,10 +985,10 @@ define <2 x i64> @strict_vector_fptoui_v2f32_to_v2i64(<2 x float> %a) #0 {
; SSE-64-NEXT: movq %rcx, %xmm1
; SSE-64-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,2,3]
; SSE-64-NEXT: comiss %xmm3, %xmm0
-; SSE-64-NEXT: jb .LBB3_4
+; SSE-64-NEXT: jb .LBB4_4
; SSE-64-NEXT: # %bb.3:
; SSE-64-NEXT: movaps %xmm3, %xmm2
-; SSE-64-NEXT: .LBB3_4:
+; SSE-64-NEXT: .LBB4_4:
; SSE-64-NEXT: subss %xmm2, %xmm0
; SSE-64-NEXT: cvttss2si %xmm0, %rax
; SSE-64-NEXT: setae %cl
@@ -819,10 +1014,10 @@ define <2 x i64> @strict_vector_fptoui_v2f32_to_v2i64(<2 x float> %a) #0 {
; AVX-32-NEXT: vcomiss %xmm1, %xmm3
; AVX-32-NEXT: vxorps %xmm2, %xmm2, %xmm2
; AVX-32-NEXT: vxorps %xmm4, %xmm4, %xmm4
-; AVX-32-NEXT: jb .LBB3_2
+; AVX-32-NEXT: jb .LBB4_2
; AVX-32-NEXT: # %bb.1:
; AVX-32-NEXT: vmovaps %xmm1, %xmm4
-; AVX-32-NEXT: .LBB3_2:
+; AVX-32-NEXT: .LBB4_2:
; AVX-32-NEXT: vsubss %xmm4, %xmm3, %xmm3
; AVX-32-NEXT: vmovss %xmm3, {{[0-9]+}}(%esp)
; AVX-32-NEXT: flds {{[0-9]+}}(%esp)
@@ -833,10 +1028,10 @@ define <2 x i64> @strict_vector_fptoui_v2f32_to_v2i64(<2 x float> %a) #0 {
; AVX-32-NEXT: shll $31, %eax
; AVX-32-NEXT: xorl {{[0-9]+}}(%esp), %eax
; AVX-32-NEXT: vcomiss %xmm1, %xmm0
-; AVX-32-NEXT: jb .LBB3_4
+; AVX-32-NEXT: jb .LBB4_4
; AVX-32-NEXT: # %bb.3:
; AVX-32-NEXT: vmovaps %xmm1, %xmm2
-; AVX-32-NEXT: .LBB3_4:
+; AVX-32-NEXT: .LBB4_4:
; AVX-32-NEXT: vsubss %xmm2, %xmm0, %xmm0
; AVX-32-NEXT: vmovss %xmm0, (%esp)
; AVX-32-NEXT: flds (%esp)
@@ -861,10 +1056,10 @@ define <2 x i64> @strict_vector_fptoui_v2f32_to_v2i64(<2 x float> %a) #0 {
; AVX-64-NEXT: vcomiss %xmm1, %xmm0
; AVX-64-NEXT: vxorps %xmm2, %xmm2, %xmm2
; AVX-64-NEXT: vxorps %xmm3, %xmm3, %xmm3
-; AVX-64-NEXT: jb .LBB3_2
+; AVX-64-NEXT: jb .LBB4_2
; AVX-64-NEXT: # %bb.1:
; AVX-64-NEXT: vmovaps %xmm1, %xmm3
-; AVX-64-NEXT: .LBB3_2:
+; AVX-64-NEXT: .LBB4_2:
; AVX-64-NEXT: vsubss %xmm3, %xmm0, %xmm3
; AVX-64-NEXT: vcvttss2si %xmm3, %rax
; AVX-64-NEXT: setae %cl
@@ -874,10 +1069,10 @@ define <2 x i64> @strict_vector_fptoui_v2f32_to_v2i64(<2 x float> %a) #0 {
; AVX-64-NEXT: vmovq %rcx, %xmm3
; AVX-64-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
; AVX-64-NEXT: vcomiss %xmm1, %xmm0
-; AVX-64-NEXT: jb .LBB3_4
+; AVX-64-NEXT: jb .LBB4_4
; AVX-64-NEXT: # %bb.3:
; AVX-64-NEXT: vmovaps %xmm1, %xmm2
-; AVX-64-NEXT: .LBB3_4:
+; AVX-64-NEXT: .LBB4_4:
; AVX-64-NEXT: vsubss %xmm2, %xmm0, %xmm0
; AVX-64-NEXT: vcvttss2si %xmm0, %rax
; AVX-64-NEXT: setae %cl
@@ -1022,6 +1217,349 @@ define <2 x i64> @strict_vector_fptoui_v2f32_to_v2i64(<2 x float> %a) #0 {
ret <2 x i64> %ret
}
+define <2 x i64> @strict_vector_fptoui_v2f32_to_v2i64_load128(<4 x float>* %x) strictfp {
+; SSE-32-LABEL: strict_vector_fptoui_v2f32_to_v2i64_load128:
+; SSE-32: # %bb.0:
+; SSE-32-NEXT: pushl %ebp
+; SSE-32-NEXT: .cfi_def_cfa_offset 8
+; SSE-32-NEXT: .cfi_offset %ebp, -8
+; SSE-32-NEXT: movl %esp, %ebp
+; SSE-32-NEXT: .cfi_def_cfa_register %ebp
+; SSE-32-NEXT: andl $-8, %esp
+; SSE-32-NEXT: subl $24, %esp
+; SSE-32-NEXT: movl 8(%ebp), %eax
+; SSE-32-NEXT: movaps (%eax), %xmm0
+; SSE-32-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
+; SSE-32-NEXT: comiss %xmm2, %xmm0
+; SSE-32-NEXT: xorps %xmm1, %xmm1
+; SSE-32-NEXT: xorps %xmm3, %xmm3
+; SSE-32-NEXT: jb .LBB5_2
+; SSE-32-NEXT: # %bb.1:
+; SSE-32-NEXT: movaps %xmm2, %xmm3
+; SSE-32-NEXT: .LBB5_2:
+; SSE-32-NEXT: movaps %xmm0, %xmm4
+; SSE-32-NEXT: subss %xmm3, %xmm4
+; SSE-32-NEXT: movss %xmm4, {{[0-9]+}}(%esp)
+; SSE-32-NEXT: setae %al
+; SSE-32-NEXT: flds {{[0-9]+}}(%esp)
+; SSE-32-NEXT: wait
+; SSE-32-NEXT: fnstcw {{[0-9]+}}(%esp)
+; SSE-32-NEXT: movzwl {{[0-9]+}}(%esp), %ecx
+; SSE-32-NEXT: orl $3072, %ecx # imm = 0xC00
+; SSE-32-NEXT: movw %cx, {{[0-9]+}}(%esp)
+; SSE-32-NEXT: fldcw {{[0-9]+}}(%esp)
+; SSE-32-NEXT: fistpll {{[0-9]+}}(%esp)
+; SSE-32-NEXT: fldcw {{[0-9]+}}(%esp)
+; SSE-32-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,2,3]
+; SSE-32-NEXT: comiss %xmm2, %xmm0
+; SSE-32-NEXT: jb .LBB5_4
+; SSE-32-NEXT: # %bb.3:
+; SSE-32-NEXT: movaps %xmm2, %xmm1
+; SSE-32-NEXT: .LBB5_4:
+; SSE-32-NEXT: subss %xmm1, %xmm0
+; SSE-32-NEXT: movss %xmm0, {{[0-9]+}}(%esp)
+; SSE-32-NEXT: setae %cl
+; SSE-32-NEXT: flds {{[0-9]+}}(%esp)
+; SSE-32-NEXT: wait
+; SSE-32-NEXT: fnstcw (%esp)
+; SSE-32-NEXT: movzwl (%esp), %edx
+; SSE-32-NEXT: orl $3072, %edx # imm = 0xC00
+; SSE-32-NEXT: movw %dx, {{[0-9]+}}(%esp)
+; SSE-32-NEXT: fldcw {{[0-9]+}}(%esp)
+; SSE-32-NEXT: fistpll {{[0-9]+}}(%esp)
+; SSE-32-NEXT: fldcw (%esp)
+; SSE-32-NEXT: movzbl %al, %eax
+; SSE-32-NEXT: shll $31, %eax
+; SSE-32-NEXT: xorl {{[0-9]+}}(%esp), %eax
+; SSE-32-NEXT: movd %eax, %xmm1
+; SSE-32-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; SSE-32-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; SSE-32-NEXT: movzbl %cl, %eax
+; SSE-32-NEXT: shll $31, %eax
+; SSE-32-NEXT: xorl {{[0-9]+}}(%esp), %eax
+; SSE-32-NEXT: movd %eax, %xmm1
+; SSE-32-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero
+; SSE-32-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
+; SSE-32-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
+; SSE-32-NEXT: movl %ebp, %esp
+; SSE-32-NEXT: popl %ebp
+; SSE-32-NEXT: .cfi_def_cfa %esp, 4
+; SSE-32-NEXT: retl
+;
+; SSE-64-LABEL: strict_vector_fptoui_v2f32_to_v2i64_load128:
+; SSE-64: # %bb.0:
+; SSE-64-NEXT: movaps (%rdi), %xmm1
+; SSE-64-NEXT: movss {{.*#+}} xmm3 = mem[0],zero,zero,zero
+; SSE-64-NEXT: comiss %xmm3, %xmm1
+; SSE-64-NEXT: xorps %xmm2, %xmm2
+; SSE-64-NEXT: xorps %xmm0, %xmm0
+; SSE-64-NEXT: jb .LBB5_2
+; SSE-64-NEXT: # %bb.1:
+; SSE-64-NEXT: movaps %xmm3, %xmm0
+; SSE-64-NEXT: .LBB5_2:
+; SSE-64-NEXT: movaps %xmm1, %xmm4
+; SSE-64-NEXT: subss %xmm0, %xmm4
+; SSE-64-NEXT: cvttss2si %xmm4, %rax
+; SSE-64-NEXT: setae %cl
+; SSE-64-NEXT: movzbl %cl, %ecx
+; SSE-64-NEXT: shlq $63, %rcx
+; SSE-64-NEXT: xorq %rax, %rcx
+; SSE-64-NEXT: movq %rcx, %xmm0
+; SSE-64-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1,2,3]
+; SSE-64-NEXT: comiss %xmm3, %xmm1
+; SSE-64-NEXT: jb .LBB5_4
+; SSE-64-NEXT: # %bb.3:
+; SSE-64-NEXT: movaps %xmm3, %xmm2
+; SSE-64-NEXT: .LBB5_4:
+; SSE-64-NEXT: subss %xmm2, %xmm1
+; SSE-64-NEXT: cvttss2si %xmm1, %rax
+; SSE-64-NEXT: setae %cl
+; SSE-64-NEXT: movzbl %cl, %ecx
+; SSE-64-NEXT: shlq $63, %rcx
+; SSE-64-NEXT: xorq %rax, %rcx
+; SSE-64-NEXT: movq %rcx, %xmm1
+; SSE-64-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; SSE-64-NEXT: retq
+;
+; AVX-32-LABEL: strict_vector_fptoui_v2f32_to_v2i64_load128:
+; AVX-32: # %bb.0:
+; AVX-32-NEXT: pushl %ebp
+; AVX-32-NEXT: .cfi_def_cfa_offset 8
+; AVX-32-NEXT: .cfi_offset %ebp, -8
+; AVX-32-NEXT: movl %esp, %ebp
+; AVX-32-NEXT: .cfi_def_cfa_register %ebp
+; AVX-32-NEXT: andl $-8, %esp
+; AVX-32-NEXT: subl $16, %esp
+; AVX-32-NEXT: movl 8(%ebp), %eax
+; AVX-32-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; AVX-32-NEXT: vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero
+; AVX-32-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; AVX-32-NEXT: vcomiss %xmm1, %xmm3
+; AVX-32-NEXT: vxorps %xmm2, %xmm2, %xmm2
+; AVX-32-NEXT: vxorps %xmm4, %xmm4, %xmm4
+; AVX-32-NEXT: jb .LBB5_2
+; AVX-32-NEXT: # %bb.1:
+; AVX-32-NEXT: vmovaps %xmm1, %xmm4
+; AVX-32-NEXT: .LBB5_2:
+; AVX-32-NEXT: vsubss %xmm4, %xmm3, %xmm3
+; AVX-32-NEXT: vmovss %xmm3, {{[0-9]+}}(%esp)
+; AVX-32-NEXT: flds {{[0-9]+}}(%esp)
+; AVX-32-NEXT: fisttpll {{[0-9]+}}(%esp)
+; AVX-32-NEXT: wait
+; AVX-32-NEXT: setae %al
+; AVX-32-NEXT: movzbl %al, %eax
+; AVX-32-NEXT: shll $31, %eax
+; AVX-32-NEXT: xorl {{[0-9]+}}(%esp), %eax
+; AVX-32-NEXT: vcomiss %xmm1, %xmm0
+; AVX-32-NEXT: jb .LBB5_4
+; AVX-32-NEXT: # %bb.3:
+; AVX-32-NEXT: vmovaps %xmm1, %xmm2
+; AVX-32-NEXT: .LBB5_4:
+; AVX-32-NEXT: vsubss %xmm2, %xmm0, %xmm0
+; AVX-32-NEXT: vmovss %xmm0, (%esp)
+; AVX-32-NEXT: flds (%esp)
+; AVX-32-NEXT: fisttpll (%esp)
+; AVX-32-NEXT: wait
+; AVX-32-NEXT: setae %cl
+; AVX-32-NEXT: movzbl %cl, %ecx
+; AVX-32-NEXT: shll $31, %ecx
+; AVX-32-NEXT: xorl {{[0-9]+}}(%esp), %ecx
+; AVX-32-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; AVX-32-NEXT: vpinsrd $1, %ecx, %xmm0, %xmm0
+; AVX-32-NEXT: vpinsrd $2, {{[0-9]+}}(%esp), %xmm0, %xmm0
+; AVX-32-NEXT: vpinsrd $3, %eax, %xmm0, %xmm0
+; AVX-32-NEXT: movl %ebp, %esp
+; AVX-32-NEXT: popl %ebp
+; AVX-32-NEXT: .cfi_def_cfa %esp, 4
+; AVX-32-NEXT: retl
+;
+; AVX-64-LABEL: strict_vector_fptoui_v2f32_to_v2i64_load128:
+; AVX-64: # %bb.0:
+; AVX-64-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; AVX-64-NEXT: vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero
+; AVX-64-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; AVX-64-NEXT: vcomiss %xmm1, %xmm3
+; AVX-64-NEXT: vxorps %xmm2, %xmm2, %xmm2
+; AVX-64-NEXT: vxorps %xmm4, %xmm4, %xmm4
+; AVX-64-NEXT: jb .LBB5_2
+; AVX-64-NEXT: # %bb.1:
+; AVX-64-NEXT: vmovaps %xmm1, %xmm4
+; AVX-64-NEXT: .LBB5_2:
+; AVX-64-NEXT: vsubss %xmm4, %xmm3, %xmm3
+; AVX-64-NEXT: vcvttss2si %xmm3, %rax
+; AVX-64-NEXT: setae %cl
+; AVX-64-NEXT: movzbl %cl, %ecx
+; AVX-64-NEXT: shlq $63, %rcx
+; AVX-64-NEXT: xorq %rax, %rcx
+; AVX-64-NEXT: vmovq %rcx, %xmm3
+; AVX-64-NEXT: vcomiss %xmm1, %xmm0
+; AVX-64-NEXT: jb .LBB5_4
+; AVX-64-NEXT: # %bb.3:
+; AVX-64-NEXT: vmovaps %xmm1, %xmm2
+; AVX-64-NEXT: .LBB5_4:
+; AVX-64-NEXT: vsubss %xmm2, %xmm0, %xmm0
+; AVX-64-NEXT: vcvttss2si %xmm0, %rax
+; AVX-64-NEXT: setae %cl
+; AVX-64-NEXT: movzbl %cl, %ecx
+; AVX-64-NEXT: shlq $63, %rcx
+; AVX-64-NEXT: xorq %rax, %rcx
+; AVX-64-NEXT: vmovq %rcx, %xmm0
+; AVX-64-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm3[0]
+; AVX-64-NEXT: retq
+;
+; AVX512F-32-LABEL: strict_vector_fptoui_v2f32_to_v2i64_load128:
+; AVX512F-32: # %bb.0:
+; AVX512F-32-NEXT: pushl %ebp
+; AVX512F-32-NEXT: .cfi_def_cfa_offset 8
+; AVX512F-32-NEXT: .cfi_offset %ebp, -8
+; AVX512F-32-NEXT: movl %esp, %ebp
+; AVX512F-32-NEXT: .cfi_def_cfa_register %ebp
+; AVX512F-32-NEXT: andl $-8, %esp
+; AVX512F-32-NEXT: subl $16, %esp
+; AVX512F-32-NEXT: movl 8(%ebp), %eax
+; AVX512F-32-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; AVX512F-32-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; AVX512F-32-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
+; AVX512F-32-NEXT: xorl %eax, %eax
+; AVX512F-32-NEXT: vcomiss %xmm2, %xmm1
+; AVX512F-32-NEXT: setb %cl
+; AVX512F-32-NEXT: kmovw %ecx, %k1
+; AVX512F-32-NEXT: vxorps %xmm3, %xmm3, %xmm3
+; AVX512F-32-NEXT: vmovaps %xmm2, %xmm4
+; AVX512F-32-NEXT: vmovss %xmm3, %xmm4, %xmm4 {%k1}
+; AVX512F-32-NEXT: vsubss %xmm4, %xmm1, %xmm1
+; AVX512F-32-NEXT: vmovss %xmm1, {{[0-9]+}}(%esp)
+; AVX512F-32-NEXT: flds {{[0-9]+}}(%esp)
+; AVX512F-32-NEXT: fisttpll {{[0-9]+}}(%esp)
+; AVX512F-32-NEXT: wait
+; AVX512F-32-NEXT: setae %al
+; AVX512F-32-NEXT: shll $31, %eax
+; AVX512F-32-NEXT: xorl {{[0-9]+}}(%esp), %eax
+; AVX512F-32-NEXT: xorl %ecx, %ecx
+; AVX512F-32-NEXT: vcomiss %xmm2, %xmm0
+; AVX512F-32-NEXT: setb %dl
+; AVX512F-32-NEXT: kmovw %edx, %k1
+; AVX512F-32-NEXT: vmovss %xmm3, %xmm2, %xmm2 {%k1}
+; AVX512F-32-NEXT: vsubss %xmm2, %xmm0, %xmm0
+; AVX512F-32-NEXT: vmovss %xmm0, (%esp)
+; AVX512F-32-NEXT: flds (%esp)
+; AVX512F-32-NEXT: fisttpll (%esp)
+; AVX512F-32-NEXT: wait
+; AVX512F-32-NEXT: setae %cl
+; AVX512F-32-NEXT: shll $31, %ecx
+; AVX512F-32-NEXT: xorl {{[0-9]+}}(%esp), %ecx
+; AVX512F-32-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; AVX512F-32-NEXT: vpinsrd $1, %ecx, %xmm0, %xmm0
+; AVX512F-32-NEXT: vpinsrd $2, {{[0-9]+}}(%esp), %xmm0, %xmm0
+; AVX512F-32-NEXT: vpinsrd $3, %eax, %xmm0, %xmm0
+; AVX512F-32-NEXT: movl %ebp, %esp
+; AVX512F-32-NEXT: popl %ebp
+; AVX512F-32-NEXT: .cfi_def_cfa %esp, 4
+; AVX512F-32-NEXT: retl
+;
+; AVX512F-64-LABEL: strict_vector_fptoui_v2f32_to_v2i64_load128:
+; AVX512F-64: # %bb.0:
+; AVX512F-64-NEXT: vcvttss2usi 4(%rdi), %rax
+; AVX512F-64-NEXT: vmovq %rax, %xmm0
+; AVX512F-64-NEXT: vcvttss2usi (%rdi), %rax
+; AVX512F-64-NEXT: vmovq %rax, %xmm1
+; AVX512F-64-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
+; AVX512F-64-NEXT: retq
+;
+; AVX512VL-32-LABEL: strict_vector_fptoui_v2f32_to_v2i64_load128:
+; AVX512VL-32: # %bb.0:
+; AVX512VL-32-NEXT: pushl %ebp
+; AVX512VL-32-NEXT: .cfi_def_cfa_offset 8
+; AVX512VL-32-NEXT: .cfi_offset %ebp, -8
+; AVX512VL-32-NEXT: movl %esp, %ebp
+; AVX512VL-32-NEXT: .cfi_def_cfa_register %ebp
+; AVX512VL-32-NEXT: andl $-8, %esp
+; AVX512VL-32-NEXT: subl $16, %esp
+; AVX512VL-32-NEXT: movl 8(%ebp), %eax
+; AVX512VL-32-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; AVX512VL-32-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; AVX512VL-32-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
+; AVX512VL-32-NEXT: xorl %eax, %eax
+; AVX512VL-32-NEXT: vcomiss %xmm2, %xmm1
+; AVX512VL-32-NEXT: setb %cl
+; AVX512VL-32-NEXT: kmovw %ecx, %k1
+; AVX512VL-32-NEXT: vxorps %xmm3, %xmm3, %xmm3
+; AVX512VL-32-NEXT: vmovaps %xmm2, %xmm4
+; AVX512VL-32-NEXT: vmovss %xmm3, %xmm4, %xmm4 {%k1}
+; AVX512VL-32-NEXT: vsubss %xmm4, %xmm1, %xmm1
+; AVX512VL-32-NEXT: vmovss %xmm1, {{[0-9]+}}(%esp)
+; AVX512VL-32-NEXT: flds {{[0-9]+}}(%esp)
+; AVX512VL-32-NEXT: fisttpll {{[0-9]+}}(%esp)
+; AVX512VL-32-NEXT: wait
+; AVX512VL-32-NEXT: setae %al
+; AVX512VL-32-NEXT: shll $31, %eax
+; AVX512VL-32-NEXT: xorl {{[0-9]+}}(%esp), %eax
+; AVX512VL-32-NEXT: xorl %ecx, %ecx
+; AVX512VL-32-NEXT: vcomiss %xmm2, %xmm0
+; AVX512VL-32-NEXT: setb %dl
+; AVX512VL-32-NEXT: kmovw %edx, %k1
+; AVX512VL-32-NEXT: vmovss %xmm3, %xmm2, %xmm2 {%k1}
+; AVX512VL-32-NEXT: vsubss %xmm2, %xmm0, %xmm0
+; AVX512VL-32-NEXT: vmovss %xmm0, (%esp)
+; AVX512VL-32-NEXT: flds (%esp)
+; AVX512VL-32-NEXT: fisttpll (%esp)
+; AVX512VL-32-NEXT: wait
+; AVX512VL-32-NEXT: setae %cl
+; AVX512VL-32-NEXT: shll $31, %ecx
+; AVX512VL-32-NEXT: xorl {{[0-9]+}}(%esp), %ecx
+; AVX512VL-32-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; AVX512VL-32-NEXT: vpinsrd $1, %ecx, %xmm0, %xmm0
+; AVX512VL-32-NEXT: vpinsrd $2, {{[0-9]+}}(%esp), %xmm0, %xmm0
+; AVX512VL-32-NEXT: vpinsrd $3, %eax, %xmm0, %xmm0
+; AVX512VL-32-NEXT: movl %ebp, %esp
+; AVX512VL-32-NEXT: popl %ebp
+; AVX512VL-32-NEXT: .cfi_def_cfa %esp, 4
+; AVX512VL-32-NEXT: retl
+;
+; AVX512VL-64-LABEL: strict_vector_fptoui_v2f32_to_v2i64_load128:
+; AVX512VL-64: # %bb.0:
+; AVX512VL-64-NEXT: vcvttss2usi 4(%rdi), %rax
+; AVX512VL-64-NEXT: vmovq %rax, %xmm0
+; AVX512VL-64-NEXT: vcvttss2usi (%rdi), %rax
+; AVX512VL-64-NEXT: vmovq %rax, %xmm1
+; AVX512VL-64-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
+; AVX512VL-64-NEXT: retq
+;
+; AVX512DQ-32-LABEL: strict_vector_fptoui_v2f32_to_v2i64_load128:
+; AVX512DQ-32: # %bb.0:
+; AVX512DQ-32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; AVX512DQ-32-NEXT: vmovdqa (%eax), %xmm0
+; AVX512DQ-32-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
+; AVX512DQ-32-NEXT: vcvttps2uqq %ymm0, %zmm0
+; AVX512DQ-32-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
+; AVX512DQ-32-NEXT: vzeroupper
+; AVX512DQ-32-NEXT: retl
+;
+; AVX512DQ-64-LABEL: strict_vector_fptoui_v2f32_to_v2i64_load128:
+; AVX512DQ-64: # %bb.0:
+; AVX512DQ-64-NEXT: vmovdqa (%rdi), %xmm0
+; AVX512DQ-64-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
+; AVX512DQ-64-NEXT: vcvttps2uqq %ymm0, %zmm0
+; AVX512DQ-64-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
+; AVX512DQ-64-NEXT: vzeroupper
+; AVX512DQ-64-NEXT: retq
+;
+; AVX512VLDQ-32-LABEL: strict_vector_fptoui_v2f32_to_v2i64_load128:
+; AVX512VLDQ-32: # %bb.0:
+; AVX512VLDQ-32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; AVX512VLDQ-32-NEXT: vcvttps2uqq (%eax), %xmm0
+; AVX512VLDQ-32-NEXT: retl
+;
+; AVX512VLDQ-64-LABEL: strict_vector_fptoui_v2f32_to_v2i64_load128:
+; AVX512VLDQ-64: # %bb.0:
+; AVX512VLDQ-64-NEXT: vcvttps2uqq (%rdi), %xmm0
+; AVX512VLDQ-64-NEXT: retq
+ %a = load <4 x float>, <4 x float>* %x
+ %b = shufflevector <4 x float> %a, <4 x float> undef, <2 x i32> <i32 0, i32 1>
+ %c = call <2 x i64> @llvm.experimental.constrained.fptoui.v2i64.v2f32(<2 x float> %b, metadata !"fpexcept.strict") #0
+ ret <2 x i64> %c
+}
+
define <2 x i32> @strict_vector_fptosi_v2f64_to_v2i32(<2 x double> %a) #0 {
; SSE-32-LABEL: strict_vector_fptosi_v2f64_to_v2i32:
; SSE-32: # %bb.0:
@@ -1069,10 +1607,10 @@ define <2 x i32> @strict_vector_fptoui_v2f64_to_v2i32(<2 x double> %a) #0 {
; SSE-32-NEXT: comisd %xmm3, %xmm0
; SSE-32-NEXT: xorpd %xmm2, %xmm2
; SSE-32-NEXT: xorpd %xmm1, %xmm1
-; SSE-32-NEXT: jb .LBB5_2
+; SSE-32-NEXT: jb .LBB7_2
; SSE-32-NEXT: # %bb.1:
; SSE-32-NEXT: movapd %xmm3, %xmm1
-; SSE-32-NEXT: .LBB5_2:
+; SSE-32-NEXT: .LBB7_2:
; SSE-32-NEXT: setae %al
; SSE-32-NEXT: movzbl %al, %eax
; SSE-32-NEXT: shll $31, %eax
@@ -1083,10 +1621,10 @@ define <2 x i32> @strict_vector_fptoui_v2f64_to_v2i32(<2 x double> %a) #0 {
; SSE-32-NEXT: movd %ecx, %xmm1
; SSE-32-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1,1]
; SSE-32-NEXT: comisd %xmm3, %xmm0
-; SSE-32-NEXT: jb .LBB5_4
+; SSE-32-NEXT: jb .LBB7_4
; SSE-32-NEXT: # %bb.3:
; SSE-32-NEXT: movapd %xmm3, %xmm2
-; SSE-32-NEXT: .LBB5_4:
+; SSE-32-NEXT: .LBB7_4:
; SSE-32-NEXT: setae %al
; SSE-32-NEXT: movzbl %al, %eax
; SSE-32-NEXT: shll $31, %eax
@@ -1225,10 +1763,10 @@ define <2 x i32> @strict_vector_fptoui_v2f32_to_v2i32(<2 x float> %a) #0 {
; SSE-32-NEXT: comiss %xmm3, %xmm0
; SSE-32-NEXT: xorps %xmm2, %xmm2
; SSE-32-NEXT: xorps %xmm1, %xmm1
-; SSE-32-NEXT: jb .LBB7_2
+; SSE-32-NEXT: jb .LBB9_2
; SSE-32-NEXT: # %bb.1:
; SSE-32-NEXT: movaps %xmm3, %xmm1
-; SSE-32-NEXT: .LBB7_2:
+; SSE-32-NEXT: .LBB9_2:
; SSE-32-NEXT: setae %al
; SSE-32-NEXT: movzbl %al, %eax
; SSE-32-NEXT: shll $31, %eax
@@ -1239,10 +1777,10 @@ define <2 x i32> @strict_vector_fptoui_v2f32_to_v2i32(<2 x float> %a) #0 {
; SSE-32-NEXT: movd %ecx, %xmm1
; SSE-32-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,2,3]
; SSE-32-NEXT: comiss %xmm3, %xmm0
-; SSE-32-NEXT: jb .LBB7_4
+; SSE-32-NEXT: jb .LBB9_4
; SSE-32-NEXT: # %bb.3:
; SSE-32-NEXT: movaps %xmm3, %xmm2
-; SSE-32-NEXT: .LBB7_4:
+; SSE-32-NEXT: .LBB9_4:
; SSE-32-NEXT: setae %al
; SSE-32-NEXT: movzbl %al, %eax
; SSE-32-NEXT: shll $31, %eax
@@ -1888,10 +2426,10 @@ define <2 x i1> @strict_vector_fptoui_v2f64_to_v2i1(<2 x double> %a) #0 {
; SSE-32-NEXT: comisd %xmm2, %xmm0
; SSE-32-NEXT: xorpd %xmm1, %xmm1
; SSE-32-NEXT: xorpd %xmm3, %xmm3
-; SSE-32-NEXT: jb .LBB17_2
+; SSE-32-NEXT: jb .LBB19_2
; SSE-32-NEXT: # %bb.1:
; SSE-32-NEXT: movapd %xmm2, %xmm3
-; SSE-32-NEXT: .LBB17_2:
+; SSE-32-NEXT: .LBB19_2:
; SSE-32-NEXT: movapd %xmm0, %xmm4
; SSE-32-NEXT: subsd %xmm3, %xmm4
; SSE-32-NEXT: movsd %xmm4, {{[0-9]+}}(%esp)
@@ -1907,10 +2445,10 @@ define <2 x i1> @strict_vector_fptoui_v2f64_to_v2i1(<2 x double> %a) #0 {
; SSE-32-NEXT: fldcw {{[0-9]+}}(%esp)
; SSE-32-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1,1]
; SSE-32-NEXT: comisd %xmm2, %xmm0
-; SSE-32-NEXT: jb .LBB17_4
+; SSE-32-NEXT: jb .LBB19_4
; SSE-32-NEXT: # %bb.3:
; SSE-32-NEXT: movapd %xmm2, %xmm1
-; SSE-32-NEXT: .LBB17_4:
+; SSE-32-NEXT: .LBB19_4:
; SSE-32-NEXT: subsd %xmm1, %xmm0
; SSE-32-NEXT: movsd %xmm0, {{[0-9]+}}(%esp)
; SSE-32-NEXT: setae %cl
@@ -1947,10 +2485,10 @@ define <2 x i1> @strict_vector_fptoui_v2f64_to_v2i1(<2 x double> %a) #0 {
; SSE-64-NEXT: comisd %xmm3, %xmm0
; SSE-64-NEXT: xorpd %xmm2, %xmm2
; SSE-64-NEXT: xorpd %xmm1, %xmm1
-; SSE-64-NEXT: jb .LBB17_2
+; SSE-64-NEXT: jb .LBB19_2
; SSE-64-NEXT: # %bb.1:
; SSE-64-NEXT: movapd %xmm3, %xmm1
-; SSE-64-NEXT: .LBB17_2:
+; SSE-64-NEXT: .LBB19_2:
; SSE-64-NEXT: movapd %xmm0, %xmm4
; SSE-64-NEXT: subsd %xmm1, %xmm4
; SSE-64-NEXT: cvttsd2si %xmm4, %rax
@@ -1961,10 +2499,10 @@ define <2 x i1> @strict_vector_fptoui_v2f64_to_v2i1(<2 x double> %a) #0 {
; SSE-64-NEXT: movq %rcx, %xmm1
; SSE-64-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1,1]
; SSE-64-NEXT: comisd %xmm3, %xmm0
-; SSE-64-NEXT: jb .LBB17_4
+; SSE-64-NEXT: jb .LBB19_4
; SSE-64-NEXT: # %bb.3:
; SSE-64-NEXT: movapd %xmm3, %xmm2
-; SSE-64-NEXT: .LBB17_4:
+; SSE-64-NEXT: .LBB19_4:
; SSE-64-NEXT: subsd %xmm2, %xmm0
; SSE-64-NEXT: cvttsd2si %xmm0, %rax
; SSE-64-NEXT: setae %cl
@@ -1990,10 +2528,10 @@ define <2 x i1> @strict_vector_fptoui_v2f64_to_v2i1(<2 x double> %a) #0 {
; AVX-32-NEXT: vcomisd %xmm1, %xmm3
; AVX-32-NEXT: vxorpd %xmm2, %xmm2, %xmm2
; AVX-32-NEXT: vxorpd %xmm4, %xmm4, %xmm4
-; AVX-32-NEXT: jb .LBB17_2
+; AVX-32-NEXT: jb .LBB19_2
; AVX-32-NEXT: # %bb.1:
; AVX-32-NEXT: vmovapd %xmm1, %xmm4
-; AVX-32-NEXT: .LBB17_2:
+; AVX-32-NEXT: .LBB19_2:
; AVX-32-NEXT: vsubsd %xmm4, %xmm3, %xmm3
; AVX-32-NEXT: vmovsd %xmm3, (%esp)
; AVX-32-NEXT: fldl (%esp)
@@ -2004,10 +2542,10 @@ define <2 x i1> @strict_vector_fptoui_v2f64_to_v2i1(<2 x double> %a) #0 {
; AVX-32-NEXT: shll $31, %eax
; AVX-32-NEXT: xorl {{[0-9]+}}(%esp), %eax
; AVX-32-NEXT: vcomisd %xmm1, %xmm0
-; AVX-32-NEXT: jb .LBB17_4
+; AVX-32-NEXT: jb .LBB19_4
; AVX-32-NEXT: # %bb.3:
; AVX-32-NEXT: vmovapd %xmm1, %xmm2
-; AVX-32-NEXT: .LBB17_4:
+; AVX-32-NEXT: .LBB19_4:
; AVX-32-NEXT: vsubsd %xmm2, %xmm0, %xmm0
; AVX-32-NEXT: vmovsd %xmm0, {{[0-9]+}}(%esp)
; AVX-32-NEXT: fldl {{[0-9]+}}(%esp)
@@ -2032,10 +2570,10 @@ define <2 x i1> @strict_vector_fptoui_v2f64_to_v2i1(<2 x double> %a) #0 {
; AVX-64-NEXT: vcomisd %xmm1, %xmm0
; AVX-64-NEXT: vxorpd %xmm2, %xmm2, %xmm2
; AVX-64-NEXT: vxorpd %xmm3, %xmm3, %xmm3
-; AVX-64-NEXT: jb .LBB17_2
+; AVX-64-NEXT: jb .LBB19_2
; AVX-64-NEXT: # %bb.1:
; AVX-64-NEXT: vmovapd %xmm1, %xmm3
-; AVX-64-NEXT: .LBB17_2:
+; AVX-64-NEXT: .LBB19_2:
; AVX-64-NEXT: vsubsd %xmm3, %xmm0, %xmm3
; AVX-64-NEXT: vcvttsd2si %xmm3, %rax
; AVX-64-NEXT: setae %cl
@@ -2045,10 +2583,10 @@ define <2 x i1> @strict_vector_fptoui_v2f64_to_v2i1(<2 x double> %a) #0 {
; AVX-64-NEXT: vmovq %rcx, %xmm3
; AVX-64-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
; AVX-64-NEXT: vcomisd %xmm1, %xmm0
-; AVX-64-NEXT: jb .LBB17_4
+; AVX-64-NEXT: jb .LBB19_4
; AVX-64-NEXT: # %bb.3:
; AVX-64-NEXT: vmovapd %xmm1, %xmm2
-; AVX-64-NEXT: .LBB17_4:
+; AVX-64-NEXT: .LBB19_4:
; AVX-64-NEXT: vsubsd %xmm2, %xmm0, %xmm0
; AVX-64-NEXT: vcvttsd2si %xmm0, %rax
; AVX-64-NEXT: setae %cl
@@ -2264,10 +2802,10 @@ define <2 x i1> @strict_vector_fptoui_v2f32_to_v2i1(<2 x float> %a) #0 {
; SSE-32-NEXT: comiss %xmm2, %xmm0
; SSE-32-NEXT: xorps %xmm1, %xmm1
; SSE-32-NEXT: xorps %xmm3, %xmm3
-; SSE-32-NEXT: jb .LBB19_2
+; SSE-32-NEXT: jb .LBB21_2
; SSE-32-NEXT: # %bb.1:
; SSE-32-NEXT: movaps %xmm2, %xmm3
-; SSE-32-NEXT: .LBB19_2:
+; SSE-32-NEXT: .LBB21_2:
; SSE-32-NEXT: movaps %xmm0, %xmm4
; SSE-32-NEXT: subss %xmm3, %xmm4
; SSE-32-NEXT: movss %xmm4, {{[0-9]+}}(%esp)
@@ -2283,10 +2821,10 @@ define <2 x i1> @strict_vector_fptoui_v2f32_to_v2i1(<2 x float> %a) #0 {
; SSE-32-NEXT: fldcw {{[0-9]+}}(%esp)
; SSE-32-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,2,3]
; SSE-32-NEXT: comiss %xmm2, %xmm0
-; SSE-32-NEXT: jb .LBB19_4
+; SSE-32-NEXT: jb .LBB21_4
; SSE-32-NEXT: # %bb.3:
; SSE-32-NEXT: movaps %xmm2, %xmm1
-; SSE-32-NEXT: .LBB19_4:
+; SSE-32-NEXT: .LBB21_4:
; SSE-32-NEXT: subss %xmm1, %xmm0
; SSE-32-NEXT: movss %xmm0, {{[0-9]+}}(%esp)
; SSE-32-NEXT: setae %cl
@@ -2323,10 +2861,10 @@ define <2 x i1> @strict_vector_fptoui_v2f32_to_v2i1(<2 x float> %a) #0 {
; SSE-64-NEXT: comiss %xmm3, %xmm0
; SSE-64-NEXT: xorps %xmm2, %xmm2
; SSE-64-NEXT: xorps %xmm1, %xmm1
-; SSE-64-NEXT: jb .LBB19_2
+; SSE-64-NEXT: jb .LBB21_2
; SSE-64-NEXT: # %bb.1:
; SSE-64-NEXT: movaps %xmm3, %xmm1
-; SSE-64-NEXT: .LBB19_2:
+; SSE-64-NEXT: .LBB21_2:
; SSE-64-NEXT: movaps %xmm0, %xmm4
; SSE-64-NEXT: subss %xmm1, %xmm4
; SSE-64-NEXT: cvttss2si %xmm4, %rax
@@ -2337,10 +2875,10 @@ define <2 x i1> @strict_vector_fptoui_v2f32_to_v2i1(<2 x float> %a) #0 {
; SSE-64-NEXT: movq %rcx, %xmm1
; SSE-64-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,2,3]
; SSE-64-NEXT: comiss %xmm3, %xmm0
-; SSE-64-NEXT: jb .LBB19_4
+; SSE-64-NEXT: jb .LBB21_4
; SSE-64-NEXT: # %bb.3:
; SSE-64-NEXT: movaps %xmm3, %xmm2
-; SSE-64-NEXT: .LBB19_4:
+; SSE-64-NEXT: .LBB21_4:
; SSE-64-NEXT: subss %xmm2, %xmm0
; SSE-64-NEXT: cvttss2si %xmm0, %rax
; SSE-64-NEXT: setae %cl
@@ -2366,10 +2904,10 @@ define <2 x i1> @strict_vector_fptoui_v2f32_to_v2i1(<2 x float> %a) #0 {
; AVX-32-NEXT: vcomiss %xmm1, %xmm3
; AVX-32-NEXT: vxorps %xmm2, %xmm2, %xmm2
; AVX-32-NEXT: vxorps %xmm4, %xmm4, %xmm4
-; AVX-32-NEXT: jb .LBB19_2
+; AVX-32-NEXT: jb .LBB21_2
; AVX-32-NEXT: # %bb.1:
; AVX-32-NEXT: vmovaps %xmm1, %xmm4
-; AVX-32-NEXT: .LBB19_2:
+; AVX-32-NEXT: .LBB21_2:
; AVX-32-NEXT: vsubss %xmm4, %xmm3, %xmm3
; AVX-32-NEXT: vmovss %xmm3, {{[0-9]+}}(%esp)
; AVX-32-NEXT: flds {{[0-9]+}}(%esp)
@@ -2380,10 +2918,10 @@ define <2 x i1> @strict_vector_fptoui_v2f32_to_v2i1(<2 x float> %a) #0 {
; AVX-32-NEXT: shll $31, %eax
; AVX-32-NEXT: xorl {{[0-9]+}}(%esp), %eax
; AVX-32-NEXT: vcomiss %xmm1, %xmm0
-; AVX-32-NEXT: jb .LBB19_4
+; AVX-32-NEXT: jb .LBB21_4
; AVX-32-NEXT: # %bb.3:
; AVX-32-NEXT: vmovaps %xmm1, %xmm2
-; AVX-32-NEXT: .LBB19_4:
+; AVX-32-NEXT: .LBB21_4:
; AVX-32-NEXT: vsubss %xmm2, %xmm0, %xmm0
; AVX-32-NEXT: vmovss %xmm0, (%esp)
; AVX-32-NEXT: flds (%esp)
@@ -2408,10 +2946,10 @@ define <2 x i1> @strict_vector_fptoui_v2f32_to_v2i1(<2 x float> %a) #0 {
; AVX-64-NEXT: vcomiss %xmm1, %xmm0
; AVX-64-NEXT: vxorps %xmm2, %xmm2, %xmm2
; AVX-64-NEXT: vxorps %xmm3, %xmm3, %xmm3
-; AVX-64-NEXT: jb .LBB19_2
+; AVX-64-NEXT: jb .LBB21_2
; AVX-64-NEXT: # %bb.1:
; AVX-64-NEXT: vmovaps %xmm1, %xmm3
-; AVX-64-NEXT: .LBB19_2:
+; AVX-64-NEXT: .LBB21_2:
; AVX-64-NEXT: vsubss %xmm3, %xmm0, %xmm3
; AVX-64-NEXT: vcvttss2si %xmm3, %rax
; AVX-64-NEXT: setae %cl
@@ -2421,10 +2959,10 @@ define <2 x i1> @strict_vector_fptoui_v2f32_to_v2i1(<2 x float> %a) #0 {
; AVX-64-NEXT: vmovq %rcx, %xmm3
; AVX-64-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
; AVX-64-NEXT: vcomiss %xmm1, %xmm0
-; AVX-64-NEXT: jb .LBB19_4
+; AVX-64-NEXT: jb .LBB21_4
; AVX-64-NEXT: # %bb.3:
; AVX-64-NEXT: vmovaps %xmm1, %xmm2
-; AVX-64-NEXT: .LBB19_4:
+; AVX-64-NEXT: .LBB21_4:
; AVX-64-NEXT: vsubss %xmm2, %xmm0, %xmm0
; AVX-64-NEXT: vcvttss2si %xmm0, %rax
; AVX-64-NEXT: setae %cl
More information about the llvm-commits
mailing list