[llvm] [X86][SATCVT] Reduce MIN/MAXSS/D by conversion instruction result (PR #136471)
Phoebe Wang via llvm-commits
llvm-commits at lists.llvm.org
Sat Apr 19 21:48:59 PDT 2025
https://github.com/phoebewang updated https://github.com/llvm/llvm-project/pull/136471
>From c0adfe61325445308cc6d5fe08b019778e9f6cf7 Mon Sep 17 00:00:00 2001
From: "Wang, Phoebe" <phoebe.wang at intel.com>
Date: Sun, 20 Apr 2025 11:15:34 +0800
Subject: [PATCH] [X86][SATCVT] Reduce MIN/MAXSS/D by conversion instruction
result
CVTSD2SI returns INT_MIN/LONG_MIN when underflow happens.
VCVTPS/D2UD/QQ returns INT_MAX/LONG_MAX when overflow happens.
We can reduce one MIN/MAXSS/D instruction leveraging the result of the conversion instruction.
Partially fixes #136342
---
llvm/lib/Target/X86/X86ISelLowering.cpp | 28 +-
llvm/test/CodeGen/X86/fpclamptosat.ll | 2 -
llvm/test/CodeGen/X86/fptosi-sat-scalar.ll | 2 -
.../test/CodeGen/X86/fptosi-sat-vector-128.ll | 9 +-
llvm/test/CodeGen/X86/fptoui-sat-scalar.ll | 611 ++++++++++++++++++
5 files changed, 635 insertions(+), 17 deletions(-)
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index a4381b99dbae0..961d01a713cd4 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -21851,6 +21851,15 @@ X86TargetLowering::LowerFP_TO_INT_SAT(SDValue Op, SelectionDAG &DAG) const {
assert(SatWidth <= DstWidth && SatWidth <= TmpWidth &&
"Expected saturation width smaller than result width");
+ // AVX512 provides VCVTPS/D2UD/QQ which return INT_MAX/LONG_MAX when overflow
+ // happens. X86ISD::FMAX makes sure negative value and NaN return 0.
+ if (Subtarget.hasAVX512() && !IsSigned && SatWidth == DstWidth &&
+ (DstVT == MVT::i32 || (Subtarget.is64Bit() && DstVT == MVT::i64))) {
+ SDValue MinFloatNode = DAG.getConstantFP(0.0, dl, SrcVT);
+ SDValue Clamped = DAG.getNode(X86ISD::FMAX, dl, SrcVT, Src, MinFloatNode);
+ return DAG.getNode(ISD::FP_TO_UINT, dl, DstVT, Clamped);
+ }
+
// Promote result of FP_TO_*INT to at least 32 bits.
if (TmpWidth < 32) {
TmpVT = MVT::i32;
@@ -21912,14 +21921,19 @@ X86TargetLowering::LowerFP_TO_INT_SAT(SDValue Op, SelectionDAG &DAG) const {
return DAG.getNode(ISD::TRUNCATE, dl, DstVT, FpToInt);
}
- // Clamp by MinFloat from below. If Src is NaN, the result is MinFloat.
- SDValue MinClamped = DAG.getNode(
- X86ISD::FMAX, dl, SrcVT, Src, MinFloatNode);
- // Clamp by MaxFloat from above. NaN cannot occur.
- SDValue BothClamped = DAG.getNode(
- X86ISD::FMINC, dl, SrcVT, MinClamped, MaxFloatNode);
+ SDValue MinClamped = Src;
+ // If Src is NaN, the result is MaxFloat.
+ unsigned MinOpc = X86ISD::FMIN; // If Src is NaN, the result is MaxFloat.
+ if (!IsSigned || SatWidth != DstWidth) {
+ // Clamp by MinFloat from below. If Src is NaN, the result is MinFloat.
+ MinClamped = DAG.getNode(X86ISD::FMAX, dl, SrcVT, Src, MinFloatNode);
+ // NaN cannot occur.
+ MinOpc = X86ISD::FMINC;
+ }
+ // Clamp by MaxFloat from above.
+ SDValue Clamped = DAG.getNode(MinOpc, dl, SrcVT, MinClamped, MaxFloatNode);
// Convert clamped value to integer.
- SDValue FpToInt = DAG.getNode(FpToIntOpcode, dl, DstVT, BothClamped);
+ SDValue FpToInt = DAG.getNode(FpToIntOpcode, dl, DstVT, Clamped);
if (!IsSigned) {
// In the unsigned case we're done, because we mapped NaN to MinFloat,
diff --git a/llvm/test/CodeGen/X86/fpclamptosat.ll b/llvm/test/CodeGen/X86/fpclamptosat.ll
index 3f5ec7b530fe0..580aac28490b9 100644
--- a/llvm/test/CodeGen/X86/fpclamptosat.ll
+++ b/llvm/test/CodeGen/X86/fpclamptosat.ll
@@ -8,7 +8,6 @@ define i32 @stest_f64i32(double %x) nounwind {
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xorl %eax, %eax
; CHECK-NEXT: ucomisd %xmm0, %xmm0
-; CHECK-NEXT: maxsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; CHECK-NEXT: minsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; CHECK-NEXT: cvttsd2si %xmm0, %ecx
; CHECK-NEXT: cmovnpl %ecx, %eax
@@ -621,7 +620,6 @@ define i32 @stest_f64i32_mm(double %x) nounwind {
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xorl %eax, %eax
; CHECK-NEXT: ucomisd %xmm0, %xmm0
-; CHECK-NEXT: maxsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; CHECK-NEXT: minsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; CHECK-NEXT: cvttsd2si %xmm0, %ecx
; CHECK-NEXT: cmovnpl %ecx, %eax
diff --git a/llvm/test/CodeGen/X86/fptosi-sat-scalar.ll b/llvm/test/CodeGen/X86/fptosi-sat-scalar.ll
index 85f4c945230e1..c726845bf98c0 100644
--- a/llvm/test/CodeGen/X86/fptosi-sat-scalar.ll
+++ b/llvm/test/CodeGen/X86/fptosi-sat-scalar.ll
@@ -1455,7 +1455,6 @@ define i32 @test_signed_i32_f64(double %f) nounwind {
; X86-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; X86-SSE-NEXT: xorl %eax, %eax
; X86-SSE-NEXT: ucomisd %xmm0, %xmm0
-; X86-SSE-NEXT: maxsd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
; X86-SSE-NEXT: minsd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
; X86-SSE-NEXT: cvttsd2si %xmm0, %ecx
; X86-SSE-NEXT: cmovnpl %ecx, %eax
@@ -1465,7 +1464,6 @@ define i32 @test_signed_i32_f64(double %f) nounwind {
; X64: # %bb.0:
; X64-NEXT: xorl %eax, %eax
; X64-NEXT: ucomisd %xmm0, %xmm0
-; X64-NEXT: maxsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; X64-NEXT: minsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; X64-NEXT: cvttsd2si %xmm0, %ecx
; X64-NEXT: cmovnpl %ecx, %eax
diff --git a/llvm/test/CodeGen/X86/fptosi-sat-vector-128.ll b/llvm/test/CodeGen/X86/fptosi-sat-vector-128.ll
index 536a1ae3b918d..262e252cbe5fa 100644
--- a/llvm/test/CodeGen/X86/fptosi-sat-vector-128.ll
+++ b/llvm/test/CodeGen/X86/fptosi-sat-vector-128.ll
@@ -418,11 +418,9 @@ define <2 x i16> @test_signed_v2i16_v2f64(<2 x double> %f) nounwind {
define <2 x i32> @test_signed_v2i32_v2f64(<2 x double> %f) nounwind {
; CHECK-LABEL: test_signed_v2i32_v2f64:
; CHECK: # %bb.0:
-; CHECK-NEXT: movsd {{.*#+}} xmm2 = [-2.147483648E+9,0.0E+0]
+; CHECK-NEXT: movsd {{.*#+}} xmm2 = [2.147483647E+9,0.0E+0]
; CHECK-NEXT: movapd %xmm0, %xmm1
-; CHECK-NEXT: maxsd %xmm2, %xmm1
-; CHECK-NEXT: movsd {{.*#+}} xmm3 = [2.147483647E+9,0.0E+0]
-; CHECK-NEXT: minsd %xmm3, %xmm1
+; CHECK-NEXT: minsd %xmm2, %xmm1
; CHECK-NEXT: cvttsd2si %xmm1, %eax
; CHECK-NEXT: xorl %ecx, %ecx
; CHECK-NEXT: ucomisd %xmm0, %xmm0
@@ -430,8 +428,7 @@ define <2 x i32> @test_signed_v2i32_v2f64(<2 x double> %f) nounwind {
; CHECK-NEXT: movd %eax, %xmm1
; CHECK-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1,1]
; CHECK-NEXT: ucomisd %xmm0, %xmm0
-; CHECK-NEXT: maxsd %xmm2, %xmm0
-; CHECK-NEXT: minsd %xmm3, %xmm0
+; CHECK-NEXT: minsd %xmm2, %xmm0
; CHECK-NEXT: cvttsd2si %xmm0, %eax
; CHECK-NEXT: cmovpl %ecx, %eax
; CHECK-NEXT: movd %eax, %xmm0
diff --git a/llvm/test/CodeGen/X86/fptoui-sat-scalar.ll b/llvm/test/CodeGen/X86/fptoui-sat-scalar.ll
index 47dc3ca3616ea..931a1f161cdb7 100644
--- a/llvm/test/CodeGen/X86/fptoui-sat-scalar.ll
+++ b/llvm/test/CodeGen/X86/fptoui-sat-scalar.ll
@@ -2,6 +2,7 @@
; RUN: llc < %s -mtriple=i686-linux | FileCheck %s --check-prefix=X86-X87
; RUN: llc < %s -mtriple=i686-linux -mattr=+sse2 | FileCheck %s --check-prefix=X86-SSE
; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s --check-prefix=X64
+; RUN: llc < %s -mtriple=x86_64-linux -mattr=+avx512f | FileCheck %s --check-prefix=AVX512
;
; 32-bit float to unsigned integer
@@ -77,6 +78,16 @@ define i1 @test_unsigned_i1_f32(float %f) nounwind {
; X64-NEXT: cvttss2si %xmm0, %eax
; X64-NEXT: # kill: def $al killed $al killed $eax
; X64-NEXT: retq
+;
+; AVX512-LABEL: test_unsigned_i1_f32:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vxorps %xmm1, %xmm1, %xmm1
+; AVX512-NEXT: vmaxss %xmm0, %xmm1, %xmm0
+; AVX512-NEXT: vmovss {{.*#+}} xmm1 = [1.0E+0,0.0E+0,0.0E+0,0.0E+0]
+; AVX512-NEXT: vminss %xmm0, %xmm1, %xmm0
+; AVX512-NEXT: vcvttss2si %xmm0, %eax
+; AVX512-NEXT: # kill: def $al killed $al killed $eax
+; AVX512-NEXT: retq
%x = call i1 @llvm.fptoui.sat.i1.f32(float %f)
ret i1 %x
}
@@ -140,6 +151,16 @@ define i8 @test_unsigned_i8_f32(float %f) nounwind {
; X64-NEXT: cvttss2si %xmm0, %eax
; X64-NEXT: # kill: def $al killed $al killed $eax
; X64-NEXT: retq
+;
+; AVX512-LABEL: test_unsigned_i8_f32:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vxorps %xmm1, %xmm1, %xmm1
+; AVX512-NEXT: vmaxss %xmm0, %xmm1, %xmm0
+; AVX512-NEXT: vmovss {{.*#+}} xmm1 = [2.55E+2,0.0E+0,0.0E+0,0.0E+0]
+; AVX512-NEXT: vminss %xmm0, %xmm1, %xmm0
+; AVX512-NEXT: vcvttss2si %xmm0, %eax
+; AVX512-NEXT: # kill: def $al killed $al killed $eax
+; AVX512-NEXT: retq
%x = call i8 @llvm.fptoui.sat.i8.f32(float %f)
ret i8 %x
}
@@ -202,6 +223,16 @@ define i13 @test_unsigned_i13_f32(float %f) nounwind {
; X64-NEXT: cvttss2si %xmm0, %eax
; X64-NEXT: # kill: def $ax killed $ax killed $eax
; X64-NEXT: retq
+;
+; AVX512-LABEL: test_unsigned_i13_f32:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vxorps %xmm1, %xmm1, %xmm1
+; AVX512-NEXT: vmaxss %xmm0, %xmm1, %xmm0
+; AVX512-NEXT: vmovss {{.*#+}} xmm1 = [8.191E+3,0.0E+0,0.0E+0,0.0E+0]
+; AVX512-NEXT: vminss %xmm0, %xmm1, %xmm0
+; AVX512-NEXT: vcvttss2si %xmm0, %eax
+; AVX512-NEXT: # kill: def $ax killed $ax killed $eax
+; AVX512-NEXT: retq
%x = call i13 @llvm.fptoui.sat.i13.f32(float %f)
ret i13 %x
}
@@ -264,6 +295,16 @@ define i16 @test_unsigned_i16_f32(float %f) nounwind {
; X64-NEXT: cvttss2si %xmm0, %eax
; X64-NEXT: # kill: def $ax killed $ax killed $eax
; X64-NEXT: retq
+;
+; AVX512-LABEL: test_unsigned_i16_f32:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vxorps %xmm1, %xmm1, %xmm1
+; AVX512-NEXT: vmaxss %xmm0, %xmm1, %xmm0
+; AVX512-NEXT: vmovss {{.*#+}} xmm1 = [6.5535E+4,0.0E+0,0.0E+0,0.0E+0]
+; AVX512-NEXT: vminss %xmm0, %xmm1, %xmm0
+; AVX512-NEXT: vcvttss2si %xmm0, %eax
+; AVX512-NEXT: # kill: def $ax killed $ax killed $eax
+; AVX512-NEXT: retq
%x = call i16 @llvm.fptoui.sat.i16.f32(float %f)
ret i16 %x
}
@@ -323,6 +364,14 @@ define i19 @test_unsigned_i19_f32(float %f) nounwind {
; X64-NEXT: minss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; X64-NEXT: cvttss2si %xmm0, %eax
; X64-NEXT: retq
+;
+; AVX512-LABEL: test_unsigned_i19_f32:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vxorps %xmm1, %xmm1, %xmm1
+; AVX512-NEXT: vmaxss %xmm1, %xmm0, %xmm0
+; AVX512-NEXT: vminss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; AVX512-NEXT: vcvttss2si %xmm0, %eax
+; AVX512-NEXT: retq
%x = call i19 @llvm.fptoui.sat.i19.f32(float %f)
ret i19 %x
}
@@ -397,6 +446,13 @@ define i32 @test_unsigned_i32_f32(float %f) nounwind {
; X64-NEXT: movl $-1, %eax
; X64-NEXT: cmovbel %ecx, %eax
; X64-NEXT: retq
+;
+; AVX512-LABEL: test_unsigned_i32_f32:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vxorps %xmm1, %xmm1, %xmm1
+; AVX512-NEXT: vmaxss %xmm1, %xmm0, %xmm0
+; AVX512-NEXT: vcvttss2usi %xmm0, %eax
+; AVX512-NEXT: retq
%x = call i32 @llvm.fptoui.sat.i32.f32(float %f)
ret i32 %x
}
@@ -523,6 +579,18 @@ define i50 @test_unsigned_i50_f32(float %f) nounwind {
; X64-NEXT: movabsq $1125899906842623, %rax # imm = 0x3FFFFFFFFFFFF
; X64-NEXT: cmovbeq %rcx, %rax
; X64-NEXT: retq
+;
+; AVX512-LABEL: test_unsigned_i50_f32:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vcvttss2si %xmm0, %rax
+; AVX512-NEXT: xorl %ecx, %ecx
+; AVX512-NEXT: vxorps %xmm1, %xmm1, %xmm1
+; AVX512-NEXT: vucomiss %xmm1, %xmm0
+; AVX512-NEXT: cmovaeq %rax, %rcx
+; AVX512-NEXT: vucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; AVX512-NEXT: movabsq $1125899906842623, %rax # imm = 0x3FFFFFFFFFFFF
+; AVX512-NEXT: cmovbeq %rcx, %rax
+; AVX512-NEXT: retq
%x = call i50 @llvm.fptoui.sat.i50.f32(float %f)
ret i50 %x
}
@@ -652,6 +720,13 @@ define i64 @test_unsigned_i64_f32(float %f) nounwind {
; X64-NEXT: movq $-1, %rax
; X64-NEXT: cmovbeq %rcx, %rax
; X64-NEXT: retq
+;
+; AVX512-LABEL: test_unsigned_i64_f32:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vxorps %xmm1, %xmm1, %xmm1
+; AVX512-NEXT: vmaxss %xmm1, %xmm0, %xmm0
+; AVX512-NEXT: vcvttss2usi %xmm0, %rax
+; AVX512-NEXT: retq
%x = call i64 @llvm.fptoui.sat.i64.f32(float %f)
ret i64 %x
}
@@ -796,6 +871,26 @@ define i100 @test_unsigned_i100_f32(float %f) nounwind {
; X64-NEXT: cmovaq %rcx, %rdx
; X64-NEXT: popq %rcx
; X64-NEXT: retq
+;
+; AVX512-LABEL: test_unsigned_i100_f32:
+; AVX512: # %bb.0:
+; AVX512-NEXT: pushq %rax
+; AVX512-NEXT: vmovss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; AVX512-NEXT: callq __fixunssfti at PLT
+; AVX512-NEXT: xorl %ecx, %ecx
+; AVX512-NEXT: vxorps %xmm0, %xmm0, %xmm0
+; AVX512-NEXT: vmovss {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 4-byte Reload
+; AVX512-NEXT: # xmm1 = mem[0],zero,zero,zero
+; AVX512-NEXT: vucomiss %xmm0, %xmm1
+; AVX512-NEXT: cmovbq %rcx, %rdx
+; AVX512-NEXT: cmovbq %rcx, %rax
+; AVX512-NEXT: vucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
+; AVX512-NEXT: movq $-1, %rcx
+; AVX512-NEXT: cmovaq %rcx, %rax
+; AVX512-NEXT: movabsq $68719476735, %rcx # imm = 0xFFFFFFFFF
+; AVX512-NEXT: cmovaq %rcx, %rdx
+; AVX512-NEXT: popq %rcx
+; AVX512-NEXT: retq
%x = call i100 @llvm.fptoui.sat.i100.f32(float %f)
ret i100 %x
}
@@ -934,6 +1029,25 @@ define i128 @test_unsigned_i128_f32(float %f) nounwind {
; X64-NEXT: cmovaq %rcx, %rdx
; X64-NEXT: popq %rcx
; X64-NEXT: retq
+;
+; AVX512-LABEL: test_unsigned_i128_f32:
+; AVX512: # %bb.0:
+; AVX512-NEXT: pushq %rax
+; AVX512-NEXT: vmovss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; AVX512-NEXT: callq __fixunssfti at PLT
+; AVX512-NEXT: xorl %ecx, %ecx
+; AVX512-NEXT: vxorps %xmm0, %xmm0, %xmm0
+; AVX512-NEXT: vmovss {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 4-byte Reload
+; AVX512-NEXT: # xmm1 = mem[0],zero,zero,zero
+; AVX512-NEXT: vucomiss %xmm0, %xmm1
+; AVX512-NEXT: cmovbq %rcx, %rdx
+; AVX512-NEXT: cmovbq %rcx, %rax
+; AVX512-NEXT: vucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
+; AVX512-NEXT: movq $-1, %rcx
+; AVX512-NEXT: cmovaq %rcx, %rax
+; AVX512-NEXT: cmovaq %rcx, %rdx
+; AVX512-NEXT: popq %rcx
+; AVX512-NEXT: retq
%x = call i128 @llvm.fptoui.sat.i128.f32(float %f)
ret i128 %x
}
@@ -1012,6 +1126,16 @@ define i1 @test_unsigned_i1_f64(double %f) nounwind {
; X64-NEXT: cvttsd2si %xmm0, %eax
; X64-NEXT: # kill: def $al killed $al killed $eax
; X64-NEXT: retq
+;
+; AVX512-LABEL: test_unsigned_i1_f64:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vxorpd %xmm1, %xmm1, %xmm1
+; AVX512-NEXT: vmaxsd %xmm0, %xmm1, %xmm0
+; AVX512-NEXT: vmovsd {{.*#+}} xmm1 = [1.0E+0,0.0E+0]
+; AVX512-NEXT: vminsd %xmm0, %xmm1, %xmm0
+; AVX512-NEXT: vcvttsd2si %xmm0, %eax
+; AVX512-NEXT: # kill: def $al killed $al killed $eax
+; AVX512-NEXT: retq
%x = call i1 @llvm.fptoui.sat.i1.f64(double %f)
ret i1 %x
}
@@ -1075,6 +1199,16 @@ define i8 @test_unsigned_i8_f64(double %f) nounwind {
; X64-NEXT: cvttsd2si %xmm0, %eax
; X64-NEXT: # kill: def $al killed $al killed $eax
; X64-NEXT: retq
+;
+; AVX512-LABEL: test_unsigned_i8_f64:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vxorpd %xmm1, %xmm1, %xmm1
+; AVX512-NEXT: vmaxsd %xmm0, %xmm1, %xmm0
+; AVX512-NEXT: vmovsd {{.*#+}} xmm1 = [2.55E+2,0.0E+0]
+; AVX512-NEXT: vminsd %xmm0, %xmm1, %xmm0
+; AVX512-NEXT: vcvttsd2si %xmm0, %eax
+; AVX512-NEXT: # kill: def $al killed $al killed $eax
+; AVX512-NEXT: retq
%x = call i8 @llvm.fptoui.sat.i8.f64(double %f)
ret i8 %x
}
@@ -1137,6 +1271,16 @@ define i13 @test_unsigned_i13_f64(double %f) nounwind {
; X64-NEXT: cvttsd2si %xmm0, %eax
; X64-NEXT: # kill: def $ax killed $ax killed $eax
; X64-NEXT: retq
+;
+; AVX512-LABEL: test_unsigned_i13_f64:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vxorpd %xmm1, %xmm1, %xmm1
+; AVX512-NEXT: vmaxsd %xmm0, %xmm1, %xmm0
+; AVX512-NEXT: vmovsd {{.*#+}} xmm1 = [8.191E+3,0.0E+0]
+; AVX512-NEXT: vminsd %xmm0, %xmm1, %xmm0
+; AVX512-NEXT: vcvttsd2si %xmm0, %eax
+; AVX512-NEXT: # kill: def $ax killed $ax killed $eax
+; AVX512-NEXT: retq
%x = call i13 @llvm.fptoui.sat.i13.f64(double %f)
ret i13 %x
}
@@ -1199,6 +1343,16 @@ define i16 @test_unsigned_i16_f64(double %f) nounwind {
; X64-NEXT: cvttsd2si %xmm0, %eax
; X64-NEXT: # kill: def $ax killed $ax killed $eax
; X64-NEXT: retq
+;
+; AVX512-LABEL: test_unsigned_i16_f64:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vxorpd %xmm1, %xmm1, %xmm1
+; AVX512-NEXT: vmaxsd %xmm0, %xmm1, %xmm0
+; AVX512-NEXT: vmovsd {{.*#+}} xmm1 = [6.5535E+4,0.0E+0]
+; AVX512-NEXT: vminsd %xmm0, %xmm1, %xmm0
+; AVX512-NEXT: vcvttsd2si %xmm0, %eax
+; AVX512-NEXT: # kill: def $ax killed $ax killed $eax
+; AVX512-NEXT: retq
%x = call i16 @llvm.fptoui.sat.i16.f64(double %f)
ret i16 %x
}
@@ -1258,6 +1412,14 @@ define i19 @test_unsigned_i19_f64(double %f) nounwind {
; X64-NEXT: minsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; X64-NEXT: cvttsd2si %xmm0, %eax
; X64-NEXT: retq
+;
+; AVX512-LABEL: test_unsigned_i19_f64:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vxorpd %xmm1, %xmm1, %xmm1
+; AVX512-NEXT: vmaxsd %xmm1, %xmm0, %xmm0
+; AVX512-NEXT: vminsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; AVX512-NEXT: vcvttsd2si %xmm0, %eax
+; AVX512-NEXT: retq
%x = call i19 @llvm.fptoui.sat.i19.f64(double %f)
ret i19 %x
}
@@ -1325,6 +1487,13 @@ define i32 @test_unsigned_i32_f64(double %f) nounwind {
; X64-NEXT: cvttsd2si %xmm0, %rax
; X64-NEXT: # kill: def $eax killed $eax killed $rax
; X64-NEXT: retq
+;
+; AVX512-LABEL: test_unsigned_i32_f64:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vxorpd %xmm1, %xmm1, %xmm1
+; AVX512-NEXT: vmaxsd %xmm1, %xmm0, %xmm0
+; AVX512-NEXT: vcvttsd2usi %xmm0, %eax
+; AVX512-NEXT: retq
%x = call i32 @llvm.fptoui.sat.i32.f64(double %f)
ret i32 %x
}
@@ -1447,6 +1616,14 @@ define i50 @test_unsigned_i50_f64(double %f) nounwind {
; X64-NEXT: minsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; X64-NEXT: cvttsd2si %xmm0, %rax
; X64-NEXT: retq
+;
+; AVX512-LABEL: test_unsigned_i50_f64:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vxorpd %xmm1, %xmm1, %xmm1
+; AVX512-NEXT: vmaxsd %xmm1, %xmm0, %xmm0
+; AVX512-NEXT: vminsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; AVX512-NEXT: vcvttsd2si %xmm0, %rax
+; AVX512-NEXT: retq
%x = call i50 @llvm.fptoui.sat.i50.f64(double %f)
ret i50 %x
}
@@ -1576,6 +1753,13 @@ define i64 @test_unsigned_i64_f64(double %f) nounwind {
; X64-NEXT: movq $-1, %rax
; X64-NEXT: cmovbeq %rcx, %rax
; X64-NEXT: retq
+;
+; AVX512-LABEL: test_unsigned_i64_f64:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vxorpd %xmm1, %xmm1, %xmm1
+; AVX512-NEXT: vmaxsd %xmm1, %xmm0, %xmm0
+; AVX512-NEXT: vcvttsd2usi %xmm0, %rax
+; AVX512-NEXT: retq
%x = call i64 @llvm.fptoui.sat.i64.f64(double %f)
ret i64 %x
}
@@ -1720,6 +1904,26 @@ define i100 @test_unsigned_i100_f64(double %f) nounwind {
; X64-NEXT: cmovaq %rcx, %rdx
; X64-NEXT: popq %rcx
; X64-NEXT: retq
+;
+; AVX512-LABEL: test_unsigned_i100_f64:
+; AVX512: # %bb.0:
+; AVX512-NEXT: pushq %rax
+; AVX512-NEXT: vmovsd %xmm0, (%rsp) # 8-byte Spill
+; AVX512-NEXT: callq __fixunsdfti at PLT
+; AVX512-NEXT: xorl %ecx, %ecx
+; AVX512-NEXT: vxorpd %xmm0, %xmm0, %xmm0
+; AVX512-NEXT: vmovsd (%rsp), %xmm1 # 8-byte Reload
+; AVX512-NEXT: # xmm1 = mem[0],zero
+; AVX512-NEXT: vucomisd %xmm0, %xmm1
+; AVX512-NEXT: cmovbq %rcx, %rdx
+; AVX512-NEXT: cmovbq %rcx, %rax
+; AVX512-NEXT: vucomisd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
+; AVX512-NEXT: movq $-1, %rcx
+; AVX512-NEXT: cmovaq %rcx, %rax
+; AVX512-NEXT: movabsq $68719476735, %rcx # imm = 0xFFFFFFFFF
+; AVX512-NEXT: cmovaq %rcx, %rdx
+; AVX512-NEXT: popq %rcx
+; AVX512-NEXT: retq
%x = call i100 @llvm.fptoui.sat.i100.f64(double %f)
ret i100 %x
}
@@ -1858,6 +2062,25 @@ define i128 @test_unsigned_i128_f64(double %f) nounwind {
; X64-NEXT: cmovaq %rcx, %rdx
; X64-NEXT: popq %rcx
; X64-NEXT: retq
+;
+; AVX512-LABEL: test_unsigned_i128_f64:
+; AVX512: # %bb.0:
+; AVX512-NEXT: pushq %rax
+; AVX512-NEXT: vmovsd %xmm0, (%rsp) # 8-byte Spill
+; AVX512-NEXT: callq __fixunsdfti at PLT
+; AVX512-NEXT: xorl %ecx, %ecx
+; AVX512-NEXT: vxorpd %xmm0, %xmm0, %xmm0
+; AVX512-NEXT: vmovsd (%rsp), %xmm1 # 8-byte Reload
+; AVX512-NEXT: # xmm1 = mem[0],zero
+; AVX512-NEXT: vucomisd %xmm0, %xmm1
+; AVX512-NEXT: cmovbq %rcx, %rdx
+; AVX512-NEXT: cmovbq %rcx, %rax
+; AVX512-NEXT: vucomisd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
+; AVX512-NEXT: movq $-1, %rcx
+; AVX512-NEXT: cmovaq %rcx, %rax
+; AVX512-NEXT: cmovaq %rcx, %rdx
+; AVX512-NEXT: popq %rcx
+; AVX512-NEXT: retq
%x = call i128 @llvm.fptoui.sat.i128.f64(double %f)
ret i128 %x
}
@@ -1955,6 +2178,20 @@ define i1 @test_unsigned_i1_f16(half %f) nounwind {
; X64-NEXT: # kill: def $al killed $al killed $eax
; X64-NEXT: popq %rcx
; X64-NEXT: retq
+;
+; AVX512-LABEL: test_unsigned_i1_f16:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vcvtph2ps %xmm0, %xmm0
+; AVX512-NEXT: vcvttss2si %xmm0, %eax
+; AVX512-NEXT: xorl %ecx, %ecx
+; AVX512-NEXT: vxorps %xmm1, %xmm1, %xmm1
+; AVX512-NEXT: vucomiss %xmm1, %xmm0
+; AVX512-NEXT: cmovael %eax, %ecx
+; AVX512-NEXT: vucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; AVX512-NEXT: movl $1, %eax
+; AVX512-NEXT: cmovbel %ecx, %eax
+; AVX512-NEXT: # kill: def $al killed $al killed $eax
+; AVX512-NEXT: retq
%x = call i1 @llvm.fptoui.sat.i1.f16(half %f)
ret i1 %x
}
@@ -2037,6 +2274,20 @@ define i8 @test_unsigned_i8_f16(half %f) nounwind {
; X64-NEXT: # kill: def $al killed $al killed $eax
; X64-NEXT: popq %rcx
; X64-NEXT: retq
+;
+; AVX512-LABEL: test_unsigned_i8_f16:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vcvtph2ps %xmm0, %xmm0
+; AVX512-NEXT: vcvttss2si %xmm0, %eax
+; AVX512-NEXT: xorl %ecx, %ecx
+; AVX512-NEXT: vxorps %xmm1, %xmm1, %xmm1
+; AVX512-NEXT: vucomiss %xmm1, %xmm0
+; AVX512-NEXT: cmovael %eax, %ecx
+; AVX512-NEXT: vucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; AVX512-NEXT: movl $255, %eax
+; AVX512-NEXT: cmovbel %ecx, %eax
+; AVX512-NEXT: # kill: def $al killed $al killed $eax
+; AVX512-NEXT: retq
%x = call i8 @llvm.fptoui.sat.i8.f16(half %f)
ret i8 %x
}
@@ -2118,6 +2369,20 @@ define i13 @test_unsigned_i13_f16(half %f) nounwind {
; X64-NEXT: # kill: def $ax killed $ax killed $eax
; X64-NEXT: popq %rcx
; X64-NEXT: retq
+;
+; AVX512-LABEL: test_unsigned_i13_f16:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vcvtph2ps %xmm0, %xmm0
+; AVX512-NEXT: vcvttss2si %xmm0, %eax
+; AVX512-NEXT: xorl %ecx, %ecx
+; AVX512-NEXT: vxorps %xmm1, %xmm1, %xmm1
+; AVX512-NEXT: vucomiss %xmm1, %xmm0
+; AVX512-NEXT: cmovael %eax, %ecx
+; AVX512-NEXT: vucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; AVX512-NEXT: movl $8191, %eax # imm = 0x1FFF
+; AVX512-NEXT: cmovbel %ecx, %eax
+; AVX512-NEXT: # kill: def $ax killed $ax killed $eax
+; AVX512-NEXT: retq
%x = call i13 @llvm.fptoui.sat.i13.f16(half %f)
ret i13 %x
}
@@ -2199,6 +2464,20 @@ define i16 @test_unsigned_i16_f16(half %f) nounwind {
; X64-NEXT: # kill: def $ax killed $ax killed $eax
; X64-NEXT: popq %rcx
; X64-NEXT: retq
+;
+; AVX512-LABEL: test_unsigned_i16_f16:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vcvtph2ps %xmm0, %xmm0
+; AVX512-NEXT: vcvttss2si %xmm0, %eax
+; AVX512-NEXT: xorl %ecx, %ecx
+; AVX512-NEXT: vxorps %xmm1, %xmm1, %xmm1
+; AVX512-NEXT: vucomiss %xmm1, %xmm0
+; AVX512-NEXT: cmovael %eax, %ecx
+; AVX512-NEXT: vucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; AVX512-NEXT: movl $65535, %eax # imm = 0xFFFF
+; AVX512-NEXT: cmovbel %ecx, %eax
+; AVX512-NEXT: # kill: def $ax killed $ax killed $eax
+; AVX512-NEXT: retq
%x = call i16 @llvm.fptoui.sat.i16.f16(half %f)
ret i16 %x
}
@@ -2285,6 +2564,19 @@ define i19 @test_unsigned_i19_f16(half %f) nounwind {
; X64-NEXT: cmovbel %ecx, %eax
; X64-NEXT: popq %rcx
; X64-NEXT: retq
+;
+; AVX512-LABEL: test_unsigned_i19_f16:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vcvtph2ps %xmm0, %xmm0
+; AVX512-NEXT: vcvttss2usi %xmm0, %eax
+; AVX512-NEXT: xorl %ecx, %ecx
+; AVX512-NEXT: vxorps %xmm1, %xmm1, %xmm1
+; AVX512-NEXT: vucomiss %xmm1, %xmm0
+; AVX512-NEXT: cmovael %eax, %ecx
+; AVX512-NEXT: vucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; AVX512-NEXT: movl $524287, %eax # imm = 0x7FFFF
+; AVX512-NEXT: cmovbel %ecx, %eax
+; AVX512-NEXT: retq
%x = call i19 @llvm.fptoui.sat.i19.f16(half %f)
ret i19 %x
}
@@ -2371,6 +2663,19 @@ define i32 @test_unsigned_i32_f16(half %f) nounwind {
; X64-NEXT: cmovbel %ecx, %eax
; X64-NEXT: popq %rcx
; X64-NEXT: retq
+;
+; AVX512-LABEL: test_unsigned_i32_f16:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vcvtph2ps %xmm0, %xmm0
+; AVX512-NEXT: vcvttss2usi %xmm0, %eax
+; AVX512-NEXT: xorl %ecx, %ecx
+; AVX512-NEXT: vxorps %xmm1, %xmm1, %xmm1
+; AVX512-NEXT: vucomiss %xmm1, %xmm0
+; AVX512-NEXT: cmovael %eax, %ecx
+; AVX512-NEXT: vucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; AVX512-NEXT: movl $-1, %eax
+; AVX512-NEXT: cmovbel %ecx, %eax
+; AVX512-NEXT: retq
%x = call i32 @llvm.fptoui.sat.i32.f16(half %f)
ret i32 %x
}
@@ -2514,6 +2819,19 @@ define i50 @test_unsigned_i50_f16(half %f) nounwind {
; X64-NEXT: cmovbeq %rcx, %rax
; X64-NEXT: popq %rcx
; X64-NEXT: retq
+;
+; AVX512-LABEL: test_unsigned_i50_f16:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vcvtph2ps %xmm0, %xmm0
+; AVX512-NEXT: vcvttss2usi %xmm0, %rax
+; AVX512-NEXT: xorl %ecx, %ecx
+; AVX512-NEXT: vxorps %xmm1, %xmm1, %xmm1
+; AVX512-NEXT: vucomiss %xmm1, %xmm0
+; AVX512-NEXT: cmovaeq %rax, %rcx
+; AVX512-NEXT: vucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; AVX512-NEXT: movabsq $1125899906842623, %rax # imm = 0x3FFFFFFFFFFFF
+; AVX512-NEXT: cmovbeq %rcx, %rax
+; AVX512-NEXT: retq
%x = call i50 @llvm.fptoui.sat.i50.f16(half %f)
ret i50 %x
}
@@ -2653,6 +2971,19 @@ define i64 @test_unsigned_i64_f16(half %f) nounwind {
; X64-NEXT: cmovbeq %rcx, %rax
; X64-NEXT: popq %rcx
; X64-NEXT: retq
+;
+; AVX512-LABEL: test_unsigned_i64_f16:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vcvtph2ps %xmm0, %xmm0
+; AVX512-NEXT: vcvttss2usi %xmm0, %rax
+; AVX512-NEXT: xorl %ecx, %ecx
+; AVX512-NEXT: vxorps %xmm1, %xmm1, %xmm1
+; AVX512-NEXT: vucomiss %xmm1, %xmm0
+; AVX512-NEXT: cmovaeq %rax, %rcx
+; AVX512-NEXT: vucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; AVX512-NEXT: movq $-1, %rax
+; AVX512-NEXT: cmovbeq %rcx, %rax
+; AVX512-NEXT: retq
%x = call i64 @llvm.fptoui.sat.i64.f16(half %f)
ret i64 %x
}
@@ -2807,6 +3138,26 @@ define i100 @test_unsigned_i100_f16(half %f) nounwind {
; X64-NEXT: cmovaq %rcx, %rdx
; X64-NEXT: popq %rcx
; X64-NEXT: retq
+;
+; AVX512-LABEL: test_unsigned_i100_f16:
+; AVX512: # %bb.0:
+; AVX512-NEXT: subq $24, %rsp
+; AVX512-NEXT: vcvtph2ps %xmm0, %xmm0
+; AVX512-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
+; AVX512-NEXT: callq __fixunssfti at PLT
+; AVX512-NEXT: xorl %ecx, %ecx
+; AVX512-NEXT: vxorps %xmm0, %xmm0, %xmm0
+; AVX512-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload
+; AVX512-NEXT: vucomiss %xmm0, %xmm1
+; AVX512-NEXT: cmovbq %rcx, %rdx
+; AVX512-NEXT: cmovbq %rcx, %rax
+; AVX512-NEXT: vucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
+; AVX512-NEXT: movq $-1, %rcx
+; AVX512-NEXT: cmovaq %rcx, %rax
+; AVX512-NEXT: movabsq $68719476735, %rcx # imm = 0xFFFFFFFFF
+; AVX512-NEXT: cmovaq %rcx, %rdx
+; AVX512-NEXT: addq $24, %rsp
+; AVX512-NEXT: retq
%x = call i100 @llvm.fptoui.sat.i100.f16(half %f)
ret i100 %x
}
@@ -2955,6 +3306,25 @@ define i128 @test_unsigned_i128_f16(half %f) nounwind {
; X64-NEXT: cmovaq %rcx, %rdx
; X64-NEXT: popq %rcx
; X64-NEXT: retq
+;
+; AVX512-LABEL: test_unsigned_i128_f16:
+; AVX512: # %bb.0:
+; AVX512-NEXT: subq $24, %rsp
+; AVX512-NEXT: vcvtph2ps %xmm0, %xmm0
+; AVX512-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
+; AVX512-NEXT: callq __fixunssfti at PLT
+; AVX512-NEXT: xorl %ecx, %ecx
+; AVX512-NEXT: vxorps %xmm0, %xmm0, %xmm0
+; AVX512-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload
+; AVX512-NEXT: vucomiss %xmm0, %xmm1
+; AVX512-NEXT: cmovbq %rcx, %rdx
+; AVX512-NEXT: cmovbq %rcx, %rax
+; AVX512-NEXT: vucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
+; AVX512-NEXT: movq $-1, %rcx
+; AVX512-NEXT: cmovaq %rcx, %rax
+; AVX512-NEXT: cmovaq %rcx, %rdx
+; AVX512-NEXT: addq $24, %rsp
+; AVX512-NEXT: retq
%x = call i128 @llvm.fptoui.sat.i128.f16(half %f)
ret i128 %x
}
@@ -3067,6 +3437,27 @@ define i1 @test_unsigned_i1_f80(x86_fp80 %f) nounwind {
; X64-NEXT: cmovbel %ecx, %eax
; X64-NEXT: # kill: def $al killed $al killed $eax
; X64-NEXT: retq
+;
+; AVX512-LABEL: test_unsigned_i1_f80:
+; AVX512: # %bb.0:
+; AVX512-NEXT: fldt {{[0-9]+}}(%rsp)
+; AVX512-NEXT: fld %st(0)
+; AVX512-NEXT: fisttps -{{[0-9]+}}(%rsp)
+; AVX512-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
+; AVX512-NEXT: xorl %ecx, %ecx
+; AVX512-NEXT: fldz
+; AVX512-NEXT: fxch %st(1)
+; AVX512-NEXT: fucomi %st(1), %st
+; AVX512-NEXT: fstp %st(1)
+; AVX512-NEXT: cmovael %eax, %ecx
+; AVX512-NEXT: fld1
+; AVX512-NEXT: fxch %st(1)
+; AVX512-NEXT: fucompi %st(1), %st
+; AVX512-NEXT: fstp %st(0)
+; AVX512-NEXT: movl $1, %eax
+; AVX512-NEXT: cmovbel %ecx, %eax
+; AVX512-NEXT: # kill: def $al killed $al killed $eax
+; AVX512-NEXT: retq
%x = call i1 @llvm.fptoui.sat.i1.f80(x86_fp80 %f)
ret i1 %x
}
@@ -3164,6 +3555,27 @@ define i8 @test_unsigned_i8_f80(x86_fp80 %f) nounwind {
; X64-NEXT: cmovbel %ecx, %eax
; X64-NEXT: # kill: def $al killed $al killed $eax
; X64-NEXT: retq
+;
+; AVX512-LABEL: test_unsigned_i8_f80:
+; AVX512: # %bb.0:
+; AVX512-NEXT: fldt {{[0-9]+}}(%rsp)
+; AVX512-NEXT: fld %st(0)
+; AVX512-NEXT: fisttps -{{[0-9]+}}(%rsp)
+; AVX512-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
+; AVX512-NEXT: xorl %ecx, %ecx
+; AVX512-NEXT: fldz
+; AVX512-NEXT: fxch %st(1)
+; AVX512-NEXT: fucomi %st(1), %st
+; AVX512-NEXT: fstp %st(1)
+; AVX512-NEXT: cmovael %eax, %ecx
+; AVX512-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}}(%rip)
+; AVX512-NEXT: fxch %st(1)
+; AVX512-NEXT: fucompi %st(1), %st
+; AVX512-NEXT: fstp %st(0)
+; AVX512-NEXT: movl $255, %eax
+; AVX512-NEXT: cmovbel %ecx, %eax
+; AVX512-NEXT: # kill: def $al killed $al killed $eax
+; AVX512-NEXT: retq
%x = call i8 @llvm.fptoui.sat.i8.f80(x86_fp80 %f)
ret i8 %x
}
@@ -3264,6 +3676,29 @@ define i13 @test_unsigned_i13_f80(x86_fp80 %f) nounwind {
; X64-NEXT: cmovbel %ecx, %eax
; X64-NEXT: # kill: def $ax killed $ax killed $eax
; X64-NEXT: retq
+;
+; AVX512-LABEL: test_unsigned_i13_f80:
+; AVX512: # %bb.0:
+; AVX512-NEXT: fldt {{[0-9]+}}(%rsp)
+; AVX512-NEXT: fld %st(0)
+; AVX512-NEXT: fisttpl -{{[0-9]+}}(%rsp)
+; AVX512-NEXT: xorl %ecx, %ecx
+; AVX512-NEXT: fldz
+; AVX512-NEXT: fxch %st(1)
+; AVX512-NEXT: fucomi %st(1), %st
+; AVX512-NEXT: fstp %st(1)
+; AVX512-NEXT: jb .LBB32_2
+; AVX512-NEXT: # %bb.1:
+; AVX512-NEXT: movl -{{[0-9]+}}(%rsp), %ecx
+; AVX512-NEXT: .LBB32_2:
+; AVX512-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}}(%rip)
+; AVX512-NEXT: fxch %st(1)
+; AVX512-NEXT: fucompi %st(1), %st
+; AVX512-NEXT: fstp %st(0)
+; AVX512-NEXT: movl $8191, %eax # imm = 0x1FFF
+; AVX512-NEXT: cmovbel %ecx, %eax
+; AVX512-NEXT: # kill: def $ax killed $ax killed $eax
+; AVX512-NEXT: retq
%x = call i13 @llvm.fptoui.sat.i13.f80(x86_fp80 %f)
ret i13 %x
}
@@ -3364,6 +3799,29 @@ define i16 @test_unsigned_i16_f80(x86_fp80 %f) nounwind {
; X64-NEXT: cmovbel %ecx, %eax
; X64-NEXT: # kill: def $ax killed $ax killed $eax
; X64-NEXT: retq
+;
+; AVX512-LABEL: test_unsigned_i16_f80:
+; AVX512: # %bb.0:
+; AVX512-NEXT: fldt {{[0-9]+}}(%rsp)
+; AVX512-NEXT: fld %st(0)
+; AVX512-NEXT: fisttpl -{{[0-9]+}}(%rsp)
+; AVX512-NEXT: xorl %ecx, %ecx
+; AVX512-NEXT: fldz
+; AVX512-NEXT: fxch %st(1)
+; AVX512-NEXT: fucomi %st(1), %st
+; AVX512-NEXT: fstp %st(1)
+; AVX512-NEXT: jb .LBB33_2
+; AVX512-NEXT: # %bb.1:
+; AVX512-NEXT: movl -{{[0-9]+}}(%rsp), %ecx
+; AVX512-NEXT: .LBB33_2:
+; AVX512-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}}(%rip)
+; AVX512-NEXT: fxch %st(1)
+; AVX512-NEXT: fucompi %st(1), %st
+; AVX512-NEXT: fstp %st(0)
+; AVX512-NEXT: movl $65535, %eax # imm = 0xFFFF
+; AVX512-NEXT: cmovbel %ecx, %eax
+; AVX512-NEXT: # kill: def $ax killed $ax killed $eax
+; AVX512-NEXT: retq
%x = call i16 @llvm.fptoui.sat.i16.f80(x86_fp80 %f)
ret i16 %x
}
@@ -3464,6 +3922,28 @@ define i19 @test_unsigned_i19_f80(x86_fp80 %f) nounwind {
; X64-NEXT: movl $524287, %eax # imm = 0x7FFFF
; X64-NEXT: cmovbel %ecx, %eax
; X64-NEXT: retq
+;
+; AVX512-LABEL: test_unsigned_i19_f80:
+; AVX512: # %bb.0:
+; AVX512-NEXT: fldt {{[0-9]+}}(%rsp)
+; AVX512-NEXT: fld %st(0)
+; AVX512-NEXT: fisttpll -{{[0-9]+}}(%rsp)
+; AVX512-NEXT: xorl %ecx, %ecx
+; AVX512-NEXT: fldz
+; AVX512-NEXT: fxch %st(1)
+; AVX512-NEXT: fucomi %st(1), %st
+; AVX512-NEXT: fstp %st(1)
+; AVX512-NEXT: jb .LBB34_2
+; AVX512-NEXT: # %bb.1:
+; AVX512-NEXT: movl -{{[0-9]+}}(%rsp), %ecx
+; AVX512-NEXT: .LBB34_2:
+; AVX512-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}}(%rip)
+; AVX512-NEXT: fxch %st(1)
+; AVX512-NEXT: fucompi %st(1), %st
+; AVX512-NEXT: fstp %st(0)
+; AVX512-NEXT: movl $524287, %eax # imm = 0x7FFFF
+; AVX512-NEXT: cmovbel %ecx, %eax
+; AVX512-NEXT: retq
%x = call i19 @llvm.fptoui.sat.i19.f80(x86_fp80 %f)
ret i19 %x
}
@@ -3564,6 +4044,28 @@ define i32 @test_unsigned_i32_f80(x86_fp80 %f) nounwind {
; X64-NEXT: movl $-1, %eax
; X64-NEXT: cmovbel %ecx, %eax
; X64-NEXT: retq
+;
+; AVX512-LABEL: test_unsigned_i32_f80:
+; AVX512: # %bb.0:
+; AVX512-NEXT: fldt {{[0-9]+}}(%rsp)
+; AVX512-NEXT: fld %st(0)
+; AVX512-NEXT: fisttpll -{{[0-9]+}}(%rsp)
+; AVX512-NEXT: xorl %ecx, %ecx
+; AVX512-NEXT: fldz
+; AVX512-NEXT: fxch %st(1)
+; AVX512-NEXT: fucomi %st(1), %st
+; AVX512-NEXT: fstp %st(1)
+; AVX512-NEXT: jb .LBB35_2
+; AVX512-NEXT: # %bb.1:
+; AVX512-NEXT: movl -{{[0-9]+}}(%rsp), %ecx
+; AVX512-NEXT: .LBB35_2:
+; AVX512-NEXT: fldl {{\.?LCPI[0-9]+_[0-9]+}}(%rip)
+; AVX512-NEXT: fxch %st(1)
+; AVX512-NEXT: fucompi %st(1), %st
+; AVX512-NEXT: fstp %st(0)
+; AVX512-NEXT: movl $-1, %eax
+; AVX512-NEXT: cmovbel %ecx, %eax
+; AVX512-NEXT: retq
%x = call i32 @llvm.fptoui.sat.i32.f80(x86_fp80 %f)
ret i32 %x
}
@@ -3717,6 +4219,35 @@ define i50 @test_unsigned_i50_f80(x86_fp80 %f) nounwind {
; X64-NEXT: movabsq $1125899906842623, %rax # imm = 0x3FFFFFFFFFFFF
; X64-NEXT: cmovbeq %rcx, %rax
; X64-NEXT: retq
+;
+; AVX512-LABEL: test_unsigned_i50_f80:
+; AVX512: # %bb.0:
+; AVX512-NEXT: fldt {{[0-9]+}}(%rsp)
+; AVX512-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}}(%rip)
+; AVX512-NEXT: xorl %eax, %eax
+; AVX512-NEXT: fucomi %st(1), %st
+; AVX512-NEXT: fldz
+; AVX512-NEXT: fld %st(0)
+; AVX512-NEXT: fcmovbe %st(2), %st
+; AVX512-NEXT: fstp %st(2)
+; AVX512-NEXT: fxch %st(1)
+; AVX512-NEXT: fsubr %st(2), %st
+; AVX512-NEXT: fisttpll -{{[0-9]+}}(%rsp)
+; AVX512-NEXT: setbe %al
+; AVX512-NEXT: shlq $63, %rax
+; AVX512-NEXT: xorq -{{[0-9]+}}(%rsp), %rax
+; AVX512-NEXT: xorl %ecx, %ecx
+; AVX512-NEXT: fxch %st(1)
+; AVX512-NEXT: fucomi %st(1), %st
+; AVX512-NEXT: fstp %st(1)
+; AVX512-NEXT: cmovaeq %rax, %rcx
+; AVX512-NEXT: fldl {{\.?LCPI[0-9]+_[0-9]+}}(%rip)
+; AVX512-NEXT: fxch %st(1)
+; AVX512-NEXT: fucompi %st(1), %st
+; AVX512-NEXT: fstp %st(0)
+; AVX512-NEXT: movabsq $1125899906842623, %rax # imm = 0x3FFFFFFFFFFFF
+; AVX512-NEXT: cmovbeq %rcx, %rax
+; AVX512-NEXT: retq
%x = call i50 @llvm.fptoui.sat.i50.f80(x86_fp80 %f)
ret i50 %x
}
@@ -3868,6 +4399,35 @@ define i64 @test_unsigned_i64_f80(x86_fp80 %f) nounwind {
; X64-NEXT: movq $-1, %rax
; X64-NEXT: cmovbeq %rcx, %rax
; X64-NEXT: retq
+;
+; AVX512-LABEL: test_unsigned_i64_f80:
+; AVX512: # %bb.0:
+; AVX512-NEXT: fldt {{[0-9]+}}(%rsp)
+; AVX512-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}}(%rip)
+; AVX512-NEXT: xorl %eax, %eax
+; AVX512-NEXT: fucomi %st(1), %st
+; AVX512-NEXT: fldz
+; AVX512-NEXT: fld %st(0)
+; AVX512-NEXT: fcmovbe %st(2), %st
+; AVX512-NEXT: fstp %st(2)
+; AVX512-NEXT: fxch %st(1)
+; AVX512-NEXT: fsubr %st(2), %st
+; AVX512-NEXT: fisttpll -{{[0-9]+}}(%rsp)
+; AVX512-NEXT: setbe %al
+; AVX512-NEXT: shlq $63, %rax
+; AVX512-NEXT: xorq -{{[0-9]+}}(%rsp), %rax
+; AVX512-NEXT: xorl %ecx, %ecx
+; AVX512-NEXT: fxch %st(1)
+; AVX512-NEXT: fucomi %st(1), %st
+; AVX512-NEXT: fstp %st(1)
+; AVX512-NEXT: cmovaeq %rax, %rcx
+; AVX512-NEXT: fldt {{\.?LCPI[0-9]+_[0-9]+}}(%rip)
+; AVX512-NEXT: fxch %st(1)
+; AVX512-NEXT: fucompi %st(1), %st
+; AVX512-NEXT: fstp %st(0)
+; AVX512-NEXT: movq $-1, %rax
+; AVX512-NEXT: cmovbeq %rcx, %rax
+; AVX512-NEXT: retq
%x = call i64 @llvm.fptoui.sat.i64.f80(x86_fp80 %f)
ret i64 %x
}
@@ -4026,6 +4586,32 @@ define i100 @test_unsigned_i100_f80(x86_fp80 %f) nounwind {
; X64-NEXT: cmovaq %rcx, %rdx
; X64-NEXT: addq $40, %rsp
; X64-NEXT: retq
+;
+; AVX512-LABEL: test_unsigned_i100_f80:
+; AVX512: # %bb.0:
+; AVX512-NEXT: subq $40, %rsp
+; AVX512-NEXT: fldt {{[0-9]+}}(%rsp)
+; AVX512-NEXT: fld %st(0)
+; AVX512-NEXT: fstpt {{[-0-9]+}}(%r{{[sb]}}p) # 10-byte Folded Spill
+; AVX512-NEXT: fstpt (%rsp)
+; AVX512-NEXT: callq __fixunsxfti at PLT
+; AVX512-NEXT: xorl %ecx, %ecx
+; AVX512-NEXT: fldz
+; AVX512-NEXT: fldt {{[-0-9]+}}(%r{{[sb]}}p) # 10-byte Folded Reload
+; AVX512-NEXT: fucomi %st(1), %st
+; AVX512-NEXT: fstp %st(1)
+; AVX512-NEXT: cmovbq %rcx, %rdx
+; AVX512-NEXT: cmovbq %rcx, %rax
+; AVX512-NEXT: fldt {{\.?LCPI[0-9]+_[0-9]+}}(%rip)
+; AVX512-NEXT: fxch %st(1)
+; AVX512-NEXT: fucompi %st(1), %st
+; AVX512-NEXT: fstp %st(0)
+; AVX512-NEXT: movq $-1, %rcx
+; AVX512-NEXT: cmovaq %rcx, %rax
+; AVX512-NEXT: movabsq $68719476735, %rcx # imm = 0xFFFFFFFFF
+; AVX512-NEXT: cmovaq %rcx, %rdx
+; AVX512-NEXT: addq $40, %rsp
+; AVX512-NEXT: retq
%x = call i100 @llvm.fptoui.sat.i100.f80(x86_fp80 %f)
ret i100 %x
}
@@ -4178,6 +4764,31 @@ define i128 @test_unsigned_i128_f80(x86_fp80 %f) nounwind {
; X64-NEXT: cmovaq %rcx, %rdx
; X64-NEXT: addq $40, %rsp
; X64-NEXT: retq
+;
+; AVX512-LABEL: test_unsigned_i128_f80:
+; AVX512: # %bb.0:
+; AVX512-NEXT: subq $40, %rsp
+; AVX512-NEXT: fldt {{[0-9]+}}(%rsp)
+; AVX512-NEXT: fld %st(0)
+; AVX512-NEXT: fstpt {{[-0-9]+}}(%r{{[sb]}}p) # 10-byte Folded Spill
+; AVX512-NEXT: fstpt (%rsp)
+; AVX512-NEXT: callq __fixunsxfti at PLT
+; AVX512-NEXT: xorl %ecx, %ecx
+; AVX512-NEXT: fldz
+; AVX512-NEXT: fldt {{[-0-9]+}}(%r{{[sb]}}p) # 10-byte Folded Reload
+; AVX512-NEXT: fucomi %st(1), %st
+; AVX512-NEXT: fstp %st(1)
+; AVX512-NEXT: cmovbq %rcx, %rdx
+; AVX512-NEXT: cmovbq %rcx, %rax
+; AVX512-NEXT: fldt {{\.?LCPI[0-9]+_[0-9]+}}(%rip)
+; AVX512-NEXT: fxch %st(1)
+; AVX512-NEXT: fucompi %st(1), %st
+; AVX512-NEXT: fstp %st(0)
+; AVX512-NEXT: movq $-1, %rcx
+; AVX512-NEXT: cmovaq %rcx, %rax
+; AVX512-NEXT: cmovaq %rcx, %rdx
+; AVX512-NEXT: addq $40, %rsp
+; AVX512-NEXT: retq
%x = call i128 @llvm.fptoui.sat.i128.f80(x86_fp80 %f)
ret i128 %x
}
More information about the llvm-commits
mailing list