[llvm] r330437 - [DAGCombine] (float)((int) f) --> ftrunc (PR36617)
Sanjay Patel via llvm-commits
llvm-commits at lists.llvm.org
Fri Apr 20 08:07:55 PDT 2018
Author: spatel
Date: Fri Apr 20 08:07:55 2018
New Revision: 330437
URL: http://llvm.org/viewvc/llvm-project?rev=330437&view=rev
Log:
[DAGCombine] (float)((int) f) --> ftrunc (PR36617)
This was originally committed at rL328921 and reverted at rL329920 to
investigate failures in Chrome. This time I've added to the ReleaseNotes
to warn users of the potential of exposing UB and let me repeat that
here for more exposure:
Optimization of floating-point casts is improved. This may cause surprising
results for code that is relying on undefined behavior. Code sanitizers can
be used to detect affected patterns such as this:
int main() {
float x = 4294967296.0f;
x = (float)((int)x);
printf("junk in the ftrunc: %f\n", x);
return 0;
}
$ clang -O1 ftrunc.c -fsanitize=undefined ; ./a.out
ftrunc.c:5:15: runtime error: 4.29497e+09 is outside the range of
representable values of type 'int'
junk in the ftrunc: 0.000000
Original commit message:
fptosi / fptoui round towards zero, and that's the same behavior as ISD::FTRUNC,
so replace a pair of casts with the equivalent node. We don't have to account for
special cases (NaN, INF) because out-of-range casts are undefined.
Differential Revision: https://reviews.llvm.org/D44909
Added:
llvm/trunk/test/CodeGen/ARM/ftrunc.ll
- copied unchanged from r329919, llvm/trunk/test/CodeGen/ARM/ftrunc.ll
Modified:
llvm/trunk/docs/ReleaseNotes.rst
llvm/trunk/include/llvm/CodeGen/ISDOpcodes.h
llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
llvm/trunk/test/CodeGen/AArch64/ftrunc.ll
llvm/trunk/test/CodeGen/PowerPC/fp-int128-fp-combine.ll
llvm/trunk/test/CodeGen/PowerPC/fp-to-int-to-fp.ll
llvm/trunk/test/CodeGen/PowerPC/ftrunc-vec.ll
llvm/trunk/test/CodeGen/PowerPC/no-extra-fp-conv-ldst.ll
llvm/trunk/test/CodeGen/X86/2011-10-19-widen_vselect.ll
llvm/trunk/test/CodeGen/X86/ftrunc.ll
Modified: llvm/trunk/docs/ReleaseNotes.rst
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/docs/ReleaseNotes.rst?rev=330437&r1=330436&r2=330437&view=diff
==============================================================================
--- llvm/trunk/docs/ReleaseNotes.rst (original)
+++ llvm/trunk/docs/ReleaseNotes.rst Fri Apr 20 08:07:55 2018
@@ -61,6 +61,26 @@ Non-comprehensive list of changes in thi
* The optimization flag to merge constants (-fmerge-all-constants) is no longer
applied by default.
+* Optimization of floating-point casts is improved. This may cause surprising
+ results for code that is relying on undefined behavior. Code sanitizers can
+ be used to detect affected patterns such as this:
+
+.. code-block:: c
+
+ int main() {
+ float x = 4294967296.0f;
+ x = (float)((int)x);
+ printf("junk in the ftrunc: %f\n", x);
+ return 0;
+ }
+
+.. code-block:: bash
+
+ clang -O1 ftrunc.c -fsanitize=undefined ; ./a.out
+ ftrunc.c:5:15: runtime error: 4.29497e+09 is outside the range of representable values of type 'int'
+ junk in the ftrunc: 0.000000
+
+
* Note..
.. NOTE
Modified: llvm/trunk/include/llvm/CodeGen/ISDOpcodes.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/CodeGen/ISDOpcodes.h?rev=330437&r1=330436&r2=330437&view=diff
==============================================================================
--- llvm/trunk/include/llvm/CodeGen/ISDOpcodes.h (original)
+++ llvm/trunk/include/llvm/CodeGen/ISDOpcodes.h Fri Apr 20 08:07:55 2018
@@ -495,7 +495,8 @@ namespace ISD {
ZERO_EXTEND_VECTOR_INREG,
/// FP_TO_[US]INT - Convert a floating point value to a signed or unsigned
- /// integer.
+ /// integer. These have the same semantics as fptosi and fptoui in IR. If
+ /// the FP value cannot fit in the integer type, the results are undefined.
FP_TO_SINT,
FP_TO_UINT,
Modified: llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp?rev=330437&r1=330436&r2=330437&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp (original)
+++ llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp Fri Apr 20 08:07:55 2018
@@ -10890,6 +10890,15 @@ SDValue DAGCombiner::visitSINT_TO_FP(SDN
}
}
+ // fptosi rounds towards zero, so converting from FP to integer and back is
+ // the same as an 'ftrunc': sitofp (fptosi X) --> ftrunc X
+ // We only do this if the target has legal ftrunc, otherwise we'd likely be
+ // replacing casts with a libcall.
+ if (N0.getOpcode() == ISD::FP_TO_SINT &&
+ N0.getOperand(0).getValueType() == VT &&
+ TLI.isOperationLegal(ISD::FTRUNC, VT))
+ return DAG.getNode(ISD::FTRUNC, SDLoc(N), VT, N0.getOperand(0));
+
return SDValue();
}
@@ -10929,6 +10938,15 @@ SDValue DAGCombiner::visitUINT_TO_FP(SDN
}
}
+ // fptoui rounds towards zero, so converting from FP to integer and back is
+ // the same as an 'ftrunc': uitofp (fptoui X) --> ftrunc X
+ // We only do this if the target has legal ftrunc, otherwise we'd likely be
+ // replacing casts with a libcall.
+ if (N0.getOpcode() == ISD::FP_TO_UINT &&
+ N0.getOperand(0).getValueType() == VT &&
+ TLI.isOperationLegal(ISD::FTRUNC, VT))
+ return DAG.getNode(ISD::FTRUNC, SDLoc(N), VT, N0.getOperand(0));
+
return SDValue();
}
Modified: llvm/trunk/test/CodeGen/AArch64/ftrunc.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/ftrunc.ll?rev=330437&r1=330436&r2=330437&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/ftrunc.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/ftrunc.ll Fri Apr 20 08:07:55 2018
@@ -4,8 +4,7 @@
define float @trunc_unsigned_f32(float %x) {
; CHECK-LABEL: trunc_unsigned_f32:
; CHECK: // %bb.0:
-; CHECK-NEXT: fcvtzu w8, s0
-; CHECK-NEXT: ucvtf s0, w8
+; CHECK-NEXT: frintz s0, s0
; CHECK-NEXT: ret
%i = fptoui float %x to i32
%r = uitofp i32 %i to float
@@ -15,8 +14,7 @@ define float @trunc_unsigned_f32(float %
define double @trunc_unsigned_f64(double %x) {
; CHECK-LABEL: trunc_unsigned_f64:
; CHECK: // %bb.0:
-; CHECK-NEXT: fcvtzu x8, d0
-; CHECK-NEXT: ucvtf d0, x8
+; CHECK-NEXT: frintz d0, d0
; CHECK-NEXT: ret
%i = fptoui double %x to i64
%r = uitofp i64 %i to double
@@ -26,8 +24,7 @@ define double @trunc_unsigned_f64(double
define float @trunc_signed_f32(float %x) {
; CHECK-LABEL: trunc_signed_f32:
; CHECK: // %bb.0:
-; CHECK-NEXT: fcvtzs w8, s0
-; CHECK-NEXT: scvtf s0, w8
+; CHECK-NEXT: frintz s0, s0
; CHECK-NEXT: ret
%i = fptosi float %x to i32
%r = sitofp i32 %i to float
@@ -37,8 +34,7 @@ define float @trunc_signed_f32(float %x)
define double @trunc_signed_f64(double %x) {
; CHECK-LABEL: trunc_signed_f64:
; CHECK: // %bb.0:
-; CHECK-NEXT: fcvtzs x8, d0
-; CHECK-NEXT: scvtf d0, x8
+; CHECK-NEXT: frintz d0, d0
; CHECK-NEXT: ret
%i = fptosi double %x to i64
%r = sitofp i64 %i to double
Modified: llvm/trunk/test/CodeGen/PowerPC/fp-int128-fp-combine.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/PowerPC/fp-int128-fp-combine.ll?rev=330437&r1=330436&r2=330437&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/PowerPC/fp-int128-fp-combine.ll (original)
+++ llvm/trunk/test/CodeGen/PowerPC/fp-int128-fp-combine.ll Fri Apr 20 08:07:55 2018
@@ -5,18 +5,7 @@
define float @f_i128_f(float %v) {
; CHECK-LABEL: f_i128_f:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: mflr 0
-; CHECK-NEXT: std 0, 16(1)
-; CHECK-NEXT: stdu 1, -32(1)
-; CHECK-NEXT: .cfi_def_cfa_offset 32
-; CHECK-NEXT: .cfi_offset lr, 16
-; CHECK-NEXT: bl __fixsfti
-; CHECK-NEXT: nop
-; CHECK-NEXT: bl __floattisf
-; CHECK-NEXT: nop
-; CHECK-NEXT: addi 1, 1, 32
-; CHECK-NEXT: ld 0, 16(1)
-; CHECK-NEXT: mtlr 0
+; CHECK-NEXT: friz 1, 1
; CHECK-NEXT: blr
entry:
%a = fptosi float %v to i128
Modified: llvm/trunk/test/CodeGen/PowerPC/fp-to-int-to-fp.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/PowerPC/fp-to-int-to-fp.ll?rev=330437&r1=330436&r2=330437&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/PowerPC/fp-to-int-to-fp.ll (original)
+++ llvm/trunk/test/CodeGen/PowerPC/fp-to-int-to-fp.ll Fri Apr 20 08:07:55 2018
@@ -11,8 +11,7 @@ entry:
ret float %conv1
; FPCVT-LABEL: @fool
-; FPCVT: fctidz [[REG1:[0-9]+]], 1
-; FPCVT: fcfids 1, [[REG1]]
+; FPCVT: friz 1, 1
; FPCVT: blr
; PPC64-LABEL: @fool
@@ -30,8 +29,7 @@ entry:
ret double %conv1
; FPCVT-LABEL: @foodl
-; FPCVT: fctidz [[REG1:[0-9]+]], 1
-; FPCVT: fcfid 1, [[REG1]]
+; FPCVT: friz 1, 1
; FPCVT: blr
; PPC64-LABEL: @foodl
@@ -48,8 +46,7 @@ entry:
ret float %conv1
; FPCVT-LABEL: @fooul
-; FPCVT: fctiduz [[REG1:[0-9]+]], 1
-; FPCVT: fcfidus 1, [[REG1]]
+; FPCVT: friz 1, 1
; FPCVT: blr
}
@@ -61,8 +58,7 @@ entry:
ret double %conv1
; FPCVT-LABEL: @fooudl
-; FPCVT: fctiduz [[REG1:[0-9]+]], 1
-; FPCVT: fcfidu 1, [[REG1]]
+; FPCVT: friz 1, 1
; FPCVT: blr
}
Modified: llvm/trunk/test/CodeGen/PowerPC/ftrunc-vec.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/PowerPC/ftrunc-vec.ll?rev=330437&r1=330436&r2=330437&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/PowerPC/ftrunc-vec.ll (original)
+++ llvm/trunk/test/CodeGen/PowerPC/ftrunc-vec.ll Fri Apr 20 08:07:55 2018
@@ -4,8 +4,7 @@
define <4 x float> @truncf32(<4 x float> %a) {
; CHECK-LABEL: truncf32:
; CHECK: # %bb.0:
-; CHECK-NEXT: xvcvspsxws 0, 34
-; CHECK-NEXT: xvcvsxwsp 34, 0
+; CHECK-NEXT: xvrspiz 34, 34
; CHECK-NEXT: blr
%t0 = fptosi <4 x float> %a to <4 x i32>
%t1 = sitofp <4 x i32> %t0 to <4 x float>
@@ -15,8 +14,7 @@ define <4 x float> @truncf32(<4 x float>
define <2 x double> @truncf64(<2 x double> %a) {
; CHECK-LABEL: truncf64:
; CHECK: # %bb.0:
-; CHECK-NEXT: xvcvdpsxds 34, 34
-; CHECK-NEXT: xvcvsxddp 34, 34
+; CHECK-NEXT: xvrdpiz 34, 34
; CHECK-NEXT: blr
%t0 = fptosi <2 x double> %a to <2 x i64>
%t1 = sitofp <2 x i64> %t0 to <2 x double>
@@ -26,8 +24,7 @@ define <2 x double> @truncf64(<2 x doubl
define <4 x float> @truncf32u(<4 x float> %a) {
; CHECK-LABEL: truncf32u:
; CHECK: # %bb.0:
-; CHECK-NEXT: xvcvspuxws 0, 34
-; CHECK-NEXT: xvcvuxwsp 34, 0
+; CHECK-NEXT: xvrspiz 34, 34
; CHECK-NEXT: blr
%t0 = fptoui <4 x float> %a to <4 x i32>
%t1 = uitofp <4 x i32> %t0 to <4 x float>
@@ -37,8 +34,7 @@ define <4 x float> @truncf32u(<4 x float
define <2 x double> @truncf64u(<2 x double> %a) {
; CHECK-LABEL: truncf64u:
; CHECK: # %bb.0:
-; CHECK-NEXT: xvcvdpuxds 34, 34
-; CHECK-NEXT: xvcvuxddp 34, 34
+; CHECK-NEXT: xvrdpiz 34, 34
; CHECK-NEXT: blr
%t0 = fptoui <2 x double> %a to <2 x i64>
%t1 = uitofp <2 x i64> %t0 to <2 x double>
Modified: llvm/trunk/test/CodeGen/PowerPC/no-extra-fp-conv-ldst.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/PowerPC/no-extra-fp-conv-ldst.ll?rev=330437&r1=330436&r2=330437&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/PowerPC/no-extra-fp-conv-ldst.ll (original)
+++ llvm/trunk/test/CodeGen/PowerPC/no-extra-fp-conv-ldst.ll Fri Apr 20 08:07:55 2018
@@ -36,11 +36,7 @@ entry:
ret float %conv1
; CHECK-LABEL: @foo
-; CHECK-DAG: fctiwz [[REG2:[0-9]+]], 1
-; CHECK-DAG: addi [[REG1:[0-9]+]], 1,
-; CHECK: stfiwx [[REG2]], 0, [[REG1]]
-; CHECK: lfiwax [[REG3:[0-9]+]], 0, [[REG1]]
-; CHECK: fcfids 1, [[REG3]]
+; CHECK: friz 1, 1
; CHECK: blr
}
@@ -52,11 +48,7 @@ entry:
ret double %conv1
; CHECK-LABEL: @food
-; CHECK-DAG: fctiwz [[REG2:[0-9]+]], 1
-; CHECK-DAG: addi [[REG1:[0-9]+]], 1,
-; CHECK: stfiwx [[REG2]], 0, [[REG1]]
-; CHECK: lfiwax [[REG3:[0-9]+]], 0, [[REG1]]
-; CHECK: fcfid 1, [[REG3]]
+; CHECK: friz 1, 1
; CHECK: blr
}
@@ -68,11 +60,7 @@ entry:
ret float %conv1
; CHECK-LABEL: @foou
-; CHECK-DAG: fctiwuz [[REG2:[0-9]+]], 1
-; CHECK-DAG: addi [[REG1:[0-9]+]], 1,
-; CHECK: stfiwx [[REG2]], 0, [[REG1]]
-; CHECK: lfiwzx [[REG3:[0-9]+]], 0, [[REG1]]
-; CHECK: fcfidus 1, [[REG3]]
+; CHECK: friz 1, 1
; CHECK: blr
}
@@ -84,11 +72,7 @@ entry:
ret double %conv1
; CHECK-LABEL: @fooud
-; CHECK-DAG: fctiwuz [[REG2:[0-9]+]], 1
-; CHECK-DAG: addi [[REG1:[0-9]+]], 1,
-; CHECK: stfiwx [[REG2]], 0, [[REG1]]
-; CHECK: lfiwzx [[REG3:[0-9]+]], 0, [[REG1]]
-; CHECK: fcfidu 1, [[REG3]]
+; CHECK: friz 1, 1
; CHECK: blr
}
Modified: llvm/trunk/test/CodeGen/X86/2011-10-19-widen_vselect.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/2011-10-19-widen_vselect.ll?rev=330437&r1=330436&r2=330437&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/2011-10-19-widen_vselect.ll (original)
+++ llvm/trunk/test/CodeGen/X86/2011-10-19-widen_vselect.ll Fri Apr 20 08:07:55 2018
@@ -71,8 +71,7 @@ define void @full_test() {
; X32-NEXT: subl $60, %esp
; X32-NEXT: .cfi_def_cfa_offset 64
; X32-NEXT: movsd {{.*#+}} xmm2 = mem[0],zero
-; X32-NEXT: cvttps2dq %xmm2, %xmm0
-; X32-NEXT: cvtdq2ps %xmm0, %xmm1
+; X32-NEXT: roundps $11, %xmm2, %xmm1
; X32-NEXT: xorps %xmm0, %xmm0
; X32-NEXT: cmpltps %xmm2, %xmm0
; X32-NEXT: movaps {{.*#+}} xmm3 = <1,1,u,u>
@@ -93,8 +92,7 @@ define void @full_test() {
; X64-LABEL: full_test:
; X64: # %bb.0: # %entry
; X64-NEXT: movsd {{.*#+}} xmm2 = mem[0],zero
-; X64-NEXT: cvttps2dq %xmm2, %xmm0
-; X64-NEXT: cvtdq2ps %xmm0, %xmm1
+; X64-NEXT: roundps $11, %xmm2, %xmm1
; X64-NEXT: xorps %xmm0, %xmm0
; X64-NEXT: cmpltps %xmm2, %xmm0
; X64-NEXT: movaps {{.*#+}} xmm3 = <1,1,u,u>
Modified: llvm/trunk/test/CodeGen/X86/ftrunc.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/ftrunc.ll?rev=330437&r1=330436&r2=330437&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/ftrunc.ll (original)
+++ llvm/trunk/test/CodeGen/X86/ftrunc.ll Fri Apr 20 08:07:55 2018
@@ -14,17 +14,12 @@ define float @trunc_unsigned_f32(float %
;
; SSE41-LABEL: trunc_unsigned_f32:
; SSE41: # %bb.0:
-; SSE41-NEXT: cvttss2si %xmm0, %rax
-; SSE41-NEXT: movl %eax, %eax
-; SSE41-NEXT: xorps %xmm0, %xmm0
-; SSE41-NEXT: cvtsi2ssq %rax, %xmm0
+; SSE41-NEXT: roundss $11, %xmm0, %xmm0
; SSE41-NEXT: retq
;
; AVX1-LABEL: trunc_unsigned_f32:
; AVX1: # %bb.0:
-; AVX1-NEXT: vcvttss2si %xmm0, %rax
-; AVX1-NEXT: movl %eax, %eax
-; AVX1-NEXT: vcvtsi2ssq %rax, %xmm1, %xmm0
+; AVX1-NEXT: vroundss $11, %xmm0, %xmm0, %xmm0
; AVX1-NEXT: retq
%i = fptoui float %x to i32
%r = uitofp i32 %i to float
@@ -52,35 +47,12 @@ define double @trunc_unsigned_f64(double
;
; SSE41-LABEL: trunc_unsigned_f64:
; SSE41: # %bb.0:
-; SSE41-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
-; SSE41-NEXT: movapd %xmm0, %xmm2
-; SSE41-NEXT: subsd %xmm1, %xmm2
-; SSE41-NEXT: cvttsd2si %xmm2, %rax
-; SSE41-NEXT: movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000
-; SSE41-NEXT: xorq %rax, %rcx
-; SSE41-NEXT: cvttsd2si %xmm0, %rax
-; SSE41-NEXT: ucomisd %xmm1, %xmm0
-; SSE41-NEXT: cmovaeq %rcx, %rax
-; SSE41-NEXT: movq %rax, %xmm0
-; SSE41-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1]
-; SSE41-NEXT: subpd {{.*}}(%rip), %xmm0
-; SSE41-NEXT: haddpd %xmm0, %xmm0
+; SSE41-NEXT: roundsd $11, %xmm0, %xmm0
; SSE41-NEXT: retq
;
; AVX1-LABEL: trunc_unsigned_f64:
; AVX1: # %bb.0:
-; AVX1-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
-; AVX1-NEXT: vsubsd %xmm1, %xmm0, %xmm2
-; AVX1-NEXT: vcvttsd2si %xmm2, %rax
-; AVX1-NEXT: movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000
-; AVX1-NEXT: xorq %rax, %rcx
-; AVX1-NEXT: vcvttsd2si %xmm0, %rax
-; AVX1-NEXT: vucomisd %xmm1, %xmm0
-; AVX1-NEXT: cmovaeq %rcx, %rax
-; AVX1-NEXT: vmovq %rax, %xmm0
-; AVX1-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1]
-; AVX1-NEXT: vsubpd {{.*}}(%rip), %xmm0, %xmm0
-; AVX1-NEXT: vhaddpd %xmm0, %xmm0, %xmm0
+; AVX1-NEXT: vroundsd $11, %xmm0, %xmm0, %xmm0
; AVX1-NEXT: retq
%i = fptoui double %x to i64
%r = uitofp i64 %i to double
@@ -118,45 +90,12 @@ define <4 x float> @trunc_unsigned_v4f32
;
; SSE41-LABEL: trunc_unsigned_v4f32:
; SSE41: # %bb.0:
-; SSE41-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
-; SSE41-NEXT: cvttss2si %xmm1, %rax
-; SSE41-NEXT: cvttss2si %xmm0, %rcx
-; SSE41-NEXT: movd %ecx, %xmm1
-; SSE41-NEXT: pinsrd $1, %eax, %xmm1
-; SSE41-NEXT: movaps %xmm0, %xmm2
-; SSE41-NEXT: movhlps {{.*#+}} xmm2 = xmm0[1],xmm2[1]
-; SSE41-NEXT: cvttss2si %xmm2, %rax
-; SSE41-NEXT: pinsrd $2, %eax, %xmm1
-; SSE41-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,1,2,3]
-; SSE41-NEXT: cvttss2si %xmm0, %rax
-; SSE41-NEXT: pinsrd $3, %eax, %xmm1
-; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [1258291200,1258291200,1258291200,1258291200]
-; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0],xmm0[1],xmm1[2],xmm0[3],xmm1[4],xmm0[5],xmm1[6],xmm0[7]
-; SSE41-NEXT: psrld $16, %xmm1
-; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0],mem[1],xmm1[2],mem[3],xmm1[4],mem[5],xmm1[6],mem[7]
-; SSE41-NEXT: addps {{.*}}(%rip), %xmm1
-; SSE41-NEXT: addps %xmm0, %xmm1
-; SSE41-NEXT: movaps %xmm1, %xmm0
+; SSE41-NEXT: roundps $11, %xmm0, %xmm0
; SSE41-NEXT: retq
;
; AVX1-LABEL: trunc_unsigned_v4f32:
; AVX1: # %bb.0:
-; AVX1-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
-; AVX1-NEXT: vcvttss2si %xmm1, %rax
-; AVX1-NEXT: vcvttss2si %xmm0, %rcx
-; AVX1-NEXT: vmovd %ecx, %xmm1
-; AVX1-NEXT: vpinsrd $1, %eax, %xmm1, %xmm1
-; AVX1-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0]
-; AVX1-NEXT: vcvttss2si %xmm2, %rax
-; AVX1-NEXT: vpinsrd $2, %eax, %xmm1, %xmm1
-; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,1,2,3]
-; AVX1-NEXT: vcvttss2si %xmm0, %rax
-; AVX1-NEXT: vpinsrd $3, %eax, %xmm1, %xmm0
-; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0],mem[1],xmm0[2],mem[3],xmm0[4],mem[5],xmm0[6],mem[7]
-; AVX1-NEXT: vpsrld $16, %xmm0, %xmm0
-; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],mem[1],xmm0[2],mem[3],xmm0[4],mem[5],xmm0[6],mem[7]
-; AVX1-NEXT: vaddps {{.*}}(%rip), %xmm0, %xmm0
-; AVX1-NEXT: vaddps %xmm0, %xmm1, %xmm0
+; AVX1-NEXT: vroundps $11, %xmm0, %xmm0
; AVX1-NEXT: retq
%i = fptoui <4 x float> %x to <4 x i32>
%r = uitofp <4 x i32> %i to <4 x float>
@@ -201,61 +140,12 @@ define <2 x double> @trunc_unsigned_v2f6
;
; SSE41-LABEL: trunc_unsigned_v2f64:
; SSE41: # %bb.0:
-; SSE41-NEXT: movaps %xmm0, %xmm1
-; SSE41-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1]
-; SSE41-NEXT: movsd {{.*#+}} xmm2 = mem[0],zero
-; SSE41-NEXT: movaps %xmm1, %xmm3
-; SSE41-NEXT: subsd %xmm2, %xmm3
-; SSE41-NEXT: cvttsd2si %xmm3, %rax
-; SSE41-NEXT: movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000
-; SSE41-NEXT: xorq %rcx, %rax
-; SSE41-NEXT: cvttsd2si %xmm1, %rdx
-; SSE41-NEXT: ucomisd %xmm2, %xmm1
-; SSE41-NEXT: cmovaeq %rax, %rdx
-; SSE41-NEXT: movaps %xmm0, %xmm1
-; SSE41-NEXT: subsd %xmm2, %xmm1
-; SSE41-NEXT: cvttsd2si %xmm1, %rax
-; SSE41-NEXT: xorq %rcx, %rax
-; SSE41-NEXT: cvttsd2si %xmm0, %rcx
-; SSE41-NEXT: ucomisd %xmm2, %xmm0
-; SSE41-NEXT: cmovaeq %rax, %rcx
-; SSE41-NEXT: movq %rcx, %xmm0
-; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [1127219200,1160773632,0,0]
-; SSE41-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
-; SSE41-NEXT: movapd {{.*#+}} xmm2 = [4.503600e+15,1.934281e+25]
-; SSE41-NEXT: subpd %xmm2, %xmm0
-; SSE41-NEXT: movq %rdx, %xmm3
-; SSE41-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1]
-; SSE41-NEXT: subpd %xmm2, %xmm3
-; SSE41-NEXT: haddpd %xmm3, %xmm0
+; SSE41-NEXT: roundpd $11, %xmm0, %xmm0
; SSE41-NEXT: retq
;
; AVX1-LABEL: trunc_unsigned_v2f64:
; AVX1: # %bb.0:
-; AVX1-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
-; AVX1-NEXT: vmovsd {{.*#+}} xmm2 = mem[0],zero
-; AVX1-NEXT: vsubsd %xmm2, %xmm1, %xmm3
-; AVX1-NEXT: vcvttsd2si %xmm3, %rax
-; AVX1-NEXT: movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000
-; AVX1-NEXT: xorq %rcx, %rax
-; AVX1-NEXT: vcvttsd2si %xmm1, %rdx
-; AVX1-NEXT: vucomisd %xmm2, %xmm1
-; AVX1-NEXT: cmovaeq %rax, %rdx
-; AVX1-NEXT: vsubsd %xmm2, %xmm0, %xmm1
-; AVX1-NEXT: vcvttsd2si %xmm1, %rax
-; AVX1-NEXT: xorq %rcx, %rax
-; AVX1-NEXT: vcvttsd2si %xmm0, %rcx
-; AVX1-NEXT: vucomisd %xmm2, %xmm0
-; AVX1-NEXT: cmovaeq %rax, %rcx
-; AVX1-NEXT: vmovq %rcx, %xmm0
-; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [1127219200,1160773632,0,0]
-; AVX1-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
-; AVX1-NEXT: vmovapd {{.*#+}} xmm2 = [4.503600e+15,1.934281e+25]
-; AVX1-NEXT: vsubpd %xmm2, %xmm0, %xmm0
-; AVX1-NEXT: vmovq %rdx, %xmm3
-; AVX1-NEXT: vpunpckldq {{.*#+}} xmm1 = xmm3[0],xmm1[0],xmm3[1],xmm1[1]
-; AVX1-NEXT: vsubpd %xmm2, %xmm1, %xmm1
-; AVX1-NEXT: vhaddpd %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: vroundpd $11, %xmm0, %xmm0
; AVX1-NEXT: retq
%i = fptoui <2 x double> %x to <2 x i64>
%r = uitofp <2 x i64> %i to <2 x double>
@@ -327,106 +217,13 @@ define <4 x double> @trunc_unsigned_v4f6
;
; SSE41-LABEL: trunc_unsigned_v4f64:
; SSE41: # %bb.0:
-; SSE41-NEXT: movaps %xmm1, %xmm3
-; SSE41-NEXT: movhlps {{.*#+}} xmm3 = xmm1[1],xmm3[1]
-; SSE41-NEXT: movsd {{.*#+}} xmm2 = mem[0],zero
-; SSE41-NEXT: movaps %xmm3, %xmm4
-; SSE41-NEXT: subsd %xmm2, %xmm4
-; SSE41-NEXT: cvttsd2si %xmm4, %rcx
-; SSE41-NEXT: movabsq $-9223372036854775808, %rdx # imm = 0x8000000000000000
-; SSE41-NEXT: xorq %rdx, %rcx
-; SSE41-NEXT: cvttsd2si %xmm3, %rax
-; SSE41-NEXT: ucomisd %xmm2, %xmm3
-; SSE41-NEXT: cmovaeq %rcx, %rax
-; SSE41-NEXT: movaps %xmm1, %xmm3
-; SSE41-NEXT: subsd %xmm2, %xmm3
-; SSE41-NEXT: cvttsd2si %xmm3, %rsi
-; SSE41-NEXT: xorq %rdx, %rsi
-; SSE41-NEXT: cvttsd2si %xmm1, %rcx
-; SSE41-NEXT: ucomisd %xmm2, %xmm1
-; SSE41-NEXT: cmovaeq %rsi, %rcx
-; SSE41-NEXT: movaps %xmm0, %xmm1
-; SSE41-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1]
-; SSE41-NEXT: movaps %xmm1, %xmm3
-; SSE41-NEXT: subsd %xmm2, %xmm3
-; SSE41-NEXT: cvttsd2si %xmm3, %rsi
-; SSE41-NEXT: xorq %rdx, %rsi
-; SSE41-NEXT: cvttsd2si %xmm1, %rdi
-; SSE41-NEXT: ucomisd %xmm2, %xmm1
-; SSE41-NEXT: cmovaeq %rsi, %rdi
-; SSE41-NEXT: movaps %xmm0, %xmm1
-; SSE41-NEXT: subsd %xmm2, %xmm1
-; SSE41-NEXT: cvttsd2si %xmm1, %rsi
-; SSE41-NEXT: xorq %rdx, %rsi
-; SSE41-NEXT: cvttsd2si %xmm0, %rdx
-; SSE41-NEXT: ucomisd %xmm2, %xmm0
-; SSE41-NEXT: cmovaeq %rsi, %rdx
-; SSE41-NEXT: movq %rdx, %xmm0
-; SSE41-NEXT: movdqa {{.*#+}} xmm2 = [1127219200,1160773632,0,0]
-; SSE41-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
-; SSE41-NEXT: movapd {{.*#+}} xmm3 = [4.503600e+15,1.934281e+25]
-; SSE41-NEXT: subpd %xmm3, %xmm0
-; SSE41-NEXT: movq %rdi, %xmm1
-; SSE41-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
-; SSE41-NEXT: subpd %xmm3, %xmm1
-; SSE41-NEXT: haddpd %xmm1, %xmm0
-; SSE41-NEXT: movq %rcx, %xmm1
-; SSE41-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
-; SSE41-NEXT: subpd %xmm3, %xmm1
-; SSE41-NEXT: movq %rax, %xmm4
-; SSE41-NEXT: punpckldq {{.*#+}} xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1]
-; SSE41-NEXT: subpd %xmm3, %xmm4
-; SSE41-NEXT: haddpd %xmm4, %xmm1
+; SSE41-NEXT: roundpd $11, %xmm0, %xmm0
+; SSE41-NEXT: roundpd $11, %xmm1, %xmm1
; SSE41-NEXT: retq
;
; AVX1-LABEL: trunc_unsigned_v4f64:
; AVX1: # %bb.0:
-; AVX1-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0]
-; AVX1-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
-; AVX1-NEXT: vsubsd %xmm1, %xmm2, %xmm3
-; AVX1-NEXT: vcvttsd2si %xmm3, %rcx
-; AVX1-NEXT: movabsq $-9223372036854775808, %rdx # imm = 0x8000000000000000
-; AVX1-NEXT: xorq %rdx, %rcx
-; AVX1-NEXT: vcvttsd2si %xmm2, %rax
-; AVX1-NEXT: vucomisd %xmm1, %xmm2
-; AVX1-NEXT: cmovaeq %rcx, %rax
-; AVX1-NEXT: vsubsd %xmm1, %xmm0, %xmm2
-; AVX1-NEXT: vcvttsd2si %xmm2, %rcx
-; AVX1-NEXT: xorq %rdx, %rcx
-; AVX1-NEXT: vcvttsd2si %xmm0, %rsi
-; AVX1-NEXT: vucomisd %xmm1, %xmm0
-; AVX1-NEXT: cmovaeq %rcx, %rsi
-; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
-; AVX1-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0]
-; AVX1-NEXT: vsubsd %xmm1, %xmm2, %xmm3
-; AVX1-NEXT: vcvttsd2si %xmm3, %rcx
-; AVX1-NEXT: xorq %rdx, %rcx
-; AVX1-NEXT: vcvttsd2si %xmm2, %rdi
-; AVX1-NEXT: vucomisd %xmm1, %xmm2
-; AVX1-NEXT: cmovaeq %rcx, %rdi
-; AVX1-NEXT: vsubsd %xmm1, %xmm0, %xmm2
-; AVX1-NEXT: vcvttsd2si %xmm2, %rcx
-; AVX1-NEXT: xorq %rdx, %rcx
-; AVX1-NEXT: vcvttsd2si %xmm0, %rdx
-; AVX1-NEXT: vucomisd %xmm1, %xmm0
-; AVX1-NEXT: cmovaeq %rcx, %rdx
-; AVX1-NEXT: vmovq %rdx, %xmm0
-; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [1127219200,1160773632,0,0]
-; AVX1-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
-; AVX1-NEXT: vmovapd {{.*#+}} xmm2 = [4.503600e+15,1.934281e+25]
-; AVX1-NEXT: vsubpd %xmm2, %xmm0, %xmm0
-; AVX1-NEXT: vmovq %rdi, %xmm3
-; AVX1-NEXT: vpunpckldq {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1]
-; AVX1-NEXT: vsubpd %xmm2, %xmm3, %xmm3
-; AVX1-NEXT: vhaddpd %xmm3, %xmm0, %xmm0
-; AVX1-NEXT: vmovq %rsi, %xmm3
-; AVX1-NEXT: vpunpckldq {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1]
-; AVX1-NEXT: vsubpd %xmm2, %xmm3, %xmm3
-; AVX1-NEXT: vmovq %rax, %xmm4
-; AVX1-NEXT: vpunpckldq {{.*#+}} xmm1 = xmm4[0],xmm1[0],xmm4[1],xmm1[1]
-; AVX1-NEXT: vsubpd %xmm2, %xmm1, %xmm1
-; AVX1-NEXT: vhaddpd %xmm1, %xmm3, %xmm1
-; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; AVX1-NEXT: vroundpd $11, %ymm0, %ymm0
; AVX1-NEXT: retq
%i = fptoui <4 x double> %x to <4 x i64>
%r = uitofp <4 x i64> %i to <4 x double>
@@ -443,15 +240,12 @@ define float @trunc_signed_f32(float %x)
;
; SSE41-LABEL: trunc_signed_f32:
; SSE41: # %bb.0:
-; SSE41-NEXT: cvttss2si %xmm0, %eax
-; SSE41-NEXT: xorps %xmm0, %xmm0
-; SSE41-NEXT: cvtsi2ssl %eax, %xmm0
+; SSE41-NEXT: roundss $11, %xmm0, %xmm0
; SSE41-NEXT: retq
;
; AVX1-LABEL: trunc_signed_f32:
; AVX1: # %bb.0:
-; AVX1-NEXT: vcvttss2si %xmm0, %eax
-; AVX1-NEXT: vcvtsi2ssl %eax, %xmm1, %xmm0
+; AVX1-NEXT: vroundss $11, %xmm0, %xmm0, %xmm0
; AVX1-NEXT: retq
%i = fptosi float %x to i32
%r = sitofp i32 %i to float
@@ -468,15 +262,12 @@ define double @trunc_signed_f64(double %
;
; SSE41-LABEL: trunc_signed_f64:
; SSE41: # %bb.0:
-; SSE41-NEXT: cvttsd2si %xmm0, %rax
-; SSE41-NEXT: xorps %xmm0, %xmm0
-; SSE41-NEXT: cvtsi2sdq %rax, %xmm0
+; SSE41-NEXT: roundsd $11, %xmm0, %xmm0
; SSE41-NEXT: retq
;
; AVX1-LABEL: trunc_signed_f64:
; AVX1: # %bb.0:
-; AVX1-NEXT: vcvttsd2si %xmm0, %rax
-; AVX1-NEXT: vcvtsi2sdq %rax, %xmm1, %xmm0
+; AVX1-NEXT: vroundsd $11, %xmm0, %xmm0, %xmm0
; AVX1-NEXT: retq
%i = fptosi double %x to i64
%r = sitofp i64 %i to double
@@ -492,14 +283,12 @@ define <4 x float> @trunc_signed_v4f32(<
;
; SSE41-LABEL: trunc_signed_v4f32:
; SSE41: # %bb.0:
-; SSE41-NEXT: cvttps2dq %xmm0, %xmm0
-; SSE41-NEXT: cvtdq2ps %xmm0, %xmm0
+; SSE41-NEXT: roundps $11, %xmm0, %xmm0
; SSE41-NEXT: retq
;
; AVX1-LABEL: trunc_signed_v4f32:
; AVX1: # %bb.0:
-; AVX1-NEXT: vcvttps2dq %xmm0, %xmm0
-; AVX1-NEXT: vcvtdq2ps %xmm0, %xmm0
+; AVX1-NEXT: vroundps $11, %xmm0, %xmm0
; AVX1-NEXT: retq
%i = fptosi <4 x float> %x to <4 x i32>
%r = sitofp <4 x i32> %i to <4 x float>
@@ -520,23 +309,12 @@ define <2 x double> @trunc_signed_v2f64(
;
; SSE41-LABEL: trunc_signed_v2f64:
; SSE41: # %bb.0:
-; SSE41-NEXT: cvttsd2si %xmm0, %rax
-; SSE41-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
-; SSE41-NEXT: cvttsd2si %xmm0, %rcx
-; SSE41-NEXT: xorps %xmm0, %xmm0
-; SSE41-NEXT: cvtsi2sdq %rax, %xmm0
-; SSE41-NEXT: cvtsi2sdq %rcx, %xmm1
-; SSE41-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; SSE41-NEXT: roundpd $11, %xmm0, %xmm0
; SSE41-NEXT: retq
;
; AVX1-LABEL: trunc_signed_v2f64:
; AVX1: # %bb.0:
-; AVX1-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
-; AVX1-NEXT: vcvttsd2si %xmm1, %rax
-; AVX1-NEXT: vcvttsd2si %xmm0, %rcx
-; AVX1-NEXT: vcvtsi2sdq %rcx, %xmm2, %xmm0
-; AVX1-NEXT: vcvtsi2sdq %rax, %xmm2, %xmm1
-; AVX1-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; AVX1-NEXT: vroundpd $11, %xmm0, %xmm0
; AVX1-NEXT: retq
%i = fptosi <2 x double> %x to <2 x i64>
%r = sitofp <2 x i64> %i to <2 x double>
@@ -565,39 +343,13 @@ define <4 x double> @trunc_signed_v4f64(
;
; SSE41-LABEL: trunc_signed_v4f64:
; SSE41: # %bb.0:
-; SSE41-NEXT: cvttsd2si %xmm1, %rax
-; SSE41-NEXT: movhlps {{.*#+}} xmm1 = xmm1[1,1]
-; SSE41-NEXT: cvttsd2si %xmm1, %rcx
-; SSE41-NEXT: cvttsd2si %xmm0, %rdx
-; SSE41-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
-; SSE41-NEXT: cvttsd2si %xmm0, %rsi
-; SSE41-NEXT: xorps %xmm0, %xmm0
-; SSE41-NEXT: cvtsi2sdq %rdx, %xmm0
-; SSE41-NEXT: xorps %xmm1, %xmm1
-; SSE41-NEXT: cvtsi2sdq %rsi, %xmm1
-; SSE41-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
-; SSE41-NEXT: xorps %xmm1, %xmm1
-; SSE41-NEXT: cvtsi2sdq %rax, %xmm1
-; SSE41-NEXT: cvtsi2sdq %rcx, %xmm2
-; SSE41-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm2[0]
+; SSE41-NEXT: roundpd $11, %xmm0, %xmm0
+; SSE41-NEXT: roundpd $11, %xmm1, %xmm1
; SSE41-NEXT: retq
;
; AVX1-LABEL: trunc_signed_v4f64:
; AVX1: # %bb.0:
-; AVX1-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
-; AVX1-NEXT: vcvttsd2si %xmm1, %rax
-; AVX1-NEXT: vcvttsd2si %xmm0, %rcx
-; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
-; AVX1-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
-; AVX1-NEXT: vcvttsd2si %xmm1, %rdx
-; AVX1-NEXT: vcvttsd2si %xmm0, %rsi
-; AVX1-NEXT: vcvtsi2sdq %rsi, %xmm2, %xmm0
-; AVX1-NEXT: vcvtsi2sdq %rdx, %xmm2, %xmm1
-; AVX1-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
-; AVX1-NEXT: vcvtsi2sdq %rcx, %xmm2, %xmm1
-; AVX1-NEXT: vcvtsi2sdq %rax, %xmm2, %xmm2
-; AVX1-NEXT: vmovlhps {{.*#+}} xmm1 = xmm1[0],xmm2[0]
-; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; AVX1-NEXT: vroundpd $11, %ymm0, %ymm0
; AVX1-NEXT: retq
%i = fptosi <4 x double> %x to <4 x i64>
%r = sitofp <4 x i64> %i to <4 x double>
More information about the llvm-commits
mailing list