[llvm] r259448 - [X86] Don't force Nearest-Even rounding for VCVTPS2PH, use MXCSR.

Mon Feb 1 17:32:51 PST 2016

Author: ab
Date: Mon Feb  1 19:32:50 2016
New Revision: 259448

URL: http://llvm.org/viewvc/llvm-project?rev=259448&view=rev
Log:
[X86] Don't force Nearest-Even rounding for VCVTPS2PH, use MXCSR.

Officially, we don't acknowledge non-default configurations of MXCSR,
as getting there would require usage of the FENV_ACCESS pragma (at
least insofar as rounding mode is concerned).

We don't support the pragma, so we can assume that the default
rounding mode - round to nearest, ties to even - is always used.

However, it's inconsistent with the rest of the instruction set,
where MXCSR is always effective (unless otherwise specified).
Also, it's an unnecessary obstacle to the few brave souls that use
fenv.h with LLVM.

Avoid the hard-coded rounding mode for fp_to_f16; use MXCSR instead.

Modified:
    llvm/trunk/lib/Target/X86/X86InstrSSE.td
    llvm/trunk/test/CodeGen/X86/fastmath-float-half-conversion.ll
    llvm/trunk/test/CodeGen/X86/half.ll

Modified: llvm/trunk/lib/Target/X86/X86InstrSSE.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrSSE.td?rev=259448&r1=259447&r2=259448&view=diff
==============================================================================

--- llvm/trunk/lib/Target/X86/X86InstrSSE.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrSSE.td Mon Feb  1 19:32:50 2016
@@ -8296,9 +8296,14 @@ let Predicates = [HasF16C] in {
 
 // Patterns for  matching conversions from float to half-float and vice versa.
 let Predicates = [HasF16C] in {
+  // Use MXCSR.RC for rounding instead of explicitly specifying the default
+  // rounding mode (Nearest-Even, encoded as 0). Both are equivalent in the
+  // configurations we support (the default). However, falling back to MXCSR is
+  // more consistent with other instructions, which are always controlled by it.
+  // It's encoded as 0b100.
   def : Pat<(fp_to_f16 FR32:$src),
             (i16 (EXTRACT_SUBREG (VMOVPDI2DIrr (VCVTPS2PHrr
-              (COPY_TO_REGCLASS FR32:$src, VR128), 0)), sub_16bit))>;
+              (COPY_TO_REGCLASS FR32:$src, VR128), 4)), sub_16bit))>;
 
   def : Pat<(f16_to_fp GR16:$src),
             (f32 (COPY_TO_REGCLASS (VCVTPH2PSrr
@@ -8306,7 +8311,7 @@ let Predicates = [HasF16C] in {
 
   def : Pat<(f16_to_fp (i16 (fp_to_f16 FR32:$src))),
             (f32 (COPY_TO_REGCLASS (VCVTPH2PSrr
-              (VCVTPS2PHrr (COPY_TO_REGCLASS FR32:$src, VR128), 0)), FR32)) >;
+              (VCVTPS2PHrr (COPY_TO_REGCLASS FR32:$src, VR128), 4)), FR32)) >;
 }
 
 //===----------------------------------------------------------------------===//

Modified: llvm/trunk/test/CodeGen/X86/fastmath-float-half-conversion.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/fastmath-float-half-conversion.ll?rev=259448&r1=259447&r2=259448&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/fastmath-float-half-conversion.ll (original)
+++ llvm/trunk/test/CodeGen/X86/fastmath-float-half-conversion.ll Mon Feb  1 19:32:50 2016
@@ -5,7 +5,7 @@ define zeroext i16 @test1_fast(double %d
 ; ALL-LABEL: test1_fast:
 ; F16C-NOT: callq {{_+}}truncdfhf2
 ; F16C: vcvtsd2ss %xmm0, %xmm0, %xmm0
-; F16C-NEXT: vcvtps2ph $0, %xmm0, %xmm0
+; F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0
 ; AVX: callq {{_+}}truncdfhf2
 ; ALL: ret
 entry:
@@ -19,7 +19,7 @@ define zeroext i16 @test2_fast(x86_fp80
 ; F16C: fldt
 ; F16C-NEXT: fstps
 ; F16C-NEXT: vmovss
-; F16C-NEXT: vcvtps2ph $0, %xmm0, %xmm0
+; F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0
 ; AVX: callq {{_+}}truncxfhf2
 ; ALL: ret
 entry:

Modified: llvm/trunk/test/CodeGen/X86/half.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/half.ll?rev=259448&r1=259447&r2=259448&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/half.ll (original)
+++ llvm/trunk/test/CodeGen/X86/half.ll Mon Feb  1 19:32:50 2016
@@ -102,7 +102,7 @@ define void @test_sitofp_i64(i64 %a, hal
 ; CHECK_LIBCALL-NEXT: retq
 
 ; CHECK-F16C-NEXT: vcvtsi2ssq %rdi, [[REG0:%[a-z0-9]+]], [[REG0]]
-; CHECK-F16C-NEXT: vcvtps2ph $0, [[REG0]], [[REG0]]
+; CHECK-F16C-NEXT: vcvtps2ph $4, [[REG0]], [[REG0]]
 ; CHECK-F16C-NEXT: vmovd [[REG0]], %eax
 ; CHECK-F16C-NEXT: movw %ax, (%rsi)
 ; CHECK-F16C-NEXT: retq
@@ -175,7 +175,7 @@ define void @test_uitofp_i64(i64 %a, hal
 ; CHECK-LIBCALL-NEXT: callq __gnu_f2h_ieee
 ; CHECK-LIBCALL-NEXT: movw %ax, ([[ADDR]])
 ; CHECK-LIBCALL-NEXT: popq [[ADDR]]
-; CHECK-F16C-NEXT: vcvtps2ph $0, [[REG1]], [[REG4:%[a-z0-9]+]]
+; CHECK-F16C-NEXT: vcvtps2ph $4, [[REG1]], [[REG4:%[a-z0-9]+]]
 ; CHECK-F16C-NEXT: vmovd [[REG4]], %eax
 ; CHECK-F16C-NEXT: movw %ax, (%rsi)
 ; CHECK-NEXT: retq