[llvm] e5edd64 - [X86] Use a shorter sequence to implement FLT_ROUNDS
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Wed Jan 29 08:56:54 PST 2020
Author: Craig Topper
Date: 2020-01-29T08:56:33-08:00
New Revision: e5edd641fde0d3fae96593e02ef9453dffa5754a
URL: https://github.com/llvm/llvm-project/commit/e5edd641fde0d3fae96593e02ef9453dffa5754a
DIFF: https://github.com/llvm/llvm-project/commit/e5edd641fde0d3fae96593e02ef9453dffa5754a.diff
LOG: [X86] Use a shorter sequence to implement FLT_ROUNDS
This code needs to map from the FPCW 2-bit encoding for rounding mode to the 2-bit encoding defined for FLT_ROUNDS. The previous implementation did some clever swapping of bits and adding 1 modulo 4 to do the mapping.
This patch instead uses an 8-bit immediate as a lookup table of four 2-bit values. Then we use the 2-bit FPCW encoding to index the lookup table by using a right shift and an AND. This requires extracting the 2-bit value from FPCW and multipying it by 2 to make it usable as a shift amount. But still results in less code.
Differential Revision: https://reviews.llvm.org/D73599
Added:
Modified:
llvm/lib/Target/X86/X86ISelLowering.cpp
llvm/test/CodeGen/X86/flt-rounds.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index ce71634e17f7..f8eaa8751bfe 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -25427,8 +25427,11 @@ SDValue X86TargetLowering::LowerFLT_ROUNDS_(SDValue Op,
2 Round to +inf
3 Round to -inf
- To perform the conversion, we do:
- (((((FPSR & 0x800) >> 11) | ((FPSR & 0x400) >> 9)) + 1) & 3)
+ To perform the conversion, we use a packed lookup table of the four 2-bit
+ values that we can index by FPSP[11:10]
+ 0x2d --> (0b00,10,11,01) --> (0,2,3,1) >> FPSR[11:10]
+
+ (0x2d >> ((FPSR & 0xc00) >> 9)) & 3
*/
MachineFunction &MF = DAG.getMachineFunction();
@@ -25456,24 +25459,19 @@ SDValue X86TargetLowering::LowerFLT_ROUNDS_(SDValue Op,
SDValue CWD =
DAG.getLoad(MVT::i16, DL, Chain, StackSlot, MachinePointerInfo());
- // Transform as necessary
- SDValue CWD1 =
- DAG.getNode(ISD::SRL, DL, MVT::i16,
- DAG.getNode(ISD::AND, DL, MVT::i16,
- CWD, DAG.getConstant(0x800, DL, MVT::i16)),
- DAG.getConstant(11, DL, MVT::i8));
- SDValue CWD2 =
+ // Mask and turn the control bits into a shift for the lookup table.
+ SDValue Shift =
DAG.getNode(ISD::SRL, DL, MVT::i16,
DAG.getNode(ISD::AND, DL, MVT::i16,
- CWD, DAG.getConstant(0x400, DL, MVT::i16)),
+ CWD, DAG.getConstant(0xc00, DL, MVT::i16)),
DAG.getConstant(9, DL, MVT::i8));
+ Shift = DAG.getNode(ISD::TRUNCATE, DL, MVT::i8, Shift);
+ SDValue LUT = DAG.getConstant(0x2d, DL, MVT::i32);
SDValue RetVal =
- DAG.getNode(ISD::AND, DL, MVT::i16,
- DAG.getNode(ISD::ADD, DL, MVT::i16,
- DAG.getNode(ISD::OR, DL, MVT::i16, CWD1, CWD2),
- DAG.getConstant(1, DL, MVT::i16)),
- DAG.getConstant(3, DL, MVT::i16));
+ DAG.getNode(ISD::AND, DL, MVT::i32,
+ DAG.getNode(ISD::SRL, DL, MVT::i32, LUT, Shift),
+ DAG.getConstant(3, DL, MVT::i32));
return DAG.getZExtOrTrunc(RetVal, DL, VT);
}
diff --git a/llvm/test/CodeGen/X86/flt-rounds.ll b/llvm/test/CodeGen/X86/flt-rounds.ll
index 531f0dafc2dc..b5e77c4e5866 100644
--- a/llvm/test/CodeGen/X86/flt-rounds.ll
+++ b/llvm/test/CodeGen/X86/flt-rounds.ll
@@ -10,13 +10,12 @@ define i32 @test_flt_rounds() nounwind {
; X86: # %bb.0:
; X86-NEXT: subl $12, %esp
; X86-NEXT: fnstcw (%esp)
-; X86-NEXT: movl (%esp), %eax
-; X86-NEXT: movl %eax, %ecx
+; X86-NEXT: movzwl (%esp), %ecx
; X86-NEXT: shrl $9, %ecx
-; X86-NEXT: andl $2, %ecx
-; X86-NEXT: shrl $11, %eax
-; X86-NEXT: andl $1, %eax
-; X86-NEXT: leal 1(%eax,%ecx), %eax
+; X86-NEXT: andb $6, %cl
+; X86-NEXT: movl $45, %eax
+; X86-NEXT: # kill: def $cl killed $cl killed $ecx
+; X86-NEXT: shrl %cl, %eax
; X86-NEXT: andl $3, %eax
; X86-NEXT: addl $12, %esp
; X86-NEXT: retl
@@ -24,13 +23,12 @@ define i32 @test_flt_rounds() nounwind {
; X64-LABEL: test_flt_rounds:
; X64: # %bb.0:
; X64-NEXT: fnstcw -{{[0-9]+}}(%rsp)
-; X64-NEXT: movl -{{[0-9]+}}(%rsp), %eax
-; X64-NEXT: movl %eax, %ecx
+; X64-NEXT: movzwl -{{[0-9]+}}(%rsp), %ecx
; X64-NEXT: shrl $9, %ecx
-; X64-NEXT: andl $2, %ecx
-; X64-NEXT: shrl $11, %eax
-; X64-NEXT: andl $1, %eax
-; X64-NEXT: leal 1(%rax,%rcx), %eax
+; X64-NEXT: andb $6, %cl
+; X64-NEXT: movl $45, %eax
+; X64-NEXT: # kill: def $cl killed $cl killed $ecx
+; X64-NEXT: shrl %cl, %eax
; X64-NEXT: andl $3, %eax
; X64-NEXT: retq
%1 = call i32 @llvm.flt.rounds()
More information about the llvm-commits
mailing list