[llvm] r371357 - [X86] Use xorps to create fp128 +0.0 constants.
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Sun Sep 8 18:35:00 PDT 2019
Author: ctopper
Date: Sun Sep 8 18:35:00 2019
New Revision: 371357
URL: http://llvm.org/viewvc/llvm-project?rev=371357&view=rev
Log:
[X86] Use xorps to create fp128 +0.0 constants.
This matches what we do for f32/f64. gcc also does this for fp128.
Modified:
llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
llvm/trunk/lib/Target/X86/X86InstrAVX512.td
llvm/trunk/lib/Target/X86/X86InstrFragmentsSIMD.td
llvm/trunk/lib/Target/X86/X86InstrInfo.cpp
llvm/trunk/lib/Target/X86/X86InstrSSE.td
llvm/trunk/test/CodeGen/X86/fp128-cast.ll
Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=371357&r1=371356&r2=371357&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Sun Sep 8 18:35:00 2019
@@ -628,6 +628,8 @@ X86TargetLowering::X86TargetLowering(con
setOperationAction(ISD::FABS , MVT::f128, Custom);
setOperationAction(ISD::FNEG , MVT::f128, Custom);
setOperationAction(ISD::FCOPYSIGN, MVT::f128, Custom);
+
+ addLegalFPImmediate(APFloat::getZero(APFloat::IEEEquad())); // xorps
}
addRegisterClass(MVT::f80, &X86::RFP80RegClass);
Modified: llvm/trunk/lib/Target/X86/X86InstrAVX512.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrAVX512.td?rev=371357&r1=371356&r2=371357&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrAVX512.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrAVX512.td Sun Sep 8 18:35:00 2019
@@ -464,7 +464,9 @@ let isReMaterializable = 1, isAsCheapAsA
def AVX512_FsFLD0SS : I<0, Pseudo, (outs FR32X:$dst), (ins), "",
[(set FR32X:$dst, fp32imm0)]>;
def AVX512_FsFLD0SD : I<0, Pseudo, (outs FR64X:$dst), (ins), "",
- [(set FR64X:$dst, fpimm0)]>;
+ [(set FR64X:$dst, fp64imm0)]>;
+ def AVX512_FsFLD0F128 : I<0, Pseudo, (outs VR128X:$dst), (ins), "",
+ [(set VR128X:$dst, fp128imm0)]>;
}
//===----------------------------------------------------------------------===//
Modified: llvm/trunk/lib/Target/X86/X86InstrFragmentsSIMD.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrFragmentsSIMD.td?rev=371357&r1=371356&r2=371357&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrFragmentsSIMD.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrFragmentsSIMD.td Sun Sep 8 18:35:00 2019
@@ -963,6 +963,10 @@ def fp64imm0 : PatLeaf<(f64 fpimm), [{
return N->isExactlyValue(+0.0);
}]>;
+def fp128imm0 : PatLeaf<(f128 fpimm), [{
+ return N->isExactlyValue(+0.0);
+}]>;
+
// EXTRACT_get_vextract128_imm xform function: convert extract_subvector index
// to VEXTRACTF128/VEXTRACTI128 imm.
def EXTRACT_get_vextract128_imm : SDNodeXForm<extract_subvector, [{
Modified: llvm/trunk/lib/Target/X86/X86InstrInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrInfo.cpp?rev=371357&r1=371356&r2=371357&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrInfo.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86InstrInfo.cpp Sun Sep 8 18:35:00 2019
@@ -497,9 +497,11 @@ bool X86InstrInfo::isReallyTriviallyReMa
case X86::AVX512_512_SETALLONES:
case X86::AVX512_FsFLD0SD:
case X86::AVX512_FsFLD0SS:
+ case X86::AVX512_FsFLD0F128:
case X86::AVX_SET0:
case X86::FsFLD0SD:
case X86::FsFLD0SS:
+ case X86::FsFLD0F128:
case X86::KSET0D:
case X86::KSET0Q:
case X86::KSET0W:
@@ -4026,6 +4028,7 @@ bool X86InstrInfo::expandPostRAPseudo(Ma
case X86::V_SET0:
case X86::FsFLD0SS:
case X86::FsFLD0SD:
+ case X86::FsFLD0F128:
return Expand2AddrUndef(MIB, get(HasAVX ? X86::VXORPSrr : X86::XORPSrr));
case X86::AVX_SET0: {
assert(HasAVX && "AVX not supported");
@@ -4039,7 +4042,8 @@ bool X86InstrInfo::expandPostRAPseudo(Ma
}
case X86::AVX512_128_SET0:
case X86::AVX512_FsFLD0SS:
- case X86::AVX512_FsFLD0SD: {
+ case X86::AVX512_FsFLD0SD:
+ case X86::AVX512_FsFLD0F128: {
bool HasVLX = Subtarget.hasVLX();
Register SrcReg = MIB->getOperand(0).getReg();
const TargetRegisterInfo *TRI = &getRegisterInfo();
@@ -5152,6 +5156,8 @@ MachineInstr *X86InstrInfo::foldMemoryOp
case X86::V_SET0:
case X86::V_SETALLONES:
case X86::AVX512_128_SET0:
+ case X86::FsFLD0F128:
+ case X86::AVX512_FsFLD0F128:
Alignment = 16;
break;
case X86::MMX_SET0:
@@ -5201,7 +5207,9 @@ MachineInstr *X86InstrInfo::foldMemoryOp
case X86::FsFLD0SD:
case X86::AVX512_FsFLD0SD:
case X86::FsFLD0SS:
- case X86::AVX512_FsFLD0SS: {
+ case X86::AVX512_FsFLD0SS:
+ case X86::FsFLD0F128:
+ case X86::AVX512_FsFLD0F128: {
// Folding a V_SET0 or V_SETALLONES as a load, to ease register pressure.
// Create a constant-pool entry and operands to load from it.
@@ -5231,6 +5239,8 @@ MachineInstr *X86InstrInfo::foldMemoryOp
Ty = Type::getFloatTy(MF.getFunction().getContext());
else if (Opc == X86::FsFLD0SD || Opc == X86::AVX512_FsFLD0SD)
Ty = Type::getDoubleTy(MF.getFunction().getContext());
+ else if (Opc == X86::FsFLD0F128 || Opc == X86::AVX512_FsFLD0F128)
+ Ty = Type::getFP128Ty(MF.getFunction().getContext());
else if (Opc == X86::AVX512_512_SET0 || Opc == X86::AVX512_512_SETALLONES)
Ty = VectorType::get(Type::getInt32Ty(MF.getFunction().getContext()),16);
else if (Opc == X86::AVX2_SETALLONES || Opc == X86::AVX_SET0 ||
Modified: llvm/trunk/lib/Target/X86/X86InstrSSE.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrSSE.td?rev=371357&r1=371356&r2=371357&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrSSE.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrSSE.td Sun Sep 8 18:35:00 2019
@@ -115,7 +115,9 @@ let isReMaterializable = 1, isAsCheapAsA
def FsFLD0SS : I<0, Pseudo, (outs FR32:$dst), (ins), "",
[(set FR32:$dst, fp32imm0)]>, Requires<[HasSSE1, NoAVX512]>;
def FsFLD0SD : I<0, Pseudo, (outs FR64:$dst), (ins), "",
- [(set FR64:$dst, fpimm0)]>, Requires<[HasSSE2, NoAVX512]>;
+ [(set FR64:$dst, fp64imm0)]>, Requires<[HasSSE2, NoAVX512]>;
+ def FsFLD0F128 : I<0, Pseudo, (outs VR128:$dst), (ins), "",
+ [(set VR128:$dst, fp128imm0)]>, Requires<[HasSSE1, NoAVX512]>;
}
//===----------------------------------------------------------------------===//
Modified: llvm/trunk/test/CodeGen/X86/fp128-cast.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/fp128-cast.ll?rev=371357&r1=371356&r2=371357&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/fp128-cast.ll (original)
+++ llvm/trunk/test/CodeGen/X86/fp128-cast.ll Sun Sep 8 18:35:00 2019
@@ -680,6 +680,57 @@ entry:
ret i32 %conv
}
+
+define i32 @TestConst128Zero(fp128 %v) nounwind {
+; X64-SSE-LABEL: TestConst128Zero:
+; X64-SSE: # %bb.0: # %entry
+; X64-SSE-NEXT: pushq %rax
+; X64-SSE-NEXT: xorps %xmm1, %xmm1
+; X64-SSE-NEXT: callq __gttf2
+; X64-SSE-NEXT: xorl %ecx, %ecx
+; X64-SSE-NEXT: testl %eax, %eax
+; X64-SSE-NEXT: setg %cl
+; X64-SSE-NEXT: movl %ecx, %eax
+; X64-SSE-NEXT: popq %rcx
+; X64-SSE-NEXT: retq
+;
+; X32-LABEL: TestConst128Zero:
+; X32: # %bb.0: # %entry
+; X32-NEXT: subl $12, %esp
+; X32-NEXT: pushl $0
+; X32-NEXT: pushl $0
+; X32-NEXT: pushl $0
+; X32-NEXT: pushl $0
+; X32-NEXT: pushl {{[0-9]+}}(%esp)
+; X32-NEXT: pushl {{[0-9]+}}(%esp)
+; X32-NEXT: pushl {{[0-9]+}}(%esp)
+; X32-NEXT: pushl {{[0-9]+}}(%esp)
+; X32-NEXT: calll __gttf2
+; X32-NEXT: addl $32, %esp
+; X32-NEXT: xorl %ecx, %ecx
+; X32-NEXT: testl %eax, %eax
+; X32-NEXT: setg %cl
+; X32-NEXT: movl %ecx, %eax
+; X32-NEXT: addl $12, %esp
+; X32-NEXT: retl
+;
+; X64-AVX-LABEL: TestConst128Zero:
+; X64-AVX: # %bb.0: # %entry
+; X64-AVX-NEXT: pushq %rax
+; X64-AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1
+; X64-AVX-NEXT: callq __gttf2
+; X64-AVX-NEXT: xorl %ecx, %ecx
+; X64-AVX-NEXT: testl %eax, %eax
+; X64-AVX-NEXT: setg %cl
+; X64-AVX-NEXT: movl %ecx, %eax
+; X64-AVX-NEXT: popq %rcx
+; X64-AVX-NEXT: retq
+entry:
+ %cmp = fcmp ogt fp128 %v, 0xL00000000000000000000000000000000
+ %conv = zext i1 %cmp to i32
+ ret i32 %conv
+}
+
; C code:
; struct TestBits_ieee_ext {
; unsigned v1;
@@ -833,7 +884,7 @@ define fp128 @TestTruncCopysign(fp128 %x
; X64-SSE-LABEL: TestTruncCopysign:
; X64-SSE: # %bb.0: # %entry
; X64-SSE-NEXT: cmpl $50001, %edi # imm = 0xC351
-; X64-SSE-NEXT: jl .LBB17_2
+; X64-SSE-NEXT: jl .LBB18_2
; X64-SSE-NEXT: # %bb.1: # %if.then
; X64-SSE-NEXT: pushq %rax
; X64-SSE-NEXT: callq __trunctfdf2
@@ -842,7 +893,7 @@ define fp128 @TestTruncCopysign(fp128 %x
; X64-SSE-NEXT: orps %xmm1, %xmm0
; X64-SSE-NEXT: callq __extenddftf2
; X64-SSE-NEXT: addq $8, %rsp
-; X64-SSE-NEXT: .LBB17_2: # %cleanup
+; X64-SSE-NEXT: .LBB18_2: # %cleanup
; X64-SSE-NEXT: retq
;
; X32-LABEL: TestTruncCopysign:
@@ -856,7 +907,7 @@ define fp128 @TestTruncCopysign(fp128 %x
; X32-NEXT: movl {{[0-9]+}}(%esp), %edi
; X32-NEXT: movl {{[0-9]+}}(%esp), %edx
; X32-NEXT: cmpl $50001, {{[0-9]+}}(%esp) # imm = 0xC351
-; X32-NEXT: jl .LBB17_4
+; X32-NEXT: jl .LBB18_4
; X32-NEXT: # %bb.1: # %if.then
; X32-NEXT: pushl %eax
; X32-NEXT: pushl %ecx
@@ -868,11 +919,11 @@ define fp128 @TestTruncCopysign(fp128 %x
; X32-NEXT: testb $-128, {{[0-9]+}}(%esp)
; X32-NEXT: flds {{\.LCPI.*}}
; X32-NEXT: flds {{\.LCPI.*}}
-; X32-NEXT: jne .LBB17_3
+; X32-NEXT: jne .LBB18_3
; X32-NEXT: # %bb.2: # %if.then
; X32-NEXT: fstp %st(1)
; X32-NEXT: fldz
-; X32-NEXT: .LBB17_3: # %if.then
+; X32-NEXT: .LBB18_3: # %if.then
; X32-NEXT: fstp %st(0)
; X32-NEXT: subl $16, %esp
; X32-NEXT: leal {{[0-9]+}}(%esp), %eax
@@ -884,7 +935,7 @@ define fp128 @TestTruncCopysign(fp128 %x
; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X32-NEXT: movl {{[0-9]+}}(%esp), %edx
; X32-NEXT: movl {{[0-9]+}}(%esp), %edi
-; X32-NEXT: .LBB17_4: # %cleanup
+; X32-NEXT: .LBB18_4: # %cleanup
; X32-NEXT: movl %edx, (%esi)
; X32-NEXT: movl %edi, 4(%esi)
; X32-NEXT: movl %ecx, 8(%esi)
@@ -898,7 +949,7 @@ define fp128 @TestTruncCopysign(fp128 %x
; X64-AVX-LABEL: TestTruncCopysign:
; X64-AVX: # %bb.0: # %entry
; X64-AVX-NEXT: cmpl $50001, %edi # imm = 0xC351
-; X64-AVX-NEXT: jl .LBB17_2
+; X64-AVX-NEXT: jl .LBB18_2
; X64-AVX-NEXT: # %bb.1: # %if.then
; X64-AVX-NEXT: pushq %rax
; X64-AVX-NEXT: callq __trunctfdf2
@@ -908,7 +959,7 @@ define fp128 @TestTruncCopysign(fp128 %x
; X64-AVX-NEXT: vorps %xmm0, %xmm1, %xmm0
; X64-AVX-NEXT: callq __extenddftf2
; X64-AVX-NEXT: addq $8, %rsp
-; X64-AVX-NEXT: .LBB17_2: # %cleanup
+; X64-AVX-NEXT: .LBB18_2: # %cleanup
; X64-AVX-NEXT: retq
entry:
%cmp = icmp sgt i32 %n, 50000
@@ -928,7 +979,7 @@ cleanup:
define i1 @PR34866(i128 %x) nounwind {
; X64-SSE-LABEL: PR34866:
; X64-SSE: # %bb.0:
-; X64-SSE-NEXT: movaps {{.*}}(%rip), %xmm0
+; X64-SSE-NEXT: xorps %xmm0, %xmm0
; X64-SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
; X64-SSE-NEXT: xorq -{{[0-9]+}}(%rsp), %rsi
; X64-SSE-NEXT: xorq -{{[0-9]+}}(%rsp), %rdi
@@ -948,7 +999,7 @@ define i1 @PR34866(i128 %x) nounwind {
;
; X64-AVX-LABEL: PR34866:
; X64-AVX: # %bb.0:
-; X64-AVX-NEXT: vmovaps {{.*}}(%rip), %xmm0
+; X64-AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0
; X64-AVX-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp)
; X64-AVX-NEXT: xorq -{{[0-9]+}}(%rsp), %rsi
; X64-AVX-NEXT: xorq -{{[0-9]+}}(%rsp), %rdi
@@ -963,7 +1014,7 @@ define i1 @PR34866(i128 %x) nounwind {
define i1 @PR34866_commute(i128 %x) nounwind {
; X64-SSE-LABEL: PR34866_commute:
; X64-SSE: # %bb.0:
-; X64-SSE-NEXT: movaps {{.*}}(%rip), %xmm0
+; X64-SSE-NEXT: xorps %xmm0, %xmm0
; X64-SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
; X64-SSE-NEXT: xorq -{{[0-9]+}}(%rsp), %rsi
; X64-SSE-NEXT: xorq -{{[0-9]+}}(%rsp), %rdi
@@ -983,7 +1034,7 @@ define i1 @PR34866_commute(i128 %x) noun
;
; X64-AVX-LABEL: PR34866_commute:
; X64-AVX: # %bb.0:
-; X64-AVX-NEXT: vmovaps {{.*}}(%rip), %xmm0
+; X64-AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0
; X64-AVX-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp)
; X64-AVX-NEXT: xorq -{{[0-9]+}}(%rsp), %rsi
; X64-AVX-NEXT: xorq -{{[0-9]+}}(%rsp), %rdi
More information about the llvm-commits
mailing list