[PATCH] D114844: [X86][FP16] Only generate approximate rsqrt when Reciprocal is true for half type
Phoebe Wang via Phabricator via llvm-commits
llvm-commits at lists.llvm.org
Wed Dec 1 21:53:16 PST 2021
This revision was automatically updated to reflect the committed changes.
Closed by commit rGf13b43d5702b: [X86][FP16] Only generate approximate rsqrt when Reciprocal is true for half… (authored by pengfei).
Repository:
rG LLVM Github Monorepo
CHANGES SINCE LAST ACTION
https://reviews.llvm.org/D114844/new/
https://reviews.llvm.org/D114844
Files:
llvm/lib/Target/X86/X86ISelLowering.cpp
llvm/test/CodeGen/X86/avx512fp16-intrinsics.ll
llvm/test/CodeGen/X86/avx512fp16vl-intrinsics.ll
Index: llvm/test/CodeGen/X86/avx512fp16vl-intrinsics.ll
===================================================================
--- llvm/test/CodeGen/X86/avx512fp16vl-intrinsics.ll
+++ llvm/test/CodeGen/X86/avx512fp16vl-intrinsics.ll
@@ -972,10 +972,7 @@
define <8 x half> @test_sqrt_ph_128_fast2(<8 x half> %a0, <8 x half> %a1) {
; CHECK-LABEL: test_sqrt_ph_128_fast2:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpbroadcastw {{.*#+}} xmm1 = [NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN]
-; CHECK-NEXT: vpand %xmm1, %xmm0, %xmm1
-; CHECK-NEXT: vcmpgeph {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %xmm1, %k1
-; CHECK-NEXT: vrsqrtph %xmm0, %xmm0 {%k1} {z}
+; CHECK-NEXT: vsqrtph %xmm0, %xmm0
; CHECK-NEXT: retq
%1 = call fast <8 x half> @llvm.sqrt.v8f16(<8 x half> %a0)
ret <8 x half> %1
Index: llvm/test/CodeGen/X86/avx512fp16-intrinsics.ll
===================================================================
--- llvm/test/CodeGen/X86/avx512fp16-intrinsics.ll
+++ llvm/test/CodeGen/X86/avx512fp16-intrinsics.ll
@@ -123,12 +123,7 @@
define half @test_sqrt_sh3(half %a0, half %a1) {
; CHECK-LABEL: test_sqrt_sh3:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpbroadcastw {{.*#+}} xmm1 = [NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN]
-; CHECK-NEXT: vpand %xmm1, %xmm0, %xmm1
-; CHECK-NEXT: vcmpltsh {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %k1
-; CHECK-NEXT: vrsqrtsh %xmm0, %xmm0, %xmm0
-; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
-; CHECK-NEXT: vmovsh %xmm1, %xmm0, %xmm0 {%k1}
+; CHECK-NEXT: vsqrtsh %xmm0, %xmm0, %xmm0
; CHECK-NEXT: retq
%1 = call fast half @llvm.sqrt.f16(half %a0)
ret half %1
Index: llvm/lib/Target/X86/X86ISelLowering.cpp
===================================================================
--- llvm/lib/Target/X86/X86ISelLowering.cpp
+++ llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -23190,6 +23190,10 @@
bool X86TargetLowering::isFsqrtCheap(SDValue Op, SelectionDAG &DAG) const {
EVT VT = Op.getValueType();
+ // We don't need to replace SQRT with RSQRT for half type.
+ if (VT.getScalarType() == MVT::f16)
+ return true;
+
// We never want to use both SQRT and RSQRT instructions for the same input.
if (DAG.getNodeIfExists(X86ISD::FRSQRT, DAG.getVTList(VT), Op))
return false;
@@ -23236,6 +23240,7 @@
if (VT.getScalarType() == MVT::f16 && isTypeLegal(VT) &&
Subtarget.hasFP16()) {
+ assert(Reciprocal && "Don't replace SQRT with RSQRT for half type");
if (RefinementSteps == ReciprocalEstimate::Unspecified)
RefinementSteps = 0;
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D114844.391212.patch
Type: text/x-patch
Size: 2516 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20211202/884c4055/attachment.bin>
More information about the llvm-commits
mailing list