[llvm] bfd8f7e - [X86] SimplifyDemandedVectorEltsForTargetNode - reduce vector width of FRSQRT/FRCP ymm nodes.

Tue Sep 24 02:37:42 PDT 2024

Author: Simon Pilgrim
Date: 2024-09-24T10:37:19+01:00
New Revision: bfd8f7ee4a85ae8873db14fa6e7e31223a1df169

URL: https://github.com/llvm/llvm-project/commit/bfd8f7ee4a85ae8873db14fa6e7e31223a1df169
DIFF: https://github.com/llvm/llvm-project/commit/bfd8f7ee4a85ae8873db14fa6e7e31223a1df169.diff

LOG: [X86] SimplifyDemandedVectorEltsForTargetNode - reduce vector width of FRSQRT/FRCP ymm nodes.

If we only demand the lower subvector of a FRSQRT/FRCP node, then reduce the width of the instruction.

Added: 
    

Modified: 
    llvm/lib/Target/X86/X86ISelLowering.cpp
    llvm/test/CodeGen/X86/extractelement-fp.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index de8cfe31a5529f..d7a26dc4caec6c 100644

--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -43109,6 +43109,8 @@ bool X86TargetLowering::SimplifyDemandedVectorEltsForTargetNode(
     case X86ISD::FMIN:
     case X86ISD::FMAXC:
     case X86ISD::FMINC:
+    case X86ISD::FRSQRT:
+    case X86ISD::FRCP:
       // Horizontal Ops.
     case X86ISD::HADD:
     case X86ISD::HSUB:

diff  --git a/llvm/test/CodeGen/X86/extractelement-fp.ll b/llvm/test/CodeGen/X86/extractelement-fp.ll
index 38162f676e7ee3..944f6bbfd0bfbe 100644
--- a/llvm/test/CodeGen/X86/extractelement-fp.ll
+++ b/llvm/test/CodeGen/X86/extractelement-fp.ll
@@ -1310,15 +1310,14 @@ define float @rcp_v4f32(<4 x float> %x) nounwind {
 define float @rcp_v8f32(<8 x float> %x) nounwind {
 ; X64-LABEL: rcp_v8f32:
 ; X64:       # %bb.0:
-; X64-NEXT:    vrcpps %ymm0, %ymm0
-; X64-NEXT:    # kill: def $xmm0 killed $xmm0 killed $ymm0
+; X64-NEXT:    vrcpss %xmm0, %xmm0, %xmm0
 ; X64-NEXT:    vzeroupper
 ; X64-NEXT:    retq
 ;
 ; X86-LABEL: rcp_v8f32:
 ; X86:       # %bb.0:
 ; X86-NEXT:    pushl %eax
-; X86-NEXT:    vrcpps %ymm0, %ymm0
+; X86-NEXT:    vrcpss %xmm0, %xmm0, %xmm0
 ; X86-NEXT:    vmovss %xmm0, (%esp)
 ; X86-NEXT:    flds (%esp)
 ; X86-NEXT:    popl %eax
@@ -1351,15 +1350,14 @@ define float @rsqrt_v4f32(<4 x float> %x) nounwind {
 define float @rsqrt_v8f32(<8 x float> %x) nounwind {
 ; X64-LABEL: rsqrt_v8f32:
 ; X64:       # %bb.0:
-; X64-NEXT:    vrsqrtps %ymm0, %ymm0
-; X64-NEXT:    # kill: def $xmm0 killed $xmm0 killed $ymm0
+; X64-NEXT:    vrsqrtss %xmm0, %xmm0, %xmm0
 ; X64-NEXT:    vzeroupper
 ; X64-NEXT:    retq
 ;
 ; X86-LABEL: rsqrt_v8f32:
 ; X86:       # %bb.0:
 ; X86-NEXT:    pushl %eax
-; X86-NEXT:    vrsqrtps %ymm0, %ymm0
+; X86-NEXT:    vrsqrtss %xmm0, %xmm0, %xmm0
 ; X86-NEXT:    vmovss %xmm0, (%esp)
 ; X86-NEXT:    flds (%esp)
 ; X86-NEXT:    popl %eax