[llvm] [X86] SimplifyDemandedVectorEltsForTargetNode - reduce width of X86 conversions nodes when upper elements are not demanded. (PR #102882)
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Fri Aug 16 02:57:53 PDT 2024
https://github.com/RKSimon updated https://github.com/llvm/llvm-project/pull/102882
>From 441954ad925da5b676c4e9593fccc6d758091ae2 Mon Sep 17 00:00:00 2001
From: Simon Pilgrim <llvm-dev at redking.me.uk>
Date: Mon, 12 Aug 2024 12:08:12 +0100
Subject: [PATCH 1/2] [X86] SimplifyDemandedVectorEltsForTargetNode - reduce
width of X86 conversions nodes when upper elements are not demanded.
Fixes #83402
---
llvm/lib/Target/X86/X86ISelLowering.cpp | 19 +++++++++++++++++++
.../CodeGen/X86/vector-half-conversions.ll | 19 ++++++++-----------
2 files changed, 27 insertions(+), 11 deletions(-)
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index db50f132b1349f..24cbc8f93391e5 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -42524,6 +42524,25 @@ bool X86TargetLowering::SimplifyDemandedVectorEltsForTargetNode(
SDValue Insert =
insertSubVector(UndefVec, ExtOp, 0, TLO.DAG, DL, ExtSizeInBits);
return TLO.CombineTo(Op, Insert);
+ }
+ // Conversions.
+ case X86ISD::CVTTP2SI:
+ case X86ISD::CVTTP2UI:
+ case X86ISD::CVTPH2PS: {
+ SDLoc DL(Op);
+ unsigned Scale = SizeInBits / ExtSizeInBits;
+ SDValue SrcOp = Op.getOperand(0);
+ MVT SrcVT = SrcOp.getSimpleValueType();
+ unsigned SrcExtSize =
+ std::max<unsigned>(SrcVT.getSizeInBits() / Scale, 128);
+ MVT ExtVT = MVT::getVectorVT(VT.getSimpleVT().getScalarType(),
+ ExtSizeInBits / VT.getScalarSizeInBits());
+ SDValue ExtOp = TLO.DAG.getNode(
+ Opc, DL, ExtVT, extractSubVector(SrcOp, 0, TLO.DAG, DL, SrcExtSize));
+ SDValue UndefVec = TLO.DAG.getUNDEF(VT);
+ SDValue Insert =
+ insertSubVector(UndefVec, ExtOp, 0, TLO.DAG, DL, ExtSizeInBits);
+ return TLO.CombineTo(Op, Insert);
}
// Zero upper elements.
case X86ISD::VZEXT_MOVL:
diff --git a/llvm/test/CodeGen/X86/vector-half-conversions.ll b/llvm/test/CodeGen/X86/vector-half-conversions.ll
index e87814ebb1dbe4..ef0f3f3e816dfa 100644
--- a/llvm/test/CodeGen/X86/vector-half-conversions.ll
+++ b/llvm/test/CodeGen/X86/vector-half-conversions.ll
@@ -4990,6 +4990,7 @@ define <4 x i32> @fptosi_2f16_to_4i32(<2 x half> %a) nounwind {
ret <4 x i32> %ext
}
+; PR83402
define <4 x i32> @fptosi_4f16_to_4i32(<4 x half> %a) nounwind {
; AVX-LABEL: fptosi_4f16_to_4i32:
; AVX: # %bb.0:
@@ -5024,16 +5025,14 @@ define <4 x i32> @fptosi_4f16_to_4i32(<4 x half> %a) nounwind {
;
; F16C-LABEL: fptosi_4f16_to_4i32:
; F16C: # %bb.0:
-; F16C-NEXT: vcvtph2ps %xmm0, %ymm0
+; F16C-NEXT: vcvtph2ps %xmm0, %xmm0
; F16C-NEXT: vcvttps2dq %xmm0, %xmm0
-; F16C-NEXT: vzeroupper
; F16C-NEXT: retq
;
; AVX512-LABEL: fptosi_4f16_to_4i32:
; AVX512: # %bb.0:
-; AVX512-NEXT: vcvtph2ps %xmm0, %ymm0
+; AVX512-NEXT: vcvtph2ps %xmm0, %xmm0
; AVX512-NEXT: vcvttps2dq %xmm0, %xmm0
-; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
%cvt = fptosi <4 x half> %a to <4 x i32>
ret <4 x i32> %cvt
@@ -5213,13 +5212,12 @@ define <4 x i32> @fptoui_4f16_to_4i32(<4 x half> %a) nounwind {
;
; F16C-LABEL: fptoui_4f16_to_4i32:
; F16C: # %bb.0:
-; F16C-NEXT: vcvtph2ps %xmm0, %ymm0
-; F16C-NEXT: vcvttps2dq %ymm0, %ymm1
-; F16C-NEXT: vsubps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
-; F16C-NEXT: vcvttps2dq %ymm0, %ymm0
+; F16C-NEXT: vcvtph2ps %xmm0, %xmm0
+; F16C-NEXT: vcvttps2dq %xmm0, %xmm1
+; F16C-NEXT: vsubps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; F16C-NEXT: vcvttps2dq %xmm0, %xmm0
; F16C-NEXT: vorps %xmm0, %xmm1, %xmm0
; F16C-NEXT: vblendvps %xmm1, %xmm0, %xmm1, %xmm0
-; F16C-NEXT: vzeroupper
; F16C-NEXT: retq
;
; AVX512F-LABEL: fptoui_4f16_to_4i32:
@@ -5232,9 +5230,8 @@ define <4 x i32> @fptoui_4f16_to_4i32(<4 x half> %a) nounwind {
;
; AVX512-FASTLANE-LABEL: fptoui_4f16_to_4i32:
; AVX512-FASTLANE: # %bb.0:
-; AVX512-FASTLANE-NEXT: vcvtph2ps %xmm0, %ymm0
+; AVX512-FASTLANE-NEXT: vcvtph2ps %xmm0, %xmm0
; AVX512-FASTLANE-NEXT: vcvttps2udq %xmm0, %xmm0
-; AVX512-FASTLANE-NEXT: vzeroupper
; AVX512-FASTLANE-NEXT: retq
%cvt = fptoui <4 x half> %a to <4 x i32>
ret <4 x i32> %cvt
>From f94dbe13604708bb094263cdb0fc0af5e39bd830 Mon Sep 17 00:00:00 2001
From: Simon Pilgrim <llvm-dev at redking.me.uk>
Date: Mon, 12 Aug 2024 15:51:00 +0100
Subject: [PATCH 2/2] Add TODO comment to add additional conversion opcodes
---
llvm/lib/Target/X86/X86ISelLowering.cpp | 1 +
1 file changed, 1 insertion(+)
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 24cbc8f93391e5..c6e1764edffc3a 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -42526,6 +42526,7 @@ bool X86TargetLowering::SimplifyDemandedVectorEltsForTargetNode(
return TLO.CombineTo(Op, Insert);
}
// Conversions.
+ // TODO: Add more CVT opcodes when we have test coverage.
case X86ISD::CVTTP2SI:
case X86ISD::CVTTP2UI:
case X86ISD::CVTPH2PS: {
More information about the llvm-commits
mailing list