[llvm] [X86] SimplifyDemandedVectorEltsForTargetNode - reduce width of X86 conversions nodes when upper elements are not demanded. (PR #102882)
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Mon Aug 12 07:51:27 PDT 2024
https://github.com/RKSimon updated https://github.com/llvm/llvm-project/pull/102882
>From b20332ca46d4b7a41c61f3a6cb5fa8d7b10ccabe Mon Sep 17 00:00:00 2001
From: Simon Pilgrim <llvm-dev at redking.me.uk>
Date: Mon, 12 Aug 2024 12:08:12 +0100
Subject: [PATCH 1/2] [X86] SimplifyDemandedVectorEltsForTargetNode - reduce
width of X86 conversions nodes when upper elements are not demanded.
Fixes #83402
---
llvm/lib/Target/X86/X86ISelLowering.cpp | 19 +++++++++++++++++++
.../CodeGen/X86/vector-half-conversions.ll | 19 ++++++++-----------
2 files changed, 27 insertions(+), 11 deletions(-)
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 2074fac857891..958fba076e4e1 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -42518,6 +42518,25 @@ bool X86TargetLowering::SimplifyDemandedVectorEltsForTargetNode(
SDValue Insert =
insertSubVector(UndefVec, ExtOp, 0, TLO.DAG, DL, ExtSizeInBits);
return TLO.CombineTo(Op, Insert);
+ }
+ // Conversions.
+ case X86ISD::CVTTP2SI:
+ case X86ISD::CVTTP2UI:
+ case X86ISD::CVTPH2PS: {
+ SDLoc DL(Op);
+ unsigned Scale = SizeInBits / ExtSizeInBits;
+ SDValue SrcOp = Op.getOperand(0);
+ MVT SrcVT = SrcOp.getSimpleValueType();
+ unsigned SrcExtSize =
+ std::max<unsigned>(SrcVT.getSizeInBits() / Scale, 128);
+ MVT ExtVT = MVT::getVectorVT(VT.getSimpleVT().getScalarType(),
+ ExtSizeInBits / VT.getScalarSizeInBits());
+ SDValue ExtOp = TLO.DAG.getNode(
+ Opc, DL, ExtVT, extractSubVector(SrcOp, 0, TLO.DAG, DL, SrcExtSize));
+ SDValue UndefVec = TLO.DAG.getUNDEF(VT);
+ SDValue Insert =
+ insertSubVector(UndefVec, ExtOp, 0, TLO.DAG, DL, ExtSizeInBits);
+ return TLO.CombineTo(Op, Insert);
}
// Zero upper elements.
case X86ISD::VZEXT_MOVL:
diff --git a/llvm/test/CodeGen/X86/vector-half-conversions.ll b/llvm/test/CodeGen/X86/vector-half-conversions.ll
index e87814ebb1dbe..ef0f3f3e816df 100644
--- a/llvm/test/CodeGen/X86/vector-half-conversions.ll
+++ b/llvm/test/CodeGen/X86/vector-half-conversions.ll
@@ -4990,6 +4990,7 @@ define <4 x i32> @fptosi_2f16_to_4i32(<2 x half> %a) nounwind {
ret <4 x i32> %ext
}
+; PR83402
define <4 x i32> @fptosi_4f16_to_4i32(<4 x half> %a) nounwind {
; AVX-LABEL: fptosi_4f16_to_4i32:
; AVX: # %bb.0:
@@ -5024,16 +5025,14 @@ define <4 x i32> @fptosi_4f16_to_4i32(<4 x half> %a) nounwind {
;
; F16C-LABEL: fptosi_4f16_to_4i32:
; F16C: # %bb.0:
-; F16C-NEXT: vcvtph2ps %xmm0, %ymm0
+; F16C-NEXT: vcvtph2ps %xmm0, %xmm0
; F16C-NEXT: vcvttps2dq %xmm0, %xmm0
-; F16C-NEXT: vzeroupper
; F16C-NEXT: retq
;
; AVX512-LABEL: fptosi_4f16_to_4i32:
; AVX512: # %bb.0:
-; AVX512-NEXT: vcvtph2ps %xmm0, %ymm0
+; AVX512-NEXT: vcvtph2ps %xmm0, %xmm0
; AVX512-NEXT: vcvttps2dq %xmm0, %xmm0
-; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
%cvt = fptosi <4 x half> %a to <4 x i32>
ret <4 x i32> %cvt
@@ -5213,13 +5212,12 @@ define <4 x i32> @fptoui_4f16_to_4i32(<4 x half> %a) nounwind {
;
; F16C-LABEL: fptoui_4f16_to_4i32:
; F16C: # %bb.0:
-; F16C-NEXT: vcvtph2ps %xmm0, %ymm0
-; F16C-NEXT: vcvttps2dq %ymm0, %ymm1
-; F16C-NEXT: vsubps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
-; F16C-NEXT: vcvttps2dq %ymm0, %ymm0
+; F16C-NEXT: vcvtph2ps %xmm0, %xmm0
+; F16C-NEXT: vcvttps2dq %xmm0, %xmm1
+; F16C-NEXT: vsubps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; F16C-NEXT: vcvttps2dq %xmm0, %xmm0
; F16C-NEXT: vorps %xmm0, %xmm1, %xmm0
; F16C-NEXT: vblendvps %xmm1, %xmm0, %xmm1, %xmm0
-; F16C-NEXT: vzeroupper
; F16C-NEXT: retq
;
; AVX512F-LABEL: fptoui_4f16_to_4i32:
@@ -5232,9 +5230,8 @@ define <4 x i32> @fptoui_4f16_to_4i32(<4 x half> %a) nounwind {
;
; AVX512-FASTLANE-LABEL: fptoui_4f16_to_4i32:
; AVX512-FASTLANE: # %bb.0:
-; AVX512-FASTLANE-NEXT: vcvtph2ps %xmm0, %ymm0
+; AVX512-FASTLANE-NEXT: vcvtph2ps %xmm0, %xmm0
; AVX512-FASTLANE-NEXT: vcvttps2udq %xmm0, %xmm0
-; AVX512-FASTLANE-NEXT: vzeroupper
; AVX512-FASTLANE-NEXT: retq
%cvt = fptoui <4 x half> %a to <4 x i32>
ret <4 x i32> %cvt
>From ee47d9d9a732edae94d41404e4350c17904bb045 Mon Sep 17 00:00:00 2001
From: Simon Pilgrim <llvm-dev at redking.me.uk>
Date: Mon, 12 Aug 2024 15:51:00 +0100
Subject: [PATCH 2/2] Add TOOD comment to add additional conversion opcodes
---
llvm/lib/Target/X86/X86ISelLowering.cpp | 1 +
1 file changed, 1 insertion(+)
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 958fba076e4e1..45eee45e43e06 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -42520,6 +42520,7 @@ bool X86TargetLowering::SimplifyDemandedVectorEltsForTargetNode(
return TLO.CombineTo(Op, Insert);
}
// Conversions.
+ // TODO: Add more CVT opcodes when we have test coverage.
case X86ISD::CVTTP2SI:
case X86ISD::CVTTP2UI:
case X86ISD::CVTPH2PS: {
More information about the llvm-commits
mailing list