[llvm] a84790e - [X86] SimplifyDemandedVectorEltsForTargetNode - reduce width of X86 conversions nodes when upper elements are not demanded. (#102882)
via llvm-commits
llvm-commits at lists.llvm.org
Fri Aug 16 04:23:43 PDT 2024
Author: Simon Pilgrim
Date: 2024-08-16T12:23:39+01:00
New Revision: a84790e5fca0429683b24f7bb52d2c4d947dc011
URL: https://github.com/llvm/llvm-project/commit/a84790e5fca0429683b24f7bb52d2c4d947dc011
DIFF: https://github.com/llvm/llvm-project/commit/a84790e5fca0429683b24f7bb52d2c4d947dc011.diff
LOG: [X86] SimplifyDemandedVectorEltsForTargetNode - reduce width of X86 conversions nodes when upper elements are not demanded. (#102882)
Fixes #83402
Added:
Modified:
llvm/lib/Target/X86/X86ISelLowering.cpp
llvm/test/CodeGen/X86/vector-half-conversions.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index db50f132b1349..c6e1764edffc3 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -42524,6 +42524,26 @@ bool X86TargetLowering::SimplifyDemandedVectorEltsForTargetNode(
SDValue Insert =
insertSubVector(UndefVec, ExtOp, 0, TLO.DAG, DL, ExtSizeInBits);
return TLO.CombineTo(Op, Insert);
+ }
+ // Conversions.
+ // TODO: Add more CVT opcodes when we have test coverage.
+ case X86ISD::CVTTP2SI:
+ case X86ISD::CVTTP2UI:
+ case X86ISD::CVTPH2PS: {
+ SDLoc DL(Op);
+ unsigned Scale = SizeInBits / ExtSizeInBits;
+ SDValue SrcOp = Op.getOperand(0);
+ MVT SrcVT = SrcOp.getSimpleValueType();
+ unsigned SrcExtSize =
+ std::max<unsigned>(SrcVT.getSizeInBits() / Scale, 128);
+ MVT ExtVT = MVT::getVectorVT(VT.getSimpleVT().getScalarType(),
+ ExtSizeInBits / VT.getScalarSizeInBits());
+ SDValue ExtOp = TLO.DAG.getNode(
+ Opc, DL, ExtVT, extractSubVector(SrcOp, 0, TLO.DAG, DL, SrcExtSize));
+ SDValue UndefVec = TLO.DAG.getUNDEF(VT);
+ SDValue Insert =
+ insertSubVector(UndefVec, ExtOp, 0, TLO.DAG, DL, ExtSizeInBits);
+ return TLO.CombineTo(Op, Insert);
}
// Zero upper elements.
case X86ISD::VZEXT_MOVL:
diff --git a/llvm/test/CodeGen/X86/vector-half-conversions.ll b/llvm/test/CodeGen/X86/vector-half-conversions.ll
index e87814ebb1dbe..ef0f3f3e816df 100644
--- a/llvm/test/CodeGen/X86/vector-half-conversions.ll
+++ b/llvm/test/CodeGen/X86/vector-half-conversions.ll
@@ -4990,6 +4990,7 @@ define <4 x i32> @fptosi_2f16_to_4i32(<2 x half> %a) nounwind {
ret <4 x i32> %ext
}
+; PR83402
define <4 x i32> @fptosi_4f16_to_4i32(<4 x half> %a) nounwind {
; AVX-LABEL: fptosi_4f16_to_4i32:
; AVX: # %bb.0:
@@ -5024,16 +5025,14 @@ define <4 x i32> @fptosi_4f16_to_4i32(<4 x half> %a) nounwind {
;
; F16C-LABEL: fptosi_4f16_to_4i32:
; F16C: # %bb.0:
-; F16C-NEXT: vcvtph2ps %xmm0, %ymm0
+; F16C-NEXT: vcvtph2ps %xmm0, %xmm0
; F16C-NEXT: vcvttps2dq %xmm0, %xmm0
-; F16C-NEXT: vzeroupper
; F16C-NEXT: retq
;
; AVX512-LABEL: fptosi_4f16_to_4i32:
; AVX512: # %bb.0:
-; AVX512-NEXT: vcvtph2ps %xmm0, %ymm0
+; AVX512-NEXT: vcvtph2ps %xmm0, %xmm0
; AVX512-NEXT: vcvttps2dq %xmm0, %xmm0
-; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
%cvt = fptosi <4 x half> %a to <4 x i32>
ret <4 x i32> %cvt
@@ -5213,13 +5212,12 @@ define <4 x i32> @fptoui_4f16_to_4i32(<4 x half> %a) nounwind {
;
; F16C-LABEL: fptoui_4f16_to_4i32:
; F16C: # %bb.0:
-; F16C-NEXT: vcvtph2ps %xmm0, %ymm0
-; F16C-NEXT: vcvttps2dq %ymm0, %ymm1
-; F16C-NEXT: vsubps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
-; F16C-NEXT: vcvttps2dq %ymm0, %ymm0
+; F16C-NEXT: vcvtph2ps %xmm0, %xmm0
+; F16C-NEXT: vcvttps2dq %xmm0, %xmm1
+; F16C-NEXT: vsubps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; F16C-NEXT: vcvttps2dq %xmm0, %xmm0
; F16C-NEXT: vorps %xmm0, %xmm1, %xmm0
; F16C-NEXT: vblendvps %xmm1, %xmm0, %xmm1, %xmm0
-; F16C-NEXT: vzeroupper
; F16C-NEXT: retq
;
; AVX512F-LABEL: fptoui_4f16_to_4i32:
@@ -5232,9 +5230,8 @@ define <4 x i32> @fptoui_4f16_to_4i32(<4 x half> %a) nounwind {
;
; AVX512-FASTLANE-LABEL: fptoui_4f16_to_4i32:
; AVX512-FASTLANE: # %bb.0:
-; AVX512-FASTLANE-NEXT: vcvtph2ps %xmm0, %ymm0
+; AVX512-FASTLANE-NEXT: vcvtph2ps %xmm0, %xmm0
; AVX512-FASTLANE-NEXT: vcvttps2udq %xmm0, %xmm0
-; AVX512-FASTLANE-NEXT: vzeroupper
; AVX512-FASTLANE-NEXT: retq
%cvt = fptoui <4 x half> %a to <4 x i32>
ret <4 x i32> %cvt
More information about the llvm-commits
mailing list