[llvm] 397bcfe - [X86] Fold extract_subvector(int_to_fp(x)) vXi32/vXf32 cases to match existing fp_to_int folds
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Fri Aug 16 06:57:51 PDT 2024
Author: Simon Pilgrim
Date: 2024-08-16T14:57:37+01:00
New Revision: 397bcfef741315a68e75d174b8f746a11b42c0e2
URL: https://github.com/llvm/llvm-project/commit/397bcfef741315a68e75d174b8f746a11b42c0e2
DIFF: https://github.com/llvm/llvm-project/commit/397bcfef741315a68e75d174b8f746a11b42c0e2.diff
LOG: [X86] Fold extract_subvector(int_to_fp(x)) vXi32/vXf32 cases to match existing fp_to_int folds
Added:
Modified:
llvm/lib/Target/X86/X86ISelLowering.cpp
llvm/test/CodeGen/X86/pr50609.ll
llvm/test/CodeGen/X86/vec_int_to_fp.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index c6e1764edffc3a..6385dab9c55e65 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -57203,11 +57203,13 @@ static SDValue combineEXTRACT_SUBVECTOR(SDNode *N, SelectionDAG &DAG,
}
}
// v4i32 CVTPS2DQ(v4f32) / CVTPS2UDQ(v4f32).
- if ((InOpcode == ISD::FP_TO_SINT ||
- (InOpcode == ISD::FP_TO_UINT && Subtarget.hasVLX())) &&
- VT == MVT::v4i32) {
+ // v4f32 CVTDQ2PS(v4i32) / CVTUDQ2PS(v4i32).
+ if ((InOpcode == ISD::FP_TO_SINT || InOpcode == ISD::SINT_TO_FP ||
+ ((InOpcode == ISD::FP_TO_UINT || InOpcode == ISD::UINT_TO_FP) &&
+ Subtarget.hasVLX())) &&
+ (VT == MVT::v4i32 || VT == MVT::v4f32)) {
SDValue Src = InVec.getOperand(0);
- if (Src.getValueType().getScalarType() == MVT::f32)
+ if (Src.getValueType().getScalarSizeInBits() == 32)
return DAG.getNode(InOpcode, DL, VT,
extractSubVector(Src, IdxVal, DAG, DL, SizeInBits));
}
diff --git a/llvm/test/CodeGen/X86/pr50609.ll b/llvm/test/CodeGen/X86/pr50609.ll
index ea85c312f38bd1..3e39c567d79e6d 100644
--- a/llvm/test/CodeGen/X86/pr50609.ll
+++ b/llvm/test/CodeGen/X86/pr50609.ll
@@ -12,7 +12,7 @@ define void @PR50609(ptr noalias nocapture %RET, ptr noalias %aFOO, <16 x i32> %
; CHECK-NEXT: vpsrld $30, %xmm3, %xmm3
; CHECK-NEXT: vpaddd %xmm3, %xmm2, %xmm2
; CHECK-NEXT: vpsrad $2, %xmm2, %xmm2
-; CHECK-NEXT: vcvtdq2ps %ymm2, %ymm2
+; CHECK-NEXT: vcvtdq2ps %xmm2, %xmm2
; CHECK-NEXT: vshufps {{.*#+}} xmm2 = xmm2[0,0,0,0]
; CHECK-NEXT: vinsertf128 $1, %xmm2, %ymm2, %ymm2
; CHECK-NEXT: vmaskmovps %ymm2, %ymm0, (%rdi)
diff --git a/llvm/test/CodeGen/X86/vec_int_to_fp.ll b/llvm/test/CodeGen/X86/vec_int_to_fp.ll
index e26de4be7066fa..af841cf38b24ae 100644
--- a/llvm/test/CodeGen/X86/vec_int_to_fp.ll
+++ b/llvm/test/CodeGen/X86/vec_int_to_fp.ll
@@ -1478,32 +1478,11 @@ define <4 x float> @sitofp_8i16_to_4f32(<8 x i16> %a) {
; SSE41-NEXT: cvtdq2ps %xmm0, %xmm0
; SSE41-NEXT: retq
;
-; AVX1-LABEL: sitofp_8i16_to_4f32:
-; AVX1: # %bb.0:
-; AVX1-NEXT: vpmovsxwd %xmm0, %xmm1
-; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
-; AVX1-NEXT: vpmovsxwd %xmm0, %xmm0
-; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
-; AVX1-NEXT: vcvtdq2ps %ymm0, %ymm0
-; AVX1-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
-; AVX1-NEXT: vzeroupper
-; AVX1-NEXT: retq
-;
-; AVX2-LABEL: sitofp_8i16_to_4f32:
-; AVX2: # %bb.0:
-; AVX2-NEXT: vpmovsxwd %xmm0, %ymm0
-; AVX2-NEXT: vcvtdq2ps %ymm0, %ymm0
-; AVX2-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
-; AVX2-NEXT: vzeroupper
-; AVX2-NEXT: retq
-;
-; AVX512-LABEL: sitofp_8i16_to_4f32:
-; AVX512: # %bb.0:
-; AVX512-NEXT: vpmovsxwd %xmm0, %ymm0
-; AVX512-NEXT: vcvtdq2ps %ymm0, %ymm0
-; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
-; AVX512-NEXT: vzeroupper
-; AVX512-NEXT: retq
+; AVX-LABEL: sitofp_8i16_to_4f32:
+; AVX: # %bb.0:
+; AVX-NEXT: vpmovsxwd %xmm0, %xmm0
+; AVX-NEXT: vcvtdq2ps %xmm0, %xmm0
+; AVX-NEXT: retq
%cvt = sitofp <8 x i16> %a to <8 x float>
%shuf = shufflevector <8 x float> %cvt, <8 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
ret <4 x float> %shuf
@@ -1549,32 +1528,11 @@ define <4 x float> @sitofp_16i8_to_4f32(<16 x i8> %a) {
; SSE41-NEXT: cvtdq2ps %xmm0, %xmm0
; SSE41-NEXT: retq
;
-; AVX1-LABEL: sitofp_16i8_to_4f32:
-; AVX1: # %bb.0:
-; AVX1-NEXT: vpmovsxbd %xmm0, %xmm1
-; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,1,1]
-; AVX1-NEXT: vpmovsxbd %xmm0, %xmm0
-; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
-; AVX1-NEXT: vcvtdq2ps %ymm0, %ymm0
-; AVX1-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
-; AVX1-NEXT: vzeroupper
-; AVX1-NEXT: retq
-;
-; AVX2-LABEL: sitofp_16i8_to_4f32:
-; AVX2: # %bb.0:
-; AVX2-NEXT: vpmovsxbd %xmm0, %ymm0
-; AVX2-NEXT: vcvtdq2ps %ymm0, %ymm0
-; AVX2-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
-; AVX2-NEXT: vzeroupper
-; AVX2-NEXT: retq
-;
-; AVX512-LABEL: sitofp_16i8_to_4f32:
-; AVX512: # %bb.0:
-; AVX512-NEXT: vpmovsxbd %xmm0, %zmm0
-; AVX512-NEXT: vcvtdq2ps %zmm0, %zmm0
-; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
-; AVX512-NEXT: vzeroupper
-; AVX512-NEXT: retq
+; AVX-LABEL: sitofp_16i8_to_4f32:
+; AVX: # %bb.0:
+; AVX-NEXT: vpmovsxbd %xmm0, %xmm0
+; AVX-NEXT: vcvtdq2ps %xmm0, %xmm0
+; AVX-NEXT: retq
%cvt = sitofp <16 x i8> %a to <16 x float>
%shuf = shufflevector <16 x float> %cvt, <16 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
ret <4 x float> %shuf
@@ -2354,32 +2312,11 @@ define <4 x float> @uitofp_8i16_to_4f32(<8 x i16> %a) {
; SSE41-NEXT: cvtdq2ps %xmm0, %xmm0
; SSE41-NEXT: retq
;
-; AVX1-LABEL: uitofp_8i16_to_4f32:
-; AVX1: # %bb.0:
-; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
-; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm1 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
-; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
-; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
-; AVX1-NEXT: vcvtdq2ps %ymm0, %ymm0
-; AVX1-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
-; AVX1-NEXT: vzeroupper
-; AVX1-NEXT: retq
-;
-; AVX2-LABEL: uitofp_8i16_to_4f32:
-; AVX2: # %bb.0:
-; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
-; AVX2-NEXT: vcvtdq2ps %ymm0, %ymm0
-; AVX2-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
-; AVX2-NEXT: vzeroupper
-; AVX2-NEXT: retq
-;
-; AVX512-LABEL: uitofp_8i16_to_4f32:
-; AVX512: # %bb.0:
-; AVX512-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
-; AVX512-NEXT: vcvtdq2ps %ymm0, %ymm0
-; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
-; AVX512-NEXT: vzeroupper
-; AVX512-NEXT: retq
+; AVX-LABEL: uitofp_8i16_to_4f32:
+; AVX: # %bb.0:
+; AVX-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
+; AVX-NEXT: vcvtdq2ps %xmm0, %xmm0
+; AVX-NEXT: retq
%cvt = uitofp <8 x i16> %a to <8 x float>
%shuf = shufflevector <8 x float> %cvt, <8 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
ret <4 x float> %shuf
@@ -2425,32 +2362,11 @@ define <4 x float> @uitofp_16i8_to_4f32(<16 x i8> %a) {
; SSE41-NEXT: cvtdq2ps %xmm0, %xmm0
; SSE41-NEXT: retq
;
-; AVX1-LABEL: uitofp_16i8_to_4f32:
-; AVX1: # %bb.0:
-; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
-; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,1,1]
-; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
-; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
-; AVX1-NEXT: vcvtdq2ps %ymm0, %ymm0
-; AVX1-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
-; AVX1-NEXT: vzeroupper
-; AVX1-NEXT: retq
-;
-; AVX2-LABEL: uitofp_16i8_to_4f32:
-; AVX2: # %bb.0:
-; AVX2-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
-; AVX2-NEXT: vcvtdq2ps %ymm0, %ymm0
-; AVX2-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
-; AVX2-NEXT: vzeroupper
-; AVX2-NEXT: retq
-;
-; AVX512-LABEL: uitofp_16i8_to_4f32:
-; AVX512: # %bb.0:
-; AVX512-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
-; AVX512-NEXT: vcvtdq2ps %zmm0, %zmm0
-; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
-; AVX512-NEXT: vzeroupper
-; AVX512-NEXT: retq
+; AVX-LABEL: uitofp_16i8_to_4f32:
+; AVX: # %bb.0:
+; AVX-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
+; AVX-NEXT: vcvtdq2ps %xmm0, %xmm0
+; AVX-NEXT: retq
%cvt = uitofp <16 x i8> %a to <16 x float>
%shuf = shufflevector <16 x float> %cvt, <16 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
ret <4 x float> %shuf
More information about the llvm-commits
mailing list