[llvm] [X86] Lower mathlib call ldexp into scalef when avx512 is enabled (PR #166839)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Nov 6 12:33:12 PST 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-x86
Author: Kavin Gnanapandithan (KavinTheG)
<details>
<summary>Changes</summary>
Resolve issue #<!-- -->165694.
---
Patch is 49.73 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/166839.diff
3 Files Affected:
- (modified) llvm/lib/Target/X86/X86ISelLowering.cpp (+73)
- (modified) llvm/test/CodeGen/X86/fold-int-pow2-with-fmul-or-fdiv.ll (+163-108)
- (modified) llvm/test/CodeGen/X86/ldexp-avx512.ll (+129-384)
``````````diff
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 133406bd8e0d7..f9e9bb26638d4 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -2590,6 +2590,26 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i128, Custom);
}
+ if (Subtarget.hasAVX512()) {
+ for (MVT VT : { MVT::f32, MVT::f64, MVT::v16f32, MVT::v8f64})
+ setOperationAction(ISD::FLDEXP, VT, Custom);
+
+ if (Subtarget.hasVLX()) {
+ for (MVT VT : { MVT::v4f32, MVT::v2f64, MVT::v8f32, MVT::v4f64 })
+ setOperationAction(ISD::FLDEXP, VT, Custom);
+
+ if (Subtarget.hasFP16()) {
+ for (MVT VT : { MVT::v8f16, MVT::v16f16, MVT::v32f16 })
+ setOperationAction(ISD::FLDEXP, VT, Custom);
+ }
+ }
+
+ if (Subtarget.hasFP16()) {
+ for (MVT VT : { MVT::f16, MVT::v32f16 })
+ setOperationAction(ISD::FLDEXP, VT, Custom);
+ }
+ }
+
// On 32 bit MSVC, `fmodf(f32)` is not defined - only `fmod(f64)`
// is. We should promote the value to 64-bits to solve this.
// This is what the CRT headers do - `fmodf` is an inline header
@@ -19142,6 +19162,58 @@ SDValue X86TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
return SDValue();
}
+static SDValue LowerFLDEXP(SDValue Op, const X86Subtarget &Subtarget,
+ SelectionDAG &DAG) {
+ SDLoc DL(Op);
+ SDValue X = Op.getOperand(0);
+ MVT XTy = X.getSimpleValueType();
+ SDValue Exp = Op.getOperand(1);
+ MVT XVT, ExpVT;
+
+ switch (XTy.SimpleTy) {
+ default:
+ return SDValue();
+ case MVT::f16:
+ if (Subtarget.hasFP16()) {
+ XVT = Subtarget.hasVLX() ? MVT::v8f16 : MVT::v32f16;
+ ExpVT = XVT;
+ break;
+ }
+ X = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, X);
+ [[fallthrough]];
+ case MVT::f32:
+ XVT = MVT::v4f32;
+ ExpVT = MVT::v4f32;
+ break;
+ case MVT::f64:
+ XVT = MVT::v2f64;
+ ExpVT = MVT::v2f64;
+ break;
+ case MVT::v4f32:
+ case MVT::v2f64:
+ case MVT::v8f32:
+ case MVT::v4f64:
+ case MVT::v16f32:
+ case MVT::v8f64:
+ Exp = DAG.getNode(ISD::SINT_TO_FP, DL, XTy, Exp);
+ return DAG.getNode(X86ISD::SCALEF, DL, XTy, X, Exp, X);
+ }
+
+ SDValue Zero = DAG.getConstant(0, DL, MVT::i64);
+ Exp = DAG.getNode(ISD::SINT_TO_FP, DL, X.getValueType(), Exp);
+ SDValue VX =
+ DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, XVT, DAG.getUNDEF(XVT), X, Zero);
+ SDValue VExp = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, ExpVT,
+ DAG.getUNDEF(ExpVT), Exp, Zero);
+ SDValue Scalef = DAG.getNode(X86ISD::SCALEFS, DL, XVT, VX, VExp, VX);
+ SDValue Final =
+ DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, X.getValueType(), Scalef, Zero);
+ if (X.getValueType() != XTy)
+ Final = DAG.getNode(ISD::FP_ROUND, DL, XTy, Final,
+ DAG.getIntPtrConstant(1, SDLoc(Op)));
+ return Final;
+}
+
static SDValue LowerSCALAR_TO_VECTOR(SDValue Op, const X86Subtarget &Subtarget,
SelectionDAG &DAG) {
SDLoc dl(Op);
@@ -33672,6 +33744,7 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::ADDRSPACECAST: return LowerADDRSPACECAST(Op, DAG);
case X86ISD::CVTPS2PH: return LowerCVTPS2PH(Op, DAG);
case ISD::PREFETCH: return LowerPREFETCH(Op, Subtarget, DAG);
+ case ISD::FLDEXP: return LowerFLDEXP(Op, Subtarget, DAG);
// clang-format on
}
}
diff --git a/llvm/test/CodeGen/X86/fold-int-pow2-with-fmul-or-fdiv.ll b/llvm/test/CodeGen/X86/fold-int-pow2-with-fmul-or-fdiv.ll
index 81529aff39ff1..499695f408396 100644
--- a/llvm/test/CodeGen/X86/fold-int-pow2-with-fmul-or-fdiv.ll
+++ b/llvm/test/CodeGen/X86/fold-int-pow2-with-fmul-or-fdiv.ll
@@ -79,38 +79,64 @@ define <4 x float> @fmul_pow2_ldexp_4xfloat(<4 x i32> %i) {
; CHECK-SSE-NEXT: .cfi_def_cfa_offset 8
; CHECK-SSE-NEXT: retq
;
-; CHECK-AVX-LABEL: fmul_pow2_ldexp_4xfloat:
-; CHECK-AVX: # %bb.0:
-; CHECK-AVX-NEXT: subq $40, %rsp
-; CHECK-AVX-NEXT: .cfi_def_cfa_offset 48
-; CHECK-AVX-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
-; CHECK-AVX-NEXT: vextractps $1, %xmm0, %edi
-; CHECK-AVX-NEXT: vmovss {{.*#+}} xmm0 = [9.0E+0,0.0E+0,0.0E+0,0.0E+0]
-; CHECK-AVX-NEXT: callq ldexpf at PLT
-; CHECK-AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
-; CHECK-AVX-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
-; CHECK-AVX-NEXT: vmovd %xmm0, %edi
-; CHECK-AVX-NEXT: vmovss {{.*#+}} xmm0 = [9.0E+0,0.0E+0,0.0E+0,0.0E+0]
-; CHECK-AVX-NEXT: callq ldexpf at PLT
-; CHECK-AVX-NEXT: vinsertps $16, (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
-; CHECK-AVX-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[2,3]
-; CHECK-AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
-; CHECK-AVX-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
-; CHECK-AVX-NEXT: vextractps $2, %xmm0, %edi
-; CHECK-AVX-NEXT: vmovss {{.*#+}} xmm0 = [9.0E+0,0.0E+0,0.0E+0,0.0E+0]
-; CHECK-AVX-NEXT: callq ldexpf at PLT
-; CHECK-AVX-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload
-; CHECK-AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3]
-; CHECK-AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
-; CHECK-AVX-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
-; CHECK-AVX-NEXT: vextractps $3, %xmm0, %edi
-; CHECK-AVX-NEXT: vmovss {{.*#+}} xmm0 = [9.0E+0,0.0E+0,0.0E+0,0.0E+0]
-; CHECK-AVX-NEXT: callq ldexpf at PLT
-; CHECK-AVX-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload
-; CHECK-AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
-; CHECK-AVX-NEXT: addq $40, %rsp
-; CHECK-AVX-NEXT: .cfi_def_cfa_offset 8
-; CHECK-AVX-NEXT: retq
+; CHECK-AVX2-LABEL: fmul_pow2_ldexp_4xfloat:
+; CHECK-AVX2: # %bb.0:
+; CHECK-AVX2-NEXT: subq $40, %rsp
+; CHECK-AVX2-NEXT: .cfi_def_cfa_offset 48
+; CHECK-AVX2-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; CHECK-AVX2-NEXT: vextractps $1, %xmm0, %edi
+; CHECK-AVX2-NEXT: vmovss {{.*#+}} xmm0 = [9.0E+0,0.0E+0,0.0E+0,0.0E+0]
+; CHECK-AVX2-NEXT: callq ldexpf at PLT
+; CHECK-AVX2-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
+; CHECK-AVX2-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; CHECK-AVX2-NEXT: vmovd %xmm0, %edi
+; CHECK-AVX2-NEXT: vmovss {{.*#+}} xmm0 = [9.0E+0,0.0E+0,0.0E+0,0.0E+0]
+; CHECK-AVX2-NEXT: callq ldexpf at PLT
+; CHECK-AVX2-NEXT: vinsertps $16, (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
+; CHECK-AVX2-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[2,3]
+; CHECK-AVX2-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
+; CHECK-AVX2-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; CHECK-AVX2-NEXT: vextractps $2, %xmm0, %edi
+; CHECK-AVX2-NEXT: vmovss {{.*#+}} xmm0 = [9.0E+0,0.0E+0,0.0E+0,0.0E+0]
+; CHECK-AVX2-NEXT: callq ldexpf at PLT
+; CHECK-AVX2-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload
+; CHECK-AVX2-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3]
+; CHECK-AVX2-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
+; CHECK-AVX2-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; CHECK-AVX2-NEXT: vextractps $3, %xmm0, %edi
+; CHECK-AVX2-NEXT: vmovss {{.*#+}} xmm0 = [9.0E+0,0.0E+0,0.0E+0,0.0E+0]
+; CHECK-AVX2-NEXT: callq ldexpf at PLT
+; CHECK-AVX2-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload
+; CHECK-AVX2-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
+; CHECK-AVX2-NEXT: addq $40, %rsp
+; CHECK-AVX2-NEXT: .cfi_def_cfa_offset 8
+; CHECK-AVX2-NEXT: retq
+;
+; CHECK-ONLY-AVX512F-LABEL: fmul_pow2_ldexp_4xfloat:
+; CHECK-ONLY-AVX512F: # %bb.0:
+; CHECK-ONLY-AVX512F-NEXT: vcvtdq2ps %xmm0, %xmm1
+; CHECK-ONLY-AVX512F-NEXT: vmovss {{.*#+}} xmm2 = [9.0E+0,0.0E+0,0.0E+0,0.0E+0]
+; CHECK-ONLY-AVX512F-NEXT: vscalefss %xmm1, %xmm2, %xmm1
+; CHECK-ONLY-AVX512F-NEXT: vshufps {{.*#+}} xmm3 = xmm0[1,1,1,1]
+; CHECK-ONLY-AVX512F-NEXT: vcvtdq2ps %xmm3, %xmm3
+; CHECK-ONLY-AVX512F-NEXT: vscalefss %xmm3, %xmm2, %xmm3
+; CHECK-ONLY-AVX512F-NEXT: vunpcklps {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1]
+; CHECK-ONLY-AVX512F-NEXT: vshufps {{.*#+}} xmm3 = xmm0[2,3,2,3]
+; CHECK-ONLY-AVX512F-NEXT: vcvtdq2ps %xmm3, %xmm3
+; CHECK-ONLY-AVX512F-NEXT: vscalefss %xmm3, %xmm2, %xmm3
+; CHECK-ONLY-AVX512F-NEXT: vmovlhps {{.*#+}} xmm1 = xmm1[0],xmm3[0]
+; CHECK-ONLY-AVX512F-NEXT: vshufps {{.*#+}} xmm0 = xmm0[3,3,3,3]
+; CHECK-ONLY-AVX512F-NEXT: vcvtdq2ps %xmm0, %xmm0
+; CHECK-ONLY-AVX512F-NEXT: vscalefss %xmm0, %xmm2, %xmm0
+; CHECK-ONLY-AVX512F-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
+; CHECK-ONLY-AVX512F-NEXT: retq
+;
+; CHECK-SKX-LABEL: fmul_pow2_ldexp_4xfloat:
+; CHECK-SKX: # %bb.0:
+; CHECK-SKX-NEXT: vcvtdq2ps %xmm0, %xmm0
+; CHECK-SKX-NEXT: vbroadcastss {{.*#+}} xmm1 = [9.0E+0,9.0E+0,9.0E+0,9.0E+0]
+; CHECK-SKX-NEXT: vscalefps %xmm0, %xmm1, %xmm0
+; CHECK-SKX-NEXT: retq
%r = call <4 x float> @llvm.ldexp.v4f32.v4i32(<4 x float> <float 9.000000e+00, float 9.000000e+00, float 9.000000e+00, float 9.000000e+00>, <4 x i32> %i)
ret <4 x float> %r
}
@@ -560,82 +586,109 @@ define <8 x half> @fmul_pow2_ldexp_8xhalf(<8 x i16> %i) {
; CHECK-AVX2-NEXT: .cfi_def_cfa_offset 8
; CHECK-AVX2-NEXT: retq
;
-; CHECK-AVX512F-LABEL: fmul_pow2_ldexp_8xhalf:
-; CHECK-AVX512F: # %bb.0:
-; CHECK-AVX512F-NEXT: subq $72, %rsp
-; CHECK-AVX512F-NEXT: .cfi_def_cfa_offset 80
-; CHECK-AVX512F-NEXT: vmovdqa %xmm0, (%rsp) # 16-byte Spill
-; CHECK-AVX512F-NEXT: vpextrw $7, %xmm0, %eax
-; CHECK-AVX512F-NEXT: movswl %ax, %edi
-; CHECK-AVX512F-NEXT: vmovss {{.*#+}} xmm0 = [8.192E+3,0.0E+0,0.0E+0,0.0E+0]
-; CHECK-AVX512F-NEXT: callq ldexpf at PLT
-; CHECK-AVX512F-NEXT: vcvtps2ph $4, %xmm0, %xmm0
-; CHECK-AVX512F-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
-; CHECK-AVX512F-NEXT: vmovdqa (%rsp), %xmm0 # 16-byte Reload
-; CHECK-AVX512F-NEXT: vpextrw $6, %xmm0, %eax
-; CHECK-AVX512F-NEXT: movswl %ax, %edi
-; CHECK-AVX512F-NEXT: vmovss {{.*#+}} xmm0 = [8.192E+3,0.0E+0,0.0E+0,0.0E+0]
-; CHECK-AVX512F-NEXT: callq ldexpf at PLT
-; CHECK-AVX512F-NEXT: vcvtps2ph $4, %xmm0, %xmm0
-; CHECK-AVX512F-NEXT: vpunpcklwd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
-; CHECK-AVX512F-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3]
-; CHECK-AVX512F-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
-; CHECK-AVX512F-NEXT: vmovdqa (%rsp), %xmm0 # 16-byte Reload
-; CHECK-AVX512F-NEXT: vpextrw $5, %xmm0, %eax
-; CHECK-AVX512F-NEXT: movswl %ax, %edi
-; CHECK-AVX512F-NEXT: vmovss {{.*#+}} xmm0 = [8.192E+3,0.0E+0,0.0E+0,0.0E+0]
-; CHECK-AVX512F-NEXT: callq ldexpf at PLT
-; CHECK-AVX512F-NEXT: vcvtps2ph $4, %xmm0, %xmm0
-; CHECK-AVX512F-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
-; CHECK-AVX512F-NEXT: vmovdqa (%rsp), %xmm0 # 16-byte Reload
-; CHECK-AVX512F-NEXT: vpextrw $4, %xmm0, %eax
-; CHECK-AVX512F-NEXT: movswl %ax, %edi
-; CHECK-AVX512F-NEXT: vmovss {{.*#+}} xmm0 = [8.192E+3,0.0E+0,0.0E+0,0.0E+0]
-; CHECK-AVX512F-NEXT: callq ldexpf at PLT
-; CHECK-AVX512F-NEXT: vcvtps2ph $4, %xmm0, %xmm0
-; CHECK-AVX512F-NEXT: vpunpcklwd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
-; CHECK-AVX512F-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3]
-; CHECK-AVX512F-NEXT: vpunpckldq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
-; CHECK-AVX512F-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1]
-; CHECK-AVX512F-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
-; CHECK-AVX512F-NEXT: vmovdqa (%rsp), %xmm0 # 16-byte Reload
-; CHECK-AVX512F-NEXT: vpextrw $3, %xmm0, %eax
-; CHECK-AVX512F-NEXT: movswl %ax, %edi
-; CHECK-AVX512F-NEXT: vmovss {{.*#+}} xmm0 = [8.192E+3,0.0E+0,0.0E+0,0.0E+0]
-; CHECK-AVX512F-NEXT: callq ldexpf at PLT
-; CHECK-AVX512F-NEXT: vcvtps2ph $4, %xmm0, %xmm0
-; CHECK-AVX512F-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
-; CHECK-AVX512F-NEXT: vmovdqa (%rsp), %xmm0 # 16-byte Reload
-; CHECK-AVX512F-NEXT: vpextrw $2, %xmm0, %eax
-; CHECK-AVX512F-NEXT: movswl %ax, %edi
-; CHECK-AVX512F-NEXT: vmovss {{.*#+}} xmm0 = [8.192E+3,0.0E+0,0.0E+0,0.0E+0]
-; CHECK-AVX512F-NEXT: callq ldexpf at PLT
-; CHECK-AVX512F-NEXT: vcvtps2ph $4, %xmm0, %xmm0
-; CHECK-AVX512F-NEXT: vpunpcklwd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
-; CHECK-AVX512F-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3]
-; CHECK-AVX512F-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
-; CHECK-AVX512F-NEXT: vmovdqa (%rsp), %xmm0 # 16-byte Reload
-; CHECK-AVX512F-NEXT: vpextrw $1, %xmm0, %eax
-; CHECK-AVX512F-NEXT: movswl %ax, %edi
-; CHECK-AVX512F-NEXT: vmovss {{.*#+}} xmm0 = [8.192E+3,0.0E+0,0.0E+0,0.0E+0]
-; CHECK-AVX512F-NEXT: callq ldexpf at PLT
-; CHECK-AVX512F-NEXT: vcvtps2ph $4, %xmm0, %xmm0
-; CHECK-AVX512F-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
-; CHECK-AVX512F-NEXT: vmovdqa (%rsp), %xmm0 # 16-byte Reload
-; CHECK-AVX512F-NEXT: vmovd %xmm0, %eax
-; CHECK-AVX512F-NEXT: movswl %ax, %edi
-; CHECK-AVX512F-NEXT: vmovss {{.*#+}} xmm0 = [8.192E+3,0.0E+0,0.0E+0,0.0E+0]
-; CHECK-AVX512F-NEXT: callq ldexpf at PLT
-; CHECK-AVX512F-NEXT: vcvtps2ph $4, %xmm0, %xmm0
-; CHECK-AVX512F-NEXT: vpunpcklwd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
-; CHECK-AVX512F-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3]
-; CHECK-AVX512F-NEXT: vpunpckldq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
-; CHECK-AVX512F-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1]
-; CHECK-AVX512F-NEXT: vpunpcklqdq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
-; CHECK-AVX512F-NEXT: # xmm0 = xmm0[0],mem[0]
-; CHECK-AVX512F-NEXT: addq $72, %rsp
-; CHECK-AVX512F-NEXT: .cfi_def_cfa_offset 8
-; CHECK-AVX512F-NEXT: retq
+; CHECK-ONLY-AVX512F-LABEL: fmul_pow2_ldexp_8xhalf:
+; CHECK-ONLY-AVX512F: # %bb.0:
+; CHECK-ONLY-AVX512F-NEXT: vpextrw $7, %xmm0, %eax
+; CHECK-ONLY-AVX512F-NEXT: cwtl
+; CHECK-ONLY-AVX512F-NEXT: vcvtsi2ss %eax, %xmm15, %xmm2
+; CHECK-ONLY-AVX512F-NEXT: vmovss {{.*#+}} xmm1 = [8.192E+3,0.0E+0,0.0E+0,0.0E+0]
+; CHECK-ONLY-AVX512F-NEXT: vscalefss %xmm2, %xmm1, %xmm2
+; CHECK-ONLY-AVX512F-NEXT: vcvtps2ph $4, %xmm2, %xmm2
+; CHECK-ONLY-AVX512F-NEXT: vpextrw $6, %xmm0, %eax
+; CHECK-ONLY-AVX512F-NEXT: cwtl
+; CHECK-ONLY-AVX512F-NEXT: vcvtsi2ss %eax, %xmm15, %xmm3
+; CHECK-ONLY-AVX512F-NEXT: vscalefss %xmm3, %xmm1, %xmm3
+; CHECK-ONLY-AVX512F-NEXT: vcvtps2ph $4, %xmm3, %xmm3
+; CHECK-ONLY-AVX512F-NEXT: vpunpcklwd {{.*#+}} xmm2 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3]
+; CHECK-ONLY-AVX512F-NEXT: vpextrw $5, %xmm0, %eax
+; CHECK-ONLY-AVX512F-NEXT: cwtl
+; CHECK-ONLY-AVX512F-NEXT: vcvtsi2ss %eax, %xmm15, %xmm3
+; CHECK-ONLY-AVX512F-NEXT: vscalefss %xmm3, %xmm1, %xmm3
+; CHECK-ONLY-AVX512F-NEXT: vcvtps2ph $4, %xmm3, %xmm3
+; CHECK-ONLY-AVX512F-NEXT: vpextrw $4, %xmm0, %eax
+; CHECK-ONLY-AVX512F-NEXT: cwtl
+; CHECK-ONLY-AVX512F-NEXT: vcvtsi2ss %eax, %xmm15, %xmm4
+; CHECK-ONLY-AVX512F-NEXT: vscalefss %xmm4, %xmm1, %xmm4
+; CHECK-ONLY-AVX512F-NEXT: vcvtps2ph $4, %xmm4, %xmm4
+; CHECK-ONLY-AVX512F-NEXT: vpunpcklwd {{.*#+}} xmm3 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3]
+; CHECK-ONLY-AVX512F-NEXT: vpunpckldq {{.*#+}} xmm2 = xmm3[0],xmm2[0],xmm3[1],xmm2[1]
+; CHECK-ONLY-AVX512F-NEXT: vpextrw $3, %xmm0, %eax
+; CHECK-ONLY-AVX512F-NEXT: cwtl
+; CHECK-ONLY-AVX512F-NEXT: vcvtsi2ss %eax, %xmm15, %xmm3
+; CHECK-ONLY-AVX512F-NEXT: vscalefss %xmm3, %xmm1, %xmm3
+; CHECK-ONLY-AVX512F-NEXT: vcvtps2ph $4, %xmm3, %xmm3
+; CHECK-ONLY-AVX512F-NEXT: vpextrw $2, %xmm0, %eax
+; CHECK-ONLY-AVX512F-NEXT: cwtl
+; CHECK-ONLY-AVX512F-NEXT: vcvtsi2ss %eax, %xmm15, %xmm4
+; CHECK-ONLY-AVX512F-NEXT: vscalefss %xmm4, %xmm1, %xmm4
+; CHECK-ONLY-AVX512F-NEXT: vcvtps2ph $4, %xmm4, %xmm4
+; CHECK-ONLY-AVX512F-NEXT: vpunpcklwd {{.*#+}} xmm3 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3]
+; CHECK-ONLY-AVX512F-NEXT: vpextrw $1, %xmm0, %eax
+; CHECK-ONLY-AVX512F-NEXT: cwtl
+; CHECK-ONLY-AVX512F-NEXT: vcvtsi2ss %eax, %xmm15, %xmm4
+; CHECK-ONLY-AVX512F-NEXT: vscalefss %xmm4, %xmm1, %xmm4
+; CHECK-ONLY-AVX512F-NEXT: vcvtps2ph $4, %xmm4, %xmm4
+; CHECK-ONLY-AVX512F-NEXT: vmovd %xmm0, %eax
+; CHECK-ONLY-AVX512F-NEXT: cwtl
+; CHECK-ONLY-AVX512F-NEXT: vcvtsi2ss %eax, %xmm15, %xmm0
+; CHECK-ONLY-AVX512F-NEXT: vscalefss %xmm0, %xmm1, %xmm0
+; CHECK-ONLY-AVX512F-NEXT: vcvtps2ph $4, %xmm0, %xmm0
+; CHECK-ONLY-AVX512F-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3]
+; CHECK-ONLY-AVX512F-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1]
+; CHECK-ONLY-AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
+; CHECK-ONLY-AVX512F-NEXT: retq
+;
+; CHECK-SKX-LABEL: fmul_pow2_ldexp_8xhalf:
+; CHECK-SKX: # %bb.0:
+; CHECK-SKX-NEXT: vpextrw $7, %xmm0, %eax
+; CHECK-SKX-NEXT: cwtl
+; CHECK-SKX-NEXT: vcvtsi2ss %eax, %xmm15, %xmm1
+; CHECK-SKX-NEXT: vmovss {{.*#+}} xmm2 = [8.192E+3,0.0E+0,0.0E+0,0.0E+0]
+; CHECK-SKX-NEXT: vscalefss %xmm1, %xmm2, %xmm1
+; CHECK-SKX-NEXT: vcvtps2ph $4, %xmm1, %xmm1
+; CHECK-SKX-NEXT: vpextrw $6, %xmm0, %eax
+; CHECK-SKX-NEXT: cwtl
+; CHECK-SKX-NEXT: vcvtsi2ss %eax, %xmm15, %xmm3
+; CHECK-SKX-NEXT: vscalefss %xmm3, %xmm2, %xmm3
+; CHECK-SKX-NEXT: vcvtps2ph $4, %xmm3, %xmm3
+; CHECK-SKX-NEXT: vpunpcklwd {{.*#+}} xmm1 = xmm3[0],xmm1[0],xmm3[1],xmm1[1],xmm3[2],xmm1[2],xmm3[3],xmm1[3]
+; CHECK-SKX-NEXT: vpextrw $5, %xmm0, %eax
+; CHECK-SKX-NEXT: cwtl
+; CHECK-SKX-NEXT: vcvtsi2ss %eax, %xmm15, %xmm3
+; CHECK-SKX-NEXT: vscalefss %xmm3, %xmm2, %xmm3
+; CHECK-SKX-NEXT: vcvtps2ph $4, %xmm3, %xmm3
+; CHECK-SKX-NEXT: vpextrw $4, %xmm0, %eax
+; CHECK-SKX-NEXT: cwtl
+; CHECK-SKX-NEXT: vcvtsi2ss %eax, %xmm15, %xmm4
+; CHECK-SKX-NEXT: vscalefss %xmm4, %xmm2, %xmm4
+; CHECK-SKX-NEXT: vcvtps2ph $4, %xmm4, %xmm4
+; CHECK-SKX-NEXT: vpunpcklwd {{.*#+}} xmm3 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3]
+; CHECK-SKX-NEXT: vpunpckldq {{.*#+}} xmm1 = xmm3[0],xmm1[0],xmm3[1],xmm1[1]
+; CHECK-SKX-NEXT: vpextrw $3, %xmm0, %eax
+; CHECK-SKX-NEXT: cwtl
+; CHECK-SKX-NEXT: vcvtsi2ss %eax, %xmm15, %xmm3
+; CHECK-SKX-NEXT: vscalefss %xmm3, %xmm2, %xmm3
+; CHECK-SKX-NEXT: vcvtps2ph $4, %xmm3, %xmm3
+; CHECK-SKX-NEXT: vpextrw $2, %xmm0, %eax
+; CHECK-SKX-NEXT: cwtl
+; CHECK-SKX-NEXT: vcvtsi2ss %eax, %xmm15, %xmm4
+; CHECK-SKX-NEXT: vscalefss %xmm4, %xmm2, %xmm4
+; CHECK-SKX-NEXT: vcvtps2ph $4, %xmm4, %xmm4
+; CHECK-SKX-NEXT: vpunpcklwd {{.*#+}} xmm3 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3]
+; CHECK-SKX-NEXT: vpextrw $1, %xmm0, %eax
+; CHECK-SKX-NEXT: cwtl
+; CHECK-SKX-NEXT: vcvtsi2ss %eax, %xmm15, %xmm4
+; CHECK-SKX-NEXT: vscalefss %xmm4, %xmm2, %xmm4
+; CHECK-SKX-NEXT: vcvtps2ph $4, %xmm4, %xmm4
+; CHECK-SKX-NEXT: vmovd %xmm0, %eax
+; CHECK-SKX-NEXT: cwtl
+; CHECK-SKX-NEXT: vcvtsi2ss %eax, %xmm15, %xmm0
+; CHECK-SKX-NEXT: vscalefss %xmm0, %xmm2, %xmm0
+; CHECK-SKX-NEXT: vcvtps2ph $4, %xmm0, %xmm0
+; CHECK-SKX-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3]
+; CHECK-SKX-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1]
+; CHECK-SKX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; CHECK-SKX-NEXT: retq
%r = call <8 x half> @llvm.ldexp.v8f16.v8i16(<8 x half> <half 0xH7000, half 0xH7000, half 0xH7000, half 0xH7000, half 0xH7000, half 0xH7000, half 0xH7000, half 0xH7000>, <8 x i16> %i)
ret <8 x half> %r
}
@@ -1769,3 +1822,5 @@ define x86_fp80 @pr128528(i1 %cond) {
%mul = fmul x86_fp80 %conv, 0xK4007D055555555555800
ret x86_fp80 %mul
}
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/166839
More information about the llvm-commits
mailing list