[llvm] Outline X86 autoupgrade patterns (PR #97851)
via llvm-commits
llvm-commits at lists.llvm.org
Fri Jul 5 11:05:48 PDT 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-llvm-ir
Author: Justin Holewinski (jholewinski)
<details>
<summary>Changes</summary>
Outlining these patterns has a significant impact on the overall stack frame size of llvm::UpgradeIntrinsicCall. This is helpful for scenarios where compilation threads are stack-constrained. The overall impact is low when using clang as the host compiler, but very pronounced when using MSVC 2022 with release builds.
Clang: 1,624 -> 824 bytes
MSVC: 23,560 -> 6,120 bytes
---
Patch is 167.81 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/97851.diff
1 Files Affected:
- (modified) llvm/lib/IR/AutoUpgrade.cpp (+1695-1757)
``````````diff
diff --git a/llvm/lib/IR/AutoUpgrade.cpp b/llvm/lib/IR/AutoUpgrade.cpp
index 5beefaa1ec701..de27b6848b20a 100644
--- a/llvm/lib/IR/AutoUpgrade.cpp
+++ b/llvm/lib/IR/AutoUpgrade.cpp
@@ -2245,6 +2245,1698 @@ void llvm::UpgradeInlineAsmString(std::string *AsmStr) {
}
}
+static Value *upgradeX86IntrinsicCall(StringRef Name, CallBase *CI, Function *F,
+ IRBuilder<> &Builder) {
+ LLVMContext &C = F->getContext();
+ Value *Rep = nullptr;
+
+ if (Name.starts_with("sse4a.movnt.")) {
+ SmallVector<Metadata *, 1> Elts;
+ Elts.push_back(
+ ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1)));
+ MDNode *Node = MDNode::get(C, Elts);
+
+ Value *Arg0 = CI->getArgOperand(0);
+ Value *Arg1 = CI->getArgOperand(1);
+
+ // Nontemporal (unaligned) store of the 0'th element of the float/double
+ // vector.
+ Type *SrcEltTy = cast<VectorType>(Arg1->getType())->getElementType();
+ PointerType *EltPtrTy = PointerType::getUnqual(SrcEltTy);
+ Value *Addr = Builder.CreateBitCast(Arg0, EltPtrTy, "cast");
+ Value *Extract =
+ Builder.CreateExtractElement(Arg1, (uint64_t)0, "extractelement");
+
+ StoreInst *SI = Builder.CreateAlignedStore(Extract, Addr, Align(1));
+ SI->setMetadata(LLVMContext::MD_nontemporal, Node);
+ } else if ((Name.starts_with("avx.movnt.") ||
+ Name.starts_with("avx512.storent."))) {
+ SmallVector<Metadata *, 1> Elts;
+ Elts.push_back(
+ ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1)));
+ MDNode *Node = MDNode::get(C, Elts);
+
+ Value *Arg0 = CI->getArgOperand(0);
+ Value *Arg1 = CI->getArgOperand(1);
+
+ // Convert the type of the pointer to a pointer to the stored type.
+ Value *BC = Builder.CreateBitCast(
+ Arg0, PointerType::getUnqual(Arg1->getType()), "cast");
+ StoreInst *SI = Builder.CreateAlignedStore(
+ Arg1, BC,
+ Align(Arg1->getType()->getPrimitiveSizeInBits().getFixedValue() / 8));
+ SI->setMetadata(LLVMContext::MD_nontemporal, Node);
+ } else if (Name == "sse2.storel.dq") {
+ Value *Arg0 = CI->getArgOperand(0);
+ Value *Arg1 = CI->getArgOperand(1);
+
+ auto *NewVecTy = FixedVectorType::get(Type::getInt64Ty(C), 2);
+ Value *BC0 = Builder.CreateBitCast(Arg1, NewVecTy, "cast");
+ Value *Elt = Builder.CreateExtractElement(BC0, (uint64_t)0);
+ Value *BC = Builder.CreateBitCast(
+ Arg0, PointerType::getUnqual(Elt->getType()), "cast");
+ Builder.CreateAlignedStore(Elt, BC, Align(1));
+ } else if ((Name.starts_with("sse.storeu.") ||
+ Name.starts_with("sse2.storeu.") ||
+ Name.starts_with("avx.storeu."))) {
+ Value *Arg0 = CI->getArgOperand(0);
+ Value *Arg1 = CI->getArgOperand(1);
+
+ Arg0 = Builder.CreateBitCast(Arg0, PointerType::getUnqual(Arg1->getType()),
+ "cast");
+ Builder.CreateAlignedStore(Arg1, Arg0, Align(1));
+ } else if (Name == "avx512.mask.store.ss") {
+ Value *Mask = Builder.CreateAnd(CI->getArgOperand(2), Builder.getInt8(1));
+ upgradeMaskedStore(Builder, CI->getArgOperand(0), CI->getArgOperand(1),
+ Mask, false);
+ } else if (Name.starts_with("avx512.mask.store")) {
+ // "avx512.mask.storeu." or "avx512.mask.store."
+ bool Aligned = Name[17] != 'u'; // "avx512.mask.storeu".
+ upgradeMaskedStore(Builder, CI->getArgOperand(0), CI->getArgOperand(1),
+ CI->getArgOperand(2), Aligned);
+ } else if ((Name.starts_with("sse2.pcmp") || Name.starts_with("avx2.pcmp"))) {
+ // Upgrade packed integer vector compare intrinsics to compare instructions.
+ // "sse2.pcpmpeq." "sse2.pcmpgt." "avx2.pcmpeq." or "avx2.pcmpgt."
+ bool CmpEq = Name[9] == 'e';
+ Rep = Builder.CreateICmp(CmpEq ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_SGT,
+ CI->getArgOperand(0), CI->getArgOperand(1));
+ Rep = Builder.CreateSExt(Rep, CI->getType(), "");
+ } else if ((Name.starts_with("avx512.broadcastm"))) {
+ Type *ExtTy = Type::getInt32Ty(C);
+ if (CI->getOperand(0)->getType()->isIntegerTy(8))
+ ExtTy = Type::getInt64Ty(C);
+ unsigned NumElts = CI->getType()->getPrimitiveSizeInBits() /
+ ExtTy->getPrimitiveSizeInBits();
+ Rep = Builder.CreateZExt(CI->getArgOperand(0), ExtTy);
+ Rep = Builder.CreateVectorSplat(NumElts, Rep);
+ } else if ((Name == "sse.sqrt.ss" || Name == "sse2.sqrt.sd")) {
+ Value *Vec = CI->getArgOperand(0);
+ Value *Elt0 = Builder.CreateExtractElement(Vec, (uint64_t)0);
+ Function *Intr = Intrinsic::getDeclaration(F->getParent(), Intrinsic::sqrt,
+ Elt0->getType());
+ Elt0 = Builder.CreateCall(Intr, Elt0);
+ Rep = Builder.CreateInsertElement(Vec, Elt0, (uint64_t)0);
+ } else if ((Name.starts_with("avx.sqrt.p") ||
+ Name.starts_with("sse2.sqrt.p") ||
+ Name.starts_with("sse.sqrt.p"))) {
+ Rep =
+ Builder.CreateCall(Intrinsic::getDeclaration(
+ F->getParent(), Intrinsic::sqrt, CI->getType()),
+ {CI->getArgOperand(0)});
+ } else if ((Name.starts_with("avx512.mask.sqrt.p"))) {
+ if (CI->arg_size() == 4 &&
+ (!isa<ConstantInt>(CI->getArgOperand(3)) ||
+ cast<ConstantInt>(CI->getArgOperand(3))->getZExtValue() != 4)) {
+ Intrinsic::ID IID = Name[18] == 's' ? Intrinsic::x86_avx512_sqrt_ps_512
+ : Intrinsic::x86_avx512_sqrt_pd_512;
+
+ Value *Args[] = {CI->getArgOperand(0), CI->getArgOperand(3)};
+ Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(), IID),
+ Args);
+ } else {
+ Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(),
+ Intrinsic::sqrt,
+ CI->getType()),
+ {CI->getArgOperand(0)});
+ }
+ Rep =
+ emitX86Select(Builder, CI->getArgOperand(2), Rep, CI->getArgOperand(1));
+ } else if ((Name.starts_with("avx512.ptestm") ||
+ Name.starts_with("avx512.ptestnm"))) {
+ Value *Op0 = CI->getArgOperand(0);
+ Value *Op1 = CI->getArgOperand(1);
+ Value *Mask = CI->getArgOperand(2);
+ Rep = Builder.CreateAnd(Op0, Op1);
+ llvm::Type *Ty = Op0->getType();
+ Value *Zero = llvm::Constant::getNullValue(Ty);
+ ICmpInst::Predicate Pred = Name.starts_with("avx512.ptestm")
+ ? ICmpInst::ICMP_NE
+ : ICmpInst::ICMP_EQ;
+ Rep = Builder.CreateICmp(Pred, Rep, Zero);
+ Rep = applyX86MaskOn1BitsVec(Builder, Rep, Mask);
+ } else if ((Name.starts_with("avx512.mask.pbroadcast"))) {
+ unsigned NumElts = cast<FixedVectorType>(CI->getArgOperand(1)->getType())
+ ->getNumElements();
+ Rep = Builder.CreateVectorSplat(NumElts, CI->getArgOperand(0));
+ Rep =
+ emitX86Select(Builder, CI->getArgOperand(2), Rep, CI->getArgOperand(1));
+ } else if ((Name.starts_with("avx512.kunpck"))) {
+ unsigned NumElts = CI->getType()->getScalarSizeInBits();
+ Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), NumElts);
+ Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), NumElts);
+ int Indices[64];
+ for (unsigned i = 0; i != NumElts; ++i)
+ Indices[i] = i;
+
+ // First extract half of each vector. This gives better codegen than
+ // doing it in a single shuffle.
+ LHS = Builder.CreateShuffleVector(LHS, LHS, ArrayRef(Indices, NumElts / 2));
+ RHS = Builder.CreateShuffleVector(RHS, RHS, ArrayRef(Indices, NumElts / 2));
+ // Concat the vectors.
+ // NOTE: Operands have to be swapped to match intrinsic definition.
+ Rep = Builder.CreateShuffleVector(RHS, LHS, ArrayRef(Indices, NumElts));
+ Rep = Builder.CreateBitCast(Rep, CI->getType());
+ } else if (Name == "avx512.kand.w") {
+ Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
+ Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
+ Rep = Builder.CreateAnd(LHS, RHS);
+ Rep = Builder.CreateBitCast(Rep, CI->getType());
+ } else if (Name == "avx512.kandn.w") {
+ Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
+ Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
+ LHS = Builder.CreateNot(LHS);
+ Rep = Builder.CreateAnd(LHS, RHS);
+ Rep = Builder.CreateBitCast(Rep, CI->getType());
+ } else if (Name == "avx512.kor.w") {
+ Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
+ Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
+ Rep = Builder.CreateOr(LHS, RHS);
+ Rep = Builder.CreateBitCast(Rep, CI->getType());
+ } else if (Name == "avx512.kxor.w") {
+ Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
+ Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
+ Rep = Builder.CreateXor(LHS, RHS);
+ Rep = Builder.CreateBitCast(Rep, CI->getType());
+ } else if (Name == "avx512.kxnor.w") {
+ Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
+ Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
+ LHS = Builder.CreateNot(LHS);
+ Rep = Builder.CreateXor(LHS, RHS);
+ Rep = Builder.CreateBitCast(Rep, CI->getType());
+ } else if (Name == "avx512.knot.w") {
+ Rep = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
+ Rep = Builder.CreateNot(Rep);
+ Rep = Builder.CreateBitCast(Rep, CI->getType());
+ } else if ((Name == "avx512.kortestz.w" || Name == "avx512.kortestc.w")) {
+ Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
+ Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
+ Rep = Builder.CreateOr(LHS, RHS);
+ Rep = Builder.CreateBitCast(Rep, Builder.getInt16Ty());
+ Value *C;
+ if (Name[14] == 'c')
+ C = ConstantInt::getAllOnesValue(Builder.getInt16Ty());
+ else
+ C = ConstantInt::getNullValue(Builder.getInt16Ty());
+ Rep = Builder.CreateICmpEQ(Rep, C);
+ Rep = Builder.CreateZExt(Rep, Builder.getInt32Ty());
+ } else if ((Name == "sse.add.ss" || Name == "sse2.add.sd" ||
+ Name == "sse.sub.ss" || Name == "sse2.sub.sd" ||
+ Name == "sse.mul.ss" || Name == "sse2.mul.sd" ||
+ Name == "sse.div.ss" || Name == "sse2.div.sd")) {
+ Type *I32Ty = Type::getInt32Ty(C);
+ Value *Elt0 = Builder.CreateExtractElement(CI->getArgOperand(0),
+ ConstantInt::get(I32Ty, 0));
+ Value *Elt1 = Builder.CreateExtractElement(CI->getArgOperand(1),
+ ConstantInt::get(I32Ty, 0));
+ Value *EltOp;
+ if (Name.contains(".add."))
+ EltOp = Builder.CreateFAdd(Elt0, Elt1);
+ else if (Name.contains(".sub."))
+ EltOp = Builder.CreateFSub(Elt0, Elt1);
+ else if (Name.contains(".mul."))
+ EltOp = Builder.CreateFMul(Elt0, Elt1);
+ else
+ EltOp = Builder.CreateFDiv(Elt0, Elt1);
+ Rep = Builder.CreateInsertElement(CI->getArgOperand(0), EltOp,
+ ConstantInt::get(I32Ty, 0));
+ } else if (Name.starts_with("avx512.mask.pcmp")) {
+ // "avx512.mask.pcmpeq." or "avx512.mask.pcmpgt."
+ bool CmpEq = Name[16] == 'e';
+ Rep = upgradeMaskedCompare(Builder, *CI, CmpEq ? 0 : 6, true);
+ } else if (Name.starts_with("avx512.mask.vpshufbitqmb.")) {
+ Type *OpTy = CI->getArgOperand(0)->getType();
+ unsigned VecWidth = OpTy->getPrimitiveSizeInBits();
+ Intrinsic::ID IID;
+ switch (VecWidth) {
+ default:
+ llvm_unreachable("Unexpected intrinsic");
+ case 128:
+ IID = Intrinsic::x86_avx512_vpshufbitqmb_128;
+ break;
+ case 256:
+ IID = Intrinsic::x86_avx512_vpshufbitqmb_256;
+ break;
+ case 512:
+ IID = Intrinsic::x86_avx512_vpshufbitqmb_512;
+ break;
+ }
+
+ Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
+ {CI->getOperand(0), CI->getArgOperand(1)});
+ Rep = applyX86MaskOn1BitsVec(Builder, Rep, CI->getArgOperand(2));
+ } else if (Name.starts_with("avx512.mask.fpclass.p")) {
+ Type *OpTy = CI->getArgOperand(0)->getType();
+ unsigned VecWidth = OpTy->getPrimitiveSizeInBits();
+ unsigned EltWidth = OpTy->getScalarSizeInBits();
+ Intrinsic::ID IID;
+ if (VecWidth == 128 && EltWidth == 32)
+ IID = Intrinsic::x86_avx512_fpclass_ps_128;
+ else if (VecWidth == 256 && EltWidth == 32)
+ IID = Intrinsic::x86_avx512_fpclass_ps_256;
+ else if (VecWidth == 512 && EltWidth == 32)
+ IID = Intrinsic::x86_avx512_fpclass_ps_512;
+ else if (VecWidth == 128 && EltWidth == 64)
+ IID = Intrinsic::x86_avx512_fpclass_pd_128;
+ else if (VecWidth == 256 && EltWidth == 64)
+ IID = Intrinsic::x86_avx512_fpclass_pd_256;
+ else if (VecWidth == 512 && EltWidth == 64)
+ IID = Intrinsic::x86_avx512_fpclass_pd_512;
+ else
+ llvm_unreachable("Unexpected intrinsic");
+
+ Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
+ {CI->getOperand(0), CI->getArgOperand(1)});
+ Rep = applyX86MaskOn1BitsVec(Builder, Rep, CI->getArgOperand(2));
+ } else if (Name.starts_with("avx512.cmp.p")) {
+ SmallVector<Value *, 4> Args(CI->args());
+ Type *OpTy = Args[0]->getType();
+ unsigned VecWidth = OpTy->getPrimitiveSizeInBits();
+ unsigned EltWidth = OpTy->getScalarSizeInBits();
+ Intrinsic::ID IID;
+ if (VecWidth == 128 && EltWidth == 32)
+ IID = Intrinsic::x86_avx512_mask_cmp_ps_128;
+ else if (VecWidth == 256 && EltWidth == 32)
+ IID = Intrinsic::x86_avx512_mask_cmp_ps_256;
+ else if (VecWidth == 512 && EltWidth == 32)
+ IID = Intrinsic::x86_avx512_mask_cmp_ps_512;
+ else if (VecWidth == 128 && EltWidth == 64)
+ IID = Intrinsic::x86_avx512_mask_cmp_pd_128;
+ else if (VecWidth == 256 && EltWidth == 64)
+ IID = Intrinsic::x86_avx512_mask_cmp_pd_256;
+ else if (VecWidth == 512 && EltWidth == 64)
+ IID = Intrinsic::x86_avx512_mask_cmp_pd_512;
+ else
+ llvm_unreachable("Unexpected intrinsic");
+
+ Value *Mask = Constant::getAllOnesValue(CI->getType());
+ if (VecWidth == 512)
+ std::swap(Mask, Args.back());
+ Args.push_back(Mask);
+
+ Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
+ Args);
+ } else if (Name.starts_with("avx512.mask.cmp.")) {
+ // Integer compare intrinsics.
+ unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
+ Rep = upgradeMaskedCompare(Builder, *CI, Imm, true);
+ } else if (Name.starts_with("avx512.mask.ucmp.")) {
+ unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
+ Rep = upgradeMaskedCompare(Builder, *CI, Imm, false);
+ } else if ((Name.starts_with("avx512.cvtb2mask.") ||
+ Name.starts_with("avx512.cvtw2mask.") ||
+ Name.starts_with("avx512.cvtd2mask.") ||
+ Name.starts_with("avx512.cvtq2mask."))) {
+ Value *Op = CI->getArgOperand(0);
+ Value *Zero = llvm::Constant::getNullValue(Op->getType());
+ Rep = Builder.CreateICmp(ICmpInst::ICMP_SLT, Op, Zero);
+ Rep = applyX86MaskOn1BitsVec(Builder, Rep, nullptr);
+ } else if ((Name == "ssse3.pabs.b.128" || Name == "ssse3.pabs.w.128" ||
+ Name == "ssse3.pabs.d.128" || Name.starts_with("avx2.pabs") ||
+ Name.starts_with("avx512.mask.pabs"))) {
+ Rep = upgradeAbs(Builder, *CI);
+ } else if ((Name == "sse41.pmaxsb" || Name == "sse2.pmaxs.w" ||
+ Name == "sse41.pmaxsd" || Name.starts_with("avx2.pmaxs") ||
+ Name.starts_with("avx512.mask.pmaxs"))) {
+ Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::smax);
+ } else if ((Name == "sse2.pmaxu.b" || Name == "sse41.pmaxuw" ||
+ Name == "sse41.pmaxud" || Name.starts_with("avx2.pmaxu") ||
+ Name.starts_with("avx512.mask.pmaxu"))) {
+ Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::umax);
+ } else if ((Name == "sse41.pminsb" || Name == "sse2.pmins.w" ||
+ Name == "sse41.pminsd" || Name.starts_with("avx2.pmins") ||
+ Name.starts_with("avx512.mask.pmins"))) {
+ Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::smin);
+ } else if ((Name == "sse2.pminu.b" || Name == "sse41.pminuw" ||
+ Name == "sse41.pminud" || Name.starts_with("avx2.pminu") ||
+ Name.starts_with("avx512.mask.pminu"))) {
+ Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::umin);
+ } else if ((Name == "sse2.pmulu.dq" || Name == "avx2.pmulu.dq" ||
+ Name == "avx512.pmulu.dq.512" ||
+ Name.starts_with("avx512.mask.pmulu.dq."))) {
+ Rep = upgradePMULDQ(Builder, *CI, /*Signed*/ false);
+ } else if ((Name == "sse41.pmuldq" || Name == "avx2.pmul.dq" ||
+ Name == "avx512.pmul.dq.512" ||
+ Name.starts_with("avx512.mask.pmul.dq."))) {
+ Rep = upgradePMULDQ(Builder, *CI, /*Signed*/ true);
+ } else if ((Name == "sse.cvtsi2ss" || Name == "sse2.cvtsi2sd" ||
+ Name == "sse.cvtsi642ss" || Name == "sse2.cvtsi642sd")) {
+ Rep =
+ Builder.CreateSIToFP(CI->getArgOperand(1),
+ cast<VectorType>(CI->getType())->getElementType());
+ Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep, (uint64_t)0);
+ } else if (Name == "avx512.cvtusi2sd") {
+ Rep =
+ Builder.CreateUIToFP(CI->getArgOperand(1),
+ cast<VectorType>(CI->getType())->getElementType());
+ Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep, (uint64_t)0);
+ } else if (Name == "sse2.cvtss2sd") {
+ Rep = Builder.CreateExtractElement(CI->getArgOperand(1), (uint64_t)0);
+ Rep = Builder.CreateFPExt(
+ Rep, cast<VectorType>(CI->getType())->getElementType());
+ Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep, (uint64_t)0);
+ } else if ((Name == "sse2.cvtdq2pd" || Name == "sse2.cvtdq2ps" ||
+ Name == "avx.cvtdq2.pd.256" || Name == "avx.cvtdq2.ps.256" ||
+ Name.starts_with("avx512.mask.cvtdq2pd.") ||
+ Name.starts_with("avx512.mask.cvtudq2pd.") ||
+ Name.starts_with("avx512.mask.cvtdq2ps.") ||
+ Name.starts_with("avx512.mask.cvtudq2ps.") ||
+ Name.starts_with("avx512.mask.cvtqq2pd.") ||
+ Name.starts_with("avx512.mask.cvtuqq2pd.") ||
+ Name == "avx512.mask.cvtqq2ps.256" ||
+ Name == "avx512.mask.cvtqq2ps.512" ||
+ Name == "avx512.mask.cvtuqq2ps.256" ||
+ Name == "avx512.mask.cvtuqq2ps.512" || Name == "sse2.cvtps2pd" ||
+ Name == "avx.cvt.ps2.pd.256" ||
+ Name == "avx512.mask.cvtps2pd.128" ||
+ Name == "avx512.mask.cvtps2pd.256")) {
+ auto *DstTy = cast<FixedVectorType>(CI->getType());
+ Rep = CI->getArgOperand(0);
+ auto *SrcTy = cast<FixedVectorType>(Rep->getType());
+
+ unsigned NumDstElts = DstTy->getNumElements();
+ if (NumDstElts < SrcTy->getNumElements()) {
+ assert(NumDstElts == 2 && "Unexpected vector size");
+ Rep = Builder.CreateShuffleVector(Rep, Rep, ArrayRef<int>{0, 1});
+ }
+
+ bool IsPS2PD = SrcTy->getElementType()->isFloatTy();
+ bool IsUnsigned = Name.contains("cvtu");
+ if (IsPS2PD)
+ Rep = Builder.CreateFPExt(Rep, DstTy, "cvtps2pd");
+ else if (CI->arg_size() == 4 &&
+ (!isa<ConstantInt>(CI->getArgOperand(3)) ||
+ cast<ConstantInt>(CI->getArgOperand(3))->getZExtValue() != 4)) {
+ Intrinsic::ID IID = IsUnsigned ? Intrinsic::x86_avx512_uitofp_round
+ : Intrinsic::x86_avx512_sitofp_round;
+ Function *F =
+ Intrinsic::getDeclaration(CI->getModule(), IID, {DstTy, SrcTy});
+ Rep = Builder.CreateCall(F, {Rep, CI->getArgOperand(3)});
+ } else {
+ Rep = IsUnsigned ? Builder.CreateUIToFP(Rep, DstTy, "cvt")
+ : Builder.CreateSIToFP(Rep, DstTy, "cvt");
+ }
+
+ if (CI->arg_size() >= 3)
+ Rep = emitX86Select(Builder, CI->getArgOperand(2), Rep,
+ CI->getArgOperand(1));
+ } else if ((Name.starts_with("avx512.mask.vcvtph2ps.") ||
+ ...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/97851
More information about the llvm-commits
mailing list