[llvm] Outline X86 autoupgrade patterns (PR #97851)

via llvm-commits llvm-commits at lists.llvm.org
Fri Jul 5 11:05:48 PDT 2024


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-llvm-ir

Author: Justin Holewinski (jholewinski)

<details>
<summary>Changes</summary>

Outlining these patterns has a significant impact on the overall stack frame size of llvm::UpgradeIntrinsicCall. This is helpful for scenarios where compilation threads are stack-constrained. The overall impact is low when using clang as the host compiler, but very pronounced when using MSVC 2022 with release builds.

Clang:   1,624 ->   824 bytes
MSVC:   23,560 -> 6,120 bytes

---

Patch is 167.81 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/97851.diff


1 Files Affected:

- (modified) llvm/lib/IR/AutoUpgrade.cpp (+1695-1757) 


``````````diff
diff --git a/llvm/lib/IR/AutoUpgrade.cpp b/llvm/lib/IR/AutoUpgrade.cpp
index 5beefaa1ec701..de27b6848b20a 100644
--- a/llvm/lib/IR/AutoUpgrade.cpp
+++ b/llvm/lib/IR/AutoUpgrade.cpp
@@ -2245,6 +2245,1698 @@ void llvm::UpgradeInlineAsmString(std::string *AsmStr) {
   }
 }
 
+static Value *upgradeX86IntrinsicCall(StringRef Name, CallBase *CI, Function *F,
+                                      IRBuilder<> &Builder) {
+  LLVMContext &C = F->getContext();
+  Value *Rep = nullptr;
+
+  if (Name.starts_with("sse4a.movnt.")) {
+    SmallVector<Metadata *, 1> Elts;
+    Elts.push_back(
+        ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1)));
+    MDNode *Node = MDNode::get(C, Elts);
+
+    Value *Arg0 = CI->getArgOperand(0);
+    Value *Arg1 = CI->getArgOperand(1);
+
+    // Nontemporal (unaligned) store of the 0'th element of the float/double
+    // vector.
+    Type *SrcEltTy = cast<VectorType>(Arg1->getType())->getElementType();
+    PointerType *EltPtrTy = PointerType::getUnqual(SrcEltTy);
+    Value *Addr = Builder.CreateBitCast(Arg0, EltPtrTy, "cast");
+    Value *Extract =
+        Builder.CreateExtractElement(Arg1, (uint64_t)0, "extractelement");
+
+    StoreInst *SI = Builder.CreateAlignedStore(Extract, Addr, Align(1));
+    SI->setMetadata(LLVMContext::MD_nontemporal, Node);
+  } else if ((Name.starts_with("avx.movnt.") ||
+              Name.starts_with("avx512.storent."))) {
+    SmallVector<Metadata *, 1> Elts;
+    Elts.push_back(
+        ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1)));
+    MDNode *Node = MDNode::get(C, Elts);
+
+    Value *Arg0 = CI->getArgOperand(0);
+    Value *Arg1 = CI->getArgOperand(1);
+
+    // Convert the type of the pointer to a pointer to the stored type.
+    Value *BC = Builder.CreateBitCast(
+        Arg0, PointerType::getUnqual(Arg1->getType()), "cast");
+    StoreInst *SI = Builder.CreateAlignedStore(
+        Arg1, BC,
+        Align(Arg1->getType()->getPrimitiveSizeInBits().getFixedValue() / 8));
+    SI->setMetadata(LLVMContext::MD_nontemporal, Node);
+  } else if (Name == "sse2.storel.dq") {
+    Value *Arg0 = CI->getArgOperand(0);
+    Value *Arg1 = CI->getArgOperand(1);
+
+    auto *NewVecTy = FixedVectorType::get(Type::getInt64Ty(C), 2);
+    Value *BC0 = Builder.CreateBitCast(Arg1, NewVecTy, "cast");
+    Value *Elt = Builder.CreateExtractElement(BC0, (uint64_t)0);
+    Value *BC = Builder.CreateBitCast(
+        Arg0, PointerType::getUnqual(Elt->getType()), "cast");
+    Builder.CreateAlignedStore(Elt, BC, Align(1));
+  } else if ((Name.starts_with("sse.storeu.") ||
+              Name.starts_with("sse2.storeu.") ||
+              Name.starts_with("avx.storeu."))) {
+    Value *Arg0 = CI->getArgOperand(0);
+    Value *Arg1 = CI->getArgOperand(1);
+
+    Arg0 = Builder.CreateBitCast(Arg0, PointerType::getUnqual(Arg1->getType()),
+                                 "cast");
+    Builder.CreateAlignedStore(Arg1, Arg0, Align(1));
+  } else if (Name == "avx512.mask.store.ss") {
+    Value *Mask = Builder.CreateAnd(CI->getArgOperand(2), Builder.getInt8(1));
+    upgradeMaskedStore(Builder, CI->getArgOperand(0), CI->getArgOperand(1),
+                       Mask, false);
+  } else if (Name.starts_with("avx512.mask.store")) {
+    // "avx512.mask.storeu." or "avx512.mask.store."
+    bool Aligned = Name[17] != 'u'; // "avx512.mask.storeu".
+    upgradeMaskedStore(Builder, CI->getArgOperand(0), CI->getArgOperand(1),
+                       CI->getArgOperand(2), Aligned);
+  } else if ((Name.starts_with("sse2.pcmp") || Name.starts_with("avx2.pcmp"))) {
+    // Upgrade packed integer vector compare intrinsics to compare instructions.
+    // "sse2.pcpmpeq." "sse2.pcmpgt." "avx2.pcmpeq." or "avx2.pcmpgt."
+    bool CmpEq = Name[9] == 'e';
+    Rep = Builder.CreateICmp(CmpEq ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_SGT,
+                             CI->getArgOperand(0), CI->getArgOperand(1));
+    Rep = Builder.CreateSExt(Rep, CI->getType(), "");
+  } else if ((Name.starts_with("avx512.broadcastm"))) {
+    Type *ExtTy = Type::getInt32Ty(C);
+    if (CI->getOperand(0)->getType()->isIntegerTy(8))
+      ExtTy = Type::getInt64Ty(C);
+    unsigned NumElts = CI->getType()->getPrimitiveSizeInBits() /
+                       ExtTy->getPrimitiveSizeInBits();
+    Rep = Builder.CreateZExt(CI->getArgOperand(0), ExtTy);
+    Rep = Builder.CreateVectorSplat(NumElts, Rep);
+  } else if ((Name == "sse.sqrt.ss" || Name == "sse2.sqrt.sd")) {
+    Value *Vec = CI->getArgOperand(0);
+    Value *Elt0 = Builder.CreateExtractElement(Vec, (uint64_t)0);
+    Function *Intr = Intrinsic::getDeclaration(F->getParent(), Intrinsic::sqrt,
+                                               Elt0->getType());
+    Elt0 = Builder.CreateCall(Intr, Elt0);
+    Rep = Builder.CreateInsertElement(Vec, Elt0, (uint64_t)0);
+  } else if ((Name.starts_with("avx.sqrt.p") ||
+              Name.starts_with("sse2.sqrt.p") ||
+              Name.starts_with("sse.sqrt.p"))) {
+    Rep =
+        Builder.CreateCall(Intrinsic::getDeclaration(
+                               F->getParent(), Intrinsic::sqrt, CI->getType()),
+                           {CI->getArgOperand(0)});
+  } else if ((Name.starts_with("avx512.mask.sqrt.p"))) {
+    if (CI->arg_size() == 4 &&
+        (!isa<ConstantInt>(CI->getArgOperand(3)) ||
+         cast<ConstantInt>(CI->getArgOperand(3))->getZExtValue() != 4)) {
+      Intrinsic::ID IID = Name[18] == 's' ? Intrinsic::x86_avx512_sqrt_ps_512
+                                          : Intrinsic::x86_avx512_sqrt_pd_512;
+
+      Value *Args[] = {CI->getArgOperand(0), CI->getArgOperand(3)};
+      Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(), IID),
+                               Args);
+    } else {
+      Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(),
+                                                         Intrinsic::sqrt,
+                                                         CI->getType()),
+                               {CI->getArgOperand(0)});
+    }
+    Rep =
+        emitX86Select(Builder, CI->getArgOperand(2), Rep, CI->getArgOperand(1));
+  } else if ((Name.starts_with("avx512.ptestm") ||
+              Name.starts_with("avx512.ptestnm"))) {
+    Value *Op0 = CI->getArgOperand(0);
+    Value *Op1 = CI->getArgOperand(1);
+    Value *Mask = CI->getArgOperand(2);
+    Rep = Builder.CreateAnd(Op0, Op1);
+    llvm::Type *Ty = Op0->getType();
+    Value *Zero = llvm::Constant::getNullValue(Ty);
+    ICmpInst::Predicate Pred = Name.starts_with("avx512.ptestm")
+                                   ? ICmpInst::ICMP_NE
+                                   : ICmpInst::ICMP_EQ;
+    Rep = Builder.CreateICmp(Pred, Rep, Zero);
+    Rep = applyX86MaskOn1BitsVec(Builder, Rep, Mask);
+  } else if ((Name.starts_with("avx512.mask.pbroadcast"))) {
+    unsigned NumElts = cast<FixedVectorType>(CI->getArgOperand(1)->getType())
+                           ->getNumElements();
+    Rep = Builder.CreateVectorSplat(NumElts, CI->getArgOperand(0));
+    Rep =
+        emitX86Select(Builder, CI->getArgOperand(2), Rep, CI->getArgOperand(1));
+  } else if ((Name.starts_with("avx512.kunpck"))) {
+    unsigned NumElts = CI->getType()->getScalarSizeInBits();
+    Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), NumElts);
+    Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), NumElts);
+    int Indices[64];
+    for (unsigned i = 0; i != NumElts; ++i)
+      Indices[i] = i;
+
+    // First extract half of each vector. This gives better codegen than
+    // doing it in a single shuffle.
+    LHS = Builder.CreateShuffleVector(LHS, LHS, ArrayRef(Indices, NumElts / 2));
+    RHS = Builder.CreateShuffleVector(RHS, RHS, ArrayRef(Indices, NumElts / 2));
+    // Concat the vectors.
+    // NOTE: Operands have to be swapped to match intrinsic definition.
+    Rep = Builder.CreateShuffleVector(RHS, LHS, ArrayRef(Indices, NumElts));
+    Rep = Builder.CreateBitCast(Rep, CI->getType());
+  } else if (Name == "avx512.kand.w") {
+    Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
+    Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
+    Rep = Builder.CreateAnd(LHS, RHS);
+    Rep = Builder.CreateBitCast(Rep, CI->getType());
+  } else if (Name == "avx512.kandn.w") {
+    Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
+    Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
+    LHS = Builder.CreateNot(LHS);
+    Rep = Builder.CreateAnd(LHS, RHS);
+    Rep = Builder.CreateBitCast(Rep, CI->getType());
+  } else if (Name == "avx512.kor.w") {
+    Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
+    Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
+    Rep = Builder.CreateOr(LHS, RHS);
+    Rep = Builder.CreateBitCast(Rep, CI->getType());
+  } else if (Name == "avx512.kxor.w") {
+    Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
+    Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
+    Rep = Builder.CreateXor(LHS, RHS);
+    Rep = Builder.CreateBitCast(Rep, CI->getType());
+  } else if (Name == "avx512.kxnor.w") {
+    Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
+    Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
+    LHS = Builder.CreateNot(LHS);
+    Rep = Builder.CreateXor(LHS, RHS);
+    Rep = Builder.CreateBitCast(Rep, CI->getType());
+  } else if (Name == "avx512.knot.w") {
+    Rep = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
+    Rep = Builder.CreateNot(Rep);
+    Rep = Builder.CreateBitCast(Rep, CI->getType());
+  } else if ((Name == "avx512.kortestz.w" || Name == "avx512.kortestc.w")) {
+    Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
+    Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
+    Rep = Builder.CreateOr(LHS, RHS);
+    Rep = Builder.CreateBitCast(Rep, Builder.getInt16Ty());
+    Value *C;
+    if (Name[14] == 'c')
+      C = ConstantInt::getAllOnesValue(Builder.getInt16Ty());
+    else
+      C = ConstantInt::getNullValue(Builder.getInt16Ty());
+    Rep = Builder.CreateICmpEQ(Rep, C);
+    Rep = Builder.CreateZExt(Rep, Builder.getInt32Ty());
+  } else if ((Name == "sse.add.ss" || Name == "sse2.add.sd" ||
+              Name == "sse.sub.ss" || Name == "sse2.sub.sd" ||
+              Name == "sse.mul.ss" || Name == "sse2.mul.sd" ||
+              Name == "sse.div.ss" || Name == "sse2.div.sd")) {
+    Type *I32Ty = Type::getInt32Ty(C);
+    Value *Elt0 = Builder.CreateExtractElement(CI->getArgOperand(0),
+                                               ConstantInt::get(I32Ty, 0));
+    Value *Elt1 = Builder.CreateExtractElement(CI->getArgOperand(1),
+                                               ConstantInt::get(I32Ty, 0));
+    Value *EltOp;
+    if (Name.contains(".add."))
+      EltOp = Builder.CreateFAdd(Elt0, Elt1);
+    else if (Name.contains(".sub."))
+      EltOp = Builder.CreateFSub(Elt0, Elt1);
+    else if (Name.contains(".mul."))
+      EltOp = Builder.CreateFMul(Elt0, Elt1);
+    else
+      EltOp = Builder.CreateFDiv(Elt0, Elt1);
+    Rep = Builder.CreateInsertElement(CI->getArgOperand(0), EltOp,
+                                      ConstantInt::get(I32Ty, 0));
+  } else if (Name.starts_with("avx512.mask.pcmp")) {
+    // "avx512.mask.pcmpeq." or "avx512.mask.pcmpgt."
+    bool CmpEq = Name[16] == 'e';
+    Rep = upgradeMaskedCompare(Builder, *CI, CmpEq ? 0 : 6, true);
+  } else if (Name.starts_with("avx512.mask.vpshufbitqmb.")) {
+    Type *OpTy = CI->getArgOperand(0)->getType();
+    unsigned VecWidth = OpTy->getPrimitiveSizeInBits();
+    Intrinsic::ID IID;
+    switch (VecWidth) {
+    default:
+      llvm_unreachable("Unexpected intrinsic");
+    case 128:
+      IID = Intrinsic::x86_avx512_vpshufbitqmb_128;
+      break;
+    case 256:
+      IID = Intrinsic::x86_avx512_vpshufbitqmb_256;
+      break;
+    case 512:
+      IID = Intrinsic::x86_avx512_vpshufbitqmb_512;
+      break;
+    }
+
+    Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
+                             {CI->getOperand(0), CI->getArgOperand(1)});
+    Rep = applyX86MaskOn1BitsVec(Builder, Rep, CI->getArgOperand(2));
+  } else if (Name.starts_with("avx512.mask.fpclass.p")) {
+    Type *OpTy = CI->getArgOperand(0)->getType();
+    unsigned VecWidth = OpTy->getPrimitiveSizeInBits();
+    unsigned EltWidth = OpTy->getScalarSizeInBits();
+    Intrinsic::ID IID;
+    if (VecWidth == 128 && EltWidth == 32)
+      IID = Intrinsic::x86_avx512_fpclass_ps_128;
+    else if (VecWidth == 256 && EltWidth == 32)
+      IID = Intrinsic::x86_avx512_fpclass_ps_256;
+    else if (VecWidth == 512 && EltWidth == 32)
+      IID = Intrinsic::x86_avx512_fpclass_ps_512;
+    else if (VecWidth == 128 && EltWidth == 64)
+      IID = Intrinsic::x86_avx512_fpclass_pd_128;
+    else if (VecWidth == 256 && EltWidth == 64)
+      IID = Intrinsic::x86_avx512_fpclass_pd_256;
+    else if (VecWidth == 512 && EltWidth == 64)
+      IID = Intrinsic::x86_avx512_fpclass_pd_512;
+    else
+      llvm_unreachable("Unexpected intrinsic");
+
+    Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
+                             {CI->getOperand(0), CI->getArgOperand(1)});
+    Rep = applyX86MaskOn1BitsVec(Builder, Rep, CI->getArgOperand(2));
+  } else if (Name.starts_with("avx512.cmp.p")) {
+    SmallVector<Value *, 4> Args(CI->args());
+    Type *OpTy = Args[0]->getType();
+    unsigned VecWidth = OpTy->getPrimitiveSizeInBits();
+    unsigned EltWidth = OpTy->getScalarSizeInBits();
+    Intrinsic::ID IID;
+    if (VecWidth == 128 && EltWidth == 32)
+      IID = Intrinsic::x86_avx512_mask_cmp_ps_128;
+    else if (VecWidth == 256 && EltWidth == 32)
+      IID = Intrinsic::x86_avx512_mask_cmp_ps_256;
+    else if (VecWidth == 512 && EltWidth == 32)
+      IID = Intrinsic::x86_avx512_mask_cmp_ps_512;
+    else if (VecWidth == 128 && EltWidth == 64)
+      IID = Intrinsic::x86_avx512_mask_cmp_pd_128;
+    else if (VecWidth == 256 && EltWidth == 64)
+      IID = Intrinsic::x86_avx512_mask_cmp_pd_256;
+    else if (VecWidth == 512 && EltWidth == 64)
+      IID = Intrinsic::x86_avx512_mask_cmp_pd_512;
+    else
+      llvm_unreachable("Unexpected intrinsic");
+
+    Value *Mask = Constant::getAllOnesValue(CI->getType());
+    if (VecWidth == 512)
+      std::swap(Mask, Args.back());
+    Args.push_back(Mask);
+
+    Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
+                             Args);
+  } else if (Name.starts_with("avx512.mask.cmp.")) {
+    // Integer compare intrinsics.
+    unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
+    Rep = upgradeMaskedCompare(Builder, *CI, Imm, true);
+  } else if (Name.starts_with("avx512.mask.ucmp.")) {
+    unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
+    Rep = upgradeMaskedCompare(Builder, *CI, Imm, false);
+  } else if ((Name.starts_with("avx512.cvtb2mask.") ||
+              Name.starts_with("avx512.cvtw2mask.") ||
+              Name.starts_with("avx512.cvtd2mask.") ||
+              Name.starts_with("avx512.cvtq2mask."))) {
+    Value *Op = CI->getArgOperand(0);
+    Value *Zero = llvm::Constant::getNullValue(Op->getType());
+    Rep = Builder.CreateICmp(ICmpInst::ICMP_SLT, Op, Zero);
+    Rep = applyX86MaskOn1BitsVec(Builder, Rep, nullptr);
+  } else if ((Name == "ssse3.pabs.b.128" || Name == "ssse3.pabs.w.128" ||
+              Name == "ssse3.pabs.d.128" || Name.starts_with("avx2.pabs") ||
+              Name.starts_with("avx512.mask.pabs"))) {
+    Rep = upgradeAbs(Builder, *CI);
+  } else if ((Name == "sse41.pmaxsb" || Name == "sse2.pmaxs.w" ||
+              Name == "sse41.pmaxsd" || Name.starts_with("avx2.pmaxs") ||
+              Name.starts_with("avx512.mask.pmaxs"))) {
+    Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::smax);
+  } else if ((Name == "sse2.pmaxu.b" || Name == "sse41.pmaxuw" ||
+              Name == "sse41.pmaxud" || Name.starts_with("avx2.pmaxu") ||
+              Name.starts_with("avx512.mask.pmaxu"))) {
+    Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::umax);
+  } else if ((Name == "sse41.pminsb" || Name == "sse2.pmins.w" ||
+              Name == "sse41.pminsd" || Name.starts_with("avx2.pmins") ||
+              Name.starts_with("avx512.mask.pmins"))) {
+    Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::smin);
+  } else if ((Name == "sse2.pminu.b" || Name == "sse41.pminuw" ||
+              Name == "sse41.pminud" || Name.starts_with("avx2.pminu") ||
+              Name.starts_with("avx512.mask.pminu"))) {
+    Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::umin);
+  } else if ((Name == "sse2.pmulu.dq" || Name == "avx2.pmulu.dq" ||
+              Name == "avx512.pmulu.dq.512" ||
+              Name.starts_with("avx512.mask.pmulu.dq."))) {
+    Rep = upgradePMULDQ(Builder, *CI, /*Signed*/ false);
+  } else if ((Name == "sse41.pmuldq" || Name == "avx2.pmul.dq" ||
+              Name == "avx512.pmul.dq.512" ||
+              Name.starts_with("avx512.mask.pmul.dq."))) {
+    Rep = upgradePMULDQ(Builder, *CI, /*Signed*/ true);
+  } else if ((Name == "sse.cvtsi2ss" || Name == "sse2.cvtsi2sd" ||
+              Name == "sse.cvtsi642ss" || Name == "sse2.cvtsi642sd")) {
+    Rep =
+        Builder.CreateSIToFP(CI->getArgOperand(1),
+                             cast<VectorType>(CI->getType())->getElementType());
+    Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep, (uint64_t)0);
+  } else if (Name == "avx512.cvtusi2sd") {
+    Rep =
+        Builder.CreateUIToFP(CI->getArgOperand(1),
+                             cast<VectorType>(CI->getType())->getElementType());
+    Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep, (uint64_t)0);
+  } else if (Name == "sse2.cvtss2sd") {
+    Rep = Builder.CreateExtractElement(CI->getArgOperand(1), (uint64_t)0);
+    Rep = Builder.CreateFPExt(
+        Rep, cast<VectorType>(CI->getType())->getElementType());
+    Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep, (uint64_t)0);
+  } else if ((Name == "sse2.cvtdq2pd" || Name == "sse2.cvtdq2ps" ||
+              Name == "avx.cvtdq2.pd.256" || Name == "avx.cvtdq2.ps.256" ||
+              Name.starts_with("avx512.mask.cvtdq2pd.") ||
+              Name.starts_with("avx512.mask.cvtudq2pd.") ||
+              Name.starts_with("avx512.mask.cvtdq2ps.") ||
+              Name.starts_with("avx512.mask.cvtudq2ps.") ||
+              Name.starts_with("avx512.mask.cvtqq2pd.") ||
+              Name.starts_with("avx512.mask.cvtuqq2pd.") ||
+              Name == "avx512.mask.cvtqq2ps.256" ||
+              Name == "avx512.mask.cvtqq2ps.512" ||
+              Name == "avx512.mask.cvtuqq2ps.256" ||
+              Name == "avx512.mask.cvtuqq2ps.512" || Name == "sse2.cvtps2pd" ||
+              Name == "avx.cvt.ps2.pd.256" ||
+              Name == "avx512.mask.cvtps2pd.128" ||
+              Name == "avx512.mask.cvtps2pd.256")) {
+    auto *DstTy = cast<FixedVectorType>(CI->getType());
+    Rep = CI->getArgOperand(0);
+    auto *SrcTy = cast<FixedVectorType>(Rep->getType());
+
+    unsigned NumDstElts = DstTy->getNumElements();
+    if (NumDstElts < SrcTy->getNumElements()) {
+      assert(NumDstElts == 2 && "Unexpected vector size");
+      Rep = Builder.CreateShuffleVector(Rep, Rep, ArrayRef<int>{0, 1});
+    }
+
+    bool IsPS2PD = SrcTy->getElementType()->isFloatTy();
+    bool IsUnsigned = Name.contains("cvtu");
+    if (IsPS2PD)
+      Rep = Builder.CreateFPExt(Rep, DstTy, "cvtps2pd");
+    else if (CI->arg_size() == 4 &&
+             (!isa<ConstantInt>(CI->getArgOperand(3)) ||
+              cast<ConstantInt>(CI->getArgOperand(3))->getZExtValue() != 4)) {
+      Intrinsic::ID IID = IsUnsigned ? Intrinsic::x86_avx512_uitofp_round
+                                     : Intrinsic::x86_avx512_sitofp_round;
+      Function *F =
+          Intrinsic::getDeclaration(CI->getModule(), IID, {DstTy, SrcTy});
+      Rep = Builder.CreateCall(F, {Rep, CI->getArgOperand(3)});
+    } else {
+      Rep = IsUnsigned ? Builder.CreateUIToFP(Rep, DstTy, "cvt")
+                       : Builder.CreateSIToFP(Rep, DstTy, "cvt");
+    }
+
+    if (CI->arg_size() >= 3)
+      Rep = emitX86Select(Builder, CI->getArgOperand(2), Rep,
+                          CI->getArgOperand(1));
+  } else if ((Name.starts_with("avx512.mask.vcvtph2ps.") ||
+             ...
[truncated]

``````````

</details>


https://github.com/llvm/llvm-project/pull/97851


More information about the llvm-commits mailing list