[llvm] r350564 - [X86][AutoUpgrade] Make some tweaks to reduce the number of nested if/else in the intrinsic upgrade code to avoid an MSVC compiler limit.
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Mon Jan 7 12:13:46 PST 2019
Author: ctopper
Date: Mon Jan 7 12:13:45 2019
New Revision: 350564
URL: http://llvm.org/viewvc/llvm-project?rev=350564&view=rev
Log:
[X86][AutoUpgrade] Make some tweaks to reduce the number of nested if/else in the intrinsic upgrade code to avoid an MSVC compiler limit.
MSVC has a nesting limit of around 110-130. An if/else if/else if counts against this next level. The autoupgrade code consists a long chain of these checking matches against strings.
This commit moves some code to a helper function to move out a large if/else chain that was inside of one of the blocks into a separate function. There are more of these we could move or we could change some to lookup tables.
I've also merged together a few similar blocks in the outer chain. This should buy us some margin for a little bit.
Modified:
llvm/trunk/lib/IR/AutoUpgrade.cpp
Modified: llvm/trunk/lib/IR/AutoUpgrade.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/IR/AutoUpgrade.cpp?rev=350564&r1=350563&r2=350564&view=diff
==============================================================================
--- llvm/trunk/lib/IR/AutoUpgrade.cpp (original)
+++ llvm/trunk/lib/IR/AutoUpgrade.cpp Mon Jan 7 12:13:45 2019
@@ -933,6 +933,67 @@ static Value *UpgradeX86ALIGNIntrinsics(
return EmitX86Select(Builder, Mask, Align, Passthru);
}
+static Value *UpgradeX86VPERMT2Intrinsics(IRBuilder<> &Builder, CallInst &CI,
+ bool ZeroMask, bool IndexForm) {
+ Type *Ty = CI.getType();
+ unsigned VecWidth = Ty->getPrimitiveSizeInBits();
+ unsigned EltWidth = Ty->getScalarSizeInBits();
+ bool IsFloat = Ty->isFPOrFPVectorTy();
+ Intrinsic::ID IID;
+ if (VecWidth == 128 && EltWidth == 32 && IsFloat)
+ IID = Intrinsic::x86_avx512_vpermi2var_ps_128;
+ else if (VecWidth == 128 && EltWidth == 32 && !IsFloat)
+ IID = Intrinsic::x86_avx512_vpermi2var_d_128;
+ else if (VecWidth == 128 && EltWidth == 64 && IsFloat)
+ IID = Intrinsic::x86_avx512_vpermi2var_pd_128;
+ else if (VecWidth == 128 && EltWidth == 64 && !IsFloat)
+ IID = Intrinsic::x86_avx512_vpermi2var_q_128;
+ else if (VecWidth == 256 && EltWidth == 32 && IsFloat)
+ IID = Intrinsic::x86_avx512_vpermi2var_ps_256;
+ else if (VecWidth == 256 && EltWidth == 32 && !IsFloat)
+ IID = Intrinsic::x86_avx512_vpermi2var_d_256;
+ else if (VecWidth == 256 && EltWidth == 64 && IsFloat)
+ IID = Intrinsic::x86_avx512_vpermi2var_pd_256;
+ else if (VecWidth == 256 && EltWidth == 64 && !IsFloat)
+ IID = Intrinsic::x86_avx512_vpermi2var_q_256;
+ else if (VecWidth == 512 && EltWidth == 32 && IsFloat)
+ IID = Intrinsic::x86_avx512_vpermi2var_ps_512;
+ else if (VecWidth == 512 && EltWidth == 32 && !IsFloat)
+ IID = Intrinsic::x86_avx512_vpermi2var_d_512;
+ else if (VecWidth == 512 && EltWidth == 64 && IsFloat)
+ IID = Intrinsic::x86_avx512_vpermi2var_pd_512;
+ else if (VecWidth == 512 && EltWidth == 64 && !IsFloat)
+ IID = Intrinsic::x86_avx512_vpermi2var_q_512;
+ else if (VecWidth == 128 && EltWidth == 16)
+ IID = Intrinsic::x86_avx512_vpermi2var_hi_128;
+ else if (VecWidth == 256 && EltWidth == 16)
+ IID = Intrinsic::x86_avx512_vpermi2var_hi_256;
+ else if (VecWidth == 512 && EltWidth == 16)
+ IID = Intrinsic::x86_avx512_vpermi2var_hi_512;
+ else if (VecWidth == 128 && EltWidth == 8)
+ IID = Intrinsic::x86_avx512_vpermi2var_qi_128;
+ else if (VecWidth == 256 && EltWidth == 8)
+ IID = Intrinsic::x86_avx512_vpermi2var_qi_256;
+ else if (VecWidth == 512 && EltWidth == 8)
+ IID = Intrinsic::x86_avx512_vpermi2var_qi_512;
+ else
+ llvm_unreachable("Unexpected intrinsic");
+
+ Value *Args[] = { CI.getArgOperand(0) , CI.getArgOperand(1),
+ CI.getArgOperand(2) };
+
+ // If this isn't index form we need to swap operand 0 and 1.
+ if (!IndexForm)
+ std::swap(Args[0], Args[1]);
+
+ Value *V = Builder.CreateCall(Intrinsic::getDeclaration(CI.getModule(), IID),
+ Args);
+ Value *PassThru = ZeroMask ? ConstantAggregateZero::get(Ty)
+ : Builder.CreateBitCast(CI.getArgOperand(1),
+ Ty);
+ return EmitX86Select(Builder, CI.getArgOperand(3), V, PassThru);
+}
+
static Value *UpgradeX86AddSubSatIntrinsics(IRBuilder<> &Builder, CallInst &CI,
bool IsSigned, bool IsAddition) {
Type *Ty = CI.getType();
@@ -2406,24 +2467,8 @@ void llvm::UpgradeIntrinsicCall(CallInst
Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
CI->getArgOperand(2));
- } else if (IsX86 && Name.startswith("avx512.mask.pand.")) {
- Rep = Builder.CreateAnd(CI->getArgOperand(0), CI->getArgOperand(1));
- Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
- CI->getArgOperand(2));
- } else if (IsX86 && Name.startswith("avx512.mask.pandn.")) {
- Rep = Builder.CreateAnd(Builder.CreateNot(CI->getArgOperand(0)),
- CI->getArgOperand(1));
- Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
- CI->getArgOperand(2));
- } else if (IsX86 && Name.startswith("avx512.mask.por.")) {
- Rep = Builder.CreateOr(CI->getArgOperand(0), CI->getArgOperand(1));
- Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
- CI->getArgOperand(2));
- } else if (IsX86 && Name.startswith("avx512.mask.pxor.")) {
- Rep = Builder.CreateXor(CI->getArgOperand(0), CI->getArgOperand(1));
- Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
- CI->getArgOperand(2));
- } else if (IsX86 && Name.startswith("avx512.mask.and.")) {
+ } else if (IsX86 && (Name.startswith("avx512.mask.and.") ||
+ Name.startswith("avx512.mask.pand."))) {
VectorType *FTy = cast<VectorType>(CI->getType());
VectorType *ITy = VectorType::getInteger(FTy);
Rep = Builder.CreateAnd(Builder.CreateBitCast(CI->getArgOperand(0), ITy),
@@ -2431,7 +2476,8 @@ void llvm::UpgradeIntrinsicCall(CallInst
Rep = Builder.CreateBitCast(Rep, FTy);
Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
CI->getArgOperand(2));
- } else if (IsX86 && Name.startswith("avx512.mask.andn.")) {
+ } else if (IsX86 && (Name.startswith("avx512.mask.andn.") ||
+ Name.startswith("avx512.mask.pandn."))) {
VectorType *FTy = cast<VectorType>(CI->getType());
VectorType *ITy = VectorType::getInteger(FTy);
Rep = Builder.CreateNot(Builder.CreateBitCast(CI->getArgOperand(0), ITy));
@@ -2440,7 +2486,8 @@ void llvm::UpgradeIntrinsicCall(CallInst
Rep = Builder.CreateBitCast(Rep, FTy);
Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
CI->getArgOperand(2));
- } else if (IsX86 && Name.startswith("avx512.mask.or.")) {
+ } else if (IsX86 && (Name.startswith("avx512.mask.or.") ||
+ Name.startswith("avx512.mask.por."))) {
VectorType *FTy = cast<VectorType>(CI->getType());
VectorType *ITy = VectorType::getInteger(FTy);
Rep = Builder.CreateOr(Builder.CreateBitCast(CI->getArgOperand(0), ITy),
@@ -2448,7 +2495,8 @@ void llvm::UpgradeIntrinsicCall(CallInst
Rep = Builder.CreateBitCast(Rep, FTy);
Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
CI->getArgOperand(2));
- } else if (IsX86 && Name.startswith("avx512.mask.xor.")) {
+ } else if (IsX86 && (Name.startswith("avx512.mask.xor.") ||
+ Name.startswith("avx512.mask.pxor."))) {
VectorType *FTy = cast<VectorType>(CI->getType());
VectorType *ITy = VectorType::getInteger(FTy);
Rep = Builder.CreateXor(Builder.CreateBitCast(CI->getArgOperand(0), ITy),
@@ -2532,26 +2580,16 @@ void llvm::UpgradeIntrinsicCall(CallInst
}
Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
CI->getArgOperand(2));
- } else if (IsX86 && Name.startswith("avx512.mask.max.p") &&
+ } else if (IsX86 && (Name.startswith("avx512.mask.max.p") ||
+ Name.startswith("avx512.mask.min.p")) &&
Name.drop_front(18) == ".512") {
- Intrinsic::ID IID;
- if (Name[17] == 's')
- IID = Intrinsic::x86_avx512_max_ps_512;
- else
- IID = Intrinsic::x86_avx512_max_pd_512;
-
- Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
- { CI->getArgOperand(0), CI->getArgOperand(1),
- CI->getArgOperand(4) });
- Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
- CI->getArgOperand(2));
- } else if (IsX86 && Name.startswith("avx512.mask.min.p") &&
- Name.drop_front(18) == ".512") {
- Intrinsic::ID IID;
- if (Name[17] == 's')
- IID = Intrinsic::x86_avx512_min_ps_512;
- else
- IID = Intrinsic::x86_avx512_min_pd_512;
+ bool IsDouble = Name[17] == 'd';
+ bool IsMin = Name[13] == 'i';
+ static const Intrinsic::ID MinMaxTbl[2][2] = {
+ { Intrinsic::x86_avx512_max_ps_512, Intrinsic::x86_avx512_max_pd_512 },
+ { Intrinsic::x86_avx512_min_ps_512, Intrinsic::x86_avx512_min_pd_512 }
+ };
+ Intrinsic::ID IID = MinMaxTbl[IsMin][IsDouble];
Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
{ CI->getArgOperand(0), CI->getArgOperand(1),
@@ -3095,62 +3133,7 @@ void llvm::UpgradeIntrinsicCall(CallInst
Name.startswith("avx512.maskz.vpermt2var."))) {
bool ZeroMask = Name[11] == 'z';
bool IndexForm = Name[17] == 'i';
- unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
- unsigned EltWidth = CI->getType()->getScalarSizeInBits();
- bool IsFloat = CI->getType()->isFPOrFPVectorTy();
- Intrinsic::ID IID;
- if (VecWidth == 128 && EltWidth == 32 && IsFloat)
- IID = Intrinsic::x86_avx512_vpermi2var_ps_128;
- else if (VecWidth == 128 && EltWidth == 32 && !IsFloat)
- IID = Intrinsic::x86_avx512_vpermi2var_d_128;
- else if (VecWidth == 128 && EltWidth == 64 && IsFloat)
- IID = Intrinsic::x86_avx512_vpermi2var_pd_128;
- else if (VecWidth == 128 && EltWidth == 64 && !IsFloat)
- IID = Intrinsic::x86_avx512_vpermi2var_q_128;
- else if (VecWidth == 256 && EltWidth == 32 && IsFloat)
- IID = Intrinsic::x86_avx512_vpermi2var_ps_256;
- else if (VecWidth == 256 && EltWidth == 32 && !IsFloat)
- IID = Intrinsic::x86_avx512_vpermi2var_d_256;
- else if (VecWidth == 256 && EltWidth == 64 && IsFloat)
- IID = Intrinsic::x86_avx512_vpermi2var_pd_256;
- else if (VecWidth == 256 && EltWidth == 64 && !IsFloat)
- IID = Intrinsic::x86_avx512_vpermi2var_q_256;
- else if (VecWidth == 512 && EltWidth == 32 && IsFloat)
- IID = Intrinsic::x86_avx512_vpermi2var_ps_512;
- else if (VecWidth == 512 && EltWidth == 32 && !IsFloat)
- IID = Intrinsic::x86_avx512_vpermi2var_d_512;
- else if (VecWidth == 512 && EltWidth == 64 && IsFloat)
- IID = Intrinsic::x86_avx512_vpermi2var_pd_512;
- else if (VecWidth == 512 && EltWidth == 64 && !IsFloat)
- IID = Intrinsic::x86_avx512_vpermi2var_q_512;
- else if (VecWidth == 128 && EltWidth == 16)
- IID = Intrinsic::x86_avx512_vpermi2var_hi_128;
- else if (VecWidth == 256 && EltWidth == 16)
- IID = Intrinsic::x86_avx512_vpermi2var_hi_256;
- else if (VecWidth == 512 && EltWidth == 16)
- IID = Intrinsic::x86_avx512_vpermi2var_hi_512;
- else if (VecWidth == 128 && EltWidth == 8)
- IID = Intrinsic::x86_avx512_vpermi2var_qi_128;
- else if (VecWidth == 256 && EltWidth == 8)
- IID = Intrinsic::x86_avx512_vpermi2var_qi_256;
- else if (VecWidth == 512 && EltWidth == 8)
- IID = Intrinsic::x86_avx512_vpermi2var_qi_512;
- else
- llvm_unreachable("Unexpected intrinsic");
-
- Value *Args[] = { CI->getArgOperand(0) , CI->getArgOperand(1),
- CI->getArgOperand(2) };
-
- // If this isn't index form we need to swap operand 0 and 1.
- if (!IndexForm)
- std::swap(Args[0], Args[1]);
-
- Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(), IID),
- Args);
- Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
- : Builder.CreateBitCast(CI->getArgOperand(1),
- CI->getType());
- Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
+ Rep = UpgradeX86VPERMT2Intrinsics(Builder, *CI, ZeroMask, IndexForm);
} else if (IsX86 && (Name.startswith("avx512.mask.vpdpbusd.") ||
Name.startswith("avx512.maskz.vpdpbusd.") ||
Name.startswith("avx512.mask.vpdpbusds.") ||
More information about the llvm-commits
mailing list