[clang] 5def954 - Support of expression granularity for _Float16.
Zahira Ammarguellat via cfe-commits
cfe-commits at lists.llvm.org
Thu Aug 25 05:27:05 PDT 2022
Author: Zahira Ammarguellat
Date: 2022-08-25T08:26:53-04:00
New Revision: 5def954a5b1774747bbe90a4f6879e51f341a74d
URL: https://github.com/llvm/llvm-project/commit/5def954a5b1774747bbe90a4f6879e51f341a74d
DIFF: https://github.com/llvm/llvm-project/commit/5def954a5b1774747bbe90a4f6879e51f341a74d.diff
LOG: Support of expression granularity for _Float16.
Differential Revision: https://reviews.llvm.org/D113107
Added:
Modified:
clang/docs/LanguageExtensions.rst
clang/include/clang/Basic/TargetInfo.h
clang/lib/Basic/Targets/X86.cpp
clang/lib/Basic/Targets/X86.h
clang/lib/CodeGen/CGExprComplex.cpp
clang/lib/CodeGen/CGExprScalar.cpp
clang/lib/CodeGen/CodeGenFunction.h
clang/test/CodeGen/X86/Float16-arithmetic.c
clang/test/CodeGen/X86/Float16-complex.c
Removed:
################################################################################
diff --git a/clang/docs/LanguageExtensions.rst b/clang/docs/LanguageExtensions.rst
index 0e8028d2cc066..6522e0492f8bc 100644
--- a/clang/docs/LanguageExtensions.rst
+++ b/clang/docs/LanguageExtensions.rst
@@ -751,7 +751,11 @@ On X86 targets, ``_Float16`` is supported as long as SSE2 is available, which
includes all 64-bit and all recent 32-bit processors. When the target supports
AVX512-FP16, ``_Float16`` arithmetic is performed using that native support.
Otherwise, ``_Float16`` arithmetic is performed by promoting to ``float``,
-performing the operation, and then truncating to ``_Float16``.
+performing the operation, and then truncating to ``_Float16``. When doing this
+emulation, Clang defaults to following the C standard's rules for excess
+precision arithmetic, which avoids intermediate truncations within statements
+and may generate
diff erent results from a strict operation-by-operation
+emulation.
``_Float16`` will be supported on more targets as they define ABIs for it.
diff --git a/clang/include/clang/Basic/TargetInfo.h b/clang/include/clang/Basic/TargetInfo.h
index 6f9ee65544450..3d1c14888529c 100644
--- a/clang/include/clang/Basic/TargetInfo.h
+++ b/clang/include/clang/Basic/TargetInfo.h
@@ -912,6 +912,8 @@ class TargetInfo : public virtual TransferrableTargetInfo,
return true;
}
+ virtual bool shouldEmitFloat16WithExcessPrecision() const { return false; }
+
/// Specify if mangling based on address space map should be used or
/// not for language specific address spaces
bool useAddressSpaceMapMangling() const {
diff --git a/clang/lib/Basic/Targets/X86.cpp b/clang/lib/Basic/Targets/X86.cpp
index 7a3cb662a91ff..46a551603eca2 100644
--- a/clang/lib/Basic/Targets/X86.cpp
+++ b/clang/lib/Basic/Targets/X86.cpp
@@ -239,6 +239,7 @@ bool X86TargetInfo::handleTargetFeatures(std::vector<std::string> &Features,
HasAVX512ER = true;
} else if (Feature == "+avx512fp16") {
HasAVX512FP16 = true;
+ HasLegalHalfType = true;
} else if (Feature == "+avx512pf") {
HasAVX512PF = true;
} else if (Feature == "+avx512dq") {
diff --git a/clang/lib/Basic/Targets/X86.h b/clang/lib/Basic/Targets/X86.h
index ed0864aec6d2d..aef9f4a0676ec 100644
--- a/clang/lib/Basic/Targets/X86.h
+++ b/clang/lib/Basic/Targets/X86.h
@@ -290,6 +290,10 @@ class LLVM_LIBRARY_VISIBILITY X86TargetInfo : public TargetInfo {
return false;
}
+ bool shouldEmitFloat16WithExcessPrecision() const {
+ return HasFloat16 && !hasLegalHalfType();
+ }
+
void getTargetDefines(const LangOptions &Opts,
MacroBuilder &Builder) const override;
diff --git a/clang/lib/CodeGen/CGExprComplex.cpp b/clang/lib/CodeGen/CGExprComplex.cpp
index 5409e82d437e2..cd03475373716 100644
--- a/clang/lib/CodeGen/CGExprComplex.cpp
+++ b/clang/lib/CodeGen/CGExprComplex.cpp
@@ -206,12 +206,13 @@ class ComplexExprEmitter
return VisitPrePostIncDec(E, true, true);
}
ComplexPairTy VisitUnaryDeref(const Expr *E) { return EmitLoadOfLValue(E); }
- ComplexPairTy VisitUnaryPlus (const UnaryOperator *E) {
- TestAndClearIgnoreReal();
- TestAndClearIgnoreImag();
- return Visit(E->getSubExpr());
- }
- ComplexPairTy VisitUnaryMinus (const UnaryOperator *E);
+
+ ComplexPairTy VisitUnaryPlus(const UnaryOperator *E,
+ QualType PromotionType = QualType());
+ ComplexPairTy VisitPlus(const UnaryOperator *E, QualType PromotionType);
+ ComplexPairTy VisitUnaryMinus(const UnaryOperator *E,
+ QualType PromotionType = QualType());
+ ComplexPairTy VisitMinus(const UnaryOperator *E, QualType PromotionType);
ComplexPairTy VisitUnaryNot (const UnaryOperator *E);
// LNot,Real,Imag never return complex.
ComplexPairTy VisitUnaryExtension(const UnaryOperator *E) {
@@ -253,7 +254,10 @@ class ComplexExprEmitter
QualType Ty; // Computation Type.
};
- BinOpInfo EmitBinOps(const BinaryOperator *E);
+ BinOpInfo EmitBinOps(const BinaryOperator *E,
+ QualType PromotionTy = QualType());
+ ComplexPairTy EmitPromoted(const Expr *E, QualType PromotionTy);
+ ComplexPairTy EmitPromotedComplexOperand(const Expr *E, QualType PromotionTy);
LValue EmitCompoundAssignLValue(const CompoundAssignOperator *E,
ComplexPairTy (ComplexExprEmitter::*Func)
(const BinOpInfo &),
@@ -270,19 +274,38 @@ class ComplexExprEmitter
ComplexPairTy EmitComplexBinOpLibCall(StringRef LibCallName,
const BinOpInfo &Op);
- ComplexPairTy VisitBinAdd(const BinaryOperator *E) {
- return EmitBinAdd(EmitBinOps(E));
- }
- ComplexPairTy VisitBinSub(const BinaryOperator *E) {
- return EmitBinSub(EmitBinOps(E));
- }
- ComplexPairTy VisitBinMul(const BinaryOperator *E) {
- return EmitBinMul(EmitBinOps(E));
+ QualType getPromotionType(QualType Ty) {
+ if (CGF.getTarget().shouldEmitFloat16WithExcessPrecision()) {
+ if (Ty->isRealFloatingType()) {
+ if (Ty->isFloat16Type())
+ return CGF.getContext().FloatTy;
+ } else {
+ assert(Ty->isAnyComplexType() &&
+ "Expecting to promote a complex type!");
+ QualType ElementType = Ty->castAs<ComplexType>()->getElementType();
+ if (ElementType->isFloat16Type())
+ return CGF.getContext().getComplexType(CGF.getContext().FloatTy);
+ }
+ }
+ return QualType();
}
- ComplexPairTy VisitBinDiv(const BinaryOperator *E) {
- return EmitBinDiv(EmitBinOps(E));
+
+#define HANDLEBINOP(OP) \
+ ComplexPairTy VisitBin##OP(const BinaryOperator *E) { \
+ QualType promotionTy = getPromotionType(E->getType()); \
+ ComplexPairTy result = EmitBin##OP(EmitBinOps(E, promotionTy)); \
+ if (!promotionTy.isNull()) \
+ result = \
+ CGF.EmitUnPromotedValue(result, E->getType()); \
+ return result; \
}
+ HANDLEBINOP(Mul)
+ HANDLEBINOP(Div)
+ HANDLEBINOP(Add)
+ HANDLEBINOP(Sub)
+#undef HANDLEBINOP
+
ComplexPairTy VisitCXXRewrittenBinaryOperator(CXXRewrittenBinaryOperator *E) {
return Visit(E->getSemanticForm());
}
@@ -556,10 +579,45 @@ ComplexPairTy ComplexExprEmitter::EmitCast(CastKind CK, Expr *Op,
llvm_unreachable("unknown cast resulting in complex value");
}
-ComplexPairTy ComplexExprEmitter::VisitUnaryMinus(const UnaryOperator *E) {
+ComplexPairTy ComplexExprEmitter::VisitUnaryPlus(const UnaryOperator *E,
+ QualType PromotionType) {
+ QualType promotionTy = PromotionType.isNull()
+ ? getPromotionType(E->getSubExpr()->getType())
+ : PromotionType;
+ ComplexPairTy result = VisitPlus(E, promotionTy);
+ if (!promotionTy.isNull())
+ return CGF.EmitUnPromotedValue(result, E->getSubExpr()->getType());
+ return result;
+}
+
+ComplexPairTy ComplexExprEmitter::VisitPlus(const UnaryOperator *E,
+ QualType PromotionType) {
TestAndClearIgnoreReal();
TestAndClearIgnoreImag();
- ComplexPairTy Op = Visit(E->getSubExpr());
+ if (!PromotionType.isNull())
+ return CGF.EmitPromotedComplexExpr(E->getSubExpr(), PromotionType);
+ return Visit(E->getSubExpr());
+}
+
+ComplexPairTy ComplexExprEmitter::VisitUnaryMinus(const UnaryOperator *E,
+ QualType PromotionType) {
+ QualType promotionTy = PromotionType.isNull()
+ ? getPromotionType(E->getSubExpr()->getType())
+ : PromotionType;
+ ComplexPairTy result = VisitMinus(E, promotionTy);
+ if (!promotionTy.isNull())
+ return CGF.EmitUnPromotedValue(result, E->getSubExpr()->getType());
+ return result;
+}
+ComplexPairTy ComplexExprEmitter::VisitMinus(const UnaryOperator *E,
+ QualType PromotionType) {
+ TestAndClearIgnoreReal();
+ TestAndClearIgnoreImag();
+ ComplexPairTy Op;
+ if (!PromotionType.isNull())
+ Op = CGF.EmitPromotedComplexExpr(E->getSubExpr(), PromotionType);
+ else
+ Op = Visit(E->getSubExpr());
llvm::Value *ResR, *ResI;
if (Op.first->getType()->isFloatingPointTy()) {
@@ -876,21 +934,102 @@ ComplexPairTy ComplexExprEmitter::EmitBinDiv(const BinOpInfo &Op) {
return ComplexPairTy(DSTr, DSTi);
}
+ComplexPairTy CodeGenFunction::EmitUnPromotedValue(ComplexPairTy result,
+ QualType UnPromotionType) {
+ llvm::Type *ComplexElementTy =
+ ConvertType(UnPromotionType->castAs<ComplexType>()->getElementType());
+ if (result.first)
+ result.first =
+ Builder.CreateFPTrunc(result.first, ComplexElementTy, "unpromotion");
+ if (result.second)
+ result.second =
+ Builder.CreateFPTrunc(result.second, ComplexElementTy, "unpromotion");
+ return result;
+}
+
+ComplexPairTy CodeGenFunction::EmitPromotedValue(ComplexPairTy result,
+ QualType PromotionType) {
+ llvm::Type *ComplexElementTy =
+ ConvertType(PromotionType->castAs<ComplexType>()->getElementType());
+ if (result.first)
+ result.first = Builder.CreateFPExt(result.first, ComplexElementTy, "ext");
+ if (result.second)
+ result.second = Builder.CreateFPExt(result.second, ComplexElementTy, "ext");
+
+ return result;
+}
+
+ComplexPairTy ComplexExprEmitter::EmitPromoted(const Expr *E,
+ QualType PromotionType) {
+ E = E->IgnoreParens();
+ if (auto BO = dyn_cast<BinaryOperator>(E)) {
+ switch (BO->getOpcode()) {
+#define HANDLE_BINOP(OP) \
+ case BO_##OP: \
+ return EmitBin##OP(EmitBinOps(BO, PromotionType));
+ HANDLE_BINOP(Add)
+ HANDLE_BINOP(Sub)
+ HANDLE_BINOP(Mul)
+ HANDLE_BINOP(Div)
+#undef HANDLE_BINOP
+ default:
+ break;
+ }
+ } else if (auto UO = dyn_cast<UnaryOperator>(E)) {
+ switch (UO->getOpcode()) {
+ case UO_Minus:
+ return VisitMinus(UO, PromotionType);
+ case UO_Plus:
+ return VisitPlus(UO, PromotionType);
+ default:
+ break;
+ }
+ }
+ auto result = Visit(const_cast<Expr *>(E));
+ if (!PromotionType.isNull())
+ return CGF.EmitPromotedValue(result, PromotionType);
+ else
+ return result;
+}
+
+ComplexPairTy CodeGenFunction::EmitPromotedComplexExpr(const Expr *E,
+ QualType DstTy) {
+ return ComplexExprEmitter(*this).EmitPromoted(E, DstTy);
+}
+
+ComplexPairTy
+ComplexExprEmitter::EmitPromotedComplexOperand(const Expr *E,
+ QualType OverallPromotionType) {
+ if (E->getType()->isAnyComplexType()) {
+ if (!OverallPromotionType.isNull())
+ return CGF.EmitPromotedComplexExpr(E, OverallPromotionType);
+ else
+ return Visit(const_cast<Expr *>(E));
+ } else {
+ if (!OverallPromotionType.isNull()) {
+ QualType ComplexElementTy =
+ OverallPromotionType->castAs<ComplexType>()->getElementType();
+ return ComplexPairTy(CGF.EmitPromotedScalarExpr(E, ComplexElementTy),
+ nullptr);
+ } else {
+ return ComplexPairTy(CGF.EmitScalarExpr(E), nullptr);
+ }
+ }
+}
+
ComplexExprEmitter::BinOpInfo
-ComplexExprEmitter::EmitBinOps(const BinaryOperator *E) {
+ComplexExprEmitter::EmitBinOps(const BinaryOperator *E,
+ QualType PromotionType) {
TestAndClearIgnoreReal();
TestAndClearIgnoreImag();
BinOpInfo Ops;
- if (E->getLHS()->getType()->isRealFloatingType())
- Ops.LHS = ComplexPairTy(CGF.EmitScalarExpr(E->getLHS()), nullptr);
- else
- Ops.LHS = Visit(E->getLHS());
- if (E->getRHS()->getType()->isRealFloatingType())
- Ops.RHS = ComplexPairTy(CGF.EmitScalarExpr(E->getRHS()), nullptr);
- else
- Ops.RHS = Visit(E->getRHS());
- Ops.Ty = E->getType();
+ Ops.LHS = EmitPromotedComplexOperand(E->getLHS(), PromotionType);
+ Ops.RHS = EmitPromotedComplexOperand(E->getRHS(), PromotionType);
+ if (!PromotionType.isNull())
+ Ops.Ty = PromotionType;
+ else
+ Ops.Ty = E->getType();
return Ops;
}
@@ -911,35 +1050,66 @@ EmitCompoundAssignLValue(const CompoundAssignOperator *E,
// Load the RHS and LHS operands.
// __block variables need to have the rhs evaluated first, plus this should
// improve codegen a little.
- OpInfo.Ty = E->getComputationResultType();
+ QualType PromotionTypeCR;
+ PromotionTypeCR = getPromotionType(E->getComputationResultType());
+ if (PromotionTypeCR.isNull())
+ PromotionTypeCR = E->getComputationResultType();
+ OpInfo.Ty = PromotionTypeCR;
QualType ComplexElementTy = cast<ComplexType>(OpInfo.Ty)->getElementType();
+ QualType PromotionTypeRHS = getPromotionType(E->getRHS()->getType());
// The RHS should have been converted to the computation type.
if (E->getRHS()->getType()->isRealFloatingType()) {
- assert(
- CGF.getContext()
- .hasSameUnqualifiedType(ComplexElementTy, E->getRHS()->getType()));
- OpInfo.RHS = ComplexPairTy(CGF.EmitScalarExpr(E->getRHS()), nullptr);
+ if (!PromotionTypeRHS.isNull())
+ OpInfo.RHS = ComplexPairTy(
+ CGF.EmitPromotedScalarExpr(E->getRHS(), PromotionTypeRHS), nullptr);
+ else {
+ assert(CGF.getContext().hasSameUnqualifiedType(ComplexElementTy,
+ E->getRHS()->getType()));
+
+ OpInfo.RHS = ComplexPairTy(CGF.EmitScalarExpr(E->getRHS()), nullptr);
+ }
} else {
- assert(CGF.getContext()
- .hasSameUnqualifiedType(OpInfo.Ty, E->getRHS()->getType()));
- OpInfo.RHS = Visit(E->getRHS());
+ if (!PromotionTypeRHS.isNull()) {
+ OpInfo.RHS = ComplexPairTy(
+ CGF.EmitPromotedComplexExpr(E->getRHS(), PromotionTypeRHS));
+ } else {
+ assert(CGF.getContext().hasSameUnqualifiedType(OpInfo.Ty,
+ E->getRHS()->getType()));
+ OpInfo.RHS = Visit(E->getRHS());
+ }
}
LValue LHS = CGF.EmitLValue(E->getLHS());
// Load from the l-value and convert it.
SourceLocation Loc = E->getExprLoc();
+ QualType PromotionTypeLHS = getPromotionType(E->getComputationLHSType());
if (LHSTy->isAnyComplexType()) {
ComplexPairTy LHSVal = EmitLoadOfLValue(LHS, Loc);
- OpInfo.LHS = EmitComplexToComplexCast(LHSVal, LHSTy, OpInfo.Ty, Loc);
+ if (!PromotionTypeLHS.isNull())
+ OpInfo.LHS =
+ EmitComplexToComplexCast(LHSVal, LHSTy, PromotionTypeLHS, Loc);
+ else
+ OpInfo.LHS = EmitComplexToComplexCast(LHSVal, LHSTy, OpInfo.Ty, Loc);
} else {
llvm::Value *LHSVal = CGF.EmitLoadOfScalar(LHS, Loc);
// For floating point real operands we can directly pass the scalar form
// to the binary operator emission and potentially get more efficient code.
if (LHSTy->isRealFloatingType()) {
- if (!CGF.getContext().hasSameUnqualifiedType(ComplexElementTy, LHSTy))
- LHSVal = CGF.EmitScalarConversion(LHSVal, LHSTy, ComplexElementTy, Loc);
+ QualType PromotedComplexElementTy;
+ if (!PromotionTypeLHS.isNull()) {
+ PromotedComplexElementTy =
+ cast<ComplexType>(PromotionTypeLHS)->getElementType();
+ if (!CGF.getContext().hasSameUnqualifiedType(PromotedComplexElementTy,
+ PromotionTypeLHS))
+ LHSVal = CGF.EmitScalarConversion(LHSVal, LHSTy,
+ PromotedComplexElementTy, Loc);
+ } else {
+ if (!CGF.getContext().hasSameUnqualifiedType(ComplexElementTy, LHSTy))
+ LHSVal =
+ CGF.EmitScalarConversion(LHSVal, LHSTy, ComplexElementTy, Loc);
+ }
OpInfo.LHS = ComplexPairTy(LHSVal, nullptr);
} else {
OpInfo.LHS = EmitScalarToComplexCast(LHSVal, LHSTy, OpInfo.Ty, Loc);
diff --git a/clang/lib/CodeGen/CGExprScalar.cpp b/clang/lib/CodeGen/CGExprScalar.cpp
index 8b2bbbb77b3c8..9def1285fbc1d 100644
--- a/clang/lib/CodeGen/CGExprScalar.cpp
+++ b/clang/lib/CodeGen/CGExprScalar.cpp
@@ -619,16 +619,22 @@ class ScalarExprEmitter
return Visit(E->getSubExpr()); // the actual value should be unused
return EmitLoadOfLValue(E);
}
- Value *VisitUnaryPlus(const UnaryOperator *E) {
- // This
diff ers from gcc, though, most likely due to a bug in gcc.
- TestAndClearIgnoreResultAssign();
- return Visit(E->getSubExpr());
- }
- Value *VisitUnaryMinus (const UnaryOperator *E);
+
+ Value *VisitUnaryPlus(const UnaryOperator *E,
+ QualType PromotionType = QualType());
+ Value *VisitPlus(const UnaryOperator *E, QualType PromotionType);
+ Value *VisitUnaryMinus(const UnaryOperator *E,
+ QualType PromotionType = QualType());
+ Value *VisitMinus(const UnaryOperator *E, QualType PromotionType);
+
Value *VisitUnaryNot (const UnaryOperator *E);
Value *VisitUnaryLNot (const UnaryOperator *E);
- Value *VisitUnaryReal (const UnaryOperator *E);
- Value *VisitUnaryImag (const UnaryOperator *E);
+ Value *VisitUnaryReal(const UnaryOperator *E,
+ QualType PromotionType = QualType());
+ Value *VisitReal(const UnaryOperator *E, QualType PromotionType);
+ Value *VisitUnaryImag(const UnaryOperator *E,
+ QualType PromotionType = QualType());
+ Value *VisitImag(const UnaryOperator *E, QualType PromotionType);
Value *VisitUnaryExtension(const UnaryOperator *E) {
return Visit(E->getSubExpr());
}
@@ -790,7 +796,13 @@ class ScalarExprEmitter
// Helper functions for fixed point binary operations.
Value *EmitFixedPointBinOp(const BinOpInfo &Ops);
- BinOpInfo EmitBinOps(const BinaryOperator *E);
+ BinOpInfo EmitBinOps(const BinaryOperator *E,
+ QualType PromotionTy = QualType());
+
+ Value *EmitPromotedValue(Value *result, QualType PromotionType);
+ Value *EmitUnPromotedValue(Value *result, QualType ExprType);
+ Value *EmitPromoted(const Expr *E, QualType PromotionType);
+
LValue EmitCompoundAssignLValue(const CompoundAssignOperator *E,
Value *(ScalarExprEmitter::*F)(const BinOpInfo &),
Value *&Result);
@@ -798,13 +810,30 @@ class ScalarExprEmitter
Value *EmitCompoundAssign(const CompoundAssignOperator *E,
Value *(ScalarExprEmitter::*F)(const BinOpInfo &));
+ QualType getPromotionType(QualType Ty) {
+ if (CGF.getTarget().shouldEmitFloat16WithExcessPrecision()) {
+ if (Ty->isAnyComplexType()) {
+ QualType ElementType = Ty->castAs<ComplexType>()->getElementType();
+ if (ElementType->isFloat16Type())
+ return CGF.getContext().getComplexType(CGF.getContext().FloatTy);
+ }
+ if (Ty->isFloat16Type())
+ return CGF.getContext().FloatTy;
+ }
+ return QualType();
+ }
+
// Binary operators and binary compound assignment operators.
-#define HANDLEBINOP(OP) \
- Value *VisitBin ## OP(const BinaryOperator *E) { \
- return Emit ## OP(EmitBinOps(E)); \
- } \
- Value *VisitBin ## OP ## Assign(const CompoundAssignOperator *E) { \
- return EmitCompoundAssign(E, &ScalarExprEmitter::Emit ## OP); \
+#define HANDLEBINOP(OP) \
+ Value *VisitBin##OP(const BinaryOperator *E) { \
+ QualType promotionTy = getPromotionType(E->getType()); \
+ auto result = Emit##OP(EmitBinOps(E, promotionTy)); \
+ if (result && !promotionTy.isNull()) \
+ result = EmitUnPromotedValue(result, E->getType()); \
+ return result; \
+ } \
+ Value *VisitBin##OP##Assign(const CompoundAssignOperator *E) { \
+ return EmitCompoundAssign(E, &ScalarExprEmitter::Emit##OP); \
}
HANDLEBINOP(Mul)
HANDLEBINOP(Div)
@@ -2817,10 +2846,45 @@ ScalarExprEmitter::EmitScalarPrePostIncDec(const UnaryOperator *E, LValue LV,
}
+Value *ScalarExprEmitter::VisitUnaryPlus(const UnaryOperator *E,
+ QualType PromotionType) {
+ QualType promotionTy = PromotionType.isNull()
+ ? getPromotionType(E->getSubExpr()->getType())
+ : PromotionType;
+ Value *result = VisitPlus(E, promotionTy);
+ if (result && !promotionTy.isNull())
+ result = EmitUnPromotedValue(result, E->getType());
+ return result;
+}
-Value *ScalarExprEmitter::VisitUnaryMinus(const UnaryOperator *E) {
+Value *ScalarExprEmitter::VisitPlus(const UnaryOperator *E,
+ QualType PromotionType) {
+ // This
diff ers from gcc, though, most likely due to a bug in gcc.
TestAndClearIgnoreResultAssign();
- Value *Op = Visit(E->getSubExpr());
+ if (!PromotionType.isNull())
+ return CGF.EmitPromotedScalarExpr(E->getSubExpr(), PromotionType);
+ return Visit(E->getSubExpr());
+}
+
+Value *ScalarExprEmitter::VisitUnaryMinus(const UnaryOperator *E,
+ QualType PromotionType) {
+ QualType promotionTy = PromotionType.isNull()
+ ? getPromotionType(E->getSubExpr()->getType())
+ : PromotionType;
+ Value *result = VisitMinus(E, promotionTy);
+ if (result && !promotionTy.isNull())
+ result = EmitUnPromotedValue(result, E->getType());
+ return result;
+}
+
+Value *ScalarExprEmitter::VisitMinus(const UnaryOperator *E,
+ QualType PromotionType) {
+ TestAndClearIgnoreResultAssign();
+ Value *Op;
+ if (!PromotionType.isNull())
+ Op = CGF.EmitPromotedScalarExpr(E->getSubExpr(), PromotionType);
+ else
+ Op = Visit(E->getSubExpr());
// Generate a unary FNeg for FP ops.
if (Op->getType()->isFPOrFPVectorTy())
@@ -3005,33 +3069,75 @@ ScalarExprEmitter::VisitUnaryExprOrTypeTraitExpr(
return Builder.getInt(E->EvaluateKnownConstInt(CGF.getContext()));
}
-Value *ScalarExprEmitter::VisitUnaryReal(const UnaryOperator *E) {
+Value *ScalarExprEmitter::VisitUnaryReal(const UnaryOperator *E,
+ QualType PromotionType) {
+ QualType promotionTy = PromotionType.isNull()
+ ? getPromotionType(E->getSubExpr()->getType())
+ : PromotionType;
+ Value *result = VisitReal(E, promotionTy);
+ if (result && !promotionTy.isNull())
+ result = EmitUnPromotedValue(result, E->getType());
+ return result;
+}
+
+Value *ScalarExprEmitter::VisitReal(const UnaryOperator *E,
+ QualType PromotionType) {
Expr *Op = E->getSubExpr();
if (Op->getType()->isAnyComplexType()) {
// If it's an l-value, load through the appropriate subobject l-value.
// Note that we have to ask E because Op might be an l-value that
// this won't work for, e.g. an Obj-C property.
- if (E->isGLValue())
- return CGF.EmitLoadOfLValue(CGF.EmitLValue(E),
- E->getExprLoc()).getScalarVal();
-
+ if (E->isGLValue()) {
+ if (!PromotionType.isNull()) {
+ CodeGenFunction::ComplexPairTy result = CGF.EmitComplexExpr(
+ Op, /*IgnoreReal*/ IgnoreResultAssign, /*IgnoreImag*/ true);
+ if (result.first)
+ result.first = CGF.EmitPromotedValue(result, PromotionType).first;
+ return result.first;
+ } else {
+ return CGF.EmitLoadOfLValue(CGF.EmitLValue(E), E->getExprLoc())
+ .getScalarVal();
+ }
+ }
// Otherwise, calculate and project.
return CGF.EmitComplexExpr(Op, false, true).first;
}
+ if (!PromotionType.isNull())
+ return CGF.EmitPromotedScalarExpr(Op, PromotionType);
return Visit(Op);
}
-Value *ScalarExprEmitter::VisitUnaryImag(const UnaryOperator *E) {
+Value *ScalarExprEmitter::VisitUnaryImag(const UnaryOperator *E,
+ QualType PromotionType) {
+ QualType promotionTy = PromotionType.isNull()
+ ? getPromotionType(E->getSubExpr()->getType())
+ : PromotionType;
+ Value *result = VisitImag(E, promotionTy);
+ if (result && !promotionTy.isNull())
+ result = EmitUnPromotedValue(result, E->getType());
+ return result;
+}
+
+Value *ScalarExprEmitter::VisitImag(const UnaryOperator *E,
+ QualType PromotionType) {
Expr *Op = E->getSubExpr();
if (Op->getType()->isAnyComplexType()) {
// If it's an l-value, load through the appropriate subobject l-value.
// Note that we have to ask E because Op might be an l-value that
// this won't work for, e.g. an Obj-C property.
- if (Op->isGLValue())
- return CGF.EmitLoadOfLValue(CGF.EmitLValue(E),
- E->getExprLoc()).getScalarVal();
-
+ if (Op->isGLValue()) {
+ if (!PromotionType.isNull()) {
+ CodeGenFunction::ComplexPairTy result = CGF.EmitComplexExpr(
+ Op, /*IgnoreReal*/ true, /*IgnoreImag*/ IgnoreResultAssign);
+ if (result.second)
+ result.second = CGF.EmitPromotedValue(result, PromotionType).second;
+ return result.second;
+ } else {
+ return CGF.EmitLoadOfLValue(CGF.EmitLValue(E), E->getExprLoc())
+ .getScalarVal();
+ }
+ }
// Otherwise, calculate and project.
return CGF.EmitComplexExpr(Op, true, false).second;
}
@@ -3040,8 +3146,12 @@ Value *ScalarExprEmitter::VisitUnaryImag(const UnaryOperator *E) {
// effects are evaluated, but not the actual value.
if (Op->isGLValue())
CGF.EmitLValue(Op);
+ else if (!PromotionType.isNull())
+ CGF.EmitPromotedScalarExpr(Op, PromotionType);
else
CGF.EmitScalarExpr(Op, true);
+ if (!PromotionType.isNull())
+ return llvm::Constant::getNullValue(ConvertType(PromotionType));
return llvm::Constant::getNullValue(ConvertType(E->getType()));
}
@@ -3049,12 +3159,65 @@ Value *ScalarExprEmitter::VisitUnaryImag(const UnaryOperator *E) {
// Binary Operators
//===----------------------------------------------------------------------===//
-BinOpInfo ScalarExprEmitter::EmitBinOps(const BinaryOperator *E) {
+Value *ScalarExprEmitter::EmitPromotedValue(Value *result,
+ QualType PromotionType) {
+ return CGF.Builder.CreateFPExt(result, ConvertType(PromotionType), "ext");
+}
+
+Value *ScalarExprEmitter::EmitUnPromotedValue(Value *result,
+ QualType ExprType) {
+ return CGF.Builder.CreateFPTrunc(result, ConvertType(ExprType), "unpromotion");
+}
+
+Value *ScalarExprEmitter::EmitPromoted(const Expr *E, QualType PromotionType) {
+ E = E->IgnoreParens();
+ if (auto BO = dyn_cast<BinaryOperator>(E)) {
+ switch (BO->getOpcode()) {
+#define HANDLE_BINOP(OP) \
+ case BO_##OP: \
+ return Emit##OP(EmitBinOps(BO, PromotionType));
+ HANDLE_BINOP(Add)
+ HANDLE_BINOP(Sub)
+ HANDLE_BINOP(Mul)
+ HANDLE_BINOP(Div)
+#undef HANDLE_BINOP
+ default:
+ break;
+ }
+ } else if (auto UO = dyn_cast<UnaryOperator>(E)) {
+ switch (UO->getOpcode()) {
+ case UO_Imag:
+ return VisitImag(UO, PromotionType);
+ case UO_Real:
+ return VisitReal(UO, PromotionType);
+ case UO_Minus:
+ return VisitMinus(UO, PromotionType);
+ case UO_Plus:
+ return VisitPlus(UO, PromotionType);
+ default:
+ break;
+ }
+ }
+ auto result = Visit(const_cast<Expr *>(E));
+ if (result) {
+ if (!PromotionType.isNull())
+ return EmitPromotedValue(result, PromotionType);
+ else
+ return EmitUnPromotedValue(result, E->getType());
+ }
+ return result;
+}
+
+BinOpInfo ScalarExprEmitter::EmitBinOps(const BinaryOperator *E,
+ QualType PromotionType) {
TestAndClearIgnoreResultAssign();
BinOpInfo Result;
- Result.LHS = Visit(E->getLHS());
- Result.RHS = Visit(E->getRHS());
- Result.Ty = E->getType();
+ Result.LHS = CGF.EmitPromotedScalarExpr(E->getLHS(), PromotionType);
+ Result.RHS = CGF.EmitPromotedScalarExpr(E->getRHS(), PromotionType);
+ if (!PromotionType.isNull())
+ Result.Ty = PromotionType;
+ else
+ Result.Ty = E->getType();
Result.Opcode = E->getOpcode();
Result.FPFeatures = E->getFPFeaturesInEffect(CGF.getLangOpts());
Result.E = E;
@@ -3073,8 +3236,18 @@ LValue ScalarExprEmitter::EmitCompoundAssignLValue(
// Emit the RHS first. __block variables need to have the rhs evaluated
// first, plus this should improve codegen a little.
- OpInfo.RHS = Visit(E->getRHS());
- OpInfo.Ty = E->getComputationResultType();
+
+ QualType PromotionTypeCR;
+ PromotionTypeCR = getPromotionType(E->getComputationResultType());
+ if (PromotionTypeCR.isNull())
+ PromotionTypeCR = E->getComputationResultType();
+ QualType PromotionTypeLHS = getPromotionType(E->getComputationLHSType());
+ QualType PromotionTypeRHS = getPromotionType(E->getRHS()->getType());
+ if (!PromotionTypeRHS.isNull())
+ OpInfo.RHS = CGF.EmitPromotedScalarExpr(E->getRHS(), PromotionTypeRHS);
+ else
+ OpInfo.RHS = Visit(E->getRHS());
+ OpInfo.Ty = PromotionTypeCR;
OpInfo.Opcode = E->getOpcode();
OpInfo.FPFeatures = E->getFPFeaturesInEffect(CGF.getLangOpts());
OpInfo.E = E;
@@ -3153,16 +3326,20 @@ LValue ScalarExprEmitter::EmitCompoundAssignLValue(
CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, OpInfo.FPFeatures);
SourceLocation Loc = E->getExprLoc();
- OpInfo.LHS =
- EmitScalarConversion(OpInfo.LHS, LHSTy, E->getComputationLHSType(), Loc);
+ if (!PromotionTypeLHS.isNull())
+ OpInfo.LHS = EmitScalarConversion(OpInfo.LHS, LHSTy, PromotionTypeLHS,
+ E->getExprLoc());
+ else
+ OpInfo.LHS = EmitScalarConversion(OpInfo.LHS, LHSTy,
+ E->getComputationLHSType(), Loc);
// Expand the binary operator.
Result = (this->*Func)(OpInfo);
// Convert the result back to the LHS type,
// potentially with Implicit Conversion sanitizer check.
- Result = EmitScalarConversion(Result, E->getComputationResultType(), LHSTy,
- Loc, ScalarConversionOpts(CGF.SanOpts));
+ Result = EmitScalarConversion(Result, PromotionTypeCR, LHSTy, Loc,
+ ScalarConversionOpts(CGF.SanOpts));
if (atomicPHI) {
llvm::BasicBlock *curBlock = Builder.GetInsertBlock();
@@ -4896,6 +5073,16 @@ Value *CodeGenFunction::EmitComplexToScalarConversion(ComplexPairTy Src,
}
+Value *
+CodeGenFunction::EmitPromotedScalarExpr(const Expr *E,
+ QualType PromotionType) {
+ if (!PromotionType.isNull())
+ return ScalarExprEmitter(*this).EmitPromoted(E, PromotionType);
+ else
+ return ScalarExprEmitter(*this).Visit(const_cast<Expr *>(E));
+}
+
+
llvm::Value *CodeGenFunction::
EmitScalarPrePostIncDec(const UnaryOperator *E, LValue LV,
bool isInc, bool isPre) {
diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h
index cfa7e33401f49..b906fd0d1c121 100644
--- a/clang/lib/CodeGen/CodeGenFunction.h
+++ b/clang/lib/CodeGen/CodeGenFunction.h
@@ -4415,6 +4415,11 @@ class CodeGenFunction : public CodeGenTypeCache {
/// EmitLoadOfComplex - Load a complex number from the specified l-value.
ComplexPairTy EmitLoadOfComplex(LValue src, SourceLocation loc);
+ ComplexPairTy EmitPromotedComplexExpr(const Expr *E, QualType PromotionType);
+ llvm::Value *EmitPromotedScalarExpr(const Expr *E, QualType PromotionType);
+ ComplexPairTy EmitPromotedValue(ComplexPairTy result, QualType PromotionType);
+ ComplexPairTy EmitUnPromotedValue(ComplexPairTy result, QualType PromotionType);
+
Address emitAddrOfRealComponent(Address complex, QualType complexType);
Address emitAddrOfImagComponent(Address complex, QualType complexType);
diff --git a/clang/test/CodeGen/X86/Float16-arithmetic.c b/clang/test/CodeGen/X86/Float16-arithmetic.c
index aa61f7cb3c65f..8fecc6459e6a9 100644
--- a/clang/test/CodeGen/X86/Float16-arithmetic.c
+++ b/clang/test/CodeGen/X86/Float16-arithmetic.c
@@ -1,7 +1,6 @@
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
// RUN: %clang_cc1 -triple x86_64-unknown-unknown -emit-llvm -o - %s | FileCheck %s
-
// CHECK-LABEL: @add1(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[A_ADDR:%.*]] = alloca half, align 2
@@ -9,9 +8,12 @@
// CHECK-NEXT: store half [[A:%.*]], ptr [[A_ADDR]], align 2
// CHECK-NEXT: store half [[B:%.*]], ptr [[B_ADDR]], align 2
// CHECK-NEXT: [[TMP0:%.*]] = load half, ptr [[A_ADDR]], align 2
+// CHECK-NEXT: [[EXT:%.*]] = fpext half [[TMP0]] to float
// CHECK-NEXT: [[TMP1:%.*]] = load half, ptr [[B_ADDR]], align 2
-// CHECK-NEXT: [[ADD:%.*]] = fadd half [[TMP0]], [[TMP1]]
-// CHECK-NEXT: ret half [[ADD]]
+// CHECK-NEXT: [[EXT1:%.*]] = fpext half [[TMP1]] to float
+// CHECK-NEXT: [[ADD:%.*]] = fadd float [[EXT]], [[EXT1]]
+// CHECK-NEXT: [[UNPROMOTION:%.*]] = fptrunc float [[ADD]] to half
+// CHECK-NEXT: ret half [[UNPROMOTION]]
//
_Float16 add1(_Float16 a, _Float16 b) {
return a + b;
@@ -26,11 +28,15 @@ _Float16 add1(_Float16 a, _Float16 b) {
// CHECK-NEXT: store half [[B:%.*]], ptr [[B_ADDR]], align 2
// CHECK-NEXT: store half [[C:%.*]], ptr [[C_ADDR]], align 2
// CHECK-NEXT: [[TMP0:%.*]] = load half, ptr [[A_ADDR]], align 2
+// CHECK-NEXT: [[EXT:%.*]] = fpext half [[TMP0]] to float
// CHECK-NEXT: [[TMP1:%.*]] = load half, ptr [[B_ADDR]], align 2
-// CHECK-NEXT: [[ADD:%.*]] = fadd half [[TMP0]], [[TMP1]]
+// CHECK-NEXT: [[EXT1:%.*]] = fpext half [[TMP1]] to float
+// CHECK-NEXT: [[ADD:%.*]] = fadd float [[EXT]], [[EXT1]]
// CHECK-NEXT: [[TMP2:%.*]] = load half, ptr [[C_ADDR]], align 2
-// CHECK-NEXT: [[ADD1:%.*]] = fadd half [[ADD]], [[TMP2]]
-// CHECK-NEXT: ret half [[ADD1]]
+// CHECK-NEXT: [[EXT2:%.*]] = fpext half [[TMP2]] to float
+// CHECK-NEXT: [[ADD3:%.*]] = fadd float [[ADD]], [[EXT2]]
+// CHECK-NEXT: [[UNPROMOTION:%.*]] = fptrunc float [[ADD3]] to half
+// CHECK-NEXT: ret half [[UNPROMOTION]]
//
_Float16 add2(_Float16 a, _Float16 b, _Float16 c) {
return a + b + c;
@@ -43,9 +49,12 @@ _Float16 add2(_Float16 a, _Float16 b, _Float16 c) {
// CHECK-NEXT: store half [[A:%.*]], ptr [[A_ADDR]], align 2
// CHECK-NEXT: store half [[B:%.*]], ptr [[B_ADDR]], align 2
// CHECK-NEXT: [[TMP0:%.*]] = load half, ptr [[A_ADDR]], align 2
+// CHECK-NEXT: [[EXT:%.*]] = fpext half [[TMP0]] to float
// CHECK-NEXT: [[TMP1:%.*]] = load half, ptr [[B_ADDR]], align 2
-// CHECK-NEXT: [[DIV:%.*]] = fdiv half [[TMP0]], [[TMP1]]
-// CHECK-NEXT: ret half [[DIV]]
+// CHECK-NEXT: [[EXT1:%.*]] = fpext half [[TMP1]] to float
+// CHECK-NEXT: [[DIV:%.*]] = fdiv float [[EXT]], [[EXT1]]
+// CHECK-NEXT: [[UNPROMOTION:%.*]] = fptrunc float [[DIV]] to half
+// CHECK-NEXT: ret half [[UNPROMOTION]]
//
_Float16 div(_Float16 a, _Float16 b) {
return a / b;
@@ -58,9 +67,12 @@ _Float16 div(_Float16 a, _Float16 b) {
// CHECK-NEXT: store half [[A:%.*]], ptr [[A_ADDR]], align 2
// CHECK-NEXT: store half [[B:%.*]], ptr [[B_ADDR]], align 2
// CHECK-NEXT: [[TMP0:%.*]] = load half, ptr [[A_ADDR]], align 2
+// CHECK-NEXT: [[EXT:%.*]] = fpext half [[TMP0]] to float
// CHECK-NEXT: [[TMP1:%.*]] = load half, ptr [[B_ADDR]], align 2
-// CHECK-NEXT: [[MUL:%.*]] = fmul half [[TMP0]], [[TMP1]]
-// CHECK-NEXT: ret half [[MUL]]
+// CHECK-NEXT: [[EXT1:%.*]] = fpext half [[TMP1]] to float
+// CHECK-NEXT: [[MUL:%.*]] = fmul float [[EXT]], [[EXT1]]
+// CHECK-NEXT: [[UNPROMOTION:%.*]] = fptrunc float [[MUL]] to half
+// CHECK-NEXT: ret half [[UNPROMOTION]]
//
_Float16 mul(_Float16 a, _Float16 b) {
return a * b;
@@ -77,13 +89,18 @@ _Float16 mul(_Float16 a, _Float16 b) {
// CHECK-NEXT: store half [[C:%.*]], ptr [[C_ADDR]], align 2
// CHECK-NEXT: store half [[D:%.*]], ptr [[D_ADDR]], align 2
// CHECK-NEXT: [[TMP0:%.*]] = load half, ptr [[A_ADDR]], align 2
+// CHECK-NEXT: [[EXT:%.*]] = fpext half [[TMP0]] to float
// CHECK-NEXT: [[TMP1:%.*]] = load half, ptr [[B_ADDR]], align 2
-// CHECK-NEXT: [[MUL:%.*]] = fmul half [[TMP0]], [[TMP1]]
+// CHECK-NEXT: [[EXT1:%.*]] = fpext half [[TMP1]] to float
+// CHECK-NEXT: [[MUL:%.*]] = fmul float [[EXT]], [[EXT1]]
// CHECK-NEXT: [[TMP2:%.*]] = load half, ptr [[C_ADDR]], align 2
+// CHECK-NEXT: [[EXT2:%.*]] = fpext half [[TMP2]] to float
// CHECK-NEXT: [[TMP3:%.*]] = load half, ptr [[D_ADDR]], align 2
-// CHECK-NEXT: [[MUL1:%.*]] = fmul half [[TMP2]], [[TMP3]]
-// CHECK-NEXT: [[ADD:%.*]] = fadd half [[MUL]], [[MUL1]]
-// CHECK-NEXT: ret half [[ADD]]
+// CHECK-NEXT: [[EXT3:%.*]] = fpext half [[TMP3]] to float
+// CHECK-NEXT: [[MUL4:%.*]] = fmul float [[EXT2]], [[EXT3]]
+// CHECK-NEXT: [[ADD:%.*]] = fadd float [[MUL]], [[MUL4]]
+// CHECK-NEXT: [[UNPROMOTION:%.*]] = fptrunc float [[ADD]] to half
+// CHECK-NEXT: ret half [[UNPROMOTION]]
//
_Float16 add_and_mul1(_Float16 a, _Float16 b, _Float16 c, _Float16 d) {
return a * b + c * d;
@@ -94,19 +111,397 @@ _Float16 add_and_mul1(_Float16 a, _Float16 b, _Float16 c, _Float16 d) {
// CHECK-NEXT: [[A_ADDR:%.*]] = alloca half, align 2
// CHECK-NEXT: [[B_ADDR:%.*]] = alloca half, align 2
// CHECK-NEXT: [[C_ADDR:%.*]] = alloca half, align 2
-// CHECK-NEXT: [[D_ADDR:%.*]] = alloca half, align 2
// CHECK-NEXT: store half [[A:%.*]], ptr [[A_ADDR]], align 2
// CHECK-NEXT: store half [[B:%.*]], ptr [[B_ADDR]], align 2
// CHECK-NEXT: store half [[C:%.*]], ptr [[C_ADDR]], align 2
-// CHECK-NEXT: store half [[D:%.*]], ptr [[D_ADDR]], align 2
// CHECK-NEXT: [[TMP0:%.*]] = load half, ptr [[A_ADDR]], align 2
+// CHECK-NEXT: [[EXT:%.*]] = fpext half [[TMP0]] to float
// CHECK-NEXT: [[TMP1:%.*]] = load half, ptr [[B_ADDR]], align 2
-// CHECK-NEXT: [[MUL:%.*]] = fmul half 0xH4600, [[TMP1]]
-// CHECK-NEXT: [[SUB:%.*]] = fsub half [[TMP0]], [[MUL]]
+// CHECK-NEXT: [[EXT1:%.*]] = fpext half [[TMP1]] to float
+// CHECK-NEXT: [[MUL:%.*]] = fmul float 6.000000e+00, [[EXT1]]
+// CHECK-NEXT: [[SUB:%.*]] = fsub float [[EXT]], [[MUL]]
// CHECK-NEXT: [[TMP2:%.*]] = load half, ptr [[C_ADDR]], align 2
-// CHECK-NEXT: [[ADD:%.*]] = fadd half [[SUB]], [[TMP2]]
-// CHECK-NEXT: ret half [[ADD]]
+// CHECK-NEXT: [[EXT2:%.*]] = fpext half [[TMP2]] to float
+// CHECK-NEXT: [[ADD:%.*]] = fadd float [[SUB]], [[EXT2]]
+// CHECK-NEXT: [[UNPROMOTION:%.*]] = fptrunc float [[ADD]] to half
+// CHECK-NEXT: ret half [[UNPROMOTION]]
//
-_Float16 add_and_mul2(_Float16 a, _Float16 b, _Float16 c, _Float16 d) {
+_Float16 add_and_mul2(_Float16 a, _Float16 b, _Float16 c) {
return (a - 6 * b) + c;
}
+
+// CHECK-LABEL: @addcompound(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[A_ADDR:%.*]] = alloca half, align 2
+// CHECK-NEXT: [[C_ADDR:%.*]] = alloca half, align 2
+// CHECK-NEXT: store half [[A:%.*]], ptr [[A_ADDR]], align 2
+// CHECK-NEXT: store half [[C:%.*]], ptr [[C_ADDR]], align 2
+// CHECK-NEXT: [[TMP0:%.*]] = load half, ptr [[A_ADDR]], align 2
+// CHECK-NEXT: [[EXT:%.*]] = fpext half [[TMP0]] to float
+// CHECK-NEXT: [[TMP1:%.*]] = load half, ptr [[C_ADDR]], align 2
+// CHECK-NEXT: [[CONV:%.*]] = fpext half [[TMP1]] to float
+// CHECK-NEXT: [[ADD:%.*]] = fadd float [[CONV]], [[EXT]]
+// CHECK-NEXT: [[CONV1:%.*]] = fptrunc float [[ADD]] to half
+// CHECK-NEXT: store half [[CONV1]], ptr [[C_ADDR]], align 2
+// CHECK-NEXT: [[TMP2:%.*]] = load half, ptr [[C_ADDR]], align 2
+// CHECK-NEXT: ret half [[TMP2]]
+//
+_Float16 addcompound(_Float16 a, _Float16 c) {
+ c += a;
+ return c;
+}
+
+// CHECK-LABEL: @mulcompound_int_float16(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT: [[C_ADDR:%.*]] = alloca half, align 2
+// CHECK-NEXT: store i32 [[A:%.*]], ptr [[A_ADDR]], align 4
+// CHECK-NEXT: store half [[C:%.*]], ptr [[C_ADDR]], align 2
+// CHECK-NEXT: [[TMP0:%.*]] = load half, ptr [[C_ADDR]], align 2
+// CHECK-NEXT: [[EXT:%.*]] = fpext half [[TMP0]] to float
+// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4
+// CHECK-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP1]] to float
+// CHECK-NEXT: [[MUL:%.*]] = fmul float [[CONV]], [[EXT]]
+// CHECK-NEXT: [[CONV1:%.*]] = fptosi float [[MUL]] to i32
+// CHECK-NEXT: store i32 [[CONV1]], ptr [[A_ADDR]], align 4
+// CHECK-NEXT: [[TMP2:%.*]] = load half, ptr [[C_ADDR]], align 2
+// CHECK-NEXT: ret half [[TMP2]]
+//
+_Float16 mulcompound_int_float16(int a, _Float16 c) {
+ a *= c;
+ return c;
+}
+
+// CHECK-LABEL: @mulcompound_float_float16c(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[C:%.*]] = alloca { half, half }, align 2
+// CHECK-NEXT: [[A_ADDR:%.*]] = alloca float, align 4
+// CHECK-NEXT: store <2 x half> [[C_COERCE:%.*]], ptr [[C]], align 2
+// CHECK-NEXT: store float [[A:%.*]], ptr [[A_ADDR]], align 4
+// CHECK-NEXT: [[C_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[C]], i32 0, i32 0
+// CHECK-NEXT: [[C_REAL:%.*]] = load half, ptr [[C_REALP]], align 2
+// CHECK-NEXT: [[C_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[C]], i32 0, i32 1
+// CHECK-NEXT: [[C_IMAG:%.*]] = load half, ptr [[C_IMAGP]], align 2
+// CHECK-NEXT: [[CONV:%.*]] = fpext half [[C_REAL]] to float
+// CHECK-NEXT: [[CONV1:%.*]] = fpext half [[C_IMAG]] to float
+// CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[A_ADDR]], align 4
+// CHECK-NEXT: [[MUL_RL:%.*]] = fmul float [[TMP0]], [[CONV]]
+// CHECK-NEXT: [[MUL_IR:%.*]] = fmul float [[TMP0]], [[CONV1]]
+// CHECK-NEXT: store float [[MUL_RL]], ptr [[A_ADDR]], align 4
+// CHECK-NEXT: [[C_REALP2:%.*]] = getelementptr inbounds { half, half }, ptr [[C]], i32 0, i32 0
+// CHECK-NEXT: [[C_REAL3:%.*]] = load half, ptr [[C_REALP2]], align 2
+// CHECK-NEXT: ret half [[C_REAL3]]
+//
+_Float16 mulcompound_float_float16c(float a, _Float16 _Complex c) {
+ a *= c;
+ return c;
+}
+
+// CHECK-LABEL: @RealOp(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[A_ADDR:%.*]] = alloca half, align 2
+// CHECK-NEXT: store half [[A:%.*]], ptr [[A_ADDR]], align 2
+// CHECK-NEXT: [[TMP0:%.*]] = load half, ptr [[A_ADDR]], align 2
+// CHECK-NEXT: [[EXT:%.*]] = fpext half [[TMP0]] to float
+// CHECK-NEXT: [[UNPROMOTION:%.*]] = fptrunc float [[EXT]] to half
+// CHECK-NEXT: ret half [[UNPROMOTION]]
+//
+_Float16 RealOp(_Float16 a) {
+ return __real a;
+}
+
+// CHECK-LABEL: @RealOp_c(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[A:%.*]] = alloca { half, half }, align 2
+// CHECK-NEXT: store <2 x half> [[A_COERCE:%.*]], ptr [[A]], align 2
+// CHECK-NEXT: [[A_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[A]], i32 0, i32 0
+// CHECK-NEXT: [[A_REAL:%.*]] = load half, ptr [[A_REALP]], align 2
+// CHECK-NEXT: [[EXT:%.*]] = fpext half [[A_REAL]] to float
+// CHECK-NEXT: [[UNPROMOTION:%.*]] = fptrunc float [[EXT]] to half
+// CHECK-NEXT: ret half [[UNPROMOTION]]
+//
+_Float16 RealOp_c(_Float16 _Complex a) {
+ return __real a;
+}
+
+// CHECK-LABEL: @ImagOp(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[A_ADDR:%.*]] = alloca half, align 2
+// CHECK-NEXT: store half [[A:%.*]], ptr [[A_ADDR]], align 2
+// CHECK-NEXT: [[TMP0:%.*]] = load half, ptr [[A_ADDR]], align 2
+// CHECK-NEXT: [[EXT:%.*]] = fpext half [[TMP0]] to float
+// CHECK-NEXT: ret half 0xH0000
+//
+_Float16 ImagOp(_Float16 a) {
+ return __imag a;
+}
+
+// CHECK-LABEL: @ImagOp_c(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[A:%.*]] = alloca { half, half }, align 2
+// CHECK-NEXT: store <2 x half> [[A_COERCE:%.*]], ptr [[A]], align 2
+// CHECK-NEXT: [[A_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[A]], i32 0, i32 1
+// CHECK-NEXT: [[A_IMAG:%.*]] = load half, ptr [[A_IMAGP]], align 2
+// CHECK-NEXT: [[EXT:%.*]] = fpext half [[A_IMAG]] to float
+// CHECK-NEXT: [[UNPROMOTION:%.*]] = fptrunc float [[EXT]] to half
+// CHECK-NEXT: ret half [[UNPROMOTION]]
+//
+_Float16 ImagOp_c(_Float16 _Complex a) {
+ return __imag a;
+}
+
+// CHECK-LABEL: @MinusOp_r(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[A_ADDR:%.*]] = alloca half, align 2
+// CHECK-NEXT: store half [[A:%.*]], ptr [[A_ADDR]], align 2
+// CHECK-NEXT: [[TMP0:%.*]] = load half, ptr [[A_ADDR]], align 2
+// CHECK-NEXT: [[EXT:%.*]] = fpext half [[TMP0]] to float
+// CHECK-NEXT: [[FNEG:%.*]] = fneg float [[EXT]]
+// CHECK-NEXT: [[UNPROMOTION:%.*]] = fptrunc float [[FNEG]] to half
+// CHECK-NEXT: ret half [[UNPROMOTION]]
+//
+_Float16 MinusOp_r(_Float16 a) {
+ return -a;
+}
+
+// CHECK-LABEL: @MinusOp_c(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[A:%.*]] = alloca { half, half }, align 2
+// CHECK-NEXT: store <2 x half> [[A_COERCE:%.*]], ptr [[A]], align 2
+// CHECK-NEXT: [[A_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[A]], i32 0, i32 0
+// CHECK-NEXT: [[A_REAL:%.*]] = load half, ptr [[A_REALP]], align 2
+// CHECK-NEXT: [[A_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[A]], i32 0, i32 1
+// CHECK-NEXT: [[A_IMAG:%.*]] = load half, ptr [[A_IMAGP]], align 2
+// CHECK-NEXT: [[EXT:%.*]] = fpext half [[A_REAL]] to float
+// CHECK-NEXT: [[EXT1:%.*]] = fpext half [[A_IMAG]] to float
+// CHECK-NEXT: [[NEG_R:%.*]] = fneg float [[EXT]]
+// CHECK-NEXT: [[NEG_I:%.*]] = fneg float [[EXT1]]
+// CHECK-NEXT: [[UNPROMOTION:%.*]] = fptrunc float [[NEG_R]] to half
+// CHECK-NEXT: [[UNPROMOTION2:%.*]] = fptrunc float [[NEG_I]] to half
+// CHECK-NEXT: ret half [[UNPROMOTION]]
+//
+_Float16 MinusOp_c(_Float16 _Complex a) {
+ return -a;
+}
+
+// CHECK-LABEL: @PlusOp_r(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[A_ADDR:%.*]] = alloca half, align 2
+// CHECK-NEXT: store half [[A:%.*]], ptr [[A_ADDR]], align 2
+// CHECK-NEXT: [[TMP0:%.*]] = load half, ptr [[A_ADDR]], align 2
+// CHECK-NEXT: [[EXT:%.*]] = fpext half [[TMP0]] to float
+// CHECK-NEXT: [[UNPROMOTION:%.*]] = fptrunc float [[EXT]] to half
+// CHECK-NEXT: ret half [[UNPROMOTION]]
+//
+_Float16 PlusOp_r(_Float16 a) {
+ return +a;
+}
+
+// CHECK-LABEL: @PlusOp_c(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[A:%.*]] = alloca { half, half }, align 2
+// CHECK-NEXT: store <2 x half> [[A_COERCE:%.*]], ptr [[A]], align 2
+// CHECK-NEXT: [[A_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[A]], i32 0, i32 0
+// CHECK-NEXT: [[A_REAL:%.*]] = load half, ptr [[A_REALP]], align 2
+// CHECK-NEXT: [[A_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[A]], i32 0, i32 1
+// CHECK-NEXT: [[A_IMAG:%.*]] = load half, ptr [[A_IMAGP]], align 2
+// CHECK-NEXT: [[EXT:%.*]] = fpext half [[A_REAL]] to float
+// CHECK-NEXT: [[EXT1:%.*]] = fpext half [[A_IMAG]] to float
+// CHECK-NEXT: [[UNPROMOTION:%.*]] = fptrunc float [[EXT]] to half
+// CHECK-NEXT: [[UNPROMOTION2:%.*]] = fptrunc float [[EXT1]] to half
+// CHECK-NEXT: ret half [[UNPROMOTION]]
+//
+_Float16 PlusOp_c(_Float16 _Complex a) {
+ return +a;
+}
+
+// CHECK-LABEL: @MinusOp_r_r(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[A_ADDR:%.*]] = alloca half, align 2
+// CHECK-NEXT: [[C_ADDR:%.*]] = alloca half, align 2
+// CHECK-NEXT: store half [[A:%.*]], ptr [[A_ADDR]], align 2
+// CHECK-NEXT: store half [[C:%.*]], ptr [[C_ADDR]], align 2
+// CHECK-NEXT: [[TMP0:%.*]] = load half, ptr [[A_ADDR]], align 2
+// CHECK-NEXT: [[EXT:%.*]] = fpext half [[TMP0]] to float
+// CHECK-NEXT: [[TMP1:%.*]] = load half, ptr [[C_ADDR]], align 2
+// CHECK-NEXT: [[EXT1:%.*]] = fpext half [[TMP1]] to float
+// CHECK-NEXT: [[FNEG:%.*]] = fneg float [[EXT1]]
+// CHECK-NEXT: [[ADD:%.*]] = fadd float [[EXT]], [[FNEG]]
+// CHECK-NEXT: [[UNPROMOTION:%.*]] = fptrunc float [[ADD]] to half
+// CHECK-NEXT: ret half [[UNPROMOTION]]
+//
+_Float16 MinusOp_r_r(_Float16 a, _Float16 c) {
+ return a + -c;
+}
+
+// CHECK-LABEL: @MinusOp_c_r(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[A:%.*]] = alloca { half, half }, align 2
+// CHECK-NEXT: [[C_ADDR:%.*]] = alloca half, align 2
+// CHECK-NEXT: store <2 x half> [[A_COERCE:%.*]], ptr [[A]], align 2
+// CHECK-NEXT: store half [[C:%.*]], ptr [[C_ADDR]], align 2
+// CHECK-NEXT: [[A_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[A]], i32 0, i32 0
+// CHECK-NEXT: [[A_REAL:%.*]] = load half, ptr [[A_REALP]], align 2
+// CHECK-NEXT: [[A_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[A]], i32 0, i32 1
+// CHECK-NEXT: [[A_IMAG:%.*]] = load half, ptr [[A_IMAGP]], align 2
+// CHECK-NEXT: [[EXT:%.*]] = fpext half [[A_REAL]] to float
+// CHECK-NEXT: [[EXT1:%.*]] = fpext half [[A_IMAG]] to float
+// CHECK-NEXT: [[TMP0:%.*]] = load half, ptr [[C_ADDR]], align 2
+// CHECK-NEXT: [[EXT2:%.*]] = fpext half [[TMP0]] to float
+// CHECK-NEXT: [[FNEG:%.*]] = fneg float [[EXT2]]
+// CHECK-NEXT: [[ADD_R:%.*]] = fadd float [[EXT]], [[FNEG]]
+// CHECK-NEXT: [[UNPROMOTION:%.*]] = fptrunc float [[ADD_R]] to half
+// CHECK-NEXT: [[UNPROMOTION3:%.*]] = fptrunc float [[EXT1]] to half
+// CHECK-NEXT: ret half [[UNPROMOTION]]
+//
+_Float16 MinusOp_c_r(_Float16 _Complex a, _Float16 c) {
+ return a + -c;
+}
+
+// CHECK-LABEL: @MinusOp_r_c(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[C:%.*]] = alloca { half, half }, align 2
+// CHECK-NEXT: [[A_ADDR:%.*]] = alloca half, align 2
+// CHECK-NEXT: store <2 x half> [[C_COERCE:%.*]], ptr [[C]], align 2
+// CHECK-NEXT: store half [[A:%.*]], ptr [[A_ADDR]], align 2
+// CHECK-NEXT: [[TMP0:%.*]] = load half, ptr [[A_ADDR]], align 2
+// CHECK-NEXT: [[EXT:%.*]] = fpext half [[TMP0]] to float
+// CHECK-NEXT: [[C_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[C]], i32 0, i32 0
+// CHECK-NEXT: [[C_REAL:%.*]] = load half, ptr [[C_REALP]], align 2
+// CHECK-NEXT: [[C_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[C]], i32 0, i32 1
+// CHECK-NEXT: [[C_IMAG:%.*]] = load half, ptr [[C_IMAGP]], align 2
+// CHECK-NEXT: [[EXT1:%.*]] = fpext half [[C_REAL]] to float
+// CHECK-NEXT: [[EXT2:%.*]] = fpext half [[C_IMAG]] to float
+// CHECK-NEXT: [[NEG_R:%.*]] = fneg float [[EXT1]]
+// CHECK-NEXT: [[NEG_I:%.*]] = fneg float [[EXT2]]
+// CHECK-NEXT: [[ADD_R:%.*]] = fadd float [[EXT]], [[NEG_R]]
+// CHECK-NEXT: [[UNPROMOTION:%.*]] = fptrunc float [[ADD_R]] to half
+// CHECK-NEXT: [[UNPROMOTION3:%.*]] = fptrunc float [[NEG_I]] to half
+// CHECK-NEXT: ret half [[UNPROMOTION]]
+//
+_Float16 MinusOp_r_c(_Float16 a, _Float16 _Complex c) {
+ return a + -c;
+}
+
+// CHECK-LABEL: @MinusOp_c_c(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[A:%.*]] = alloca { half, half }, align 2
+// CHECK-NEXT: [[C:%.*]] = alloca { half, half }, align 2
+// CHECK-NEXT: store <2 x half> [[A_COERCE:%.*]], ptr [[A]], align 2
+// CHECK-NEXT: store <2 x half> [[C_COERCE:%.*]], ptr [[C]], align 2
+// CHECK-NEXT: [[A_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[A]], i32 0, i32 0
+// CHECK-NEXT: [[A_REAL:%.*]] = load half, ptr [[A_REALP]], align 2
+// CHECK-NEXT: [[A_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[A]], i32 0, i32 1
+// CHECK-NEXT: [[A_IMAG:%.*]] = load half, ptr [[A_IMAGP]], align 2
+// CHECK-NEXT: [[EXT:%.*]] = fpext half [[A_REAL]] to float
+// CHECK-NEXT: [[EXT1:%.*]] = fpext half [[A_IMAG]] to float
+// CHECK-NEXT: [[C_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[C]], i32 0, i32 0
+// CHECK-NEXT: [[C_REAL:%.*]] = load half, ptr [[C_REALP]], align 2
+// CHECK-NEXT: [[C_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[C]], i32 0, i32 1
+// CHECK-NEXT: [[C_IMAG:%.*]] = load half, ptr [[C_IMAGP]], align 2
+// CHECK-NEXT: [[EXT2:%.*]] = fpext half [[C_REAL]] to float
+// CHECK-NEXT: [[EXT3:%.*]] = fpext half [[C_IMAG]] to float
+// CHECK-NEXT: [[NEG_R:%.*]] = fneg float [[EXT2]]
+// CHECK-NEXT: [[NEG_I:%.*]] = fneg float [[EXT3]]
+// CHECK-NEXT: [[ADD_R:%.*]] = fadd float [[EXT]], [[NEG_R]]
+// CHECK-NEXT: [[ADD_I:%.*]] = fadd float [[EXT1]], [[NEG_I]]
+// CHECK-NEXT: [[UNPROMOTION:%.*]] = fptrunc float [[ADD_R]] to half
+// CHECK-NEXT: [[UNPROMOTION4:%.*]] = fptrunc float [[ADD_I]] to half
+// CHECK-NEXT: ret half [[UNPROMOTION]]
+//
+_Float16 MinusOp_c_c(_Float16 _Complex a, _Float16 _Complex c) {
+ return a + -c;
+}
+
+// CHECK-LABEL: @PlusOp_r_r(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[A_ADDR:%.*]] = alloca half, align 2
+// CHECK-NEXT: [[C_ADDR:%.*]] = alloca half, align 2
+// CHECK-NEXT: store half [[A:%.*]], ptr [[A_ADDR]], align 2
+// CHECK-NEXT: store half [[C:%.*]], ptr [[C_ADDR]], align 2
+// CHECK-NEXT: [[TMP0:%.*]] = load half, ptr [[A_ADDR]], align 2
+// CHECK-NEXT: [[EXT:%.*]] = fpext half [[TMP0]] to float
+// CHECK-NEXT: [[TMP1:%.*]] = load half, ptr [[C_ADDR]], align 2
+// CHECK-NEXT: [[EXT1:%.*]] = fpext half [[TMP1]] to float
+// CHECK-NEXT: [[SUB:%.*]] = fsub float [[EXT]], [[EXT1]]
+// CHECK-NEXT: [[UNPROMOTION:%.*]] = fptrunc float [[SUB]] to half
+// CHECK-NEXT: ret half [[UNPROMOTION]]
+//
+_Float16 PlusOp_r_r(_Float16 a, _Float16 c) {
+ return a - +c;
+}
+
+// CHECK-LABEL: @PlusOp_c_r(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[A:%.*]] = alloca { half, half }, align 2
+// CHECK-NEXT: [[C_ADDR:%.*]] = alloca half, align 2
+// CHECK-NEXT: store <2 x half> [[A_COERCE:%.*]], ptr [[A]], align 2
+// CHECK-NEXT: store half [[C:%.*]], ptr [[C_ADDR]], align 2
+// CHECK-NEXT: [[A_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[A]], i32 0, i32 0
+// CHECK-NEXT: [[A_REAL:%.*]] = load half, ptr [[A_REALP]], align 2
+// CHECK-NEXT: [[A_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[A]], i32 0, i32 1
+// CHECK-NEXT: [[A_IMAG:%.*]] = load half, ptr [[A_IMAGP]], align 2
+// CHECK-NEXT: [[EXT:%.*]] = fpext half [[A_REAL]] to float
+// CHECK-NEXT: [[EXT1:%.*]] = fpext half [[A_IMAG]] to float
+// CHECK-NEXT: [[TMP0:%.*]] = load half, ptr [[C_ADDR]], align 2
+// CHECK-NEXT: [[EXT2:%.*]] = fpext half [[TMP0]] to float
+// CHECK-NEXT: [[SUB_R:%.*]] = fsub float [[EXT]], [[EXT2]]
+// CHECK-NEXT: [[UNPROMOTION:%.*]] = fptrunc float [[SUB_R]] to half
+// CHECK-NEXT: [[UNPROMOTION3:%.*]] = fptrunc float [[EXT1]] to half
+// CHECK-NEXT: ret half [[UNPROMOTION]]
+//
+_Float16 PlusOp_c_r(_Float16 _Complex a, _Float16 c) {
+ return a - +c;
+}
+
+// CHECK-LABEL: @PlusOp_r_c(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[C:%.*]] = alloca { half, half }, align 2
+// CHECK-NEXT: [[A_ADDR:%.*]] = alloca half, align 2
+// CHECK-NEXT: store <2 x half> [[C_COERCE:%.*]], ptr [[C]], align 2
+// CHECK-NEXT: store half [[A:%.*]], ptr [[A_ADDR]], align 2
+// CHECK-NEXT: [[TMP0:%.*]] = load half, ptr [[A_ADDR]], align 2
+// CHECK-NEXT: [[EXT:%.*]] = fpext half [[TMP0]] to float
+// CHECK-NEXT: [[C_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[C]], i32 0, i32 0
+// CHECK-NEXT: [[C_REAL:%.*]] = load half, ptr [[C_REALP]], align 2
+// CHECK-NEXT: [[C_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[C]], i32 0, i32 1
+// CHECK-NEXT: [[C_IMAG:%.*]] = load half, ptr [[C_IMAGP]], align 2
+// CHECK-NEXT: [[EXT1:%.*]] = fpext half [[C_REAL]] to float
+// CHECK-NEXT: [[EXT2:%.*]] = fpext half [[C_IMAG]] to float
+// CHECK-NEXT: [[SUB_R:%.*]] = fsub float [[EXT]], [[EXT1]]
+// CHECK-NEXT: [[SUB_I:%.*]] = fneg float [[EXT2]]
+// CHECK-NEXT: [[UNPROMOTION:%.*]] = fptrunc float [[SUB_R]] to half
+// CHECK-NEXT: [[UNPROMOTION3:%.*]] = fptrunc float [[SUB_I]] to half
+// CHECK-NEXT: ret half [[UNPROMOTION]]
+//
+_Float16 PlusOp_r_c(_Float16 a, _Float16 _Complex c) {
+ return a - +c;
+}
+
+// CHECK-LABEL: @PlusOp_c_c(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[A:%.*]] = alloca { half, half }, align 2
+// CHECK-NEXT: [[C:%.*]] = alloca { half, half }, align 2
+// CHECK-NEXT: store <2 x half> [[A_COERCE:%.*]], ptr [[A]], align 2
+// CHECK-NEXT: store <2 x half> [[C_COERCE:%.*]], ptr [[C]], align 2
+// CHECK-NEXT: [[A_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[A]], i32 0, i32 0
+// CHECK-NEXT: [[A_REAL:%.*]] = load half, ptr [[A_REALP]], align 2
+// CHECK-NEXT: [[A_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[A]], i32 0, i32 1
+// CHECK-NEXT: [[A_IMAG:%.*]] = load half, ptr [[A_IMAGP]], align 2
+// CHECK-NEXT: [[EXT:%.*]] = fpext half [[A_REAL]] to float
+// CHECK-NEXT: [[EXT1:%.*]] = fpext half [[A_IMAG]] to float
+// CHECK-NEXT: [[C_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[C]], i32 0, i32 0
+// CHECK-NEXT: [[C_REAL:%.*]] = load half, ptr [[C_REALP]], align 2
+// CHECK-NEXT: [[C_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[C]], i32 0, i32 1
+// CHECK-NEXT: [[C_IMAG:%.*]] = load half, ptr [[C_IMAGP]], align 2
+// CHECK-NEXT: [[EXT2:%.*]] = fpext half [[C_REAL]] to float
+// CHECK-NEXT: [[EXT3:%.*]] = fpext half [[C_IMAG]] to float
+// CHECK-NEXT: [[SUB_R:%.*]] = fsub float [[EXT]], [[EXT2]]
+// CHECK-NEXT: [[SUB_I:%.*]] = fsub float [[EXT1]], [[EXT3]]
+// CHECK-NEXT: [[UNPROMOTION:%.*]] = fptrunc float [[SUB_R]] to half
+// CHECK-NEXT: [[UNPROMOTION4:%.*]] = fptrunc float [[SUB_I]] to half
+// CHECK-NEXT: ret half [[UNPROMOTION]]
+//
+_Float16 PlusOp_c_c(_Float16 _Complex a, _Float16 _Complex c) {
+ return a - +c;
+}
diff --git a/clang/test/CodeGen/X86/Float16-complex.c b/clang/test/CodeGen/X86/Float16-complex.c
index ebb290c976e7d..cf71d6287760c 100644
--- a/clang/test/CodeGen/X86/Float16-complex.c
+++ b/clang/test/CodeGen/X86/Float16-complex.c
@@ -1,134 +1,1945 @@
-// RUN: %clang_cc1 %s -O0 -emit-llvm -triple x86_64-unknown-unknown -target-feature +avx512fp16 -o - | FileCheck %s --check-prefix=X86
-// RUN: %clang_cc1 %s -O0 -emit-llvm -triple x86_64-unknown-unknown -o - | FileCheck %s --check-prefix=X86
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
+// RUN: %clang_cc1 %s -O0 -emit-llvm -triple x86_64-unknown-unknown -target-feature +avx512fp16 -o - | FileCheck %s --check-prefixes=AVX
+// RUN: %clang_cc1 %s -O0 -emit-llvm -triple x86_64-unknown-unknown -o - | FileCheck %s --check-prefixes=X86
+// AVX-LABEL: @add_half_rr(
+// AVX-NEXT: entry:
+// AVX-NEXT: [[RETVAL:%.*]] = alloca { half, half }, align 2
+// AVX-NEXT: [[A_ADDR:%.*]] = alloca half, align 2
+// AVX-NEXT: [[B_ADDR:%.*]] = alloca half, align 2
+// AVX-NEXT: store half [[A:%.*]], ptr [[A_ADDR]], align 2
+// AVX-NEXT: store half [[B:%.*]], ptr [[B_ADDR]], align 2
+// AVX-NEXT: [[TMP0:%.*]] = load half, ptr [[A_ADDR]], align 2
+// AVX-NEXT: [[TMP1:%.*]] = load half, ptr [[B_ADDR]], align 2
+// AVX-NEXT: [[ADD:%.*]] = fadd half [[TMP0]], [[TMP1]]
+// AVX-NEXT: [[RETVAL_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 0
+// AVX-NEXT: [[RETVAL_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 1
+// AVX-NEXT: store half [[ADD]], ptr [[RETVAL_REALP]], align 2
+// AVX-NEXT: store half 0xH0000, ptr [[RETVAL_IMAGP]], align 2
+// AVX-NEXT: [[TMP2:%.*]] = load <2 x half>, ptr [[RETVAL]], align 2
+// AVX-NEXT: ret <2 x half> [[TMP2]]
+//
+// X86-LABEL: @add_half_rr(
+// X86-NEXT: entry:
+// X86-NEXT: [[RETVAL:%.*]] = alloca { half, half }, align 2
+// X86-NEXT: [[A_ADDR:%.*]] = alloca half, align 2
+// X86-NEXT: [[B_ADDR:%.*]] = alloca half, align 2
+// X86-NEXT: store half [[A:%.*]], ptr [[A_ADDR]], align 2
+// X86-NEXT: store half [[B:%.*]], ptr [[B_ADDR]], align 2
+// X86-NEXT: [[TMP0:%.*]] = load half, ptr [[A_ADDR]], align 2
+// X86-NEXT: [[EXT:%.*]] = fpext half [[TMP0]] to float
+// X86-NEXT: [[TMP1:%.*]] = load half, ptr [[B_ADDR]], align 2
+// X86-NEXT: [[EXT1:%.*]] = fpext half [[TMP1]] to float
+// X86-NEXT: [[ADD:%.*]] = fadd float [[EXT]], [[EXT1]]
+// X86-NEXT: [[UNPROMOTION:%.*]] = fptrunc float [[ADD]] to half
+// X86-NEXT: [[RETVAL_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 0
+// X86-NEXT: [[RETVAL_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 1
+// X86-NEXT: store half [[UNPROMOTION]], ptr [[RETVAL_REALP]], align 2
+// X86-NEXT: store half 0xH0000, ptr [[RETVAL_IMAGP]], align 2
+// X86-NEXT: [[TMP2:%.*]] = load <2 x half>, ptr [[RETVAL]], align 2
+// X86-NEXT: ret <2 x half> [[TMP2]]
+//
_Float16 _Complex add_half_rr(_Float16 a, _Float16 b) {
- // X86-LABEL: @add_half_rr(
- // X86: fadd
- // X86-NOT: fadd
- // X86: ret
return a + b;
}
+
+// AVX-LABEL: @add_half_cr(
+// AVX-NEXT: entry:
+// AVX-NEXT: [[RETVAL:%.*]] = alloca { half, half }, align 2
+// AVX-NEXT: [[A:%.*]] = alloca { half, half }, align 2
+// AVX-NEXT: [[B_ADDR:%.*]] = alloca half, align 2
+// AVX-NEXT: store <2 x half> [[A_COERCE:%.*]], ptr [[A]], align 2
+// AVX-NEXT: store half [[B:%.*]], ptr [[B_ADDR]], align 2
+// AVX-NEXT: [[A_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[A]], i32 0, i32 0
+// AVX-NEXT: [[A_REAL:%.*]] = load half, ptr [[A_REALP]], align 2
+// AVX-NEXT: [[A_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[A]], i32 0, i32 1
+// AVX-NEXT: [[A_IMAG:%.*]] = load half, ptr [[A_IMAGP]], align 2
+// AVX-NEXT: [[TMP0:%.*]] = load half, ptr [[B_ADDR]], align 2
+// AVX-NEXT: [[ADD_R:%.*]] = fadd half [[A_REAL]], [[TMP0]]
+// AVX-NEXT: [[RETVAL_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 0
+// AVX-NEXT: [[RETVAL_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 1
+// AVX-NEXT: store half [[ADD_R]], ptr [[RETVAL_REALP]], align 2
+// AVX-NEXT: store half [[A_IMAG]], ptr [[RETVAL_IMAGP]], align 2
+// AVX-NEXT: [[TMP1:%.*]] = load <2 x half>, ptr [[RETVAL]], align 2
+// AVX-NEXT: ret <2 x half> [[TMP1]]
+//
+// X86-LABEL: @add_half_cr(
+// X86-NEXT: entry:
+// X86-NEXT: [[RETVAL:%.*]] = alloca { half, half }, align 2
+// X86-NEXT: [[A:%.*]] = alloca { half, half }, align 2
+// X86-NEXT: [[B_ADDR:%.*]] = alloca half, align 2
+// X86-NEXT: store <2 x half> [[A_COERCE:%.*]], ptr [[A]], align 2
+// X86-NEXT: store half [[B:%.*]], ptr [[B_ADDR]], align 2
+// X86-NEXT: [[A_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[A]], i32 0, i32 0
+// X86-NEXT: [[A_REAL:%.*]] = load half, ptr [[A_REALP]], align 2
+// X86-NEXT: [[A_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[A]], i32 0, i32 1
+// X86-NEXT: [[A_IMAG:%.*]] = load half, ptr [[A_IMAGP]], align 2
+// X86-NEXT: [[EXT:%.*]] = fpext half [[A_REAL]] to float
+// X86-NEXT: [[EXT1:%.*]] = fpext half [[A_IMAG]] to float
+// X86-NEXT: [[TMP0:%.*]] = load half, ptr [[B_ADDR]], align 2
+// X86-NEXT: [[EXT2:%.*]] = fpext half [[TMP0]] to float
+// X86-NEXT: [[ADD_R:%.*]] = fadd float [[EXT]], [[EXT2]]
+// X86-NEXT: [[UNPROMOTION:%.*]] = fptrunc float [[ADD_R]] to half
+// X86-NEXT: [[UNPROMOTION3:%.*]] = fptrunc float [[EXT1]] to half
+// X86-NEXT: [[RETVAL_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 0
+// X86-NEXT: [[RETVAL_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 1
+// X86-NEXT: store half [[UNPROMOTION]], ptr [[RETVAL_REALP]], align 2
+// X86-NEXT: store half [[UNPROMOTION3]], ptr [[RETVAL_IMAGP]], align 2
+// X86-NEXT: [[TMP1:%.*]] = load <2 x half>, ptr [[RETVAL]], align 2
+// X86-NEXT: ret <2 x half> [[TMP1]]
+//
_Float16 _Complex add_half_cr(_Float16 _Complex a, _Float16 b) {
- // X86-LABEL: @add_half_cr(
- // X86: fadd
- // X86-NOT: fadd
- // X86: ret
return a + b;
}
+
+// AVX-LABEL: @add_half_rc(
+// AVX-NEXT: entry:
+// AVX-NEXT: [[RETVAL:%.*]] = alloca { half, half }, align 2
+// AVX-NEXT: [[B:%.*]] = alloca { half, half }, align 2
+// AVX-NEXT: [[A_ADDR:%.*]] = alloca half, align 2
+// AVX-NEXT: store <2 x half> [[B_COERCE:%.*]], ptr [[B]], align 2
+// AVX-NEXT: store half [[A:%.*]], ptr [[A_ADDR]], align 2
+// AVX-NEXT: [[TMP0:%.*]] = load half, ptr [[A_ADDR]], align 2
+// AVX-NEXT: [[B_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[B]], i32 0, i32 0
+// AVX-NEXT: [[B_REAL:%.*]] = load half, ptr [[B_REALP]], align 2
+// AVX-NEXT: [[B_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[B]], i32 0, i32 1
+// AVX-NEXT: [[B_IMAG:%.*]] = load half, ptr [[B_IMAGP]], align 2
+// AVX-NEXT: [[ADD_R:%.*]] = fadd half [[TMP0]], [[B_REAL]]
+// AVX-NEXT: [[RETVAL_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 0
+// AVX-NEXT: [[RETVAL_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 1
+// AVX-NEXT: store half [[ADD_R]], ptr [[RETVAL_REALP]], align 2
+// AVX-NEXT: store half [[B_IMAG]], ptr [[RETVAL_IMAGP]], align 2
+// AVX-NEXT: [[TMP1:%.*]] = load <2 x half>, ptr [[RETVAL]], align 2
+// AVX-NEXT: ret <2 x half> [[TMP1]]
+//
+// X86-LABEL: @add_half_rc(
+// X86-NEXT: entry:
+// X86-NEXT: [[RETVAL:%.*]] = alloca { half, half }, align 2
+// X86-NEXT: [[B:%.*]] = alloca { half, half }, align 2
+// X86-NEXT: [[A_ADDR:%.*]] = alloca half, align 2
+// X86-NEXT: store <2 x half> [[B_COERCE:%.*]], ptr [[B]], align 2
+// X86-NEXT: store half [[A:%.*]], ptr [[A_ADDR]], align 2
+// X86-NEXT: [[TMP0:%.*]] = load half, ptr [[A_ADDR]], align 2
+// X86-NEXT: [[EXT:%.*]] = fpext half [[TMP0]] to float
+// X86-NEXT: [[B_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[B]], i32 0, i32 0
+// X86-NEXT: [[B_REAL:%.*]] = load half, ptr [[B_REALP]], align 2
+// X86-NEXT: [[B_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[B]], i32 0, i32 1
+// X86-NEXT: [[B_IMAG:%.*]] = load half, ptr [[B_IMAGP]], align 2
+// X86-NEXT: [[EXT1:%.*]] = fpext half [[B_REAL]] to float
+// X86-NEXT: [[EXT2:%.*]] = fpext half [[B_IMAG]] to float
+// X86-NEXT: [[ADD_R:%.*]] = fadd float [[EXT]], [[EXT1]]
+// X86-NEXT: [[UNPROMOTION:%.*]] = fptrunc float [[ADD_R]] to half
+// X86-NEXT: [[UNPROMOTION3:%.*]] = fptrunc float [[EXT2]] to half
+// X86-NEXT: [[RETVAL_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 0
+// X86-NEXT: [[RETVAL_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 1
+// X86-NEXT: store half [[UNPROMOTION]], ptr [[RETVAL_REALP]], align 2
+// X86-NEXT: store half [[UNPROMOTION3]], ptr [[RETVAL_IMAGP]], align 2
+// X86-NEXT: [[TMP1:%.*]] = load <2 x half>, ptr [[RETVAL]], align 2
+// X86-NEXT: ret <2 x half> [[TMP1]]
+//
_Float16 _Complex add_half_rc(_Float16 a, _Float16 _Complex b) {
- // X86-LABEL: @add_half_rc(
- // X86: fadd
- // X86-NOT: fadd
- // X86: ret
return a + b;
}
+
+// AVX-LABEL: @add_half_cc(
+// AVX-NEXT: entry:
+// AVX-NEXT: [[RETVAL:%.*]] = alloca { half, half }, align 2
+// AVX-NEXT: [[A:%.*]] = alloca { half, half }, align 2
+// AVX-NEXT: [[B:%.*]] = alloca { half, half }, align 2
+// AVX-NEXT: store <2 x half> [[A_COERCE:%.*]], ptr [[A]], align 2
+// AVX-NEXT: store <2 x half> [[B_COERCE:%.*]], ptr [[B]], align 2
+// AVX-NEXT: [[A_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[A]], i32 0, i32 0
+// AVX-NEXT: [[A_REAL:%.*]] = load half, ptr [[A_REALP]], align 2
+// AVX-NEXT: [[A_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[A]], i32 0, i32 1
+// AVX-NEXT: [[A_IMAG:%.*]] = load half, ptr [[A_IMAGP]], align 2
+// AVX-NEXT: [[B_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[B]], i32 0, i32 0
+// AVX-NEXT: [[B_REAL:%.*]] = load half, ptr [[B_REALP]], align 2
+// AVX-NEXT: [[B_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[B]], i32 0, i32 1
+// AVX-NEXT: [[B_IMAG:%.*]] = load half, ptr [[B_IMAGP]], align 2
+// AVX-NEXT: [[ADD_R:%.*]] = fadd half [[A_REAL]], [[B_REAL]]
+// AVX-NEXT: [[ADD_I:%.*]] = fadd half [[A_IMAG]], [[B_IMAG]]
+// AVX-NEXT: [[RETVAL_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 0
+// AVX-NEXT: [[RETVAL_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 1
+// AVX-NEXT: store half [[ADD_R]], ptr [[RETVAL_REALP]], align 2
+// AVX-NEXT: store half [[ADD_I]], ptr [[RETVAL_IMAGP]], align 2
+// AVX-NEXT: [[TMP0:%.*]] = load <2 x half>, ptr [[RETVAL]], align 2
+// AVX-NEXT: ret <2 x half> [[TMP0]]
+//
+// X86-LABEL: @add_half_cc(
+// X86-NEXT: entry:
+// X86-NEXT: [[RETVAL:%.*]] = alloca { half, half }, align 2
+// X86-NEXT: [[A:%.*]] = alloca { half, half }, align 2
+// X86-NEXT: [[B:%.*]] = alloca { half, half }, align 2
+// X86-NEXT: store <2 x half> [[A_COERCE:%.*]], ptr [[A]], align 2
+// X86-NEXT: store <2 x half> [[B_COERCE:%.*]], ptr [[B]], align 2
+// X86-NEXT: [[A_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[A]], i32 0, i32 0
+// X86-NEXT: [[A_REAL:%.*]] = load half, ptr [[A_REALP]], align 2
+// X86-NEXT: [[A_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[A]], i32 0, i32 1
+// X86-NEXT: [[A_IMAG:%.*]] = load half, ptr [[A_IMAGP]], align 2
+// X86-NEXT: [[EXT:%.*]] = fpext half [[A_REAL]] to float
+// X86-NEXT: [[EXT1:%.*]] = fpext half [[A_IMAG]] to float
+// X86-NEXT: [[B_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[B]], i32 0, i32 0
+// X86-NEXT: [[B_REAL:%.*]] = load half, ptr [[B_REALP]], align 2
+// X86-NEXT: [[B_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[B]], i32 0, i32 1
+// X86-NEXT: [[B_IMAG:%.*]] = load half, ptr [[B_IMAGP]], align 2
+// X86-NEXT: [[EXT2:%.*]] = fpext half [[B_REAL]] to float
+// X86-NEXT: [[EXT3:%.*]] = fpext half [[B_IMAG]] to float
+// X86-NEXT: [[ADD_R:%.*]] = fadd float [[EXT]], [[EXT2]]
+// X86-NEXT: [[ADD_I:%.*]] = fadd float [[EXT1]], [[EXT3]]
+// X86-NEXT: [[UNPROMOTION:%.*]] = fptrunc float [[ADD_R]] to half
+// X86-NEXT: [[UNPROMOTION4:%.*]] = fptrunc float [[ADD_I]] to half
+// X86-NEXT: [[RETVAL_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 0
+// X86-NEXT: [[RETVAL_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 1
+// X86-NEXT: store half [[UNPROMOTION]], ptr [[RETVAL_REALP]], align 2
+// X86-NEXT: store half [[UNPROMOTION4]], ptr [[RETVAL_IMAGP]], align 2
+// X86-NEXT: [[TMP0:%.*]] = load <2 x half>, ptr [[RETVAL]], align 2
+// X86-NEXT: ret <2 x half> [[TMP0]]
+//
_Float16 _Complex add_half_cc(_Float16 _Complex a, _Float16 _Complex b) {
- // X86-LABEL: @add_half_cc(
- // X86: fadd
- // X86: fadd
- // X86-NOT: fadd
- // X86: ret
return a + b;
}
+// AVX-LABEL: @add2_haff_rrr(
+// AVX-NEXT: entry:
+// AVX-NEXT: [[RETVAL:%.*]] = alloca { half, half }, align 2
+// AVX-NEXT: [[A_ADDR:%.*]] = alloca half, align 2
+// AVX-NEXT: [[B_ADDR:%.*]] = alloca half, align 2
+// AVX-NEXT: [[C_ADDR:%.*]] = alloca half, align 2
+// AVX-NEXT: store half [[A:%.*]], ptr [[A_ADDR]], align 2
+// AVX-NEXT: store half [[B:%.*]], ptr [[B_ADDR]], align 2
+// AVX-NEXT: store half [[C:%.*]], ptr [[C_ADDR]], align 2
+// AVX-NEXT: [[TMP0:%.*]] = load half, ptr [[A_ADDR]], align 2
+// AVX-NEXT: [[TMP1:%.*]] = load half, ptr [[B_ADDR]], align 2
+// AVX-NEXT: [[ADD:%.*]] = fadd half [[TMP0]], [[TMP1]]
+// AVX-NEXT: [[TMP2:%.*]] = load half, ptr [[C_ADDR]], align 2
+// AVX-NEXT: [[ADD1:%.*]] = fadd half [[ADD]], [[TMP2]]
+// AVX-NEXT: [[RETVAL_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 0
+// AVX-NEXT: [[RETVAL_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 1
+// AVX-NEXT: store half [[ADD1]], ptr [[RETVAL_REALP]], align 2
+// AVX-NEXT: store half 0xH0000, ptr [[RETVAL_IMAGP]], align 2
+// AVX-NEXT: [[TMP3:%.*]] = load <2 x half>, ptr [[RETVAL]], align 2
+// AVX-NEXT: ret <2 x half> [[TMP3]]
+//
+// X86-LABEL: @add2_haff_rrr(
+// X86-NEXT: entry:
+// X86-NEXT: [[RETVAL:%.*]] = alloca { half, half }, align 2
+// X86-NEXT: [[A_ADDR:%.*]] = alloca half, align 2
+// X86-NEXT: [[B_ADDR:%.*]] = alloca half, align 2
+// X86-NEXT: [[C_ADDR:%.*]] = alloca half, align 2
+// X86-NEXT: store half [[A:%.*]], ptr [[A_ADDR]], align 2
+// X86-NEXT: store half [[B:%.*]], ptr [[B_ADDR]], align 2
+// X86-NEXT: store half [[C:%.*]], ptr [[C_ADDR]], align 2
+// X86-NEXT: [[TMP0:%.*]] = load half, ptr [[A_ADDR]], align 2
+// X86-NEXT: [[EXT:%.*]] = fpext half [[TMP0]] to float
+// X86-NEXT: [[TMP1:%.*]] = load half, ptr [[B_ADDR]], align 2
+// X86-NEXT: [[EXT1:%.*]] = fpext half [[TMP1]] to float
+// X86-NEXT: [[ADD:%.*]] = fadd float [[EXT]], [[EXT1]]
+// X86-NEXT: [[TMP2:%.*]] = load half, ptr [[C_ADDR]], align 2
+// X86-NEXT: [[EXT2:%.*]] = fpext half [[TMP2]] to float
+// X86-NEXT: [[ADD3:%.*]] = fadd float [[ADD]], [[EXT2]]
+// X86-NEXT: [[UNPROMOTION:%.*]] = fptrunc float [[ADD3]] to half
+// X86-NEXT: [[RETVAL_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 0
+// X86-NEXT: [[RETVAL_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 1
+// X86-NEXT: store half [[UNPROMOTION]], ptr [[RETVAL_REALP]], align 2
+// X86-NEXT: store half 0xH0000, ptr [[RETVAL_IMAGP]], align 2
+// X86-NEXT: [[TMP3:%.*]] = load <2 x half>, ptr [[RETVAL]], align 2
+// X86-NEXT: ret <2 x half> [[TMP3]]
+//
+_Float16 _Complex add2_haff_rrr(_Float16 a, _Float16 b, _Float16 c) {
+ return a + b + c;
+}
+
+// AVX-LABEL: @add2_haff_rcr(
+// AVX-NEXT: entry:
+// AVX-NEXT: [[RETVAL:%.*]] = alloca { half, half }, align 2
+// AVX-NEXT: [[B:%.*]] = alloca { half, half }, align 2
+// AVX-NEXT: [[A_ADDR:%.*]] = alloca half, align 2
+// AVX-NEXT: [[C_ADDR:%.*]] = alloca half, align 2
+// AVX-NEXT: store <2 x half> [[B_COERCE:%.*]], ptr [[B]], align 2
+// AVX-NEXT: store half [[A:%.*]], ptr [[A_ADDR]], align 2
+// AVX-NEXT: store half [[C:%.*]], ptr [[C_ADDR]], align 2
+// AVX-NEXT: [[TMP0:%.*]] = load half, ptr [[A_ADDR]], align 2
+// AVX-NEXT: [[B_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[B]], i32 0, i32 0
+// AVX-NEXT: [[B_REAL:%.*]] = load half, ptr [[B_REALP]], align 2
+// AVX-NEXT: [[B_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[B]], i32 0, i32 1
+// AVX-NEXT: [[B_IMAG:%.*]] = load half, ptr [[B_IMAGP]], align 2
+// AVX-NEXT: [[ADD_R:%.*]] = fadd half [[TMP0]], [[B_REAL]]
+// AVX-NEXT: [[TMP1:%.*]] = load half, ptr [[C_ADDR]], align 2
+// AVX-NEXT: [[ADD_R1:%.*]] = fadd half [[ADD_R]], [[TMP1]]
+// AVX-NEXT: [[RETVAL_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 0
+// AVX-NEXT: [[RETVAL_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 1
+// AVX-NEXT: store half [[ADD_R1]], ptr [[RETVAL_REALP]], align 2
+// AVX-NEXT: store half [[B_IMAG]], ptr [[RETVAL_IMAGP]], align 2
+// AVX-NEXT: [[TMP2:%.*]] = load <2 x half>, ptr [[RETVAL]], align 2
+// AVX-NEXT: ret <2 x half> [[TMP2]]
+//
+// X86-LABEL: @add2_haff_rcr(
+// X86-NEXT: entry:
+// X86-NEXT: [[RETVAL:%.*]] = alloca { half, half }, align 2
+// X86-NEXT: [[B:%.*]] = alloca { half, half }, align 2
+// X86-NEXT: [[A_ADDR:%.*]] = alloca half, align 2
+// X86-NEXT: [[C_ADDR:%.*]] = alloca half, align 2
+// X86-NEXT: store <2 x half> [[B_COERCE:%.*]], ptr [[B]], align 2
+// X86-NEXT: store half [[A:%.*]], ptr [[A_ADDR]], align 2
+// X86-NEXT: store half [[C:%.*]], ptr [[C_ADDR]], align 2
+// X86-NEXT: [[TMP0:%.*]] = load half, ptr [[A_ADDR]], align 2
+// X86-NEXT: [[EXT:%.*]] = fpext half [[TMP0]] to float
+// X86-NEXT: [[B_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[B]], i32 0, i32 0
+// X86-NEXT: [[B_REAL:%.*]] = load half, ptr [[B_REALP]], align 2
+// X86-NEXT: [[B_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[B]], i32 0, i32 1
+// X86-NEXT: [[B_IMAG:%.*]] = load half, ptr [[B_IMAGP]], align 2
+// X86-NEXT: [[EXT1:%.*]] = fpext half [[B_REAL]] to float
+// X86-NEXT: [[EXT2:%.*]] = fpext half [[B_IMAG]] to float
+// X86-NEXT: [[ADD_R:%.*]] = fadd float [[EXT]], [[EXT1]]
+// X86-NEXT: [[TMP1:%.*]] = load half, ptr [[C_ADDR]], align 2
+// X86-NEXT: [[EXT3:%.*]] = fpext half [[TMP1]] to float
+// X86-NEXT: [[ADD_R4:%.*]] = fadd float [[ADD_R]], [[EXT3]]
+// X86-NEXT: [[UNPROMOTION:%.*]] = fptrunc float [[ADD_R4]] to half
+// X86-NEXT: [[UNPROMOTION5:%.*]] = fptrunc float [[EXT2]] to half
+// X86-NEXT: [[RETVAL_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 0
+// X86-NEXT: [[RETVAL_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 1
+// X86-NEXT: store half [[UNPROMOTION]], ptr [[RETVAL_REALP]], align 2
+// X86-NEXT: store half [[UNPROMOTION5]], ptr [[RETVAL_IMAGP]], align 2
+// X86-NEXT: [[TMP2:%.*]] = load <2 x half>, ptr [[RETVAL]], align 2
+// X86-NEXT: ret <2 x half> [[TMP2]]
+//
+_Float16 _Complex add2_haff_rcr(_Float16 a, _Float16 _Complex b, _Float16 c) {
+ return a + b + c;
+}
+
+// AVX-LABEL: @add2_haff_rcc(
+// AVX-NEXT: entry:
+// AVX-NEXT: [[RETVAL:%.*]] = alloca { half, half }, align 2
+// AVX-NEXT: [[B:%.*]] = alloca { half, half }, align 2
+// AVX-NEXT: [[C:%.*]] = alloca { half, half }, align 2
+// AVX-NEXT: [[A_ADDR:%.*]] = alloca half, align 2
+// AVX-NEXT: store <2 x half> [[B_COERCE:%.*]], ptr [[B]], align 2
+// AVX-NEXT: store <2 x half> [[C_COERCE:%.*]], ptr [[C]], align 2
+// AVX-NEXT: store half [[A:%.*]], ptr [[A_ADDR]], align 2
+// AVX-NEXT: [[TMP0:%.*]] = load half, ptr [[A_ADDR]], align 2
+// AVX-NEXT: [[B_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[B]], i32 0, i32 0
+// AVX-NEXT: [[B_REAL:%.*]] = load half, ptr [[B_REALP]], align 2
+// AVX-NEXT: [[B_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[B]], i32 0, i32 1
+// AVX-NEXT: [[B_IMAG:%.*]] = load half, ptr [[B_IMAGP]], align 2
+// AVX-NEXT: [[ADD_R:%.*]] = fadd half [[TMP0]], [[B_REAL]]
+// AVX-NEXT: [[C_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[C]], i32 0, i32 0
+// AVX-NEXT: [[C_REAL:%.*]] = load half, ptr [[C_REALP]], align 2
+// AVX-NEXT: [[C_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[C]], i32 0, i32 1
+// AVX-NEXT: [[C_IMAG:%.*]] = load half, ptr [[C_IMAGP]], align 2
+// AVX-NEXT: [[ADD_R1:%.*]] = fadd half [[ADD_R]], [[C_REAL]]
+// AVX-NEXT: [[ADD_I:%.*]] = fadd half [[B_IMAG]], [[C_IMAG]]
+// AVX-NEXT: [[RETVAL_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 0
+// AVX-NEXT: [[RETVAL_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 1
+// AVX-NEXT: store half [[ADD_R1]], ptr [[RETVAL_REALP]], align 2
+// AVX-NEXT: store half [[ADD_I]], ptr [[RETVAL_IMAGP]], align 2
+// AVX-NEXT: [[TMP1:%.*]] = load <2 x half>, ptr [[RETVAL]], align 2
+// AVX-NEXT: ret <2 x half> [[TMP1]]
+//
+// X86-LABEL: @add2_haff_rcc(
+// X86-NEXT: entry:
+// X86-NEXT: [[RETVAL:%.*]] = alloca { half, half }, align 2
+// X86-NEXT: [[B:%.*]] = alloca { half, half }, align 2
+// X86-NEXT: [[C:%.*]] = alloca { half, half }, align 2
+// X86-NEXT: [[A_ADDR:%.*]] = alloca half, align 2
+// X86-NEXT: store <2 x half> [[B_COERCE:%.*]], ptr [[B]], align 2
+// X86-NEXT: store <2 x half> [[C_COERCE:%.*]], ptr [[C]], align 2
+// X86-NEXT: store half [[A:%.*]], ptr [[A_ADDR]], align 2
+// X86-NEXT: [[TMP0:%.*]] = load half, ptr [[A_ADDR]], align 2
+// X86-NEXT: [[EXT:%.*]] = fpext half [[TMP0]] to float
+// X86-NEXT: [[B_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[B]], i32 0, i32 0
+// X86-NEXT: [[B_REAL:%.*]] = load half, ptr [[B_REALP]], align 2
+// X86-NEXT: [[B_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[B]], i32 0, i32 1
+// X86-NEXT: [[B_IMAG:%.*]] = load half, ptr [[B_IMAGP]], align 2
+// X86-NEXT: [[EXT1:%.*]] = fpext half [[B_REAL]] to float
+// X86-NEXT: [[EXT2:%.*]] = fpext half [[B_IMAG]] to float
+// X86-NEXT: [[ADD_R:%.*]] = fadd float [[EXT]], [[EXT1]]
+// X86-NEXT: [[C_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[C]], i32 0, i32 0
+// X86-NEXT: [[C_REAL:%.*]] = load half, ptr [[C_REALP]], align 2
+// X86-NEXT: [[C_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[C]], i32 0, i32 1
+// X86-NEXT: [[C_IMAG:%.*]] = load half, ptr [[C_IMAGP]], align 2
+// X86-NEXT: [[EXT3:%.*]] = fpext half [[C_REAL]] to float
+// X86-NEXT: [[EXT4:%.*]] = fpext half [[C_IMAG]] to float
+// X86-NEXT: [[ADD_R5:%.*]] = fadd float [[ADD_R]], [[EXT3]]
+// X86-NEXT: [[ADD_I:%.*]] = fadd float [[EXT2]], [[EXT4]]
+// X86-NEXT: [[UNPROMOTION:%.*]] = fptrunc float [[ADD_R5]] to half
+// X86-NEXT: [[UNPROMOTION6:%.*]] = fptrunc float [[ADD_I]] to half
+// X86-NEXT: [[RETVAL_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 0
+// X86-NEXT: [[RETVAL_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 1
+// X86-NEXT: store half [[UNPROMOTION]], ptr [[RETVAL_REALP]], align 2
+// X86-NEXT: store half [[UNPROMOTION6]], ptr [[RETVAL_IMAGP]], align 2
+// X86-NEXT: [[TMP1:%.*]] = load <2 x half>, ptr [[RETVAL]], align 2
+// X86-NEXT: ret <2 x half> [[TMP1]]
+//
+_Float16 _Complex add2_haff_rcc(_Float16 a, _Float16 _Complex b, _Float16 _Complex c) {
+ return a + b + c;
+}
+
+// AVX-LABEL: @add2_haff_crr(
+// AVX-NEXT: entry:
+// AVX-NEXT: [[RETVAL:%.*]] = alloca { half, half }, align 2
+// AVX-NEXT: [[A:%.*]] = alloca { half, half }, align 2
+// AVX-NEXT: [[B_ADDR:%.*]] = alloca half, align 2
+// AVX-NEXT: [[C_ADDR:%.*]] = alloca half, align 2
+// AVX-NEXT: store <2 x half> [[A_COERCE:%.*]], ptr [[A]], align 2
+// AVX-NEXT: store half [[B:%.*]], ptr [[B_ADDR]], align 2
+// AVX-NEXT: store half [[C:%.*]], ptr [[C_ADDR]], align 2
+// AVX-NEXT: [[A_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[A]], i32 0, i32 0
+// AVX-NEXT: [[A_REAL:%.*]] = load half, ptr [[A_REALP]], align 2
+// AVX-NEXT: [[A_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[A]], i32 0, i32 1
+// AVX-NEXT: [[A_IMAG:%.*]] = load half, ptr [[A_IMAGP]], align 2
+// AVX-NEXT: [[TMP0:%.*]] = load half, ptr [[B_ADDR]], align 2
+// AVX-NEXT: [[ADD_R:%.*]] = fadd half [[A_REAL]], [[TMP0]]
+// AVX-NEXT: [[TMP1:%.*]] = load half, ptr [[C_ADDR]], align 2
+// AVX-NEXT: [[ADD_R1:%.*]] = fadd half [[ADD_R]], [[TMP1]]
+// AVX-NEXT: [[RETVAL_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 0
+// AVX-NEXT: [[RETVAL_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 1
+// AVX-NEXT: store half [[ADD_R1]], ptr [[RETVAL_REALP]], align 2
+// AVX-NEXT: store half [[A_IMAG]], ptr [[RETVAL_IMAGP]], align 2
+// AVX-NEXT: [[TMP2:%.*]] = load <2 x half>, ptr [[RETVAL]], align 2
+// AVX-NEXT: ret <2 x half> [[TMP2]]
+//
+// X86-LABEL: @add2_haff_crr(
+// X86-NEXT: entry:
+// X86-NEXT: [[RETVAL:%.*]] = alloca { half, half }, align 2
+// X86-NEXT: [[A:%.*]] = alloca { half, half }, align 2
+// X86-NEXT: [[B_ADDR:%.*]] = alloca half, align 2
+// X86-NEXT: [[C_ADDR:%.*]] = alloca half, align 2
+// X86-NEXT: store <2 x half> [[A_COERCE:%.*]], ptr [[A]], align 2
+// X86-NEXT: store half [[B:%.*]], ptr [[B_ADDR]], align 2
+// X86-NEXT: store half [[C:%.*]], ptr [[C_ADDR]], align 2
+// X86-NEXT: [[A_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[A]], i32 0, i32 0
+// X86-NEXT: [[A_REAL:%.*]] = load half, ptr [[A_REALP]], align 2
+// X86-NEXT: [[A_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[A]], i32 0, i32 1
+// X86-NEXT: [[A_IMAG:%.*]] = load half, ptr [[A_IMAGP]], align 2
+// X86-NEXT: [[EXT:%.*]] = fpext half [[A_REAL]] to float
+// X86-NEXT: [[EXT1:%.*]] = fpext half [[A_IMAG]] to float
+// X86-NEXT: [[TMP0:%.*]] = load half, ptr [[B_ADDR]], align 2
+// X86-NEXT: [[EXT2:%.*]] = fpext half [[TMP0]] to float
+// X86-NEXT: [[ADD_R:%.*]] = fadd float [[EXT]], [[EXT2]]
+// X86-NEXT: [[TMP1:%.*]] = load half, ptr [[C_ADDR]], align 2
+// X86-NEXT: [[EXT3:%.*]] = fpext half [[TMP1]] to float
+// X86-NEXT: [[ADD_R4:%.*]] = fadd float [[ADD_R]], [[EXT3]]
+// X86-NEXT: [[UNPROMOTION:%.*]] = fptrunc float [[ADD_R4]] to half
+// X86-NEXT: [[UNPROMOTION5:%.*]] = fptrunc float [[EXT1]] to half
+// X86-NEXT: [[RETVAL_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 0
+// X86-NEXT: [[RETVAL_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 1
+// X86-NEXT: store half [[UNPROMOTION]], ptr [[RETVAL_REALP]], align 2
+// X86-NEXT: store half [[UNPROMOTION5]], ptr [[RETVAL_IMAGP]], align 2
+// X86-NEXT: [[TMP2:%.*]] = load <2 x half>, ptr [[RETVAL]], align 2
+// X86-NEXT: ret <2 x half> [[TMP2]]
+//
+_Float16 _Complex add2_haff_crr(_Float16 _Complex a, _Float16 b, _Float16 c) {
+ return a + b + c;
+}
+
+// AVX-LABEL: @add2_haff_ccr(
+// AVX-NEXT: entry:
+// AVX-NEXT: [[RETVAL:%.*]] = alloca { half, half }, align 2
+// AVX-NEXT: [[A:%.*]] = alloca { half, half }, align 2
+// AVX-NEXT: [[B:%.*]] = alloca { half, half }, align 2
+// AVX-NEXT: [[C_ADDR:%.*]] = alloca half, align 2
+// AVX-NEXT: store <2 x half> [[A_COERCE:%.*]], ptr [[A]], align 2
+// AVX-NEXT: store <2 x half> [[B_COERCE:%.*]], ptr [[B]], align 2
+// AVX-NEXT: store half [[C:%.*]], ptr [[C_ADDR]], align 2
+// AVX-NEXT: [[A_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[A]], i32 0, i32 0
+// AVX-NEXT: [[A_REAL:%.*]] = load half, ptr [[A_REALP]], align 2
+// AVX-NEXT: [[A_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[A]], i32 0, i32 1
+// AVX-NEXT: [[A_IMAG:%.*]] = load half, ptr [[A_IMAGP]], align 2
+// AVX-NEXT: [[B_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[B]], i32 0, i32 0
+// AVX-NEXT: [[B_REAL:%.*]] = load half, ptr [[B_REALP]], align 2
+// AVX-NEXT: [[B_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[B]], i32 0, i32 1
+// AVX-NEXT: [[B_IMAG:%.*]] = load half, ptr [[B_IMAGP]], align 2
+// AVX-NEXT: [[ADD_R:%.*]] = fadd half [[A_REAL]], [[B_REAL]]
+// AVX-NEXT: [[ADD_I:%.*]] = fadd half [[A_IMAG]], [[B_IMAG]]
+// AVX-NEXT: [[TMP0:%.*]] = load half, ptr [[C_ADDR]], align 2
+// AVX-NEXT: [[ADD_R1:%.*]] = fadd half [[ADD_R]], [[TMP0]]
+// AVX-NEXT: [[RETVAL_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 0
+// AVX-NEXT: [[RETVAL_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 1
+// AVX-NEXT: store half [[ADD_R1]], ptr [[RETVAL_REALP]], align 2
+// AVX-NEXT: store half [[ADD_I]], ptr [[RETVAL_IMAGP]], align 2
+// AVX-NEXT: [[TMP1:%.*]] = load <2 x half>, ptr [[RETVAL]], align 2
+// AVX-NEXT: ret <2 x half> [[TMP1]]
+//
+// X86-LABEL: @add2_haff_ccr(
+// X86-NEXT: entry:
+// X86-NEXT: [[RETVAL:%.*]] = alloca { half, half }, align 2
+// X86-NEXT: [[A:%.*]] = alloca { half, half }, align 2
+// X86-NEXT: [[B:%.*]] = alloca { half, half }, align 2
+// X86-NEXT: [[C_ADDR:%.*]] = alloca half, align 2
+// X86-NEXT: store <2 x half> [[A_COERCE:%.*]], ptr [[A]], align 2
+// X86-NEXT: store <2 x half> [[B_COERCE:%.*]], ptr [[B]], align 2
+// X86-NEXT: store half [[C:%.*]], ptr [[C_ADDR]], align 2
+// X86-NEXT: [[A_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[A]], i32 0, i32 0
+// X86-NEXT: [[A_REAL:%.*]] = load half, ptr [[A_REALP]], align 2
+// X86-NEXT: [[A_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[A]], i32 0, i32 1
+// X86-NEXT: [[A_IMAG:%.*]] = load half, ptr [[A_IMAGP]], align 2
+// X86-NEXT: [[EXT:%.*]] = fpext half [[A_REAL]] to float
+// X86-NEXT: [[EXT1:%.*]] = fpext half [[A_IMAG]] to float
+// X86-NEXT: [[B_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[B]], i32 0, i32 0
+// X86-NEXT: [[B_REAL:%.*]] = load half, ptr [[B_REALP]], align 2
+// X86-NEXT: [[B_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[B]], i32 0, i32 1
+// X86-NEXT: [[B_IMAG:%.*]] = load half, ptr [[B_IMAGP]], align 2
+// X86-NEXT: [[EXT2:%.*]] = fpext half [[B_REAL]] to float
+// X86-NEXT: [[EXT3:%.*]] = fpext half [[B_IMAG]] to float
+// X86-NEXT: [[ADD_R:%.*]] = fadd float [[EXT]], [[EXT2]]
+// X86-NEXT: [[ADD_I:%.*]] = fadd float [[EXT1]], [[EXT3]]
+// X86-NEXT: [[TMP0:%.*]] = load half, ptr [[C_ADDR]], align 2
+// X86-NEXT: [[EXT4:%.*]] = fpext half [[TMP0]] to float
+// X86-NEXT: [[ADD_R5:%.*]] = fadd float [[ADD_R]], [[EXT4]]
+// X86-NEXT: [[UNPROMOTION:%.*]] = fptrunc float [[ADD_R5]] to half
+// X86-NEXT: [[UNPROMOTION6:%.*]] = fptrunc float [[ADD_I]] to half
+// X86-NEXT: [[RETVAL_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 0
+// X86-NEXT: [[RETVAL_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 1
+// X86-NEXT: store half [[UNPROMOTION]], ptr [[RETVAL_REALP]], align 2
+// X86-NEXT: store half [[UNPROMOTION6]], ptr [[RETVAL_IMAGP]], align 2
+// X86-NEXT: [[TMP1:%.*]] = load <2 x half>, ptr [[RETVAL]], align 2
+// X86-NEXT: ret <2 x half> [[TMP1]]
+//
+_Float16 _Complex add2_haff_ccr(_Float16 _Complex a, _Float16 _Complex b, _Float16 c) {
+ return a + b + c;
+}
+
+// AVX-LABEL: @add2_haff_crc(
+// AVX-NEXT: entry:
+// AVX-NEXT: [[RETVAL:%.*]] = alloca { half, half }, align 2
+// AVX-NEXT: [[A:%.*]] = alloca { half, half }, align 2
+// AVX-NEXT: [[C:%.*]] = alloca { half, half }, align 2
+// AVX-NEXT: [[B_ADDR:%.*]] = alloca half, align 2
+// AVX-NEXT: store <2 x half> [[A_COERCE:%.*]], ptr [[A]], align 2
+// AVX-NEXT: store <2 x half> [[C_COERCE:%.*]], ptr [[C]], align 2
+// AVX-NEXT: store half [[B:%.*]], ptr [[B_ADDR]], align 2
+// AVX-NEXT: [[A_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[A]], i32 0, i32 0
+// AVX-NEXT: [[A_REAL:%.*]] = load half, ptr [[A_REALP]], align 2
+// AVX-NEXT: [[A_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[A]], i32 0, i32 1
+// AVX-NEXT: [[A_IMAG:%.*]] = load half, ptr [[A_IMAGP]], align 2
+// AVX-NEXT: [[TMP0:%.*]] = load half, ptr [[B_ADDR]], align 2
+// AVX-NEXT: [[ADD_R:%.*]] = fadd half [[A_REAL]], [[TMP0]]
+// AVX-NEXT: [[C_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[C]], i32 0, i32 0
+// AVX-NEXT: [[C_REAL:%.*]] = load half, ptr [[C_REALP]], align 2
+// AVX-NEXT: [[C_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[C]], i32 0, i32 1
+// AVX-NEXT: [[C_IMAG:%.*]] = load half, ptr [[C_IMAGP]], align 2
+// AVX-NEXT: [[ADD_R1:%.*]] = fadd half [[ADD_R]], [[C_REAL]]
+// AVX-NEXT: [[ADD_I:%.*]] = fadd half [[A_IMAG]], [[C_IMAG]]
+// AVX-NEXT: [[RETVAL_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 0
+// AVX-NEXT: [[RETVAL_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 1
+// AVX-NEXT: store half [[ADD_R1]], ptr [[RETVAL_REALP]], align 2
+// AVX-NEXT: store half [[ADD_I]], ptr [[RETVAL_IMAGP]], align 2
+// AVX-NEXT: [[TMP1:%.*]] = load <2 x half>, ptr [[RETVAL]], align 2
+// AVX-NEXT: ret <2 x half> [[TMP1]]
+//
+// X86-LABEL: @add2_haff_crc(
+// X86-NEXT: entry:
+// X86-NEXT: [[RETVAL:%.*]] = alloca { half, half }, align 2
+// X86-NEXT: [[A:%.*]] = alloca { half, half }, align 2
+// X86-NEXT: [[C:%.*]] = alloca { half, half }, align 2
+// X86-NEXT: [[B_ADDR:%.*]] = alloca half, align 2
+// X86-NEXT: store <2 x half> [[A_COERCE:%.*]], ptr [[A]], align 2
+// X86-NEXT: store <2 x half> [[C_COERCE:%.*]], ptr [[C]], align 2
+// X86-NEXT: store half [[B:%.*]], ptr [[B_ADDR]], align 2
+// X86-NEXT: [[A_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[A]], i32 0, i32 0
+// X86-NEXT: [[A_REAL:%.*]] = load half, ptr [[A_REALP]], align 2
+// X86-NEXT: [[A_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[A]], i32 0, i32 1
+// X86-NEXT: [[A_IMAG:%.*]] = load half, ptr [[A_IMAGP]], align 2
+// X86-NEXT: [[EXT:%.*]] = fpext half [[A_REAL]] to float
+// X86-NEXT: [[EXT1:%.*]] = fpext half [[A_IMAG]] to float
+// X86-NEXT: [[TMP0:%.*]] = load half, ptr [[B_ADDR]], align 2
+// X86-NEXT: [[EXT2:%.*]] = fpext half [[TMP0]] to float
+// X86-NEXT: [[ADD_R:%.*]] = fadd float [[EXT]], [[EXT2]]
+// X86-NEXT: [[C_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[C]], i32 0, i32 0
+// X86-NEXT: [[C_REAL:%.*]] = load half, ptr [[C_REALP]], align 2
+// X86-NEXT: [[C_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[C]], i32 0, i32 1
+// X86-NEXT: [[C_IMAG:%.*]] = load half, ptr [[C_IMAGP]], align 2
+// X86-NEXT: [[EXT3:%.*]] = fpext half [[C_REAL]] to float
+// X86-NEXT: [[EXT4:%.*]] = fpext half [[C_IMAG]] to float
+// X86-NEXT: [[ADD_R5:%.*]] = fadd float [[ADD_R]], [[EXT3]]
+// X86-NEXT: [[ADD_I:%.*]] = fadd float [[EXT1]], [[EXT4]]
+// X86-NEXT: [[UNPROMOTION:%.*]] = fptrunc float [[ADD_R5]] to half
+// X86-NEXT: [[UNPROMOTION6:%.*]] = fptrunc float [[ADD_I]] to half
+// X86-NEXT: [[RETVAL_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 0
+// X86-NEXT: [[RETVAL_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 1
+// X86-NEXT: store half [[UNPROMOTION]], ptr [[RETVAL_REALP]], align 2
+// X86-NEXT: store half [[UNPROMOTION6]], ptr [[RETVAL_IMAGP]], align 2
+// X86-NEXT: [[TMP1:%.*]] = load <2 x half>, ptr [[RETVAL]], align 2
+// X86-NEXT: ret <2 x half> [[TMP1]]
+//
+_Float16 _Complex add2_haff_crc(_Float16 _Complex a, _Float16 b, _Float16 _Complex c) {
+ return a + b + c;
+}
+
+// AVX-LABEL: @add2_haff_ccc(
+// AVX-NEXT: entry:
+// AVX-NEXT: [[RETVAL:%.*]] = alloca { half, half }, align 2
+// AVX-NEXT: [[A:%.*]] = alloca { half, half }, align 2
+// AVX-NEXT: [[B:%.*]] = alloca { half, half }, align 2
+// AVX-NEXT: [[C:%.*]] = alloca { half, half }, align 2
+// AVX-NEXT: store <2 x half> [[A_COERCE:%.*]], ptr [[A]], align 2
+// AVX-NEXT: store <2 x half> [[B_COERCE:%.*]], ptr [[B]], align 2
+// AVX-NEXT: store <2 x half> [[C_COERCE:%.*]], ptr [[C]], align 2
+// AVX-NEXT: [[A_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[A]], i32 0, i32 0
+// AVX-NEXT: [[A_REAL:%.*]] = load half, ptr [[A_REALP]], align 2
+// AVX-NEXT: [[A_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[A]], i32 0, i32 1
+// AVX-NEXT: [[A_IMAG:%.*]] = load half, ptr [[A_IMAGP]], align 2
+// AVX-NEXT: [[B_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[B]], i32 0, i32 0
+// AVX-NEXT: [[B_REAL:%.*]] = load half, ptr [[B_REALP]], align 2
+// AVX-NEXT: [[B_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[B]], i32 0, i32 1
+// AVX-NEXT: [[B_IMAG:%.*]] = load half, ptr [[B_IMAGP]], align 2
+// AVX-NEXT: [[ADD_R:%.*]] = fadd half [[A_REAL]], [[B_REAL]]
+// AVX-NEXT: [[ADD_I:%.*]] = fadd half [[A_IMAG]], [[B_IMAG]]
+// AVX-NEXT: [[C_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[C]], i32 0, i32 0
+// AVX-NEXT: [[C_REAL:%.*]] = load half, ptr [[C_REALP]], align 2
+// AVX-NEXT: [[C_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[C]], i32 0, i32 1
+// AVX-NEXT: [[C_IMAG:%.*]] = load half, ptr [[C_IMAGP]], align 2
+// AVX-NEXT: [[ADD_R1:%.*]] = fadd half [[ADD_R]], [[C_REAL]]
+// AVX-NEXT: [[ADD_I2:%.*]] = fadd half [[ADD_I]], [[C_IMAG]]
+// AVX-NEXT: [[RETVAL_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 0
+// AVX-NEXT: [[RETVAL_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 1
+// AVX-NEXT: store half [[ADD_R1]], ptr [[RETVAL_REALP]], align 2
+// AVX-NEXT: store half [[ADD_I2]], ptr [[RETVAL_IMAGP]], align 2
+// AVX-NEXT: [[TMP0:%.*]] = load <2 x half>, ptr [[RETVAL]], align 2
+// AVX-NEXT: ret <2 x half> [[TMP0]]
+//
+// X86-LABEL: @add2_haff_ccc(
+// X86-NEXT: entry:
+// X86-NEXT: [[RETVAL:%.*]] = alloca { half, half }, align 2
+// X86-NEXT: [[A:%.*]] = alloca { half, half }, align 2
+// X86-NEXT: [[B:%.*]] = alloca { half, half }, align 2
+// X86-NEXT: [[C:%.*]] = alloca { half, half }, align 2
+// X86-NEXT: store <2 x half> [[A_COERCE:%.*]], ptr [[A]], align 2
+// X86-NEXT: store <2 x half> [[B_COERCE:%.*]], ptr [[B]], align 2
+// X86-NEXT: store <2 x half> [[C_COERCE:%.*]], ptr [[C]], align 2
+// X86-NEXT: [[A_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[A]], i32 0, i32 0
+// X86-NEXT: [[A_REAL:%.*]] = load half, ptr [[A_REALP]], align 2
+// X86-NEXT: [[A_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[A]], i32 0, i32 1
+// X86-NEXT: [[A_IMAG:%.*]] = load half, ptr [[A_IMAGP]], align 2
+// X86-NEXT: [[EXT:%.*]] = fpext half [[A_REAL]] to float
+// X86-NEXT: [[EXT1:%.*]] = fpext half [[A_IMAG]] to float
+// X86-NEXT: [[B_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[B]], i32 0, i32 0
+// X86-NEXT: [[B_REAL:%.*]] = load half, ptr [[B_REALP]], align 2
+// X86-NEXT: [[B_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[B]], i32 0, i32 1
+// X86-NEXT: [[B_IMAG:%.*]] = load half, ptr [[B_IMAGP]], align 2
+// X86-NEXT: [[EXT2:%.*]] = fpext half [[B_REAL]] to float
+// X86-NEXT: [[EXT3:%.*]] = fpext half [[B_IMAG]] to float
+// X86-NEXT: [[ADD_R:%.*]] = fadd float [[EXT]], [[EXT2]]
+// X86-NEXT: [[ADD_I:%.*]] = fadd float [[EXT1]], [[EXT3]]
+// X86-NEXT: [[C_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[C]], i32 0, i32 0
+// X86-NEXT: [[C_REAL:%.*]] = load half, ptr [[C_REALP]], align 2
+// X86-NEXT: [[C_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[C]], i32 0, i32 1
+// X86-NEXT: [[C_IMAG:%.*]] = load half, ptr [[C_IMAGP]], align 2
+// X86-NEXT: [[EXT4:%.*]] = fpext half [[C_REAL]] to float
+// X86-NEXT: [[EXT5:%.*]] = fpext half [[C_IMAG]] to float
+// X86-NEXT: [[ADD_R6:%.*]] = fadd float [[ADD_R]], [[EXT4]]
+// X86-NEXT: [[ADD_I7:%.*]] = fadd float [[ADD_I]], [[EXT5]]
+// X86-NEXT: [[UNPROMOTION:%.*]] = fptrunc float [[ADD_R6]] to half
+// X86-NEXT: [[UNPROMOTION8:%.*]] = fptrunc float [[ADD_I7]] to half
+// X86-NEXT: [[RETVAL_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 0
+// X86-NEXT: [[RETVAL_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 1
+// X86-NEXT: store half [[UNPROMOTION]], ptr [[RETVAL_REALP]], align 2
+// X86-NEXT: store half [[UNPROMOTION8]], ptr [[RETVAL_IMAGP]], align 2
+// X86-NEXT: [[TMP0:%.*]] = load <2 x half>, ptr [[RETVAL]], align 2
+// X86-NEXT: ret <2 x half> [[TMP0]]
+//
+_Float16 _Complex add2_haff_ccc(_Float16 _Complex a, _Float16 _Complex b, _Float16 _Complex c) {
+ return a + b + c;
+}
+
+// AVX-LABEL: @sub_half_rr(
+// AVX-NEXT: entry:
+// AVX-NEXT: [[RETVAL:%.*]] = alloca { half, half }, align 2
+// AVX-NEXT: [[A_ADDR:%.*]] = alloca half, align 2
+// AVX-NEXT: [[B_ADDR:%.*]] = alloca half, align 2
+// AVX-NEXT: store half [[A:%.*]], ptr [[A_ADDR]], align 2
+// AVX-NEXT: store half [[B:%.*]], ptr [[B_ADDR]], align 2
+// AVX-NEXT: [[TMP0:%.*]] = load half, ptr [[A_ADDR]], align 2
+// AVX-NEXT: [[TMP1:%.*]] = load half, ptr [[B_ADDR]], align 2
+// AVX-NEXT: [[SUB:%.*]] = fsub half [[TMP0]], [[TMP1]]
+// AVX-NEXT: [[RETVAL_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 0
+// AVX-NEXT: [[RETVAL_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 1
+// AVX-NEXT: store half [[SUB]], ptr [[RETVAL_REALP]], align 2
+// AVX-NEXT: store half 0xH0000, ptr [[RETVAL_IMAGP]], align 2
+// AVX-NEXT: [[TMP2:%.*]] = load <2 x half>, ptr [[RETVAL]], align 2
+// AVX-NEXT: ret <2 x half> [[TMP2]]
+//
+// X86-LABEL: @sub_half_rr(
+// X86-NEXT: entry:
+// X86-NEXT: [[RETVAL:%.*]] = alloca { half, half }, align 2
+// X86-NEXT: [[A_ADDR:%.*]] = alloca half, align 2
+// X86-NEXT: [[B_ADDR:%.*]] = alloca half, align 2
+// X86-NEXT: store half [[A:%.*]], ptr [[A_ADDR]], align 2
+// X86-NEXT: store half [[B:%.*]], ptr [[B_ADDR]], align 2
+// X86-NEXT: [[TMP0:%.*]] = load half, ptr [[A_ADDR]], align 2
+// X86-NEXT: [[EXT:%.*]] = fpext half [[TMP0]] to float
+// X86-NEXT: [[TMP1:%.*]] = load half, ptr [[B_ADDR]], align 2
+// X86-NEXT: [[EXT1:%.*]] = fpext half [[TMP1]] to float
+// X86-NEXT: [[SUB:%.*]] = fsub float [[EXT]], [[EXT1]]
+// X86-NEXT: [[UNPROMOTION:%.*]] = fptrunc float [[SUB]] to half
+// X86-NEXT: [[RETVAL_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 0
+// X86-NEXT: [[RETVAL_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 1
+// X86-NEXT: store half [[UNPROMOTION]], ptr [[RETVAL_REALP]], align 2
+// X86-NEXT: store half 0xH0000, ptr [[RETVAL_IMAGP]], align 2
+// X86-NEXT: [[TMP2:%.*]] = load <2 x half>, ptr [[RETVAL]], align 2
+// X86-NEXT: ret <2 x half> [[TMP2]]
+//
_Float16 _Complex sub_half_rr(_Float16 a, _Float16 b) {
- // X86-LABEL: @sub_half_rr(
- // X86: fsub
- // X86-NOT: fsub
- // X86: ret
return a - b;
}
+
+// AVX-LABEL: @sub_half_cr(
+// AVX-NEXT: entry:
+// AVX-NEXT: [[RETVAL:%.*]] = alloca { half, half }, align 2
+// AVX-NEXT: [[A:%.*]] = alloca { half, half }, align 2
+// AVX-NEXT: [[B_ADDR:%.*]] = alloca half, align 2
+// AVX-NEXT: store <2 x half> [[A_COERCE:%.*]], ptr [[A]], align 2
+// AVX-NEXT: store half [[B:%.*]], ptr [[B_ADDR]], align 2
+// AVX-NEXT: [[A_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[A]], i32 0, i32 0
+// AVX-NEXT: [[A_REAL:%.*]] = load half, ptr [[A_REALP]], align 2
+// AVX-NEXT: [[A_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[A]], i32 0, i32 1
+// AVX-NEXT: [[A_IMAG:%.*]] = load half, ptr [[A_IMAGP]], align 2
+// AVX-NEXT: [[TMP0:%.*]] = load half, ptr [[B_ADDR]], align 2
+// AVX-NEXT: [[SUB_R:%.*]] = fsub half [[A_REAL]], [[TMP0]]
+// AVX-NEXT: [[RETVAL_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 0
+// AVX-NEXT: [[RETVAL_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 1
+// AVX-NEXT: store half [[SUB_R]], ptr [[RETVAL_REALP]], align 2
+// AVX-NEXT: store half [[A_IMAG]], ptr [[RETVAL_IMAGP]], align 2
+// AVX-NEXT: [[TMP1:%.*]] = load <2 x half>, ptr [[RETVAL]], align 2
+// AVX-NEXT: ret <2 x half> [[TMP1]]
+//
+// X86-LABEL: @sub_half_cr(
+// X86-NEXT: entry:
+// X86-NEXT: [[RETVAL:%.*]] = alloca { half, half }, align 2
+// X86-NEXT: [[A:%.*]] = alloca { half, half }, align 2
+// X86-NEXT: [[B_ADDR:%.*]] = alloca half, align 2
+// X86-NEXT: store <2 x half> [[A_COERCE:%.*]], ptr [[A]], align 2
+// X86-NEXT: store half [[B:%.*]], ptr [[B_ADDR]], align 2
+// X86-NEXT: [[A_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[A]], i32 0, i32 0
+// X86-NEXT: [[A_REAL:%.*]] = load half, ptr [[A_REALP]], align 2
+// X86-NEXT: [[A_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[A]], i32 0, i32 1
+// X86-NEXT: [[A_IMAG:%.*]] = load half, ptr [[A_IMAGP]], align 2
+// X86-NEXT: [[EXT:%.*]] = fpext half [[A_REAL]] to float
+// X86-NEXT: [[EXT1:%.*]] = fpext half [[A_IMAG]] to float
+// X86-NEXT: [[TMP0:%.*]] = load half, ptr [[B_ADDR]], align 2
+// X86-NEXT: [[EXT2:%.*]] = fpext half [[TMP0]] to float
+// X86-NEXT: [[SUB_R:%.*]] = fsub float [[EXT]], [[EXT2]]
+// X86-NEXT: [[UNPROMOTION:%.*]] = fptrunc float [[SUB_R]] to half
+// X86-NEXT: [[UNPROMOTION3:%.*]] = fptrunc float [[EXT1]] to half
+// X86-NEXT: [[RETVAL_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 0
+// X86-NEXT: [[RETVAL_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 1
+// X86-NEXT: store half [[UNPROMOTION]], ptr [[RETVAL_REALP]], align 2
+// X86-NEXT: store half [[UNPROMOTION3]], ptr [[RETVAL_IMAGP]], align 2
+// X86-NEXT: [[TMP1:%.*]] = load <2 x half>, ptr [[RETVAL]], align 2
+// X86-NEXT: ret <2 x half> [[TMP1]]
+//
_Float16 _Complex sub_half_cr(_Float16 _Complex a, _Float16 b) {
- // X86-LABEL: @sub_half_cr(
- // X86: fsub
- // X86-NOT: fsub
- // X86: ret
return a - b;
}
+
+// AVX-LABEL: @sub_half_rc(
+// AVX-NEXT: entry:
+// AVX-NEXT: [[RETVAL:%.*]] = alloca { half, half }, align 2
+// AVX-NEXT: [[B:%.*]] = alloca { half, half }, align 2
+// AVX-NEXT: [[A_ADDR:%.*]] = alloca half, align 2
+// AVX-NEXT: store <2 x half> [[B_COERCE:%.*]], ptr [[B]], align 2
+// AVX-NEXT: store half [[A:%.*]], ptr [[A_ADDR]], align 2
+// AVX-NEXT: [[TMP0:%.*]] = load half, ptr [[A_ADDR]], align 2
+// AVX-NEXT: [[B_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[B]], i32 0, i32 0
+// AVX-NEXT: [[B_REAL:%.*]] = load half, ptr [[B_REALP]], align 2
+// AVX-NEXT: [[B_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[B]], i32 0, i32 1
+// AVX-NEXT: [[B_IMAG:%.*]] = load half, ptr [[B_IMAGP]], align 2
+// AVX-NEXT: [[SUB_R:%.*]] = fsub half [[TMP0]], [[B_REAL]]
+// AVX-NEXT: [[SUB_I:%.*]] = fneg half [[B_IMAG]]
+// AVX-NEXT: [[RETVAL_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 0
+// AVX-NEXT: [[RETVAL_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 1
+// AVX-NEXT: store half [[SUB_R]], ptr [[RETVAL_REALP]], align 2
+// AVX-NEXT: store half [[SUB_I]], ptr [[RETVAL_IMAGP]], align 2
+// AVX-NEXT: [[TMP1:%.*]] = load <2 x half>, ptr [[RETVAL]], align 2
+// AVX-NEXT: ret <2 x half> [[TMP1]]
+//
+// X86-LABEL: @sub_half_rc(
+// X86-NEXT: entry:
+// X86-NEXT: [[RETVAL:%.*]] = alloca { half, half }, align 2
+// X86-NEXT: [[B:%.*]] = alloca { half, half }, align 2
+// X86-NEXT: [[A_ADDR:%.*]] = alloca half, align 2
+// X86-NEXT: store <2 x half> [[B_COERCE:%.*]], ptr [[B]], align 2
+// X86-NEXT: store half [[A:%.*]], ptr [[A_ADDR]], align 2
+// X86-NEXT: [[TMP0:%.*]] = load half, ptr [[A_ADDR]], align 2
+// X86-NEXT: [[EXT:%.*]] = fpext half [[TMP0]] to float
+// X86-NEXT: [[B_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[B]], i32 0, i32 0
+// X86-NEXT: [[B_REAL:%.*]] = load half, ptr [[B_REALP]], align 2
+// X86-NEXT: [[B_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[B]], i32 0, i32 1
+// X86-NEXT: [[B_IMAG:%.*]] = load half, ptr [[B_IMAGP]], align 2
+// X86-NEXT: [[EXT1:%.*]] = fpext half [[B_REAL]] to float
+// X86-NEXT: [[EXT2:%.*]] = fpext half [[B_IMAG]] to float
+// X86-NEXT: [[SUB_R:%.*]] = fsub float [[EXT]], [[EXT1]]
+// X86-NEXT: [[SUB_I:%.*]] = fneg float [[EXT2]]
+// X86-NEXT: [[UNPROMOTION:%.*]] = fptrunc float [[SUB_R]] to half
+// X86-NEXT: [[UNPROMOTION3:%.*]] = fptrunc float [[SUB_I]] to half
+// X86-NEXT: [[RETVAL_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 0
+// X86-NEXT: [[RETVAL_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 1
+// X86-NEXT: store half [[UNPROMOTION]], ptr [[RETVAL_REALP]], align 2
+// X86-NEXT: store half [[UNPROMOTION3]], ptr [[RETVAL_IMAGP]], align 2
+// X86-NEXT: [[TMP1:%.*]] = load <2 x half>, ptr [[RETVAL]], align 2
+// X86-NEXT: ret <2 x half> [[TMP1]]
+//
_Float16 _Complex sub_half_rc(_Float16 a, _Float16 _Complex b) {
- // X86-LABEL: @sub_half_rc(
- // X86: fsub
- // X86: fneg
- // X86-NOT: fsub
- // X86: ret
return a - b;
}
+
+// AVX-LABEL: @sub_half_cc(
+// AVX-NEXT: entry:
+// AVX-NEXT: [[RETVAL:%.*]] = alloca { half, half }, align 2
+// AVX-NEXT: [[A:%.*]] = alloca { half, half }, align 2
+// AVX-NEXT: [[B:%.*]] = alloca { half, half }, align 2
+// AVX-NEXT: store <2 x half> [[A_COERCE:%.*]], ptr [[A]], align 2
+// AVX-NEXT: store <2 x half> [[B_COERCE:%.*]], ptr [[B]], align 2
+// AVX-NEXT: [[A_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[A]], i32 0, i32 0
+// AVX-NEXT: [[A_REAL:%.*]] = load half, ptr [[A_REALP]], align 2
+// AVX-NEXT: [[A_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[A]], i32 0, i32 1
+// AVX-NEXT: [[A_IMAG:%.*]] = load half, ptr [[A_IMAGP]], align 2
+// AVX-NEXT: [[B_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[B]], i32 0, i32 0
+// AVX-NEXT: [[B_REAL:%.*]] = load half, ptr [[B_REALP]], align 2
+// AVX-NEXT: [[B_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[B]], i32 0, i32 1
+// AVX-NEXT: [[B_IMAG:%.*]] = load half, ptr [[B_IMAGP]], align 2
+// AVX-NEXT: [[SUB_R:%.*]] = fsub half [[A_REAL]], [[B_REAL]]
+// AVX-NEXT: [[SUB_I:%.*]] = fsub half [[A_IMAG]], [[B_IMAG]]
+// AVX-NEXT: [[RETVAL_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 0
+// AVX-NEXT: [[RETVAL_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 1
+// AVX-NEXT: store half [[SUB_R]], ptr [[RETVAL_REALP]], align 2
+// AVX-NEXT: store half [[SUB_I]], ptr [[RETVAL_IMAGP]], align 2
+// AVX-NEXT: [[TMP0:%.*]] = load <2 x half>, ptr [[RETVAL]], align 2
+// AVX-NEXT: ret <2 x half> [[TMP0]]
+//
+// X86-LABEL: @sub_half_cc(
+// X86-NEXT: entry:
+// X86-NEXT: [[RETVAL:%.*]] = alloca { half, half }, align 2
+// X86-NEXT: [[A:%.*]] = alloca { half, half }, align 2
+// X86-NEXT: [[B:%.*]] = alloca { half, half }, align 2
+// X86-NEXT: store <2 x half> [[A_COERCE:%.*]], ptr [[A]], align 2
+// X86-NEXT: store <2 x half> [[B_COERCE:%.*]], ptr [[B]], align 2
+// X86-NEXT: [[A_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[A]], i32 0, i32 0
+// X86-NEXT: [[A_REAL:%.*]] = load half, ptr [[A_REALP]], align 2
+// X86-NEXT: [[A_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[A]], i32 0, i32 1
+// X86-NEXT: [[A_IMAG:%.*]] = load half, ptr [[A_IMAGP]], align 2
+// X86-NEXT: [[EXT:%.*]] = fpext half [[A_REAL]] to float
+// X86-NEXT: [[EXT1:%.*]] = fpext half [[A_IMAG]] to float
+// X86-NEXT: [[B_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[B]], i32 0, i32 0
+// X86-NEXT: [[B_REAL:%.*]] = load half, ptr [[B_REALP]], align 2
+// X86-NEXT: [[B_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[B]], i32 0, i32 1
+// X86-NEXT: [[B_IMAG:%.*]] = load half, ptr [[B_IMAGP]], align 2
+// X86-NEXT: [[EXT2:%.*]] = fpext half [[B_REAL]] to float
+// X86-NEXT: [[EXT3:%.*]] = fpext half [[B_IMAG]] to float
+// X86-NEXT: [[SUB_R:%.*]] = fsub float [[EXT]], [[EXT2]]
+// X86-NEXT: [[SUB_I:%.*]] = fsub float [[EXT1]], [[EXT3]]
+// X86-NEXT: [[UNPROMOTION:%.*]] = fptrunc float [[SUB_R]] to half
+// X86-NEXT: [[UNPROMOTION4:%.*]] = fptrunc float [[SUB_I]] to half
+// X86-NEXT: [[RETVAL_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 0
+// X86-NEXT: [[RETVAL_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 1
+// X86-NEXT: store half [[UNPROMOTION]], ptr [[RETVAL_REALP]], align 2
+// X86-NEXT: store half [[UNPROMOTION4]], ptr [[RETVAL_IMAGP]], align 2
+// X86-NEXT: [[TMP0:%.*]] = load <2 x half>, ptr [[RETVAL]], align 2
+// X86-NEXT: ret <2 x half> [[TMP0]]
+//
_Float16 _Complex sub_half_cc(_Float16 _Complex a, _Float16 _Complex b) {
- // X86-LABEL: @sub_half_cc(
- // X86: fsub
- // X86: fsub
- // X86-NOT: fsub
- // X86: ret
return a - b;
}
+// AVX-LABEL: @mul_half_rr(
+// AVX-NEXT: entry:
+// AVX-NEXT: [[RETVAL:%.*]] = alloca { half, half }, align 2
+// AVX-NEXT: [[A_ADDR:%.*]] = alloca half, align 2
+// AVX-NEXT: [[B_ADDR:%.*]] = alloca half, align 2
+// AVX-NEXT: store half [[A:%.*]], ptr [[A_ADDR]], align 2
+// AVX-NEXT: store half [[B:%.*]], ptr [[B_ADDR]], align 2
+// AVX-NEXT: [[TMP0:%.*]] = load half, ptr [[A_ADDR]], align 2
+// AVX-NEXT: [[TMP1:%.*]] = load half, ptr [[B_ADDR]], align 2
+// AVX-NEXT: [[MUL:%.*]] = fmul half [[TMP0]], [[TMP1]]
+// AVX-NEXT: [[RETVAL_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 0
+// AVX-NEXT: [[RETVAL_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 1
+// AVX-NEXT: store half [[MUL]], ptr [[RETVAL_REALP]], align 2
+// AVX-NEXT: store half 0xH0000, ptr [[RETVAL_IMAGP]], align 2
+// AVX-NEXT: [[TMP2:%.*]] = load <2 x half>, ptr [[RETVAL]], align 2
+// AVX-NEXT: ret <2 x half> [[TMP2]]
+//
+// X86-LABEL: @mul_half_rr(
+// X86-NEXT: entry:
+// X86-NEXT: [[RETVAL:%.*]] = alloca { half, half }, align 2
+// X86-NEXT: [[A_ADDR:%.*]] = alloca half, align 2
+// X86-NEXT: [[B_ADDR:%.*]] = alloca half, align 2
+// X86-NEXT: store half [[A:%.*]], ptr [[A_ADDR]], align 2
+// X86-NEXT: store half [[B:%.*]], ptr [[B_ADDR]], align 2
+// X86-NEXT: [[TMP0:%.*]] = load half, ptr [[A_ADDR]], align 2
+// X86-NEXT: [[EXT:%.*]] = fpext half [[TMP0]] to float
+// X86-NEXT: [[TMP1:%.*]] = load half, ptr [[B_ADDR]], align 2
+// X86-NEXT: [[EXT1:%.*]] = fpext half [[TMP1]] to float
+// X86-NEXT: [[MUL:%.*]] = fmul float [[EXT]], [[EXT1]]
+// X86-NEXT: [[UNPROMOTION:%.*]] = fptrunc float [[MUL]] to half
+// X86-NEXT: [[RETVAL_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 0
+// X86-NEXT: [[RETVAL_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 1
+// X86-NEXT: store half [[UNPROMOTION]], ptr [[RETVAL_REALP]], align 2
+// X86-NEXT: store half 0xH0000, ptr [[RETVAL_IMAGP]], align 2
+// X86-NEXT: [[TMP2:%.*]] = load <2 x half>, ptr [[RETVAL]], align 2
+// X86-NEXT: ret <2 x half> [[TMP2]]
+//
_Float16 _Complex mul_half_rr(_Float16 a, _Float16 b) {
- // X86-LABEL: @mul_half_rr(
- // X86: fmul
- // X86-NOT: fmul
- // X86: ret
return a * b;
}
+
+// AVX-LABEL: @mul_half_cr(
+// AVX-NEXT: entry:
+// AVX-NEXT: [[RETVAL:%.*]] = alloca { half, half }, align 2
+// AVX-NEXT: [[A:%.*]] = alloca { half, half }, align 2
+// AVX-NEXT: [[B_ADDR:%.*]] = alloca half, align 2
+// AVX-NEXT: store <2 x half> [[A_COERCE:%.*]], ptr [[A]], align 2
+// AVX-NEXT: store half [[B:%.*]], ptr [[B_ADDR]], align 2
+// AVX-NEXT: [[A_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[A]], i32 0, i32 0
+// AVX-NEXT: [[A_REAL:%.*]] = load half, ptr [[A_REALP]], align 2
+// AVX-NEXT: [[A_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[A]], i32 0, i32 1
+// AVX-NEXT: [[A_IMAG:%.*]] = load half, ptr [[A_IMAGP]], align 2
+// AVX-NEXT: [[TMP0:%.*]] = load half, ptr [[B_ADDR]], align 2
+// AVX-NEXT: [[MUL_RL:%.*]] = fmul half [[A_REAL]], [[TMP0]]
+// AVX-NEXT: [[MUL_IL:%.*]] = fmul half [[A_IMAG]], [[TMP0]]
+// AVX-NEXT: [[RETVAL_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 0
+// AVX-NEXT: [[RETVAL_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 1
+// AVX-NEXT: store half [[MUL_RL]], ptr [[RETVAL_REALP]], align 2
+// AVX-NEXT: store half [[MUL_IL]], ptr [[RETVAL_IMAGP]], align 2
+// AVX-NEXT: [[TMP1:%.*]] = load <2 x half>, ptr [[RETVAL]], align 2
+// AVX-NEXT: ret <2 x half> [[TMP1]]
+//
+// X86-LABEL: @mul_half_cr(
+// X86-NEXT: entry:
+// X86-NEXT: [[RETVAL:%.*]] = alloca { half, half }, align 2
+// X86-NEXT: [[A:%.*]] = alloca { half, half }, align 2
+// X86-NEXT: [[B_ADDR:%.*]] = alloca half, align 2
+// X86-NEXT: store <2 x half> [[A_COERCE:%.*]], ptr [[A]], align 2
+// X86-NEXT: store half [[B:%.*]], ptr [[B_ADDR]], align 2
+// X86-NEXT: [[A_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[A]], i32 0, i32 0
+// X86-NEXT: [[A_REAL:%.*]] = load half, ptr [[A_REALP]], align 2
+// X86-NEXT: [[A_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[A]], i32 0, i32 1
+// X86-NEXT: [[A_IMAG:%.*]] = load half, ptr [[A_IMAGP]], align 2
+// X86-NEXT: [[EXT:%.*]] = fpext half [[A_REAL]] to float
+// X86-NEXT: [[EXT1:%.*]] = fpext half [[A_IMAG]] to float
+// X86-NEXT: [[TMP0:%.*]] = load half, ptr [[B_ADDR]], align 2
+// X86-NEXT: [[EXT2:%.*]] = fpext half [[TMP0]] to float
+// X86-NEXT: [[MUL_RL:%.*]] = fmul float [[EXT]], [[EXT2]]
+// X86-NEXT: [[MUL_IL:%.*]] = fmul float [[EXT1]], [[EXT2]]
+// X86-NEXT: [[UNPROMOTION:%.*]] = fptrunc float [[MUL_RL]] to half
+// X86-NEXT: [[UNPROMOTION3:%.*]] = fptrunc float [[MUL_IL]] to half
+// X86-NEXT: [[RETVAL_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 0
+// X86-NEXT: [[RETVAL_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 1
+// X86-NEXT: store half [[UNPROMOTION]], ptr [[RETVAL_REALP]], align 2
+// X86-NEXT: store half [[UNPROMOTION3]], ptr [[RETVAL_IMAGP]], align 2
+// X86-NEXT: [[TMP1:%.*]] = load <2 x half>, ptr [[RETVAL]], align 2
+// X86-NEXT: ret <2 x half> [[TMP1]]
+//
_Float16 _Complex mul_half_cr(_Float16 _Complex a, _Float16 b) {
- // X86-LABEL: @mul_half_cr(
- // X86: fmul
- // X86: fmul
- // X86-NOT: fmul
- // X86: ret
return a * b;
}
+
+// AVX-LABEL: @mul_half_rc(
+// AVX-NEXT: entry:
+// AVX-NEXT: [[RETVAL:%.*]] = alloca { half, half }, align 2
+// AVX-NEXT: [[B:%.*]] = alloca { half, half }, align 2
+// AVX-NEXT: [[A_ADDR:%.*]] = alloca half, align 2
+// AVX-NEXT: store <2 x half> [[B_COERCE:%.*]], ptr [[B]], align 2
+// AVX-NEXT: store half [[A:%.*]], ptr [[A_ADDR]], align 2
+// AVX-NEXT: [[TMP0:%.*]] = load half, ptr [[A_ADDR]], align 2
+// AVX-NEXT: [[B_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[B]], i32 0, i32 0
+// AVX-NEXT: [[B_REAL:%.*]] = load half, ptr [[B_REALP]], align 2
+// AVX-NEXT: [[B_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[B]], i32 0, i32 1
+// AVX-NEXT: [[B_IMAG:%.*]] = load half, ptr [[B_IMAGP]], align 2
+// AVX-NEXT: [[MUL_RL:%.*]] = fmul half [[TMP0]], [[B_REAL]]
+// AVX-NEXT: [[MUL_IR:%.*]] = fmul half [[TMP0]], [[B_IMAG]]
+// AVX-NEXT: [[RETVAL_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 0
+// AVX-NEXT: [[RETVAL_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 1
+// AVX-NEXT: store half [[MUL_RL]], ptr [[RETVAL_REALP]], align 2
+// AVX-NEXT: store half [[MUL_IR]], ptr [[RETVAL_IMAGP]], align 2
+// AVX-NEXT: [[TMP1:%.*]] = load <2 x half>, ptr [[RETVAL]], align 2
+// AVX-NEXT: ret <2 x half> [[TMP1]]
+//
+// X86-LABEL: @mul_half_rc(
+// X86-NEXT: entry:
+// X86-NEXT: [[RETVAL:%.*]] = alloca { half, half }, align 2
+// X86-NEXT: [[B:%.*]] = alloca { half, half }, align 2
+// X86-NEXT: [[A_ADDR:%.*]] = alloca half, align 2
+// X86-NEXT: store <2 x half> [[B_COERCE:%.*]], ptr [[B]], align 2
+// X86-NEXT: store half [[A:%.*]], ptr [[A_ADDR]], align 2
+// X86-NEXT: [[TMP0:%.*]] = load half, ptr [[A_ADDR]], align 2
+// X86-NEXT: [[EXT:%.*]] = fpext half [[TMP0]] to float
+// X86-NEXT: [[B_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[B]], i32 0, i32 0
+// X86-NEXT: [[B_REAL:%.*]] = load half, ptr [[B_REALP]], align 2
+// X86-NEXT: [[B_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[B]], i32 0, i32 1
+// X86-NEXT: [[B_IMAG:%.*]] = load half, ptr [[B_IMAGP]], align 2
+// X86-NEXT: [[EXT1:%.*]] = fpext half [[B_REAL]] to float
+// X86-NEXT: [[EXT2:%.*]] = fpext half [[B_IMAG]] to float
+// X86-NEXT: [[MUL_RL:%.*]] = fmul float [[EXT]], [[EXT1]]
+// X86-NEXT: [[MUL_IR:%.*]] = fmul float [[EXT]], [[EXT2]]
+// X86-NEXT: [[UNPROMOTION:%.*]] = fptrunc float [[MUL_RL]] to half
+// X86-NEXT: [[UNPROMOTION3:%.*]] = fptrunc float [[MUL_IR]] to half
+// X86-NEXT: [[RETVAL_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 0
+// X86-NEXT: [[RETVAL_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 1
+// X86-NEXT: store half [[UNPROMOTION]], ptr [[RETVAL_REALP]], align 2
+// X86-NEXT: store half [[UNPROMOTION3]], ptr [[RETVAL_IMAGP]], align 2
+// X86-NEXT: [[TMP1:%.*]] = load <2 x half>, ptr [[RETVAL]], align 2
+// X86-NEXT: ret <2 x half> [[TMP1]]
+//
_Float16 _Complex mul_half_rc(_Float16 a, _Float16 _Complex b) {
- // X86-LABEL: @mul_half_rc(
- // X86: fmul
- // X86: fmul
- // X86-NOT: fmul
- // X86: ret
return a * b;
}
+
+// AVX-LABEL: @mul_half_cc(
+// AVX-NEXT: entry:
+// AVX-NEXT: [[RETVAL:%.*]] = alloca { half, half }, align 2
+// AVX-NEXT: [[A:%.*]] = alloca { half, half }, align 2
+// AVX-NEXT: [[B:%.*]] = alloca { half, half }, align 2
+// AVX-NEXT: [[COERCE:%.*]] = alloca { half, half }, align 2
+// AVX-NEXT: store <2 x half> [[A_COERCE:%.*]], ptr [[A]], align 2
+// AVX-NEXT: store <2 x half> [[B_COERCE:%.*]], ptr [[B]], align 2
+// AVX-NEXT: [[A_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[A]], i32 0, i32 0
+// AVX-NEXT: [[A_REAL:%.*]] = load half, ptr [[A_REALP]], align 2
+// AVX-NEXT: [[A_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[A]], i32 0, i32 1
+// AVX-NEXT: [[A_IMAG:%.*]] = load half, ptr [[A_IMAGP]], align 2
+// AVX-NEXT: [[B_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[B]], i32 0, i32 0
+// AVX-NEXT: [[B_REAL:%.*]] = load half, ptr [[B_REALP]], align 2
+// AVX-NEXT: [[B_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[B]], i32 0, i32 1
+// AVX-NEXT: [[B_IMAG:%.*]] = load half, ptr [[B_IMAGP]], align 2
+// AVX-NEXT: [[MUL_AC:%.*]] = fmul half [[A_REAL]], [[B_REAL]]
+// AVX-NEXT: [[MUL_BD:%.*]] = fmul half [[A_IMAG]], [[B_IMAG]]
+// AVX-NEXT: [[MUL_AD:%.*]] = fmul half [[A_REAL]], [[B_IMAG]]
+// AVX-NEXT: [[MUL_BC:%.*]] = fmul half [[A_IMAG]], [[B_REAL]]
+// AVX-NEXT: [[MUL_R:%.*]] = fsub half [[MUL_AC]], [[MUL_BD]]
+// AVX-NEXT: [[MUL_I:%.*]] = fadd half [[MUL_AD]], [[MUL_BC]]
+// AVX-NEXT: [[ISNAN_CMP:%.*]] = fcmp uno half [[MUL_R]], [[MUL_R]]
+// AVX-NEXT: br i1 [[ISNAN_CMP]], label [[COMPLEX_MUL_IMAG_NAN:%.*]], label [[COMPLEX_MUL_CONT:%.*]], !prof [[PROF2:![0-9]+]]
+// AVX: complex_mul_imag_nan:
+// AVX-NEXT: [[ISNAN_CMP1:%.*]] = fcmp uno half [[MUL_I]], [[MUL_I]]
+// AVX-NEXT: br i1 [[ISNAN_CMP1]], label [[COMPLEX_MUL_LIBCALL:%.*]], label [[COMPLEX_MUL_CONT]], !prof [[PROF2]]
+// AVX: complex_mul_libcall:
+// AVX-NEXT: [[CALL:%.*]] = call <2 x half> @__mulhc3(half noundef [[A_REAL]], half noundef [[A_IMAG]], half noundef [[B_REAL]], half noundef [[B_IMAG]]) #[[ATTR1:[0-9]+]]
+// AVX-NEXT: store <2 x half> [[CALL]], ptr [[COERCE]], align 2
+// AVX-NEXT: [[COERCE_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[COERCE]], i32 0, i32 0
+// AVX-NEXT: [[COERCE_REAL:%.*]] = load half, ptr [[COERCE_REALP]], align 2
+// AVX-NEXT: [[COERCE_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[COERCE]], i32 0, i32 1
+// AVX-NEXT: [[COERCE_IMAG:%.*]] = load half, ptr [[COERCE_IMAGP]], align 2
+// AVX-NEXT: br label [[COMPLEX_MUL_CONT]]
+// AVX: complex_mul_cont:
+// AVX-NEXT: [[REAL_MUL_PHI:%.*]] = phi half [ [[MUL_R]], [[ENTRY:%.*]] ], [ [[MUL_R]], [[COMPLEX_MUL_IMAG_NAN]] ], [ [[COERCE_REAL]], [[COMPLEX_MUL_LIBCALL]] ]
+// AVX-NEXT: [[IMAG_MUL_PHI:%.*]] = phi half [ [[MUL_I]], [[ENTRY]] ], [ [[MUL_I]], [[COMPLEX_MUL_IMAG_NAN]] ], [ [[COERCE_IMAG]], [[COMPLEX_MUL_LIBCALL]] ]
+// AVX-NEXT: [[RETVAL_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 0
+// AVX-NEXT: [[RETVAL_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 1
+// AVX-NEXT: store half [[REAL_MUL_PHI]], ptr [[RETVAL_REALP]], align 2
+// AVX-NEXT: store half [[IMAG_MUL_PHI]], ptr [[RETVAL_IMAGP]], align 2
+// AVX-NEXT: [[TMP0:%.*]] = load <2 x half>, ptr [[RETVAL]], align 2
+// AVX-NEXT: ret <2 x half> [[TMP0]]
+//
+// X86-LABEL: @mul_half_cc(
+// X86-NEXT: entry:
+// X86-NEXT: [[RETVAL:%.*]] = alloca { half, half }, align 2
+// X86-NEXT: [[A:%.*]] = alloca { half, half }, align 2
+// X86-NEXT: [[B:%.*]] = alloca { half, half }, align 2
+// X86-NEXT: [[COERCE:%.*]] = alloca { float, float }, align 4
+// X86-NEXT: store <2 x half> [[A_COERCE:%.*]], ptr [[A]], align 2
+// X86-NEXT: store <2 x half> [[B_COERCE:%.*]], ptr [[B]], align 2
+// X86-NEXT: [[A_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[A]], i32 0, i32 0
+// X86-NEXT: [[A_REAL:%.*]] = load half, ptr [[A_REALP]], align 2
+// X86-NEXT: [[A_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[A]], i32 0, i32 1
+// X86-NEXT: [[A_IMAG:%.*]] = load half, ptr [[A_IMAGP]], align 2
+// X86-NEXT: [[EXT:%.*]] = fpext half [[A_REAL]] to float
+// X86-NEXT: [[EXT1:%.*]] = fpext half [[A_IMAG]] to float
+// X86-NEXT: [[B_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[B]], i32 0, i32 0
+// X86-NEXT: [[B_REAL:%.*]] = load half, ptr [[B_REALP]], align 2
+// X86-NEXT: [[B_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[B]], i32 0, i32 1
+// X86-NEXT: [[B_IMAG:%.*]] = load half, ptr [[B_IMAGP]], align 2
+// X86-NEXT: [[EXT2:%.*]] = fpext half [[B_REAL]] to float
+// X86-NEXT: [[EXT3:%.*]] = fpext half [[B_IMAG]] to float
+// X86-NEXT: [[MUL_AC:%.*]] = fmul float [[EXT]], [[EXT2]]
+// X86-NEXT: [[MUL_BD:%.*]] = fmul float [[EXT1]], [[EXT3]]
+// X86-NEXT: [[MUL_AD:%.*]] = fmul float [[EXT]], [[EXT3]]
+// X86-NEXT: [[MUL_BC:%.*]] = fmul float [[EXT1]], [[EXT2]]
+// X86-NEXT: [[MUL_R:%.*]] = fsub float [[MUL_AC]], [[MUL_BD]]
+// X86-NEXT: [[MUL_I:%.*]] = fadd float [[MUL_AD]], [[MUL_BC]]
+// X86-NEXT: [[ISNAN_CMP:%.*]] = fcmp uno float [[MUL_R]], [[MUL_R]]
+// X86-NEXT: br i1 [[ISNAN_CMP]], label [[COMPLEX_MUL_IMAG_NAN:%.*]], label [[COMPLEX_MUL_CONT:%.*]], !prof [[PROF2:![0-9]+]]
+// X86: complex_mul_imag_nan:
+// X86-NEXT: [[ISNAN_CMP4:%.*]] = fcmp uno float [[MUL_I]], [[MUL_I]]
+// X86-NEXT: br i1 [[ISNAN_CMP4]], label [[COMPLEX_MUL_LIBCALL:%.*]], label [[COMPLEX_MUL_CONT]], !prof [[PROF2]]
+// X86: complex_mul_libcall:
+// X86-NEXT: [[CALL:%.*]] = call <2 x float> @__mulsc3(float noundef [[EXT]], float noundef [[EXT1]], float noundef [[EXT2]], float noundef [[EXT3]]) #[[ATTR2:[0-9]+]]
+// X86-NEXT: store <2 x float> [[CALL]], ptr [[COERCE]], align 4
+// X86-NEXT: [[COERCE_REALP:%.*]] = getelementptr inbounds { float, float }, ptr [[COERCE]], i32 0, i32 0
+// X86-NEXT: [[COERCE_REAL:%.*]] = load float, ptr [[COERCE_REALP]], align 4
+// X86-NEXT: [[COERCE_IMAGP:%.*]] = getelementptr inbounds { float, float }, ptr [[COERCE]], i32 0, i32 1
+// X86-NEXT: [[COERCE_IMAG:%.*]] = load float, ptr [[COERCE_IMAGP]], align 4
+// X86-NEXT: br label [[COMPLEX_MUL_CONT]]
+// X86: complex_mul_cont:
+// X86-NEXT: [[REAL_MUL_PHI:%.*]] = phi float [ [[MUL_R]], [[ENTRY:%.*]] ], [ [[MUL_R]], [[COMPLEX_MUL_IMAG_NAN]] ], [ [[COERCE_REAL]], [[COMPLEX_MUL_LIBCALL]] ]
+// X86-NEXT: [[IMAG_MUL_PHI:%.*]] = phi float [ [[MUL_I]], [[ENTRY]] ], [ [[MUL_I]], [[COMPLEX_MUL_IMAG_NAN]] ], [ [[COERCE_IMAG]], [[COMPLEX_MUL_LIBCALL]] ]
+// X86-NEXT: [[UNPROMOTION:%.*]] = fptrunc float [[REAL_MUL_PHI]] to half
+// X86-NEXT: [[UNPROMOTION5:%.*]] = fptrunc float [[IMAG_MUL_PHI]] to half
+// X86-NEXT: [[RETVAL_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 0
+// X86-NEXT: [[RETVAL_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 1
+// X86-NEXT: store half [[UNPROMOTION]], ptr [[RETVAL_REALP]], align 2
+// X86-NEXT: store half [[UNPROMOTION5]], ptr [[RETVAL_IMAGP]], align 2
+// X86-NEXT: [[TMP0:%.*]] = load <2 x half>, ptr [[RETVAL]], align 2
+// X86-NEXT: ret <2 x half> [[TMP0]]
+//
_Float16 _Complex mul_half_cc(_Float16 _Complex a, _Float16 _Complex b) {
- // X86-LABEL: @mul_half_cc(
- // X86: %[[AC:[^ ]+]] = fmul
- // X86: %[[BD:[^ ]+]] = fmul
- // X86: %[[AD:[^ ]+]] = fmul
- // X86: %[[BC:[^ ]+]] = fmul
- // X86: %[[RR:[^ ]+]] = fsub half %[[AC]], %[[BD]]
- // X86: %[[RI:[^ ]+]] = fadd half
- // X86-DAG: %[[AD]]
- // X86-DAG: ,
- // X86-DAG: %[[BC]]
- // X86: fcmp uno half %[[RR]]
- // X86: fcmp uno half %[[RI]]
- // X86: call {{.*}} @__mulhc3(
- // X86: ret
return a * b;
}
-
+// AVX-LABEL: @div_half_rr(
+// AVX-NEXT: entry:
+// AVX-NEXT: [[RETVAL:%.*]] = alloca { half, half }, align 2
+// AVX-NEXT: [[A_ADDR:%.*]] = alloca half, align 2
+// AVX-NEXT: [[B_ADDR:%.*]] = alloca half, align 2
+// AVX-NEXT: store half [[A:%.*]], ptr [[A_ADDR]], align 2
+// AVX-NEXT: store half [[B:%.*]], ptr [[B_ADDR]], align 2
+// AVX-NEXT: [[TMP0:%.*]] = load half, ptr [[A_ADDR]], align 2
+// AVX-NEXT: [[TMP1:%.*]] = load half, ptr [[B_ADDR]], align 2
+// AVX-NEXT: [[DIV:%.*]] = fdiv half [[TMP0]], [[TMP1]]
+// AVX-NEXT: [[RETVAL_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 0
+// AVX-NEXT: [[RETVAL_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 1
+// AVX-NEXT: store half [[DIV]], ptr [[RETVAL_REALP]], align 2
+// AVX-NEXT: store half 0xH0000, ptr [[RETVAL_IMAGP]], align 2
+// AVX-NEXT: [[TMP2:%.*]] = load <2 x half>, ptr [[RETVAL]], align 2
+// AVX-NEXT: ret <2 x half> [[TMP2]]
+//
+// X86-LABEL: @div_half_rr(
+// X86-NEXT: entry:
+// X86-NEXT: [[RETVAL:%.*]] = alloca { half, half }, align 2
+// X86-NEXT: [[A_ADDR:%.*]] = alloca half, align 2
+// X86-NEXT: [[B_ADDR:%.*]] = alloca half, align 2
+// X86-NEXT: store half [[A:%.*]], ptr [[A_ADDR]], align 2
+// X86-NEXT: store half [[B:%.*]], ptr [[B_ADDR]], align 2
+// X86-NEXT: [[TMP0:%.*]] = load half, ptr [[A_ADDR]], align 2
+// X86-NEXT: [[EXT:%.*]] = fpext half [[TMP0]] to float
+// X86-NEXT: [[TMP1:%.*]] = load half, ptr [[B_ADDR]], align 2
+// X86-NEXT: [[EXT1:%.*]] = fpext half [[TMP1]] to float
+// X86-NEXT: [[DIV:%.*]] = fdiv float [[EXT]], [[EXT1]]
+// X86-NEXT: [[UNPROMOTION:%.*]] = fptrunc float [[DIV]] to half
+// X86-NEXT: [[RETVAL_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 0
+// X86-NEXT: [[RETVAL_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 1
+// X86-NEXT: store half [[UNPROMOTION]], ptr [[RETVAL_REALP]], align 2
+// X86-NEXT: store half 0xH0000, ptr [[RETVAL_IMAGP]], align 2
+// X86-NEXT: [[TMP2:%.*]] = load <2 x half>, ptr [[RETVAL]], align 2
+// X86-NEXT: ret <2 x half> [[TMP2]]
+//
_Float16 _Complex div_half_rr(_Float16 a, _Float16 b) {
- // X86-LABEL: @div_half_rr(
- // X86: fdiv
- // X86-NOT: fdiv
- // X86: ret
return a / b;
}
+
+// AVX-LABEL: @div_half_cr(
+// AVX-NEXT: entry:
+// AVX-NEXT: [[RETVAL:%.*]] = alloca { half, half }, align 2
+// AVX-NEXT: [[A:%.*]] = alloca { half, half }, align 2
+// AVX-NEXT: [[B_ADDR:%.*]] = alloca half, align 2
+// AVX-NEXT: store <2 x half> [[A_COERCE:%.*]], ptr [[A]], align 2
+// AVX-NEXT: store half [[B:%.*]], ptr [[B_ADDR]], align 2
+// AVX-NEXT: [[A_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[A]], i32 0, i32 0
+// AVX-NEXT: [[A_REAL:%.*]] = load half, ptr [[A_REALP]], align 2
+// AVX-NEXT: [[A_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[A]], i32 0, i32 1
+// AVX-NEXT: [[A_IMAG:%.*]] = load half, ptr [[A_IMAGP]], align 2
+// AVX-NEXT: [[TMP0:%.*]] = load half, ptr [[B_ADDR]], align 2
+// AVX-NEXT: [[TMP1:%.*]] = fdiv half [[A_REAL]], [[TMP0]]
+// AVX-NEXT: [[TMP2:%.*]] = fdiv half [[A_IMAG]], [[TMP0]]
+// AVX-NEXT: [[RETVAL_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 0
+// AVX-NEXT: [[RETVAL_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 1
+// AVX-NEXT: store half [[TMP1]], ptr [[RETVAL_REALP]], align 2
+// AVX-NEXT: store half [[TMP2]], ptr [[RETVAL_IMAGP]], align 2
+// AVX-NEXT: [[TMP3:%.*]] = load <2 x half>, ptr [[RETVAL]], align 2
+// AVX-NEXT: ret <2 x half> [[TMP3]]
+//
+// X86-LABEL: @div_half_cr(
+// X86-NEXT: entry:
+// X86-NEXT: [[RETVAL:%.*]] = alloca { half, half }, align 2
+// X86-NEXT: [[A:%.*]] = alloca { half, half }, align 2
+// X86-NEXT: [[B_ADDR:%.*]] = alloca half, align 2
+// X86-NEXT: store <2 x half> [[A_COERCE:%.*]], ptr [[A]], align 2
+// X86-NEXT: store half [[B:%.*]], ptr [[B_ADDR]], align 2
+// X86-NEXT: [[A_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[A]], i32 0, i32 0
+// X86-NEXT: [[A_REAL:%.*]] = load half, ptr [[A_REALP]], align 2
+// X86-NEXT: [[A_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[A]], i32 0, i32 1
+// X86-NEXT: [[A_IMAG:%.*]] = load half, ptr [[A_IMAGP]], align 2
+// X86-NEXT: [[EXT:%.*]] = fpext half [[A_REAL]] to float
+// X86-NEXT: [[EXT1:%.*]] = fpext half [[A_IMAG]] to float
+// X86-NEXT: [[TMP0:%.*]] = load half, ptr [[B_ADDR]], align 2
+// X86-NEXT: [[EXT2:%.*]] = fpext half [[TMP0]] to float
+// X86-NEXT: [[TMP1:%.*]] = fdiv float [[EXT]], [[EXT2]]
+// X86-NEXT: [[TMP2:%.*]] = fdiv float [[EXT1]], [[EXT2]]
+// X86-NEXT: [[UNPROMOTION:%.*]] = fptrunc float [[TMP1]] to half
+// X86-NEXT: [[UNPROMOTION3:%.*]] = fptrunc float [[TMP2]] to half
+// X86-NEXT: [[RETVAL_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 0
+// X86-NEXT: [[RETVAL_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 1
+// X86-NEXT: store half [[UNPROMOTION]], ptr [[RETVAL_REALP]], align 2
+// X86-NEXT: store half [[UNPROMOTION3]], ptr [[RETVAL_IMAGP]], align 2
+// X86-NEXT: [[TMP3:%.*]] = load <2 x half>, ptr [[RETVAL]], align 2
+// X86-NEXT: ret <2 x half> [[TMP3]]
+//
_Float16 _Complex div_half_cr(_Float16 _Complex a, _Float16 b) {
- // X86-LABEL: @div_half_cr(
- // X86: fdiv
- // X86: fdiv
- // X86-NOT: fdiv
- // X86: ret
return a / b;
}
+// AVX-LABEL: @div_half_rc(
+// AVX-NEXT: entry:
+// AVX-NEXT: [[RETVAL:%.*]] = alloca { half, half }, align 2
+// AVX-NEXT: [[B:%.*]] = alloca { half, half }, align 2
+// AVX-NEXT: [[A_ADDR:%.*]] = alloca half, align 2
+// AVX-NEXT: [[COERCE:%.*]] = alloca { half, half }, align 2
+// AVX-NEXT: store <2 x half> [[B_COERCE:%.*]], ptr [[B]], align 2
+// AVX-NEXT: store half [[A:%.*]], ptr [[A_ADDR]], align 2
+// AVX-NEXT: [[TMP0:%.*]] = load half, ptr [[A_ADDR]], align 2
+// AVX-NEXT: [[B_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[B]], i32 0, i32 0
+// AVX-NEXT: [[B_REAL:%.*]] = load half, ptr [[B_REALP]], align 2
+// AVX-NEXT: [[B_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[B]], i32 0, i32 1
+// AVX-NEXT: [[B_IMAG:%.*]] = load half, ptr [[B_IMAGP]], align 2
+// AVX-NEXT: [[CALL:%.*]] = call <2 x half> @__divhc3(half noundef [[TMP0]], half noundef 0xH0000, half noundef [[B_REAL]], half noundef [[B_IMAG]]) #[[ATTR1]]
+// AVX-NEXT: store <2 x half> [[CALL]], ptr [[COERCE]], align 2
+// AVX-NEXT: [[COERCE_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[COERCE]], i32 0, i32 0
+// AVX-NEXT: [[COERCE_REAL:%.*]] = load half, ptr [[COERCE_REALP]], align 2
+// AVX-NEXT: [[COERCE_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[COERCE]], i32 0, i32 1
+// AVX-NEXT: [[COERCE_IMAG:%.*]] = load half, ptr [[COERCE_IMAGP]], align 2
+// AVX-NEXT: [[RETVAL_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 0
+// AVX-NEXT: [[RETVAL_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 1
+// AVX-NEXT: store half [[COERCE_REAL]], ptr [[RETVAL_REALP]], align 2
+// AVX-NEXT: store half [[COERCE_IMAG]], ptr [[RETVAL_IMAGP]], align 2
+// AVX-NEXT: [[TMP1:%.*]] = load <2 x half>, ptr [[RETVAL]], align 2
+// AVX-NEXT: ret <2 x half> [[TMP1]]
+//
+// X86-LABEL: @div_half_rc(
+// X86-NEXT: entry:
+// X86-NEXT: [[RETVAL:%.*]] = alloca { half, half }, align 2
+// X86-NEXT: [[B:%.*]] = alloca { half, half }, align 2
+// X86-NEXT: [[A_ADDR:%.*]] = alloca half, align 2
+// X86-NEXT: [[COERCE:%.*]] = alloca { float, float }, align 4
+// X86-NEXT: store <2 x half> [[B_COERCE:%.*]], ptr [[B]], align 2
+// X86-NEXT: store half [[A:%.*]], ptr [[A_ADDR]], align 2
+// X86-NEXT: [[TMP0:%.*]] = load half, ptr [[A_ADDR]], align 2
+// X86-NEXT: [[EXT:%.*]] = fpext half [[TMP0]] to float
+// X86-NEXT: [[B_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[B]], i32 0, i32 0
+// X86-NEXT: [[B_REAL:%.*]] = load half, ptr [[B_REALP]], align 2
+// X86-NEXT: [[B_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[B]], i32 0, i32 1
+// X86-NEXT: [[B_IMAG:%.*]] = load half, ptr [[B_IMAGP]], align 2
+// X86-NEXT: [[EXT1:%.*]] = fpext half [[B_REAL]] to float
+// X86-NEXT: [[EXT2:%.*]] = fpext half [[B_IMAG]] to float
+// X86-NEXT: [[CALL:%.*]] = call <2 x float> @__divsc3(float noundef [[EXT]], float noundef 0.000000e+00, float noundef [[EXT1]], float noundef [[EXT2]]) #[[ATTR2]]
+// X86-NEXT: store <2 x float> [[CALL]], ptr [[COERCE]], align 4
+// X86-NEXT: [[COERCE_REALP:%.*]] = getelementptr inbounds { float, float }, ptr [[COERCE]], i32 0, i32 0
+// X86-NEXT: [[COERCE_REAL:%.*]] = load float, ptr [[COERCE_REALP]], align 4
+// X86-NEXT: [[COERCE_IMAGP:%.*]] = getelementptr inbounds { float, float }, ptr [[COERCE]], i32 0, i32 1
+// X86-NEXT: [[COERCE_IMAG:%.*]] = load float, ptr [[COERCE_IMAGP]], align 4
+// X86-NEXT: [[UNPROMOTION:%.*]] = fptrunc float [[COERCE_REAL]] to half
+// X86-NEXT: [[UNPROMOTION3:%.*]] = fptrunc float [[COERCE_IMAG]] to half
+// X86-NEXT: [[RETVAL_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 0
+// X86-NEXT: [[RETVAL_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 1
+// X86-NEXT: store half [[UNPROMOTION]], ptr [[RETVAL_REALP]], align 2
+// X86-NEXT: store half [[UNPROMOTION3]], ptr [[RETVAL_IMAGP]], align 2
+// X86-NEXT: [[TMP1:%.*]] = load <2 x half>, ptr [[RETVAL]], align 2
+// X86-NEXT: ret <2 x half> [[TMP1]]
+//
_Float16 _Complex div_half_rc(_Float16 a, _Float16 _Complex b) {
- // X86-LABEL: @div_half_rc(
- // X86-NOT: fdiv
- // X86: call {{.*}} @__divhc3(
- // X86: ret
return a / b;
}
+
+// AVX-LABEL: @div_half_cc(
+// AVX-NEXT: entry:
+// AVX-NEXT: [[RETVAL:%.*]] = alloca { half, half }, align 2
+// AVX-NEXT: [[A:%.*]] = alloca { half, half }, align 2
+// AVX-NEXT: [[B:%.*]] = alloca { half, half }, align 2
+// AVX-NEXT: [[COERCE:%.*]] = alloca { half, half }, align 2
+// AVX-NEXT: store <2 x half> [[A_COERCE:%.*]], ptr [[A]], align 2
+// AVX-NEXT: store <2 x half> [[B_COERCE:%.*]], ptr [[B]], align 2
+// AVX-NEXT: [[A_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[A]], i32 0, i32 0
+// AVX-NEXT: [[A_REAL:%.*]] = load half, ptr [[A_REALP]], align 2
+// AVX-NEXT: [[A_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[A]], i32 0, i32 1
+// AVX-NEXT: [[A_IMAG:%.*]] = load half, ptr [[A_IMAGP]], align 2
+// AVX-NEXT: [[B_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[B]], i32 0, i32 0
+// AVX-NEXT: [[B_REAL:%.*]] = load half, ptr [[B_REALP]], align 2
+// AVX-NEXT: [[B_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[B]], i32 0, i32 1
+// AVX-NEXT: [[B_IMAG:%.*]] = load half, ptr [[B_IMAGP]], align 2
+// AVX-NEXT: [[CALL:%.*]] = call <2 x half> @__divhc3(half noundef [[A_REAL]], half noundef [[A_IMAG]], half noundef [[B_REAL]], half noundef [[B_IMAG]]) #[[ATTR1]]
+// AVX-NEXT: store <2 x half> [[CALL]], ptr [[COERCE]], align 2
+// AVX-NEXT: [[COERCE_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[COERCE]], i32 0, i32 0
+// AVX-NEXT: [[COERCE_REAL:%.*]] = load half, ptr [[COERCE_REALP]], align 2
+// AVX-NEXT: [[COERCE_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[COERCE]], i32 0, i32 1
+// AVX-NEXT: [[COERCE_IMAG:%.*]] = load half, ptr [[COERCE_IMAGP]], align 2
+// AVX-NEXT: [[RETVAL_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 0
+// AVX-NEXT: [[RETVAL_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 1
+// AVX-NEXT: store half [[COERCE_REAL]], ptr [[RETVAL_REALP]], align 2
+// AVX-NEXT: store half [[COERCE_IMAG]], ptr [[RETVAL_IMAGP]], align 2
+// AVX-NEXT: [[TMP0:%.*]] = load <2 x half>, ptr [[RETVAL]], align 2
+// AVX-NEXT: ret <2 x half> [[TMP0]]
+//
+// X86-LABEL: @div_half_cc(
+// X86-NEXT: entry:
+// X86-NEXT: [[RETVAL:%.*]] = alloca { half, half }, align 2
+// X86-NEXT: [[A:%.*]] = alloca { half, half }, align 2
+// X86-NEXT: [[B:%.*]] = alloca { half, half }, align 2
+// X86-NEXT: [[COERCE:%.*]] = alloca { float, float }, align 4
+// X86-NEXT: store <2 x half> [[A_COERCE:%.*]], ptr [[A]], align 2
+// X86-NEXT: store <2 x half> [[B_COERCE:%.*]], ptr [[B]], align 2
+// X86-NEXT: [[A_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[A]], i32 0, i32 0
+// X86-NEXT: [[A_REAL:%.*]] = load half, ptr [[A_REALP]], align 2
+// X86-NEXT: [[A_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[A]], i32 0, i32 1
+// X86-NEXT: [[A_IMAG:%.*]] = load half, ptr [[A_IMAGP]], align 2
+// X86-NEXT: [[EXT:%.*]] = fpext half [[A_REAL]] to float
+// X86-NEXT: [[EXT1:%.*]] = fpext half [[A_IMAG]] to float
+// X86-NEXT: [[B_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[B]], i32 0, i32 0
+// X86-NEXT: [[B_REAL:%.*]] = load half, ptr [[B_REALP]], align 2
+// X86-NEXT: [[B_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[B]], i32 0, i32 1
+// X86-NEXT: [[B_IMAG:%.*]] = load half, ptr [[B_IMAGP]], align 2
+// X86-NEXT: [[EXT2:%.*]] = fpext half [[B_REAL]] to float
+// X86-NEXT: [[EXT3:%.*]] = fpext half [[B_IMAG]] to float
+// X86-NEXT: [[CALL:%.*]] = call <2 x float> @__divsc3(float noundef [[EXT]], float noundef [[EXT1]], float noundef [[EXT2]], float noundef [[EXT3]]) #[[ATTR2]]
+// X86-NEXT: store <2 x float> [[CALL]], ptr [[COERCE]], align 4
+// X86-NEXT: [[COERCE_REALP:%.*]] = getelementptr inbounds { float, float }, ptr [[COERCE]], i32 0, i32 0
+// X86-NEXT: [[COERCE_REAL:%.*]] = load float, ptr [[COERCE_REALP]], align 4
+// X86-NEXT: [[COERCE_IMAGP:%.*]] = getelementptr inbounds { float, float }, ptr [[COERCE]], i32 0, i32 1
+// X86-NEXT: [[COERCE_IMAG:%.*]] = load float, ptr [[COERCE_IMAGP]], align 4
+// X86-NEXT: [[UNPROMOTION:%.*]] = fptrunc float [[COERCE_REAL]] to half
+// X86-NEXT: [[UNPROMOTION4:%.*]] = fptrunc float [[COERCE_IMAG]] to half
+// X86-NEXT: [[RETVAL_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 0
+// X86-NEXT: [[RETVAL_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 1
+// X86-NEXT: store half [[UNPROMOTION]], ptr [[RETVAL_REALP]], align 2
+// X86-NEXT: store half [[UNPROMOTION4]], ptr [[RETVAL_IMAGP]], align 2
+// X86-NEXT: [[TMP0:%.*]] = load <2 x half>, ptr [[RETVAL]], align 2
+// X86-NEXT: ret <2 x half> [[TMP0]]
+//
_Float16 _Complex div_half_cc(_Float16 _Complex a, _Float16 _Complex b) {
- // X86-LABEL: @div_half_cc(
- // X86-NOT: fdiv
- // X86: call {{.*}} @__divhc3(
- // X86: ret
return a / b;
}
+
+// AVX-LABEL: @addcompound_half_rr(
+// AVX-NEXT: entry:
+// AVX-NEXT: [[RETVAL:%.*]] = alloca { half, half }, align 2
+// AVX-NEXT: [[A_ADDR:%.*]] = alloca half, align 2
+// AVX-NEXT: [[C_ADDR:%.*]] = alloca half, align 2
+// AVX-NEXT: store half [[A:%.*]], ptr [[A_ADDR]], align 2
+// AVX-NEXT: store half [[C:%.*]], ptr [[C_ADDR]], align 2
+// AVX-NEXT: [[TMP0:%.*]] = load half, ptr [[A_ADDR]], align 2
+// AVX-NEXT: [[TMP1:%.*]] = load half, ptr [[C_ADDR]], align 2
+// AVX-NEXT: [[ADD:%.*]] = fadd half [[TMP1]], [[TMP0]]
+// AVX-NEXT: store half [[ADD]], ptr [[C_ADDR]], align 2
+// AVX-NEXT: [[TMP2:%.*]] = load half, ptr [[C_ADDR]], align 2
+// AVX-NEXT: [[RETVAL_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 0
+// AVX-NEXT: [[RETVAL_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 1
+// AVX-NEXT: store half [[TMP2]], ptr [[RETVAL_REALP]], align 2
+// AVX-NEXT: store half 0xH0000, ptr [[RETVAL_IMAGP]], align 2
+// AVX-NEXT: [[TMP3:%.*]] = load <2 x half>, ptr [[RETVAL]], align 2
+// AVX-NEXT: ret <2 x half> [[TMP3]]
+//
+// X86-LABEL: @addcompound_half_rr(
+// X86-NEXT: entry:
+// X86-NEXT: [[RETVAL:%.*]] = alloca { half, half }, align 2
+// X86-NEXT: [[A_ADDR:%.*]] = alloca half, align 2
+// X86-NEXT: [[C_ADDR:%.*]] = alloca half, align 2
+// X86-NEXT: store half [[A:%.*]], ptr [[A_ADDR]], align 2
+// X86-NEXT: store half [[C:%.*]], ptr [[C_ADDR]], align 2
+// X86-NEXT: [[TMP0:%.*]] = load half, ptr [[A_ADDR]], align 2
+// X86-NEXT: [[EXT:%.*]] = fpext half [[TMP0]] to float
+// X86-NEXT: [[TMP1:%.*]] = load half, ptr [[C_ADDR]], align 2
+// X86-NEXT: [[CONV:%.*]] = fpext half [[TMP1]] to float
+// X86-NEXT: [[ADD:%.*]] = fadd float [[CONV]], [[EXT]]
+// X86-NEXT: [[CONV1:%.*]] = fptrunc float [[ADD]] to half
+// X86-NEXT: store half [[CONV1]], ptr [[C_ADDR]], align 2
+// X86-NEXT: [[TMP2:%.*]] = load half, ptr [[C_ADDR]], align 2
+// X86-NEXT: [[RETVAL_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 0
+// X86-NEXT: [[RETVAL_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 1
+// X86-NEXT: store half [[TMP2]], ptr [[RETVAL_REALP]], align 2
+// X86-NEXT: store half 0xH0000, ptr [[RETVAL_IMAGP]], align 2
+// X86-NEXT: [[TMP3:%.*]] = load <2 x half>, ptr [[RETVAL]], align 2
+// X86-NEXT: ret <2 x half> [[TMP3]]
+//
+_Float16 _Complex addcompound_half_rr(_Float16 a, _Float16 c) {
+ c += a;
+ return c;
+}
+
+// AVX-LABEL: @addcompound_half_cr(
+// AVX-NEXT: entry:
+// AVX-NEXT: [[RETVAL:%.*]] = alloca { half, half }, align 2
+// AVX-NEXT: [[A:%.*]] = alloca { half, half }, align 2
+// AVX-NEXT: [[C_ADDR:%.*]] = alloca half, align 2
+// AVX-NEXT: store <2 x half> [[A_COERCE:%.*]], ptr [[A]], align 2
+// AVX-NEXT: store half [[C:%.*]], ptr [[C_ADDR]], align 2
+// AVX-NEXT: [[A_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[A]], i32 0, i32 0
+// AVX-NEXT: [[A_REAL:%.*]] = load half, ptr [[A_REALP]], align 2
+// AVX-NEXT: [[A_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[A]], i32 0, i32 1
+// AVX-NEXT: [[A_IMAG:%.*]] = load half, ptr [[A_IMAGP]], align 2
+// AVX-NEXT: [[TMP0:%.*]] = load half, ptr [[C_ADDR]], align 2
+// AVX-NEXT: [[ADD_R:%.*]] = fadd half [[TMP0]], [[A_REAL]]
+// AVX-NEXT: store half [[ADD_R]], ptr [[C_ADDR]], align 2
+// AVX-NEXT: [[TMP1:%.*]] = load half, ptr [[C_ADDR]], align 2
+// AVX-NEXT: [[RETVAL_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 0
+// AVX-NEXT: [[RETVAL_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 1
+// AVX-NEXT: store half [[TMP1]], ptr [[RETVAL_REALP]], align 2
+// AVX-NEXT: store half 0xH0000, ptr [[RETVAL_IMAGP]], align 2
+// AVX-NEXT: [[TMP2:%.*]] = load <2 x half>, ptr [[RETVAL]], align 2
+// AVX-NEXT: ret <2 x half> [[TMP2]]
+//
+// X86-LABEL: @addcompound_half_cr(
+// X86-NEXT: entry:
+// X86-NEXT: [[RETVAL:%.*]] = alloca { half, half }, align 2
+// X86-NEXT: [[A:%.*]] = alloca { half, half }, align 2
+// X86-NEXT: [[C_ADDR:%.*]] = alloca half, align 2
+// X86-NEXT: store <2 x half> [[A_COERCE:%.*]], ptr [[A]], align 2
+// X86-NEXT: store half [[C:%.*]], ptr [[C_ADDR]], align 2
+// X86-NEXT: [[A_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[A]], i32 0, i32 0
+// X86-NEXT: [[A_REAL:%.*]] = load half, ptr [[A_REALP]], align 2
+// X86-NEXT: [[A_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[A]], i32 0, i32 1
+// X86-NEXT: [[A_IMAG:%.*]] = load half, ptr [[A_IMAGP]], align 2
+// X86-NEXT: [[EXT:%.*]] = fpext half [[A_REAL]] to float
+// X86-NEXT: [[EXT1:%.*]] = fpext half [[A_IMAG]] to float
+// X86-NEXT: [[TMP0:%.*]] = load half, ptr [[C_ADDR]], align 2
+// X86-NEXT: [[CONV:%.*]] = fpext half [[TMP0]] to float
+// X86-NEXT: [[ADD_R:%.*]] = fadd float [[CONV]], [[EXT]]
+// X86-NEXT: [[CONV2:%.*]] = fptrunc float [[ADD_R]] to half
+// X86-NEXT: store half [[CONV2]], ptr [[C_ADDR]], align 2
+// X86-NEXT: [[TMP1:%.*]] = load half, ptr [[C_ADDR]], align 2
+// X86-NEXT: [[RETVAL_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 0
+// X86-NEXT: [[RETVAL_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 1
+// X86-NEXT: store half [[TMP1]], ptr [[RETVAL_REALP]], align 2
+// X86-NEXT: store half 0xH0000, ptr [[RETVAL_IMAGP]], align 2
+// X86-NEXT: [[TMP2:%.*]] = load <2 x half>, ptr [[RETVAL]], align 2
+// X86-NEXT: ret <2 x half> [[TMP2]]
+//
+_Float16 _Complex addcompound_half_cr(_Float16 _Complex a, _Float16 c) {
+ c += a;
+ return c;
+}
+
+// AVX-LABEL: @addcompound_half_rc(
+// AVX-NEXT: entry:
+// AVX-NEXT: [[RETVAL:%.*]] = alloca { half, half }, align 2
+// AVX-NEXT: [[C:%.*]] = alloca { half, half }, align 2
+// AVX-NEXT: [[A_ADDR:%.*]] = alloca half, align 2
+// AVX-NEXT: store <2 x half> [[C_COERCE:%.*]], ptr [[C]], align 2
+// AVX-NEXT: store half [[A:%.*]], ptr [[A_ADDR]], align 2
+// AVX-NEXT: [[TMP0:%.*]] = load half, ptr [[A_ADDR]], align 2
+// AVX-NEXT: [[C_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[C]], i32 0, i32 0
+// AVX-NEXT: [[C_REAL:%.*]] = load half, ptr [[C_REALP]], align 2
+// AVX-NEXT: [[C_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[C]], i32 0, i32 1
+// AVX-NEXT: [[C_IMAG:%.*]] = load half, ptr [[C_IMAGP]], align 2
+// AVX-NEXT: [[ADD_R:%.*]] = fadd half [[C_REAL]], [[TMP0]]
+// AVX-NEXT: [[C_REALP1:%.*]] = getelementptr inbounds { half, half }, ptr [[C]], i32 0, i32 0
+// AVX-NEXT: [[C_IMAGP2:%.*]] = getelementptr inbounds { half, half }, ptr [[C]], i32 0, i32 1
+// AVX-NEXT: store half [[ADD_R]], ptr [[C_REALP1]], align 2
+// AVX-NEXT: store half [[C_IMAG]], ptr [[C_IMAGP2]], align 2
+// AVX-NEXT: [[C_REALP3:%.*]] = getelementptr inbounds { half, half }, ptr [[C]], i32 0, i32 0
+// AVX-NEXT: [[C_REAL4:%.*]] = load half, ptr [[C_REALP3]], align 2
+// AVX-NEXT: [[C_IMAGP5:%.*]] = getelementptr inbounds { half, half }, ptr [[C]], i32 0, i32 1
+// AVX-NEXT: [[C_IMAG6:%.*]] = load half, ptr [[C_IMAGP5]], align 2
+// AVX-NEXT: [[RETVAL_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 0
+// AVX-NEXT: [[RETVAL_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 1
+// AVX-NEXT: store half [[C_REAL4]], ptr [[RETVAL_REALP]], align 2
+// AVX-NEXT: store half [[C_IMAG6]], ptr [[RETVAL_IMAGP]], align 2
+// AVX-NEXT: [[TMP1:%.*]] = load <2 x half>, ptr [[RETVAL]], align 2
+// AVX-NEXT: ret <2 x half> [[TMP1]]
+//
+// X86-LABEL: @addcompound_half_rc(
+// X86-NEXT: entry:
+// X86-NEXT: [[RETVAL:%.*]] = alloca { half, half }, align 2
+// X86-NEXT: [[C:%.*]] = alloca { half, half }, align 2
+// X86-NEXT: [[A_ADDR:%.*]] = alloca half, align 2
+// X86-NEXT: store <2 x half> [[C_COERCE:%.*]], ptr [[C]], align 2
+// X86-NEXT: store half [[A:%.*]], ptr [[A_ADDR]], align 2
+// X86-NEXT: [[TMP0:%.*]] = load half, ptr [[A_ADDR]], align 2
+// X86-NEXT: [[EXT:%.*]] = fpext half [[TMP0]] to float
+// X86-NEXT: [[C_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[C]], i32 0, i32 0
+// X86-NEXT: [[C_REAL:%.*]] = load half, ptr [[C_REALP]], align 2
+// X86-NEXT: [[C_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[C]], i32 0, i32 1
+// X86-NEXT: [[C_IMAG:%.*]] = load half, ptr [[C_IMAGP]], align 2
+// X86-NEXT: [[CONV:%.*]] = fpext half [[C_REAL]] to float
+// X86-NEXT: [[CONV1:%.*]] = fpext half [[C_IMAG]] to float
+// X86-NEXT: [[ADD_R:%.*]] = fadd float [[CONV]], [[EXT]]
+// X86-NEXT: [[CONV2:%.*]] = fptrunc float [[ADD_R]] to half
+// X86-NEXT: [[CONV3:%.*]] = fptrunc float [[CONV1]] to half
+// X86-NEXT: [[C_REALP4:%.*]] = getelementptr inbounds { half, half }, ptr [[C]], i32 0, i32 0
+// X86-NEXT: [[C_IMAGP5:%.*]] = getelementptr inbounds { half, half }, ptr [[C]], i32 0, i32 1
+// X86-NEXT: store half [[CONV2]], ptr [[C_REALP4]], align 2
+// X86-NEXT: store half [[CONV3]], ptr [[C_IMAGP5]], align 2
+// X86-NEXT: [[C_REALP6:%.*]] = getelementptr inbounds { half, half }, ptr [[C]], i32 0, i32 0
+// X86-NEXT: [[C_REAL7:%.*]] = load half, ptr [[C_REALP6]], align 2
+// X86-NEXT: [[C_IMAGP8:%.*]] = getelementptr inbounds { half, half }, ptr [[C]], i32 0, i32 1
+// X86-NEXT: [[C_IMAG9:%.*]] = load half, ptr [[C_IMAGP8]], align 2
+// X86-NEXT: [[RETVAL_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 0
+// X86-NEXT: [[RETVAL_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 1
+// X86-NEXT: store half [[C_REAL7]], ptr [[RETVAL_REALP]], align 2
+// X86-NEXT: store half [[C_IMAG9]], ptr [[RETVAL_IMAGP]], align 2
+// X86-NEXT: [[TMP1:%.*]] = load <2 x half>, ptr [[RETVAL]], align 2
+// X86-NEXT: ret <2 x half> [[TMP1]]
+//
+_Float16 _Complex addcompound_half_rc(_Float16 a, _Float16 _Complex c) {
+ c += a;
+ return c;
+}
+
+// AVX-LABEL: @addcompound_half_cc(
+// AVX-NEXT: entry:
+// AVX-NEXT: [[RETVAL:%.*]] = alloca { half, half }, align 2
+// AVX-NEXT: [[A:%.*]] = alloca { half, half }, align 2
+// AVX-NEXT: [[C:%.*]] = alloca { half, half }, align 2
+// AVX-NEXT: store <2 x half> [[A_COERCE:%.*]], ptr [[A]], align 2
+// AVX-NEXT: store <2 x half> [[C_COERCE:%.*]], ptr [[C]], align 2
+// AVX-NEXT: [[A_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[A]], i32 0, i32 0
+// AVX-NEXT: [[A_REAL:%.*]] = load half, ptr [[A_REALP]], align 2
+// AVX-NEXT: [[A_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[A]], i32 0, i32 1
+// AVX-NEXT: [[A_IMAG:%.*]] = load half, ptr [[A_IMAGP]], align 2
+// AVX-NEXT: [[C_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[C]], i32 0, i32 0
+// AVX-NEXT: [[C_REAL:%.*]] = load half, ptr [[C_REALP]], align 2
+// AVX-NEXT: [[C_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[C]], i32 0, i32 1
+// AVX-NEXT: [[C_IMAG:%.*]] = load half, ptr [[C_IMAGP]], align 2
+// AVX-NEXT: [[ADD_R:%.*]] = fadd half [[C_REAL]], [[A_REAL]]
+// AVX-NEXT: [[ADD_I:%.*]] = fadd half [[C_IMAG]], [[A_IMAG]]
+// AVX-NEXT: [[C_REALP1:%.*]] = getelementptr inbounds { half, half }, ptr [[C]], i32 0, i32 0
+// AVX-NEXT: [[C_IMAGP2:%.*]] = getelementptr inbounds { half, half }, ptr [[C]], i32 0, i32 1
+// AVX-NEXT: store half [[ADD_R]], ptr [[C_REALP1]], align 2
+// AVX-NEXT: store half [[ADD_I]], ptr [[C_IMAGP2]], align 2
+// AVX-NEXT: [[C_REALP3:%.*]] = getelementptr inbounds { half, half }, ptr [[C]], i32 0, i32 0
+// AVX-NEXT: [[C_REAL4:%.*]] = load half, ptr [[C_REALP3]], align 2
+// AVX-NEXT: [[C_IMAGP5:%.*]] = getelementptr inbounds { half, half }, ptr [[C]], i32 0, i32 1
+// AVX-NEXT: [[C_IMAG6:%.*]] = load half, ptr [[C_IMAGP5]], align 2
+// AVX-NEXT: [[RETVAL_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 0
+// AVX-NEXT: [[RETVAL_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 1
+// AVX-NEXT: store half [[C_REAL4]], ptr [[RETVAL_REALP]], align 2
+// AVX-NEXT: store half [[C_IMAG6]], ptr [[RETVAL_IMAGP]], align 2
+// AVX-NEXT: [[TMP0:%.*]] = load <2 x half>, ptr [[RETVAL]], align 2
+// AVX-NEXT: ret <2 x half> [[TMP0]]
+//
+// X86-LABEL: @addcompound_half_cc(
+// X86-NEXT: entry:
+// X86-NEXT: [[RETVAL:%.*]] = alloca { half, half }, align 2
+// X86-NEXT: [[A:%.*]] = alloca { half, half }, align 2
+// X86-NEXT: [[C:%.*]] = alloca { half, half }, align 2
+// X86-NEXT: store <2 x half> [[A_COERCE:%.*]], ptr [[A]], align 2
+// X86-NEXT: store <2 x half> [[C_COERCE:%.*]], ptr [[C]], align 2
+// X86-NEXT: [[A_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[A]], i32 0, i32 0
+// X86-NEXT: [[A_REAL:%.*]] = load half, ptr [[A_REALP]], align 2
+// X86-NEXT: [[A_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[A]], i32 0, i32 1
+// X86-NEXT: [[A_IMAG:%.*]] = load half, ptr [[A_IMAGP]], align 2
+// X86-NEXT: [[EXT:%.*]] = fpext half [[A_REAL]] to float
+// X86-NEXT: [[EXT1:%.*]] = fpext half [[A_IMAG]] to float
+// X86-NEXT: [[C_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[C]], i32 0, i32 0
+// X86-NEXT: [[C_REAL:%.*]] = load half, ptr [[C_REALP]], align 2
+// X86-NEXT: [[C_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[C]], i32 0, i32 1
+// X86-NEXT: [[C_IMAG:%.*]] = load half, ptr [[C_IMAGP]], align 2
+// X86-NEXT: [[CONV:%.*]] = fpext half [[C_REAL]] to float
+// X86-NEXT: [[CONV2:%.*]] = fpext half [[C_IMAG]] to float
+// X86-NEXT: [[ADD_R:%.*]] = fadd float [[CONV]], [[EXT]]
+// X86-NEXT: [[ADD_I:%.*]] = fadd float [[CONV2]], [[EXT1]]
+// X86-NEXT: [[CONV3:%.*]] = fptrunc float [[ADD_R]] to half
+// X86-NEXT: [[CONV4:%.*]] = fptrunc float [[ADD_I]] to half
+// X86-NEXT: [[C_REALP5:%.*]] = getelementptr inbounds { half, half }, ptr [[C]], i32 0, i32 0
+// X86-NEXT: [[C_IMAGP6:%.*]] = getelementptr inbounds { half, half }, ptr [[C]], i32 0, i32 1
+// X86-NEXT: store half [[CONV3]], ptr [[C_REALP5]], align 2
+// X86-NEXT: store half [[CONV4]], ptr [[C_IMAGP6]], align 2
+// X86-NEXT: [[C_REALP7:%.*]] = getelementptr inbounds { half, half }, ptr [[C]], i32 0, i32 0
+// X86-NEXT: [[C_REAL8:%.*]] = load half, ptr [[C_REALP7]], align 2
+// X86-NEXT: [[C_IMAGP9:%.*]] = getelementptr inbounds { half, half }, ptr [[C]], i32 0, i32 1
+// X86-NEXT: [[C_IMAG10:%.*]] = load half, ptr [[C_IMAGP9]], align 2
+// X86-NEXT: [[RETVAL_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 0
+// X86-NEXT: [[RETVAL_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 1
+// X86-NEXT: store half [[C_REAL8]], ptr [[RETVAL_REALP]], align 2
+// X86-NEXT: store half [[C_IMAG10]], ptr [[RETVAL_IMAGP]], align 2
+// X86-NEXT: [[TMP0:%.*]] = load <2 x half>, ptr [[RETVAL]], align 2
+// X86-NEXT: ret <2 x half> [[TMP0]]
+//
+_Float16 _Complex addcompound_half_cc(_Float16 _Complex a, _Float16 _Complex c) {
+ c += a;
+ return c;
+}
+
+// AVX-LABEL: @MinusOp_r(
+// AVX-NEXT: entry:
+// AVX-NEXT: [[RETVAL:%.*]] = alloca { half, half }, align 2
+// AVX-NEXT: [[A_ADDR:%.*]] = alloca half, align 2
+// AVX-NEXT: store half [[A:%.*]], ptr [[A_ADDR]], align 2
+// AVX-NEXT: [[TMP0:%.*]] = load half, ptr [[A_ADDR]], align 2
+// AVX-NEXT: [[FNEG:%.*]] = fneg half [[TMP0]]
+// AVX-NEXT: [[RETVAL_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 0
+// AVX-NEXT: [[RETVAL_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 1
+// AVX-NEXT: store half [[FNEG]], ptr [[RETVAL_REALP]], align 2
+// AVX-NEXT: store half 0xH0000, ptr [[RETVAL_IMAGP]], align 2
+// AVX-NEXT: [[TMP1:%.*]] = load <2 x half>, ptr [[RETVAL]], align 2
+// AVX-NEXT: ret <2 x half> [[TMP1]]
+//
+// X86-LABEL: @MinusOp_r(
+// X86-NEXT: entry:
+// X86-NEXT: [[RETVAL:%.*]] = alloca { half, half }, align 2
+// X86-NEXT: [[A_ADDR:%.*]] = alloca half, align 2
+// X86-NEXT: store half [[A:%.*]], ptr [[A_ADDR]], align 2
+// X86-NEXT: [[TMP0:%.*]] = load half, ptr [[A_ADDR]], align 2
+// X86-NEXT: [[EXT:%.*]] = fpext half [[TMP0]] to float
+// X86-NEXT: [[FNEG:%.*]] = fneg float [[EXT]]
+// X86-NEXT: [[UNPROMOTION:%.*]] = fptrunc float [[FNEG]] to half
+// X86-NEXT: [[RETVAL_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 0
+// X86-NEXT: [[RETVAL_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 1
+// X86-NEXT: store half [[UNPROMOTION]], ptr [[RETVAL_REALP]], align 2
+// X86-NEXT: store half 0xH0000, ptr [[RETVAL_IMAGP]], align 2
+// X86-NEXT: [[TMP1:%.*]] = load <2 x half>, ptr [[RETVAL]], align 2
+// X86-NEXT: ret <2 x half> [[TMP1]]
+//
+_Float16 _Complex MinusOp_r(_Float16 a) {
+ return -a;
+}
+
+// AVX-LABEL: @MinusOp_c(
+// AVX-NEXT: entry:
+// AVX-NEXT: [[RETVAL:%.*]] = alloca { half, half }, align 2
+// AVX-NEXT: [[A:%.*]] = alloca { half, half }, align 2
+// AVX-NEXT: store <2 x half> [[A_COERCE:%.*]], ptr [[A]], align 2
+// AVX-NEXT: [[A_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[A]], i32 0, i32 0
+// AVX-NEXT: [[A_REAL:%.*]] = load half, ptr [[A_REALP]], align 2
+// AVX-NEXT: [[A_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[A]], i32 0, i32 1
+// AVX-NEXT: [[A_IMAG:%.*]] = load half, ptr [[A_IMAGP]], align 2
+// AVX-NEXT: [[NEG_R:%.*]] = fneg half [[A_REAL]]
+// AVX-NEXT: [[NEG_I:%.*]] = fneg half [[A_IMAG]]
+// AVX-NEXT: [[RETVAL_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 0
+// AVX-NEXT: [[RETVAL_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 1
+// AVX-NEXT: store half [[NEG_R]], ptr [[RETVAL_REALP]], align 2
+// AVX-NEXT: store half [[NEG_I]], ptr [[RETVAL_IMAGP]], align 2
+// AVX-NEXT: [[TMP0:%.*]] = load <2 x half>, ptr [[RETVAL]], align 2
+// AVX-NEXT: ret <2 x half> [[TMP0]]
+//
+// X86-LABEL: @MinusOp_c(
+// X86-NEXT: entry:
+// X86-NEXT: [[RETVAL:%.*]] = alloca { half, half }, align 2
+// X86-NEXT: [[A:%.*]] = alloca { half, half }, align 2
+// X86-NEXT: store <2 x half> [[A_COERCE:%.*]], ptr [[A]], align 2
+// X86-NEXT: [[A_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[A]], i32 0, i32 0
+// X86-NEXT: [[A_REAL:%.*]] = load half, ptr [[A_REALP]], align 2
+// X86-NEXT: [[A_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[A]], i32 0, i32 1
+// X86-NEXT: [[A_IMAG:%.*]] = load half, ptr [[A_IMAGP]], align 2
+// X86-NEXT: [[EXT:%.*]] = fpext half [[A_REAL]] to float
+// X86-NEXT: [[EXT1:%.*]] = fpext half [[A_IMAG]] to float
+// X86-NEXT: [[NEG_R:%.*]] = fneg float [[EXT]]
+// X86-NEXT: [[NEG_I:%.*]] = fneg float [[EXT1]]
+// X86-NEXT: [[UNPROMOTION:%.*]] = fptrunc float [[NEG_R]] to half
+// X86-NEXT: [[UNPROMOTION2:%.*]] = fptrunc float [[NEG_I]] to half
+// X86-NEXT: [[RETVAL_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 0
+// X86-NEXT: [[RETVAL_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 1
+// X86-NEXT: store half [[UNPROMOTION]], ptr [[RETVAL_REALP]], align 2
+// X86-NEXT: store half [[UNPROMOTION2]], ptr [[RETVAL_IMAGP]], align 2
+// X86-NEXT: [[TMP0:%.*]] = load <2 x half>, ptr [[RETVAL]], align 2
+// X86-NEXT: ret <2 x half> [[TMP0]]
+//
+_Float16 _Complex MinusOp_c(_Float16 _Complex a) {
+ return -a;
+}
+
+// AVX-LABEL: @PlusOp_r(
+// AVX-NEXT: entry:
+// AVX-NEXT: [[RETVAL:%.*]] = alloca { half, half }, align 2
+// AVX-NEXT: [[A_ADDR:%.*]] = alloca half, align 2
+// AVX-NEXT: store half [[A:%.*]], ptr [[A_ADDR]], align 2
+// AVX-NEXT: [[TMP0:%.*]] = load half, ptr [[A_ADDR]], align 2
+// AVX-NEXT: [[RETVAL_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 0
+// AVX-NEXT: [[RETVAL_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 1
+// AVX-NEXT: store half [[TMP0]], ptr [[RETVAL_REALP]], align 2
+// AVX-NEXT: store half 0xH0000, ptr [[RETVAL_IMAGP]], align 2
+// AVX-NEXT: [[TMP1:%.*]] = load <2 x half>, ptr [[RETVAL]], align 2
+// AVX-NEXT: ret <2 x half> [[TMP1]]
+//
+// X86-LABEL: @PlusOp_r(
+// X86-NEXT: entry:
+// X86-NEXT: [[RETVAL:%.*]] = alloca { half, half }, align 2
+// X86-NEXT: [[A_ADDR:%.*]] = alloca half, align 2
+// X86-NEXT: store half [[A:%.*]], ptr [[A_ADDR]], align 2
+// X86-NEXT: [[TMP0:%.*]] = load half, ptr [[A_ADDR]], align 2
+// X86-NEXT: [[EXT:%.*]] = fpext half [[TMP0]] to float
+// X86-NEXT: [[UNPROMOTION:%.*]] = fptrunc float [[EXT]] to half
+// X86-NEXT: [[RETVAL_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 0
+// X86-NEXT: [[RETVAL_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 1
+// X86-NEXT: store half [[UNPROMOTION]], ptr [[RETVAL_REALP]], align 2
+// X86-NEXT: store half 0xH0000, ptr [[RETVAL_IMAGP]], align 2
+// X86-NEXT: [[TMP1:%.*]] = load <2 x half>, ptr [[RETVAL]], align 2
+// X86-NEXT: ret <2 x half> [[TMP1]]
+//
+_Float16 _Complex PlusOp_r(_Float16 a) {
+ return +a;
+}
+
+// AVX-LABEL: @PlusOp_c(
+// AVX-NEXT: entry:
+// AVX-NEXT: [[RETVAL:%.*]] = alloca { half, half }, align 2
+// AVX-NEXT: [[A:%.*]] = alloca { half, half }, align 2
+// AVX-NEXT: store <2 x half> [[A_COERCE:%.*]], ptr [[A]], align 2
+// AVX-NEXT: [[A_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[A]], i32 0, i32 0
+// AVX-NEXT: [[A_REAL:%.*]] = load half, ptr [[A_REALP]], align 2
+// AVX-NEXT: [[A_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[A]], i32 0, i32 1
+// AVX-NEXT: [[A_IMAG:%.*]] = load half, ptr [[A_IMAGP]], align 2
+// AVX-NEXT: [[RETVAL_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 0
+// AVX-NEXT: [[RETVAL_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 1
+// AVX-NEXT: store half [[A_REAL]], ptr [[RETVAL_REALP]], align 2
+// AVX-NEXT: store half [[A_IMAG]], ptr [[RETVAL_IMAGP]], align 2
+// AVX-NEXT: [[TMP0:%.*]] = load <2 x half>, ptr [[RETVAL]], align 2
+// AVX-NEXT: ret <2 x half> [[TMP0]]
+//
+// X86-LABEL: @PlusOp_c(
+// X86-NEXT: entry:
+// X86-NEXT: [[RETVAL:%.*]] = alloca { half, half }, align 2
+// X86-NEXT: [[A:%.*]] = alloca { half, half }, align 2
+// X86-NEXT: store <2 x half> [[A_COERCE:%.*]], ptr [[A]], align 2
+// X86-NEXT: [[A_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[A]], i32 0, i32 0
+// X86-NEXT: [[A_REAL:%.*]] = load half, ptr [[A_REALP]], align 2
+// X86-NEXT: [[A_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[A]], i32 0, i32 1
+// X86-NEXT: [[A_IMAG:%.*]] = load half, ptr [[A_IMAGP]], align 2
+// X86-NEXT: [[EXT:%.*]] = fpext half [[A_REAL]] to float
+// X86-NEXT: [[EXT1:%.*]] = fpext half [[A_IMAG]] to float
+// X86-NEXT: [[UNPROMOTION:%.*]] = fptrunc float [[EXT]] to half
+// X86-NEXT: [[UNPROMOTION2:%.*]] = fptrunc float [[EXT1]] to half
+// X86-NEXT: [[RETVAL_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 0
+// X86-NEXT: [[RETVAL_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 1
+// X86-NEXT: store half [[UNPROMOTION]], ptr [[RETVAL_REALP]], align 2
+// X86-NEXT: store half [[UNPROMOTION2]], ptr [[RETVAL_IMAGP]], align 2
+// X86-NEXT: [[TMP0:%.*]] = load <2 x half>, ptr [[RETVAL]], align 2
+// X86-NEXT: ret <2 x half> [[TMP0]]
+//
+_Float16 _Complex PlusOp_c(_Float16 _Complex a) {
+ return +a;
+}
+
+// AVX-LABEL: @RealOp_r(
+// AVX-NEXT: entry:
+// AVX-NEXT: [[RETVAL:%.*]] = alloca { half, half }, align 2
+// AVX-NEXT: [[A_ADDR:%.*]] = alloca half, align 2
+// AVX-NEXT: store half [[A:%.*]], ptr [[A_ADDR]], align 2
+// AVX-NEXT: [[TMP0:%.*]] = load half, ptr [[A_ADDR]], align 2
+// AVX-NEXT: [[RETVAL_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 0
+// AVX-NEXT: [[RETVAL_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 1
+// AVX-NEXT: store half [[TMP0]], ptr [[RETVAL_REALP]], align 2
+// AVX-NEXT: store half 0xH0000, ptr [[RETVAL_IMAGP]], align 2
+// AVX-NEXT: [[TMP1:%.*]] = load <2 x half>, ptr [[RETVAL]], align 2
+// AVX-NEXT: ret <2 x half> [[TMP1]]
+//
+// X86-LABEL: @RealOp_r(
+// X86-NEXT: entry:
+// X86-NEXT: [[RETVAL:%.*]] = alloca { half, half }, align 2
+// X86-NEXT: [[A_ADDR:%.*]] = alloca half, align 2
+// X86-NEXT: store half [[A:%.*]], ptr [[A_ADDR]], align 2
+// X86-NEXT: [[TMP0:%.*]] = load half, ptr [[A_ADDR]], align 2
+// X86-NEXT: [[EXT:%.*]] = fpext half [[TMP0]] to float
+// X86-NEXT: [[UNPROMOTION:%.*]] = fptrunc float [[EXT]] to half
+// X86-NEXT: [[RETVAL_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 0
+// X86-NEXT: [[RETVAL_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 1
+// X86-NEXT: store half [[UNPROMOTION]], ptr [[RETVAL_REALP]], align 2
+// X86-NEXT: store half 0xH0000, ptr [[RETVAL_IMAGP]], align 2
+// X86-NEXT: [[TMP1:%.*]] = load <2 x half>, ptr [[RETVAL]], align 2
+// X86-NEXT: ret <2 x half> [[TMP1]]
+//
+_Float16 _Complex RealOp_r(_Float16 a) {
+ return +a;
+}
+
+// AVX-LABEL: @RealOp_c(
+// AVX-NEXT: entry:
+// AVX-NEXT: [[RETVAL:%.*]] = alloca { half, half }, align 2
+// AVX-NEXT: [[A:%.*]] = alloca { half, half }, align 2
+// AVX-NEXT: store <2 x half> [[A_COERCE:%.*]], ptr [[A]], align 2
+// AVX-NEXT: [[A_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[A]], i32 0, i32 0
+// AVX-NEXT: [[TMP0:%.*]] = load half, ptr [[A_REALP]], align 2
+// AVX-NEXT: [[RETVAL_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 0
+// AVX-NEXT: [[RETVAL_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 1
+// AVX-NEXT: store half [[TMP0]], ptr [[RETVAL_REALP]], align 2
+// AVX-NEXT: store half 0xH0000, ptr [[RETVAL_IMAGP]], align 2
+// AVX-NEXT: [[TMP1:%.*]] = load <2 x half>, ptr [[RETVAL]], align 2
+// AVX-NEXT: ret <2 x half> [[TMP1]]
+//
+// X86-LABEL: @RealOp_c(
+// X86-NEXT: entry:
+// X86-NEXT: [[RETVAL:%.*]] = alloca { half, half }, align 2
+// X86-NEXT: [[A:%.*]] = alloca { half, half }, align 2
+// X86-NEXT: store <2 x half> [[A_COERCE:%.*]], ptr [[A]], align 2
+// X86-NEXT: [[A_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[A]], i32 0, i32 0
+// X86-NEXT: [[A_REAL:%.*]] = load half, ptr [[A_REALP]], align 2
+// X86-NEXT: [[EXT:%.*]] = fpext half [[A_REAL]] to float
+// X86-NEXT: [[UNPROMOTION:%.*]] = fptrunc float [[EXT]] to half
+// X86-NEXT: [[RETVAL_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 0
+// X86-NEXT: [[RETVAL_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 1
+// X86-NEXT: store half [[UNPROMOTION]], ptr [[RETVAL_REALP]], align 2
+// X86-NEXT: store half 0xH0000, ptr [[RETVAL_IMAGP]], align 2
+// X86-NEXT: [[TMP0:%.*]] = load <2 x half>, ptr [[RETVAL]], align 2
+// X86-NEXT: ret <2 x half> [[TMP0]]
+//
+_Float16 _Complex RealOp_c(_Float16 _Complex a) {
+ return __real a;
+}
+
+// AVX-LABEL: @ImagOp_r(
+// AVX-NEXT: entry:
+// AVX-NEXT: [[RETVAL:%.*]] = alloca { half, half }, align 2
+// AVX-NEXT: [[A_ADDR:%.*]] = alloca half, align 2
+// AVX-NEXT: store half [[A:%.*]], ptr [[A_ADDR]], align 2
+// AVX-NEXT: [[TMP0:%.*]] = load half, ptr [[A_ADDR]], align 2
+// AVX-NEXT: [[RETVAL_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 0
+// AVX-NEXT: [[RETVAL_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 1
+// AVX-NEXT: store half 0xH0000, ptr [[RETVAL_REALP]], align 2
+// AVX-NEXT: store half 0xH0000, ptr [[RETVAL_IMAGP]], align 2
+// AVX-NEXT: [[TMP1:%.*]] = load <2 x half>, ptr [[RETVAL]], align 2
+// AVX-NEXT: ret <2 x half> [[TMP1]]
+//
+// X86-LABEL: @ImagOp_r(
+// X86-NEXT: entry:
+// X86-NEXT: [[RETVAL:%.*]] = alloca { half, half }, align 2
+// X86-NEXT: [[A_ADDR:%.*]] = alloca half, align 2
+// X86-NEXT: store half [[A:%.*]], ptr [[A_ADDR]], align 2
+// X86-NEXT: [[TMP0:%.*]] = load half, ptr [[A_ADDR]], align 2
+// X86-NEXT: [[EXT:%.*]] = fpext half [[TMP0]] to float
+// X86-NEXT: [[RETVAL_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 0
+// X86-NEXT: [[RETVAL_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 1
+// X86-NEXT: store half 0xH0000, ptr [[RETVAL_REALP]], align 2
+// X86-NEXT: store half 0xH0000, ptr [[RETVAL_IMAGP]], align 2
+// X86-NEXT: [[TMP1:%.*]] = load <2 x half>, ptr [[RETVAL]], align 2
+// X86-NEXT: ret <2 x half> [[TMP1]]
+//
+_Float16 _Complex ImagOp_r(_Float16 a) {
+ return __imag a;
+}
+
+// AVX-LABEL: @ImagOp_c(
+// AVX-NEXT: entry:
+// AVX-NEXT: [[RETVAL:%.*]] = alloca { half, half }, align 2
+// AVX-NEXT: [[A:%.*]] = alloca { half, half }, align 2
+// AVX-NEXT: store <2 x half> [[A_COERCE:%.*]], ptr [[A]], align 2
+// AVX-NEXT: [[A_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[A]], i32 0, i32 1
+// AVX-NEXT: [[TMP0:%.*]] = load half, ptr [[A_IMAGP]], align 2
+// AVX-NEXT: [[RETVAL_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 0
+// AVX-NEXT: [[RETVAL_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 1
+// AVX-NEXT: store half [[TMP0]], ptr [[RETVAL_REALP]], align 2
+// AVX-NEXT: store half 0xH0000, ptr [[RETVAL_IMAGP]], align 2
+// AVX-NEXT: [[TMP1:%.*]] = load <2 x half>, ptr [[RETVAL]], align 2
+// AVX-NEXT: ret <2 x half> [[TMP1]]
+//
+// X86-LABEL: @ImagOp_c(
+// X86-NEXT: entry:
+// X86-NEXT: [[RETVAL:%.*]] = alloca { half, half }, align 2
+// X86-NEXT: [[A:%.*]] = alloca { half, half }, align 2
+// X86-NEXT: store <2 x half> [[A_COERCE:%.*]], ptr [[A]], align 2
+// X86-NEXT: [[A_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[A]], i32 0, i32 1
+// X86-NEXT: [[A_IMAG:%.*]] = load half, ptr [[A_IMAGP]], align 2
+// X86-NEXT: [[EXT:%.*]] = fpext half [[A_IMAG]] to float
+// X86-NEXT: [[UNPROMOTION:%.*]] = fptrunc float [[EXT]] to half
+// X86-NEXT: [[RETVAL_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 0
+// X86-NEXT: [[RETVAL_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 1
+// X86-NEXT: store half [[UNPROMOTION]], ptr [[RETVAL_REALP]], align 2
+// X86-NEXT: store half 0xH0000, ptr [[RETVAL_IMAGP]], align 2
+// X86-NEXT: [[TMP0:%.*]] = load <2 x half>, ptr [[RETVAL]], align 2
+// X86-NEXT: ret <2 x half> [[TMP0]]
+//
+_Float16 _Complex ImagOp_c(_Float16 _Complex a) {
+ return __imag a;
+}
+
+// AVX-LABEL: @MinusOp_c_c(
+// AVX-NEXT: entry:
+// AVX-NEXT: [[RETVAL:%.*]] = alloca { half, half }, align 2
+// AVX-NEXT: [[A:%.*]] = alloca { half, half }, align 2
+// AVX-NEXT: [[C:%.*]] = alloca { half, half }, align 2
+// AVX-NEXT: store <2 x half> [[A_COERCE:%.*]], ptr [[A]], align 2
+// AVX-NEXT: store <2 x half> [[C_COERCE:%.*]], ptr [[C]], align 2
+// AVX-NEXT: [[A_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[A]], i32 0, i32 0
+// AVX-NEXT: [[A_REAL:%.*]] = load half, ptr [[A_REALP]], align 2
+// AVX-NEXT: [[A_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[A]], i32 0, i32 1
+// AVX-NEXT: [[A_IMAG:%.*]] = load half, ptr [[A_IMAGP]], align 2
+// AVX-NEXT: [[C_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[C]], i32 0, i32 0
+// AVX-NEXT: [[C_REAL:%.*]] = load half, ptr [[C_REALP]], align 2
+// AVX-NEXT: [[C_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[C]], i32 0, i32 1
+// AVX-NEXT: [[C_IMAG:%.*]] = load half, ptr [[C_IMAGP]], align 2
+// AVX-NEXT: [[NEG_R:%.*]] = fneg half [[C_REAL]]
+// AVX-NEXT: [[NEG_I:%.*]] = fneg half [[C_IMAG]]
+// AVX-NEXT: [[ADD_R:%.*]] = fadd half [[A_REAL]], [[NEG_R]]
+// AVX-NEXT: [[ADD_I:%.*]] = fadd half [[A_IMAG]], [[NEG_I]]
+// AVX-NEXT: [[RETVAL_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 0
+// AVX-NEXT: [[RETVAL_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 1
+// AVX-NEXT: store half [[ADD_R]], ptr [[RETVAL_REALP]], align 2
+// AVX-NEXT: store half [[ADD_I]], ptr [[RETVAL_IMAGP]], align 2
+// AVX-NEXT: [[TMP0:%.*]] = load <2 x half>, ptr [[RETVAL]], align 2
+// AVX-NEXT: ret <2 x half> [[TMP0]]
+//
+// X86-LABEL: @MinusOp_c_c(
+// X86-NEXT: entry:
+// X86-NEXT: [[RETVAL:%.*]] = alloca { half, half }, align 2
+// X86-NEXT: [[A:%.*]] = alloca { half, half }, align 2
+// X86-NEXT: [[C:%.*]] = alloca { half, half }, align 2
+// X86-NEXT: store <2 x half> [[A_COERCE:%.*]], ptr [[A]], align 2
+// X86-NEXT: store <2 x half> [[C_COERCE:%.*]], ptr [[C]], align 2
+// X86-NEXT: [[A_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[A]], i32 0, i32 0
+// X86-NEXT: [[A_REAL:%.*]] = load half, ptr [[A_REALP]], align 2
+// X86-NEXT: [[A_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[A]], i32 0, i32 1
+// X86-NEXT: [[A_IMAG:%.*]] = load half, ptr [[A_IMAGP]], align 2
+// X86-NEXT: [[EXT:%.*]] = fpext half [[A_REAL]] to float
+// X86-NEXT: [[EXT1:%.*]] = fpext half [[A_IMAG]] to float
+// X86-NEXT: [[C_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[C]], i32 0, i32 0
+// X86-NEXT: [[C_REAL:%.*]] = load half, ptr [[C_REALP]], align 2
+// X86-NEXT: [[C_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[C]], i32 0, i32 1
+// X86-NEXT: [[C_IMAG:%.*]] = load half, ptr [[C_IMAGP]], align 2
+// X86-NEXT: [[EXT2:%.*]] = fpext half [[C_REAL]] to float
+// X86-NEXT: [[EXT3:%.*]] = fpext half [[C_IMAG]] to float
+// X86-NEXT: [[NEG_R:%.*]] = fneg float [[EXT2]]
+// X86-NEXT: [[NEG_I:%.*]] = fneg float [[EXT3]]
+// X86-NEXT: [[ADD_R:%.*]] = fadd float [[EXT]], [[NEG_R]]
+// X86-NEXT: [[ADD_I:%.*]] = fadd float [[EXT1]], [[NEG_I]]
+// X86-NEXT: [[UNPROMOTION:%.*]] = fptrunc float [[ADD_R]] to half
+// X86-NEXT: [[UNPROMOTION4:%.*]] = fptrunc float [[ADD_I]] to half
+// X86-NEXT: [[RETVAL_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 0
+// X86-NEXT: [[RETVAL_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 1
+// X86-NEXT: store half [[UNPROMOTION]], ptr [[RETVAL_REALP]], align 2
+// X86-NEXT: store half [[UNPROMOTION4]], ptr [[RETVAL_IMAGP]], align 2
+// X86-NEXT: [[TMP0:%.*]] = load <2 x half>, ptr [[RETVAL]], align 2
+// X86-NEXT: ret <2 x half> [[TMP0]]
+//
+_Float16 _Complex MinusOp_c_c(_Float16 _Complex a, _Float16 _Complex c) {
+ return a + -c;
+}
+
+// AVX-LABEL: @PlusOp_c_c(
+// AVX-NEXT: entry:
+// AVX-NEXT: [[RETVAL:%.*]] = alloca { half, half }, align 2
+// AVX-NEXT: [[A:%.*]] = alloca { half, half }, align 2
+// AVX-NEXT: [[C:%.*]] = alloca { half, half }, align 2
+// AVX-NEXT: store <2 x half> [[A_COERCE:%.*]], ptr [[A]], align 2
+// AVX-NEXT: store <2 x half> [[C_COERCE:%.*]], ptr [[C]], align 2
+// AVX-NEXT: [[A_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[A]], i32 0, i32 0
+// AVX-NEXT: [[A_REAL:%.*]] = load half, ptr [[A_REALP]], align 2
+// AVX-NEXT: [[A_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[A]], i32 0, i32 1
+// AVX-NEXT: [[A_IMAG:%.*]] = load half, ptr [[A_IMAGP]], align 2
+// AVX-NEXT: [[C_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[C]], i32 0, i32 0
+// AVX-NEXT: [[C_REAL:%.*]] = load half, ptr [[C_REALP]], align 2
+// AVX-NEXT: [[C_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[C]], i32 0, i32 1
+// AVX-NEXT: [[C_IMAG:%.*]] = load half, ptr [[C_IMAGP]], align 2
+// AVX-NEXT: [[SUB_R:%.*]] = fsub half [[A_REAL]], [[C_REAL]]
+// AVX-NEXT: [[SUB_I:%.*]] = fsub half [[A_IMAG]], [[C_IMAG]]
+// AVX-NEXT: [[RETVAL_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 0
+// AVX-NEXT: [[RETVAL_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 1
+// AVX-NEXT: store half [[SUB_R]], ptr [[RETVAL_REALP]], align 2
+// AVX-NEXT: store half [[SUB_I]], ptr [[RETVAL_IMAGP]], align 2
+// AVX-NEXT: [[TMP0:%.*]] = load <2 x half>, ptr [[RETVAL]], align 2
+// AVX-NEXT: ret <2 x half> [[TMP0]]
+//
+// X86-LABEL: @PlusOp_c_c(
+// X86-NEXT: entry:
+// X86-NEXT: [[RETVAL:%.*]] = alloca { half, half }, align 2
+// X86-NEXT: [[A:%.*]] = alloca { half, half }, align 2
+// X86-NEXT: [[C:%.*]] = alloca { half, half }, align 2
+// X86-NEXT: store <2 x half> [[A_COERCE:%.*]], ptr [[A]], align 2
+// X86-NEXT: store <2 x half> [[C_COERCE:%.*]], ptr [[C]], align 2
+// X86-NEXT: [[A_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[A]], i32 0, i32 0
+// X86-NEXT: [[A_REAL:%.*]] = load half, ptr [[A_REALP]], align 2
+// X86-NEXT: [[A_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[A]], i32 0, i32 1
+// X86-NEXT: [[A_IMAG:%.*]] = load half, ptr [[A_IMAGP]], align 2
+// X86-NEXT: [[EXT:%.*]] = fpext half [[A_REAL]] to float
+// X86-NEXT: [[EXT1:%.*]] = fpext half [[A_IMAG]] to float
+// X86-NEXT: [[C_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[C]], i32 0, i32 0
+// X86-NEXT: [[C_REAL:%.*]] = load half, ptr [[C_REALP]], align 2
+// X86-NEXT: [[C_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[C]], i32 0, i32 1
+// X86-NEXT: [[C_IMAG:%.*]] = load half, ptr [[C_IMAGP]], align 2
+// X86-NEXT: [[EXT2:%.*]] = fpext half [[C_REAL]] to float
+// X86-NEXT: [[EXT3:%.*]] = fpext half [[C_IMAG]] to float
+// X86-NEXT: [[SUB_R:%.*]] = fsub float [[EXT]], [[EXT2]]
+// X86-NEXT: [[SUB_I:%.*]] = fsub float [[EXT1]], [[EXT3]]
+// X86-NEXT: [[UNPROMOTION:%.*]] = fptrunc float [[SUB_R]] to half
+// X86-NEXT: [[UNPROMOTION4:%.*]] = fptrunc float [[SUB_I]] to half
+// X86-NEXT: [[RETVAL_REALP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 0
+// X86-NEXT: [[RETVAL_IMAGP:%.*]] = getelementptr inbounds { half, half }, ptr [[RETVAL]], i32 0, i32 1
+// X86-NEXT: store half [[UNPROMOTION]], ptr [[RETVAL_REALP]], align 2
+// X86-NEXT: store half [[UNPROMOTION4]], ptr [[RETVAL_IMAGP]], align 2
+// X86-NEXT: [[TMP0:%.*]] = load <2 x half>, ptr [[RETVAL]], align 2
+// X86-NEXT: ret <2 x half> [[TMP0]]
+//
+_Float16 _Complex PlusOp_c_c(_Float16 _Complex a, _Float16 _Complex c) {
+ return a - +c;
+}
More information about the cfe-commits
mailing list