[clang] users/banach space/clang/arm builtins extra args 2 (PR #181974)
via cfe-commits
cfe-commits at lists.llvm.org
Wed Feb 18 00:49:42 PST 2026
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-arm
@llvm/pr-subscribers-clang-codegen
Author: Andrzej WarzyĆski (banach-space)
<details>
<summary>Changes</summary>
- **[clang][ARM] Refactor argument handling in `EmitAArch64BuiltinExpr` (1/2) (NFC)**
- **[clang][ARM] Refactor argument handling in `EmitAArch64BuiltinExpr` (2/2) (NFC)**
---
Patch is 40.93 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/181974.diff
3 Files Affected:
- (modified) clang/lib/CodeGen/TargetBuiltins/ARM.cpp (+85-146)
- (modified) clang/lib/Sema/SemaARM.cpp (+3-1)
- (modified) clang/test/CodeGen/arm64-microsoft-intrinsics.c (+16-16)
``````````diff
diff --git a/clang/lib/CodeGen/TargetBuiltins/ARM.cpp b/clang/lib/CodeGen/TargetBuiltins/ARM.cpp
index cb6bbfe07538e..5032903a850c9 100644
--- a/clang/lib/CodeGen/TargetBuiltins/ARM.cpp
+++ b/clang/lib/CodeGen/TargetBuiltins/ARM.cpp
@@ -446,9 +446,8 @@ Value *CodeGenFunction::EmitFP8NeonCall(unsigned IID,
ArrayRef<llvm::Type *> Tys,
SmallVectorImpl<Value *> &Ops,
const CallExpr *E, const char *name) {
- llvm::Value *FPM =
- EmitScalarOrConstFoldImmArg(/* ICEArguments */ 0, E->getNumArgs() - 1, E);
- Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_set_fpmr), FPM);
+ Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_set_fpmr),
+ Ops.pop_back_val());
return EmitNeonCall(CGM.getIntrinsic(IID, Tys), Ops, name);
}
@@ -2709,50 +2708,38 @@ static Value *EmitRangePrefetchBuiltin(CodeGenFunction &CGF, unsigned BuiltinID,
}
/// Return true if BuiltinID is an overloaded Neon intrinsic with an extra
-/// argument that specifies the vector type.
+/// argument that specifies the vector type. The additional argument is meant
+/// for Sema checking (see `CheckNeonBuiltinFunctionCall`) and this function
+/// should be kept consistent with the logic in Sema.
+/// TODO: Make this return false for SISD builtins.
static bool HasExtraNeonArgument(unsigned BuiltinID) {
+ // Required by the headers included below, but not in this particular
+ // function.
+ int PtrArgNum = -1;
+ bool HasConstPtr = false;
+
+ // The mask encodes the type. We don't care about the actual value. Instead,
+ // we just check whether its been set.
+ uint64_t mask = 0;
+ switch (BuiltinID) {
+#define GET_NEON_OVERLOAD_CHECK
+#include "clang/Basic/arm_fp16.inc"
+#include "clang/Basic/arm_neon.inc"
+#undef GET_NEON_OVERLOAD_CHECK
+ // Non-neon builtins for controling VFP that take extra argument for
+ // discriminating the type.
+ case ARM::BI__builtin_arm_vcvtr_f:
+ case ARM::BI__builtin_arm_vcvtr_d:
+ mask = 1;
+ }
switch (BuiltinID) {
default: break;
- case NEON::BI__builtin_neon_vget_lane_i8:
- case NEON::BI__builtin_neon_vget_lane_i16:
- case NEON::BI__builtin_neon_vget_lane_bf16:
- case NEON::BI__builtin_neon_vget_lane_i32:
- case NEON::BI__builtin_neon_vget_lane_i64:
- case NEON::BI__builtin_neon_vget_lane_mf8:
- case NEON::BI__builtin_neon_vget_lane_f32:
- case NEON::BI__builtin_neon_vgetq_lane_i8:
- case NEON::BI__builtin_neon_vgetq_lane_i16:
- case NEON::BI__builtin_neon_vgetq_lane_bf16:
- case NEON::BI__builtin_neon_vgetq_lane_i32:
- case NEON::BI__builtin_neon_vgetq_lane_i64:
- case NEON::BI__builtin_neon_vgetq_lane_mf8:
- case NEON::BI__builtin_neon_vgetq_lane_f32:
- case NEON::BI__builtin_neon_vduph_lane_bf16:
- case NEON::BI__builtin_neon_vduph_laneq_bf16:
- case NEON::BI__builtin_neon_vset_lane_i8:
- case NEON::BI__builtin_neon_vset_lane_mf8:
- case NEON::BI__builtin_neon_vset_lane_i16:
- case NEON::BI__builtin_neon_vset_lane_bf16:
- case NEON::BI__builtin_neon_vset_lane_i32:
- case NEON::BI__builtin_neon_vset_lane_i64:
- case NEON::BI__builtin_neon_vset_lane_f32:
- case NEON::BI__builtin_neon_vsetq_lane_i8:
- case NEON::BI__builtin_neon_vsetq_lane_mf8:
- case NEON::BI__builtin_neon_vsetq_lane_i16:
- case NEON::BI__builtin_neon_vsetq_lane_bf16:
- case NEON::BI__builtin_neon_vsetq_lane_i32:
- case NEON::BI__builtin_neon_vsetq_lane_i64:
- case NEON::BI__builtin_neon_vsetq_lane_f32:
- case NEON::BI__builtin_neon_vsha1h_u32:
- case NEON::BI__builtin_neon_vsha1cq_u32:
- case NEON::BI__builtin_neon_vsha1pq_u32:
- case NEON::BI__builtin_neon_vsha1mq_u32:
- case NEON::BI__builtin_neon_vcvth_bf16_f32:
- case clang::ARM::BI_MoveToCoprocessor:
- case clang::ARM::BI_MoveToCoprocessor2:
- return false;
}
- return true;
+
+ if (mask)
+ return true;
+
+ return false;
}
Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID,
@@ -5871,6 +5858,12 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
if (It != end(NEONEquivalentIntrinsicMap))
BuiltinID = It->second;
+ // Check whether this is an SISD builtin.
+ auto SISDMap = ArrayRef(AArch64SISDIntrinsicMap);
+ const ARMVectorIntrinsicInfo *Builtin = findARMVectorIntrinsicInMap(
+ SISDMap, BuiltinID, AArch64SISDIntrinsicsProvenSorted);
+ bool IsSISD = (Builtin != nullptr);
+
// Find out if any arguments are required to be integer constant
// expressions.
unsigned ICEArguments = 0;
@@ -5880,7 +5873,12 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
llvm::SmallVector<Value*, 4> Ops;
Address PtrOp0 = Address::invalid();
- for (unsigned i = 0, e = E->getNumArgs() - 1; i != e; i++) {
+ // Note the assumption that SISD intrinsics do not contain extra arguments.
+ // TODO: Fold this into a single function call instead of, effectively, two
+ // separate checks.
+ bool HasExtraArg = !IsSISD && HasExtraNeonArgument(BuiltinID);
+ unsigned NumArgs = E->getNumArgs() - (HasExtraArg ? 1 : 0);
+ for (unsigned i = 0, e = NumArgs; i != e; i++) {
if (i == 0) {
switch (BuiltinID) {
case NEON::BI__builtin_neon_vld1_v:
@@ -5907,12 +5905,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
Ops.push_back(EmitScalarOrConstFoldImmArg(ICEArguments, i, E));
}
- auto SISDMap = ArrayRef(AArch64SISDIntrinsicMap);
- const ARMVectorIntrinsicInfo *Builtin = findARMVectorIntrinsicInMap(
- SISDMap, BuiltinID, AArch64SISDIntrinsicsProvenSorted);
-
if (Builtin) {
- Ops.push_back(EmitScalarExpr(E->getArg(E->getNumArgs() - 1)));
Value *Result = EmitCommonNeonSISDBuiltinExpr(*this, *Builtin, Ops, E);
assert(Result && "SISD intrinsic should have been handled");
return Result;
@@ -5947,7 +5940,6 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
switch (BuiltinID) {
default: break;
case NEON::BI__builtin_neon_vabsh_f16:
- Ops.push_back(EmitScalarExpr(E->getArg(0)));
return EmitNeonCall(CGM.getIntrinsic(Intrinsic::fabs, HalfTy), Ops, "vabs");
case NEON::BI__builtin_neon_vaddq_p128: {
llvm::Type *Ty = GetNeonType(this, NeonTypeFlags::Poly128);
@@ -5974,7 +5966,6 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
[[fallthrough]];
case NEON::BI__builtin_neon_vcvts_f32_s32:
case NEON::BI__builtin_neon_vcvtd_f64_s64: {
- Ops.push_back(EmitScalarExpr(E->getArg(0)));
bool Is64 = Ops[0]->getType()->getPrimitiveSizeInBits() == 64;
llvm::Type *InTy = Is64 ? Int64Ty : Int32Ty;
llvm::Type *FTy = Is64 ? DoubleTy : FloatTy;
@@ -5991,7 +5982,6 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
case NEON::BI__builtin_neon_vcvth_f16_s16:
case NEON::BI__builtin_neon_vcvth_f16_s32:
case NEON::BI__builtin_neon_vcvth_f16_s64: {
- Ops.push_back(EmitScalarExpr(E->getArg(0)));
llvm::Type *FTy = HalfTy;
llvm::Type *InTy;
if (Ops[0]->getType()->getPrimitiveSizeInBits() == 64)
@@ -6018,7 +6008,6 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
llvm::Type *InTy = Int16Ty;
llvm::Type* FTy = HalfTy;
llvm::Type *Tys[2] = {InTy, FTy};
- Ops.push_back(EmitScalarExpr(E->getArg(0)));
switch (BuiltinID) {
default: llvm_unreachable("missing builtin ID in switch!");
case NEON::BI__builtin_neon_vcvtah_u16_f16:
@@ -6051,7 +6040,6 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
llvm::Type* InTy = Int32Ty;
llvm::Type* FTy = HalfTy;
llvm::Type *Tys[2] = {InTy, FTy};
- Ops.push_back(EmitScalarExpr(E->getArg(1)));
switch (BuiltinID) {
default: llvm_unreachable("missing builtin ID in switch!");
case NEON::BI__builtin_neon_vcageh_f16:
@@ -6071,7 +6059,6 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
llvm::Type* InTy = Int32Ty;
llvm::Type* FTy = HalfTy;
llvm::Type *Tys[2] = {InTy, FTy};
- Ops.push_back(EmitScalarExpr(E->getArg(1)));
switch (BuiltinID) {
default: llvm_unreachable("missing builtin ID in switch!");
case NEON::BI__builtin_neon_vcvth_n_s16_f16:
@@ -6087,7 +6074,6 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
llvm::Type* FTy = HalfTy;
llvm::Type* InTy = Int32Ty;
llvm::Type *Tys[2] = {FTy, InTy};
- Ops.push_back(EmitScalarExpr(E->getArg(1)));
switch (BuiltinID) {
default: llvm_unreachable("missing builtin ID in switch!");
case NEON::BI__builtin_neon_vcvth_n_f16_s16:
@@ -6102,91 +6088,81 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "fcvth_n");
}
case NEON::BI__builtin_neon_vpaddd_s64: {
+ // TODO: Isn't this handled by
+ // EmitCommonNeonSISDBuiltinExpr?
auto *Ty = llvm::FixedVectorType::get(Int64Ty, 2);
- Value *Vec = EmitScalarExpr(E->getArg(0));
// The vector is v2f64, so make sure it's bitcast to that.
- Vec = Builder.CreateBitCast(Vec, Ty, "v2i64");
+ Ops[0] = Builder.CreateBitCast(Ops[0], Ty, "v2i64");
llvm::Value *Idx0 = llvm::ConstantInt::get(SizeTy, 0);
llvm::Value *Idx1 = llvm::ConstantInt::get(SizeTy, 1);
- Value *Op0 = Builder.CreateExtractElement(Vec, Idx0, "lane0");
- Value *Op1 = Builder.CreateExtractElement(Vec, Idx1, "lane1");
+ Value *Op0 = Builder.CreateExtractElement(Ops[0], Idx0, "lane0");
+ Value *Op1 = Builder.CreateExtractElement(Ops[0], Idx1, "lane1");
// Pairwise addition of a v2f64 into a scalar f64.
return Builder.CreateAdd(Op0, Op1, "vpaddd");
}
case NEON::BI__builtin_neon_vpaddd_f64: {
auto *Ty = llvm::FixedVectorType::get(DoubleTy, 2);
- Value *Vec = EmitScalarExpr(E->getArg(0));
// The vector is v2f64, so make sure it's bitcast to that.
- Vec = Builder.CreateBitCast(Vec, Ty, "v2f64");
+ Ops[0] = Builder.CreateBitCast(Ops[0], Ty, "v2f64");
llvm::Value *Idx0 = llvm::ConstantInt::get(SizeTy, 0);
llvm::Value *Idx1 = llvm::ConstantInt::get(SizeTy, 1);
- Value *Op0 = Builder.CreateExtractElement(Vec, Idx0, "lane0");
- Value *Op1 = Builder.CreateExtractElement(Vec, Idx1, "lane1");
+ Value *Op0 = Builder.CreateExtractElement(Ops[0], Idx0, "lane0");
+ Value *Op1 = Builder.CreateExtractElement(Ops[0], Idx1, "lane1");
// Pairwise addition of a v2f64 into a scalar f64.
return Builder.CreateFAdd(Op0, Op1, "vpaddd");
}
case NEON::BI__builtin_neon_vpadds_f32: {
auto *Ty = llvm::FixedVectorType::get(FloatTy, 2);
- Value *Vec = EmitScalarExpr(E->getArg(0));
// The vector is v2f32, so make sure it's bitcast to that.
- Vec = Builder.CreateBitCast(Vec, Ty, "v2f32");
+ Ops[0] = Builder.CreateBitCast(Ops[0], Ty, "v2f32");
llvm::Value *Idx0 = llvm::ConstantInt::get(SizeTy, 0);
llvm::Value *Idx1 = llvm::ConstantInt::get(SizeTy, 1);
- Value *Op0 = Builder.CreateExtractElement(Vec, Idx0, "lane0");
- Value *Op1 = Builder.CreateExtractElement(Vec, Idx1, "lane1");
+ Value *Op0 = Builder.CreateExtractElement(Ops[0], Idx0, "lane0");
+ Value *Op1 = Builder.CreateExtractElement(Ops[0], Idx1, "lane1");
// Pairwise addition of a v2f32 into a scalar f32.
return Builder.CreateFAdd(Op0, Op1, "vpaddd");
}
case NEON::BI__builtin_neon_vceqzd_s64:
- Ops.push_back(EmitScalarExpr(E->getArg(0)));
return EmitAArch64CompareBuiltinExpr(
Ops[0], ConvertType(E->getCallReturnType(getContext())),
ICmpInst::ICMP_EQ, "vceqz");
case NEON::BI__builtin_neon_vceqzd_f64:
case NEON::BI__builtin_neon_vceqzs_f32:
case NEON::BI__builtin_neon_vceqzh_f16:
- Ops.push_back(EmitScalarExpr(E->getArg(0)));
return EmitAArch64CompareBuiltinExpr(
Ops[0], ConvertType(E->getCallReturnType(getContext())),
ICmpInst::FCMP_OEQ, "vceqz");
case NEON::BI__builtin_neon_vcgezd_s64:
- Ops.push_back(EmitScalarExpr(E->getArg(0)));
return EmitAArch64CompareBuiltinExpr(
Ops[0], ConvertType(E->getCallReturnType(getContext())),
ICmpInst::ICMP_SGE, "vcgez");
case NEON::BI__builtin_neon_vcgezd_f64:
case NEON::BI__builtin_neon_vcgezs_f32:
case NEON::BI__builtin_neon_vcgezh_f16:
- Ops.push_back(EmitScalarExpr(E->getArg(0)));
return EmitAArch64CompareBuiltinExpr(
Ops[0], ConvertType(E->getCallReturnType(getContext())),
ICmpInst::FCMP_OGE, "vcgez");
case NEON::BI__builtin_neon_vclezd_s64:
- Ops.push_back(EmitScalarExpr(E->getArg(0)));
return EmitAArch64CompareBuiltinExpr(
Ops[0], ConvertType(E->getCallReturnType(getContext())),
ICmpInst::ICMP_SLE, "vclez");
case NEON::BI__builtin_neon_vclezd_f64:
case NEON::BI__builtin_neon_vclezs_f32:
case NEON::BI__builtin_neon_vclezh_f16:
- Ops.push_back(EmitScalarExpr(E->getArg(0)));
return EmitAArch64CompareBuiltinExpr(
Ops[0], ConvertType(E->getCallReturnType(getContext())),
ICmpInst::FCMP_OLE, "vclez");
case NEON::BI__builtin_neon_vcgtzd_s64:
- Ops.push_back(EmitScalarExpr(E->getArg(0)));
return EmitAArch64CompareBuiltinExpr(
Ops[0], ConvertType(E->getCallReturnType(getContext())),
ICmpInst::ICMP_SGT, "vcgtz");
case NEON::BI__builtin_neon_vcgtzd_f64:
case NEON::BI__builtin_neon_vcgtzs_f32:
case NEON::BI__builtin_neon_vcgtzh_f16:
- Ops.push_back(EmitScalarExpr(E->getArg(0)));
return EmitAArch64CompareBuiltinExpr(
Ops[0], ConvertType(E->getCallReturnType(getContext())),
ICmpInst::FCMP_OGT, "vcgtz");
case NEON::BI__builtin_neon_vcltzd_s64:
- Ops.push_back(EmitScalarExpr(E->getArg(0)));
return EmitAArch64CompareBuiltinExpr(
Ops[0], ConvertType(E->getCallReturnType(getContext())),
ICmpInst::ICMP_SLT, "vcltz");
@@ -6194,13 +6170,11 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
case NEON::BI__builtin_neon_vcltzd_f64:
case NEON::BI__builtin_neon_vcltzs_f32:
case NEON::BI__builtin_neon_vcltzh_f16:
- Ops.push_back(EmitScalarExpr(E->getArg(0)));
return EmitAArch64CompareBuiltinExpr(
Ops[0], ConvertType(E->getCallReturnType(getContext())),
ICmpInst::FCMP_OLT, "vcltz");
case NEON::BI__builtin_neon_vceqzd_u64: {
- Ops.push_back(EmitScalarExpr(E->getArg(0)));
Ops[0] = Builder.CreateBitCast(Ops[0], Int64Ty);
Ops[0] =
Builder.CreateICmpEQ(Ops[0], llvm::Constant::getNullValue(Int64Ty));
@@ -6220,7 +6194,6 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
case NEON::BI__builtin_neon_vcged_f64: P = llvm::FCmpInst::FCMP_OGE; break;
case NEON::BI__builtin_neon_vcgtd_f64: P = llvm::FCmpInst::FCMP_OGT; break;
}
- Ops.push_back(EmitScalarExpr(E->getArg(1)));
Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy);
Ops[1] = Builder.CreateBitCast(Ops[1], DoubleTy);
if (P == llvm::FCmpInst::FCMP_OEQ)
@@ -6474,7 +6447,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
case NEON::BI__builtin_neon_vqdmlslh_s16: {
SmallVector<Value *, 2> ProductOps;
ProductOps.push_back(vectorWrapScalar16(Ops[1]));
- ProductOps.push_back(vectorWrapScalar16(EmitScalarExpr(E->getArg(2))));
+ ProductOps.push_back(vectorWrapScalar16(Ops[2]));
auto *VTy = llvm::FixedVectorType::get(Int32Ty, 4);
Ops[1] = EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmull, VTy),
ProductOps, "vqdmlXl");
@@ -6484,10 +6457,11 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
unsigned AccumInt = BuiltinID == NEON::BI__builtin_neon_vqdmlalh_s16
? Intrinsic::aarch64_neon_sqadd
: Intrinsic::aarch64_neon_sqsub;
+ // Drop the 2nd multiplication argument before the accumulation
+ Ops.pop_back();
return EmitNeonCall(CGM.getIntrinsic(AccumInt, Int32Ty), Ops, "vqdmlXl");
}
case NEON::BI__builtin_neon_vqshlud_n_s64: {
- Ops.push_back(EmitScalarExpr(E->getArg(1)));
Ops[1] = Builder.CreateZExt(Ops[1], Int64Ty);
return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqshlu, Int64Ty),
Ops, "vqshlu_n");
@@ -6497,7 +6471,6 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
Int = BuiltinID == NEON::BI__builtin_neon_vqshld_n_u64
? Intrinsic::aarch64_neon_uqshl
: Intrinsic::aarch64_neon_sqshl;
- Ops.push_back(EmitScalarExpr(E->getArg(1)));
Ops[1] = Builder.CreateZExt(Ops[1], Int64Ty);
return EmitNeonCall(CGM.getIntrinsic(Int, Int64Ty), Ops, "vqshl_n");
}
@@ -6506,7 +6479,6 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
Int = BuiltinID == NEON::BI__builtin_neon_vrshrd_n_u64
? Intrinsic::aarch64_neon_urshl
: Intrinsic::aarch64_neon_srshl;
- Ops.push_back(EmitScalarExpr(E->getArg(1)));
int SV = cast<ConstantInt>(Ops[1])->getSExtValue();
Ops[1] = ConstantInt::get(Int64Ty, -SV);
return EmitNeonCall(CGM.getIntrinsic(Int, Int64Ty), Ops, "vrshr_n");
@@ -6517,7 +6489,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
? Intrinsic::aarch64_neon_urshl
: Intrinsic::aarch64_neon_srshl;
Ops[1] = Builder.CreateBitCast(Ops[1], Int64Ty);
- Ops.push_back(Builder.CreateNeg(EmitScalarExpr(E->getArg(2))));
+ Ops[2] = Builder.CreateNeg(Ops[2]);
Ops[1] = Builder.CreateCall(CGM.getIntrinsic(Int, Int64Ty),
{Ops[1], Builder.CreateSExt(Ops[2], Int64Ty)});
return Builder.CreateAdd(Ops[0], Builder.CreateBitCast(Ops[1], Int64Ty));
@@ -6567,8 +6539,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
case NEON::BI__builtin_neon_vqdmlalh_laneq_s16:
case NEON::BI__builtin_neon_vqdmlslh_lane_s16:
case NEON::BI__builtin_neon_vqdmlslh_laneq_s16: {
- Ops[2] = Builder.CreateExtractElement(Ops[2], EmitScalarExpr(E->getArg(3)),
- "lane");
+ Ops[2] = Builder.CreateExtractElement(Ops[2], Ops[3], "lane");
SmallVector<Value *, 2> ProductOps;
ProductOps.push_back(vectorWrapScalar16(Ops[1]));
ProductOps.push_back(vectorWrapScalar16(Ops[2]));
@@ -6577,7 +6548,9 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
ProductOps, "vqdmlXl");
Constant *CI = ConstantInt::get(SizeTy, 0);
Ops[1] = Builder.CreateExtractElement(Ops[1], CI, "lane0");
- Ops.pop_back();
+ // Drop lane-selection and the corresponding vector argument (these have
+ // already been used)
+ Ops.pop_back_n(2);
unsigned AccInt = (BuiltinID == NEON::BI__builtin_neon_vqdmlalh_lane_s16 ||
BuiltinID == NEON::BI__builtin_neon_vqdmlalh_laneq_s16)
@@ -6597,21 +6570,24 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
unsigned AccumInt = BuiltinID == NEON::BI__builtin_neon_vqdmlals_s32
? Intrinsic::aarch64_neon_sqadd
: Intrinsic::aarch64_neon_sqsub;
+ // Drop the 2nd multiplication argument before the accumulation
+ Ops.pop_back();
return EmitNeonCall(CGM.getIntrinsic(AccumInt, Int64Ty), Ops, "vqdmlXl");
}
case NEON::BI__builtin_neon_vqdmlals_lane_s32:
case NEON::BI__builtin_neon_vqdmlals_laneq_s32:
case NEON::BI__builtin_neon_vqdmlsls_lane_s32:
case NEON::BI__builtin_neon_vqdmlsls_laneq_s32: {
- Ops[2] = Builder.CreateExtractElement(Ops[2], EmitScalarExpr(E->getArg(3)),
- "lane");
+ Ops[2] = Builder.CreateExtractElement(Ops[2], Ops[3], "lane");
SmallVector<Value *, 2> ProductOps;
ProductOps.push_back(Ops[1]);
ProductOps.push_back(Ops[2]);
Ops[1] =
EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmulls_scalar),
ProductOps, "vqdmlXl");
- Ops.pop_back();
+ // Drop lane-selection and the corresponding vector argument (these have
+ // already been used)
+ Ops.pop_back_n(2);
unsigned AccInt = (BuiltinID == NEON::BI__builtin_neon_vqdmlals_lane_s32 ||
BuiltinID == NEON::BI__builtin_neon_vqdmlals_laneq_s32)
@@ -6670,7 +6646,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
ca...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/181974
More information about the cfe-commits
mailing list