[clang] [llvm] [AArch64] Improve bcvtn2 and remove aarch64_neon_bfcvt intrinsics (PR #120363)
David Green via llvm-commits
llvm-commits at lists.llvm.org
Mon Jan 20 05:43:20 PST 2025
================
@@ -4064,31 +4072,59 @@ static Value *upgradeX86IntrinsicCall(StringRef Name, CallBase *CI, Function *F,
static Value *upgradeAArch64IntrinsicCall(StringRef Name, CallBase *CI,
Function *F, IRBuilder<> &Builder) {
- Intrinsic::ID NewID =
- StringSwitch<Intrinsic::ID>(Name)
- .Case("sve.fcvt.bf16f32", Intrinsic::aarch64_sve_fcvt_bf16f32_v2)
- .Case("sve.fcvtnt.bf16f32", Intrinsic::aarch64_sve_fcvtnt_bf16f32_v2)
- .Default(Intrinsic::not_intrinsic);
- if (NewID == Intrinsic::not_intrinsic)
- llvm_unreachable("Unhandled Intrinsic!");
-
- SmallVector<Value *, 3> Args(CI->args());
-
- // The original intrinsics incorrectly used a predicate based on the smallest
- // element type rather than the largest.
- Type *BadPredTy = ScalableVectorType::get(Builder.getInt1Ty(), 8);
- Type *GoodPredTy = ScalableVectorType::get(Builder.getInt1Ty(), 4);
-
- if (Args[1]->getType() != BadPredTy)
- llvm_unreachable("Unexpected predicate type!");
-
- Args[1] = Builder.CreateIntrinsic(Intrinsic::aarch64_sve_convert_to_svbool,
- BadPredTy, Args[1]);
- Args[1] = Builder.CreateIntrinsic(Intrinsic::aarch64_sve_convert_from_svbool,
- GoodPredTy, Args[1]);
-
- return Builder.CreateIntrinsic(NewID, {}, Args, /*FMFSource=*/nullptr,
- CI->getName());
+ if (Name.starts_with("neon.bfcvt")) {
+ if (Name.starts_with("neon.bfcvtn2")) {
+ SmallVector<int, 32> LoMask(4);
+ std::iota(LoMask.begin(), LoMask.end(), 0);
+ SmallVector<int, 32> ConcatMask(8);
+ std::iota(ConcatMask.begin(), ConcatMask.end(), 0);
+ Value *Inactive = Builder.CreateShuffleVector(CI->getOperand(0), LoMask);
+ Value *Trunc =
+ Builder.CreateFPTrunc(CI->getOperand(1), Inactive->getType());
+ return Builder.CreateShuffleVector(Inactive, Trunc, ConcatMask);
+ } else if (Name.starts_with("neon.bfcvtn")) {
+ SmallVector<int, 32> ConcatMask(8);
+ std::iota(ConcatMask.begin(), ConcatMask.end(), 0);
+ Type *V4BF16 =
+ FixedVectorType::get(Type::getBFloatTy(F->getContext()), 4);
+ Value *Trunc = Builder.CreateFPTrunc(CI->getOperand(0), V4BF16);
+ dbgs() << "Trunc: " << *Trunc << "\n";
+ return Builder.CreateShuffleVector(
+ Trunc, ConstantAggregateZero::get(V4BF16), ConcatMask);
+ } else {
+ return Builder.CreateFPTrunc(CI->getOperand(0),
+ Type::getBFloatTy(F->getContext()));
+ }
+ } else if (Name.starts_with("sve.fcvt")) {
----------------
davemgreen wrote:
The sve.fcvt is what this function (upgradeAArch64IntrinsicCall) was previously handling, so will be NFC in that regard. It gets here because of these lines from upgradeArmOrAarch64IntrinsicFunction.
```
// 'aarch64.sve.fcvt.bf16f32' || 'aarch64.sve.fcvtnt.bf16f32'
if (Name == "fcvt.bf16f32" || Name == "fcvtnt.bf16f32") {
NewFn = nullptr;
```
It's now just inside an if to be more clear. It has its own tests that are still doing OK.
https://github.com/llvm/llvm-project/pull/120363
More information about the llvm-commits
mailing list