[PATCH] [AArch64 NEON] Implement intrinsic vceqz_f64 -clang
Kevin Qin
kevinqindev at gmail.com
Wed Dec 4 00:24:59 PST 2013
Committed as r196361.
2013/12/3 Kevin Qin <kevinqindev at gmail.com>
> http://llvm-reviews.chandlerc.com/D2315
>
> Files:
> include/clang/Basic/arm_neon.td
> lib/CodeGen/CGBuiltin.cpp
> test/CodeGen/aarch64-neon-misc.c
>
> Index: include/clang/Basic/arm_neon.td
> ===================================================================
> --- include/clang/Basic/arm_neon.td
> +++ include/clang/Basic/arm_neon.td
> @@ -748,7 +748,7 @@
> def CFMLT : SOpInst<"vclt", "udd", "csifUcUsUiQcQsQiQlQfQUcQUsQUiQUlQd",
> OP_LT>;
>
> def CMEQ : SInst<"vceqz", "ud",
> - "csifUcUsUiPcPsQcQsQiQlQfQUcQUsQUiQUlQPcQPsQd">;
> + "csifUcUsUiPcPsQcQsQiQlQfQUcQUsQUiQUlQPcQPsdQd">;
> def CMGE : SInst<"vcgez", "ud", "csifdQcQsQiQlQfQd">;
> def CMLE : SInst<"vclez", "ud", "csifdQcQsQiQlQfQd">;
> def CMGT : SInst<"vcgtz", "ud", "csifdQcQsQiQlQfQd">;
> Index: lib/CodeGen/CGBuiltin.cpp
> ===================================================================
> --- lib/CodeGen/CGBuiltin.cpp
> +++ lib/CodeGen/CGBuiltin.cpp
> @@ -1759,6 +1759,7 @@
> bool ExtendEle = false;
> bool OverloadInt = false;
> bool OverloadCmpInt = false;
> + bool IsFpCmpZInt = false;
> bool OverloadCvtInt = false;
> bool OverloadWideInt = false;
> bool OverloadNarrowInt = false;
> @@ -2267,7 +2268,8 @@
> case AArch64::BI__builtin_neon_vceqzd_f64:
> Int = Intrinsic::aarch64_neon_vceq; s = "vceq";
> // Add implicit zero operand.
> - Ops.push_back(llvm::Constant::getNullValue(Ops[0]->getType()));
> + Ops.push_back(llvm::Constant::getNullValue(CGF.FloatTy));
> + IsFpCmpZInt = true;
> OverloadCmpInt = true; break;
> // Scalar Floating-point Compare Greater Than Or Equal
> case AArch64::BI__builtin_neon_vcges_f32:
> @@ -2279,7 +2281,8 @@
> case AArch64::BI__builtin_neon_vcgezd_f64:
> Int = Intrinsic::aarch64_neon_vcge; s = "vcge";
> // Add implicit zero operand.
> - Ops.push_back(llvm::Constant::getNullValue(Ops[0]->getType()));
> + Ops.push_back(llvm::Constant::getNullValue(CGF.FloatTy));
> + IsFpCmpZInt = true;
> OverloadCmpInt = true; break;
> // Scalar Floating-point Compare Greather Than
> case AArch64::BI__builtin_neon_vcgts_f32:
> @@ -2291,7 +2294,8 @@
> case AArch64::BI__builtin_neon_vcgtzd_f64:
> Int = Intrinsic::aarch64_neon_vcgt; s = "vcgt";
> // Add implicit zero operand.
> - Ops.push_back(llvm::Constant::getNullValue(Ops[0]->getType()));
> + Ops.push_back(llvm::Constant::getNullValue(CGF.FloatTy));
> + IsFpCmpZInt = true;
> OverloadCmpInt = true; break;
> // Scalar Floating-point Compare Less Than or Equal
> case AArch64::BI__builtin_neon_vcles_f32:
> @@ -2303,7 +2307,8 @@
> case AArch64::BI__builtin_neon_vclezd_f64:
> Int = Intrinsic::aarch64_neon_vclez; s = "vcle";
> // Add implicit zero operand.
> - Ops.push_back(llvm::Constant::getNullValue(Ops[0]->getType()));
> + Ops.push_back(llvm::Constant::getNullValue(CGF.FloatTy));
> + IsFpCmpZInt = true;
> OverloadCmpInt = true; break;
> // Scalar Floating-point Compare Less Than Zero
> case AArch64::BI__builtin_neon_vclts_f32:
> @@ -2315,7 +2320,8 @@
> case AArch64::BI__builtin_neon_vcltzd_f64:
> Int = Intrinsic::aarch64_neon_vcltz; s = "vclt";
> // Add implicit zero operand.
> - Ops.push_back(llvm::Constant::getNullValue(Ops[0]->getType()));
> + Ops.push_back(llvm::Constant::getNullValue(CGF.FloatTy));
> + IsFpCmpZInt = true;
> OverloadCmpInt = true; break;
> // Scalar Floating-point Absolute Compare Greater Than Or Equal
> case AArch64::BI__builtin_neon_vcages_f32:
> @@ -2600,6 +2606,8 @@
> Ty = CGF.ConvertType(Arg->getType());
> VTy = llvm::VectorType::get(Ty, 1);
> Tys.push_back(VTy);
> + if(IsFpCmpZInt)
> + VTy = llvm::VectorType::get(CGF.FloatTy, 1);
> Tys.push_back(VTy);
>
> F = CGF.CGM.getIntrinsic(Int, Tys);
> Index: test/CodeGen/aarch64-neon-misc.c
> ===================================================================
> --- test/CodeGen/aarch64-neon-misc.c
> +++ test/CodeGen/aarch64-neon-misc.c
> @@ -96,6 +96,12 @@
> return vceqz_f32(a);
> }
>
> +// CHECK: test_vceqz_f64
> +// CHECK: fcmeq {{d[0-9]+}}, {{d[0-9]+}}, #0
> +uint64x1_t test_vceqz_f64(float64x1_t a) {
> + return vceqz_f64(a);
> +}
> +
> // CHECK: test_vceqzq_f32
> // CHECK: fcmeq {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #0
> uint32x4_t test_vceqzq_f32(float32x4_t a) {
>
--
Best Regards,
Kevin Qin
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20131204/2ad24c01/attachment.html>
More information about the llvm-commits
mailing list