[PATCH] [AArch64 NEON] Implement intrinsic vceqz_f64 -clang

Wed Dec 4 00:24:59 PST 2013

Committed as r196361.


2013/12/3 Kevin Qin <kevinqindev at gmail.com>

> http://llvm-reviews.chandlerc.com/D2315
>
> Files:
>   include/clang/Basic/arm_neon.td
>   lib/CodeGen/CGBuiltin.cpp
>   test/CodeGen/aarch64-neon-misc.c
>
> Index: include/clang/Basic/arm_neon.td
> ===================================================================
> --- include/clang/Basic/arm_neon.td
> +++ include/clang/Basic/arm_neon.td
> @@ -748,7 +748,7 @@
>  def CFMLT  : SOpInst<"vclt", "udd", "csifUcUsUiQcQsQiQlQfQUcQUsQUiQUlQd",
> OP_LT>;
>
>  def CMEQ  : SInst<"vceqz", "ud",
> -                  "csifUcUsUiPcPsQcQsQiQlQfQUcQUsQUiQUlQPcQPsQd">;
> +                  "csifUcUsUiPcPsQcQsQiQlQfQUcQUsQUiQUlQPcQPsdQd">;
>  def CMGE  : SInst<"vcgez", "ud", "csifdQcQsQiQlQfQd">;
>  def CMLE  : SInst<"vclez", "ud", "csifdQcQsQiQlQfQd">;
>  def CMGT  : SInst<"vcgtz", "ud", "csifdQcQsQiQlQfQd">;
> Index: lib/CodeGen/CGBuiltin.cpp
> ===================================================================
> --- lib/CodeGen/CGBuiltin.cpp
> +++ lib/CodeGen/CGBuiltin.cpp
> @@ -1759,6 +1759,7 @@
>    bool ExtendEle = false;
>    bool OverloadInt = false;
>    bool OverloadCmpInt = false;
> +  bool IsFpCmpZInt = false;
>    bool OverloadCvtInt = false;
>    bool OverloadWideInt = false;
>    bool OverloadNarrowInt = false;
> @@ -2267,7 +2268,8 @@
>    case AArch64::BI__builtin_neon_vceqzd_f64:
>      Int = Intrinsic::aarch64_neon_vceq; s = "vceq";
>      // Add implicit zero operand.
> -    Ops.push_back(llvm::Constant::getNullValue(Ops[0]->getType()));
> +    Ops.push_back(llvm::Constant::getNullValue(CGF.FloatTy));
> +    IsFpCmpZInt = true;
>      OverloadCmpInt = true; break;
>    // Scalar Floating-point Compare Greater Than Or Equal
>    case AArch64::BI__builtin_neon_vcges_f32:
> @@ -2279,7 +2281,8 @@
>    case AArch64::BI__builtin_neon_vcgezd_f64:
>      Int = Intrinsic::aarch64_neon_vcge; s = "vcge";
>      // Add implicit zero operand.
> -    Ops.push_back(llvm::Constant::getNullValue(Ops[0]->getType()));
> +    Ops.push_back(llvm::Constant::getNullValue(CGF.FloatTy));
> +    IsFpCmpZInt = true;
>      OverloadCmpInt = true; break;
>    // Scalar Floating-point Compare Greather Than
>    case AArch64::BI__builtin_neon_vcgts_f32:
> @@ -2291,7 +2294,8 @@
>    case AArch64::BI__builtin_neon_vcgtzd_f64:
>      Int = Intrinsic::aarch64_neon_vcgt; s = "vcgt";
>      // Add implicit zero operand.
> -    Ops.push_back(llvm::Constant::getNullValue(Ops[0]->getType()));
> +    Ops.push_back(llvm::Constant::getNullValue(CGF.FloatTy));
> +    IsFpCmpZInt = true;
>      OverloadCmpInt = true; break;
>    // Scalar Floating-point Compare Less Than or Equal
>    case AArch64::BI__builtin_neon_vcles_f32:
> @@ -2303,7 +2307,8 @@
>    case AArch64::BI__builtin_neon_vclezd_f64:
>      Int = Intrinsic::aarch64_neon_vclez; s = "vcle";
>      // Add implicit zero operand.
> -    Ops.push_back(llvm::Constant::getNullValue(Ops[0]->getType()));
> +    Ops.push_back(llvm::Constant::getNullValue(CGF.FloatTy));
> +    IsFpCmpZInt = true;
>      OverloadCmpInt = true; break;
>    // Scalar Floating-point Compare Less Than Zero
>    case AArch64::BI__builtin_neon_vclts_f32:
> @@ -2315,7 +2320,8 @@
>    case AArch64::BI__builtin_neon_vcltzd_f64:
>      Int = Intrinsic::aarch64_neon_vcltz; s = "vclt";
>      // Add implicit zero operand.
> -    Ops.push_back(llvm::Constant::getNullValue(Ops[0]->getType()));
> +    Ops.push_back(llvm::Constant::getNullValue(CGF.FloatTy));
> +    IsFpCmpZInt = true;
>      OverloadCmpInt = true; break;
>    // Scalar Floating-point Absolute Compare Greater Than Or Equal
>    case AArch64::BI__builtin_neon_vcages_f32:
> @@ -2600,6 +2606,8 @@
>      Ty = CGF.ConvertType(Arg->getType());
>      VTy = llvm::VectorType::get(Ty, 1);
>      Tys.push_back(VTy);
> +    if(IsFpCmpZInt)
> +      VTy = llvm::VectorType::get(CGF.FloatTy, 1);
>      Tys.push_back(VTy);
>
>      F = CGF.CGM.getIntrinsic(Int, Tys);
> Index: test/CodeGen/aarch64-neon-misc.c
> ===================================================================
> --- test/CodeGen/aarch64-neon-misc.c
> +++ test/CodeGen/aarch64-neon-misc.c
> @@ -96,6 +96,12 @@
>    return vceqz_f32(a);
>  }
>
> +// CHECK: test_vceqz_f64
> +// CHECK: fcmeq  {{d[0-9]+}}, {{d[0-9]+}}, #0
> +uint64x1_t test_vceqz_f64(float64x1_t a) {
> +  return vceqz_f64(a);
> +}
> +
>  // CHECK: test_vceqzq_f32
>  // CHECK: fcmeq  {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #0
>  uint32x4_t test_vceqzq_f32(float32x4_t a) {
>


-- 
Best Regards,

Kevin Qin
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20131204/2ad24c01/attachment.html>