[PATCH] [AArch64 NEON] Implement intrinsic vceqz_f64 -clang

Tue Dec 3 01:57:47 PST 2013

http://llvm-reviews.chandlerc.com/D2315

Files:
  include/clang/Basic/arm_neon.td
  lib/CodeGen/CGBuiltin.cpp
  test/CodeGen/aarch64-neon-misc.c

Index: include/clang/Basic/arm_neon.td
===================================================================

--- include/clang/Basic/arm_neon.td
+++ include/clang/Basic/arm_neon.td
@@ -748,7 +748,7 @@
 def CFMLT  : SOpInst<"vclt", "udd", "csifUcUsUiQcQsQiQlQfQUcQUsQUiQUlQd", OP_LT>;
 
 def CMEQ  : SInst<"vceqz", "ud",
-                  "csifUcUsUiPcPsQcQsQiQlQfQUcQUsQUiQUlQPcQPsQd">;
+                  "csifUcUsUiPcPsQcQsQiQlQfQUcQUsQUiQUlQPcQPsdQd">;
 def CMGE  : SInst<"vcgez", "ud", "csifdQcQsQiQlQfQd">;
 def CMLE  : SInst<"vclez", "ud", "csifdQcQsQiQlQfQd">;
 def CMGT  : SInst<"vcgtz", "ud", "csifdQcQsQiQlQfQd">;
Index: lib/CodeGen/CGBuiltin.cpp
===================================================================
--- lib/CodeGen/CGBuiltin.cpp
+++ lib/CodeGen/CGBuiltin.cpp
@@ -1759,6 +1759,7 @@
   bool ExtendEle = false;
   bool OverloadInt = false;
   bool OverloadCmpInt = false;
+  bool IsFpCmpZInt = false;
   bool OverloadCvtInt = false;
   bool OverloadWideInt = false;
   bool OverloadNarrowInt = false;
@@ -2267,7 +2268,8 @@
   case AArch64::BI__builtin_neon_vceqzd_f64:
     Int = Intrinsic::aarch64_neon_vceq; s = "vceq";
     // Add implicit zero operand.
-    Ops.push_back(llvm::Constant::getNullValue(Ops[0]->getType()));
+    Ops.push_back(llvm::Constant::getNullValue(CGF.FloatTy));
+    IsFpCmpZInt = true;
     OverloadCmpInt = true; break;
   // Scalar Floating-point Compare Greater Than Or Equal
   case AArch64::BI__builtin_neon_vcges_f32:
@@ -2279,7 +2281,8 @@
   case AArch64::BI__builtin_neon_vcgezd_f64:
     Int = Intrinsic::aarch64_neon_vcge; s = "vcge";
     // Add implicit zero operand.
-    Ops.push_back(llvm::Constant::getNullValue(Ops[0]->getType()));
+    Ops.push_back(llvm::Constant::getNullValue(CGF.FloatTy));
+    IsFpCmpZInt = true;
     OverloadCmpInt = true; break;
   // Scalar Floating-point Compare Greather Than
   case AArch64::BI__builtin_neon_vcgts_f32:
@@ -2291,7 +2294,8 @@
   case AArch64::BI__builtin_neon_vcgtzd_f64:
     Int = Intrinsic::aarch64_neon_vcgt; s = "vcgt";
     // Add implicit zero operand.
-    Ops.push_back(llvm::Constant::getNullValue(Ops[0]->getType()));
+    Ops.push_back(llvm::Constant::getNullValue(CGF.FloatTy));
+    IsFpCmpZInt = true;
     OverloadCmpInt = true; break;
   // Scalar Floating-point Compare Less Than or Equal
   case AArch64::BI__builtin_neon_vcles_f32:
@@ -2303,7 +2307,8 @@
   case AArch64::BI__builtin_neon_vclezd_f64:
     Int = Intrinsic::aarch64_neon_vclez; s = "vcle";
     // Add implicit zero operand.
-    Ops.push_back(llvm::Constant::getNullValue(Ops[0]->getType()));
+    Ops.push_back(llvm::Constant::getNullValue(CGF.FloatTy));
+    IsFpCmpZInt = true;
     OverloadCmpInt = true; break;
   // Scalar Floating-point Compare Less Than Zero
   case AArch64::BI__builtin_neon_vclts_f32:
@@ -2315,7 +2320,8 @@
   case AArch64::BI__builtin_neon_vcltzd_f64:
     Int = Intrinsic::aarch64_neon_vcltz; s = "vclt";
     // Add implicit zero operand.
-    Ops.push_back(llvm::Constant::getNullValue(Ops[0]->getType()));
+    Ops.push_back(llvm::Constant::getNullValue(CGF.FloatTy));
+    IsFpCmpZInt = true;
     OverloadCmpInt = true; break;
   // Scalar Floating-point Absolute Compare Greater Than Or Equal
   case AArch64::BI__builtin_neon_vcages_f32:
@@ -2600,6 +2606,8 @@
     Ty = CGF.ConvertType(Arg->getType());
     VTy = llvm::VectorType::get(Ty, 1);
     Tys.push_back(VTy);
+    if(IsFpCmpZInt)
+      VTy = llvm::VectorType::get(CGF.FloatTy, 1);
     Tys.push_back(VTy);
 
     F = CGF.CGM.getIntrinsic(Int, Tys);
Index: test/CodeGen/aarch64-neon-misc.c
===================================================================
--- test/CodeGen/aarch64-neon-misc.c
+++ test/CodeGen/aarch64-neon-misc.c
@@ -96,6 +96,12 @@
   return vceqz_f32(a);
 }
 
+// CHECK: test_vceqz_f64
+// CHECK: fcmeq  {{d[0-9]+}}, {{d[0-9]+}}, #0
+uint64x1_t test_vceqz_f64(float64x1_t a) {
+  return vceqz_f64(a);
+}
+
 // CHECK: test_vceqzq_f32
 // CHECK: fcmeq  {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #0
 uint32x4_t test_vceqzq_f32(float32x4_t a) {
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D2315.1.patch
Type: text/x-patch
Size: 4045 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20131203/b028a298/attachment.bin>