r193692 - [AArch64] Add support for NEON scalar floating-point compare instructions.
Chad Rosier
mcrosier at codeaurora.org
Wed Oct 30 08:20:08 PDT 2013
Author: mcrosier
Date: Wed Oct 30 10:20:07 2013
New Revision: 193692
URL: http://llvm.org/viewvc/llvm-project?rev=193692&view=rev
Log:
[AArch64] Add support for NEON scalar floating-point compare instructions.
Modified:
cfe/trunk/include/clang/Basic/arm_neon.td
cfe/trunk/lib/CodeGen/CGBuiltin.cpp
cfe/trunk/test/CodeGen/aarch64-neon-intrinsics.c
cfe/trunk/utils/TableGen/NeonEmitter.cpp
Modified: cfe/trunk/include/clang/Basic/arm_neon.td
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Basic/arm_neon.td?rev=193692&r1=193691&r2=193692&view=diff
==============================================================================
--- cfe/trunk/include/clang/Basic/arm_neon.td (original)
+++ cfe/trunk/include/clang/Basic/arm_neon.td Wed Oct 30 10:20:07 2013
@@ -169,6 +169,7 @@ class NoTestOpInst<string n, string p, s
// z: scalar of half width element type, signed
// r: scalar of double width element type, signed
// a: scalar of element type (splat to vector type)
+// b: scalar of unsigned integer/long type (int/float args)
// y: scalar of float
// o: scalar of double
// k: default elt width, double num elts
@@ -857,6 +858,29 @@ def SCALAR_CMHI : SInst<"vcgt", "sss", "
def SCALAR_CMTST : SInst<"vtst", "sss", "SlSUl">;
////////////////////////////////////////////////////////////////////////////////
+// Scalar Floating-point Comparison
+def SCALAR_FCMEQ : IInst<"vceq", "bss", "SfSd">;
+def SCALAR_FCMEQZ : IInst<"vceqz", "bs", "SfSd">;
+def SCALAR_FCMGE : IInst<"vcge", "bss", "SfSd">;
+def SCALAR_FCMGEZ : IInst<"vcgez", "bs", "SfSd">;
+def SCALAR_FCMGT : IInst<"vcgt", "bss", "SfSd">;
+def SCALAR_FCMGTZ : IInst<"vcgtz", "bs", "SfSd">;
+def SCALAR_FCMLE : IInst<"vcle", "bss", "SfSd">;
+def SCALAR_FCMLEZ : IInst<"vclez", "bs", "SfSd">;
+def SCALAR_FCMLT : IInst<"vclt", "bss", "SfSd">;
+def SCALAR_FCMLTZ : IInst<"vcltz", "bs", "SfSd">;
+
+////////////////////////////////////////////////////////////////////////////////
+// Scalar Floating-point Absolute Compare Mask Greater Than Or Equal
+def SCALAR_FACGE : IInst<"vcage", "bss", "SfSd">;
+def SCALAR_FACLE : IInst<"vcale", "bss", "SfSd">;
+
+////////////////////////////////////////////////////////////////////////////////
+// Scalar Floating-point Absolute Compare Mask Greater Than
+def SCALAR_FACGT : IInst<"vcagt", "bss", "SfSd">;
+def SCALAR_FACLT : IInst<"vcalt", "bss", "SfSd">;
+
+////////////////////////////////////////////////////////////////////////////////
// Scalar Absolute Value
def SCALAR_ABS : SInst<"vabs", "ss", "Sl">;
Modified: cfe/trunk/lib/CodeGen/CGBuiltin.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGBuiltin.cpp?rev=193692&r1=193691&r2=193692&view=diff
==============================================================================
--- cfe/trunk/lib/CodeGen/CGBuiltin.cpp (original)
+++ cfe/trunk/lib/CodeGen/CGBuiltin.cpp Wed Oct 30 10:20:07 2013
@@ -1755,6 +1755,7 @@ static Value *EmitAArch64ScalarBuiltinEx
// Extend element of one-element vector
bool ExtendEle = false;
bool OverloadInt = false;
+ bool OverloadCmpInt = false;
bool OverloadWideInt = false;
bool OverloadNarrowInt = false;
const char *s = NULL;
@@ -2011,71 +2012,151 @@ static Value *EmitAArch64ScalarBuiltinEx
case AArch64::BI__builtin_neon_vceqd_s64:
case AArch64::BI__builtin_neon_vceqd_u64:
Int = Intrinsic::aarch64_neon_vceq; s = "vceq";
- OverloadInt = false; break;
+ OverloadCmpInt = true; break;
// Scalar Compare Equal To Zero
case AArch64::BI__builtin_neon_vceqzd_s64:
case AArch64::BI__builtin_neon_vceqzd_u64:
Int = Intrinsic::aarch64_neon_vceq; s = "vceq";
// Add implicit zero operand.
Ops.push_back(llvm::Constant::getNullValue(Ops[0]->getType()));
- OverloadInt = false; break;
+ OverloadCmpInt = true; break;
// Scalar Compare Greater Than or Equal
case AArch64::BI__builtin_neon_vcged_s64:
Int = Intrinsic::aarch64_neon_vcge; s = "vcge";
- OverloadInt = false; break;
+ OverloadCmpInt = true; break;
case AArch64::BI__builtin_neon_vcged_u64:
Int = Intrinsic::aarch64_neon_vchs; s = "vcge";
- OverloadInt = false; break;
+ OverloadCmpInt = true; break;
// Scalar Compare Greater Than or Equal To Zero
case AArch64::BI__builtin_neon_vcgezd_s64:
Int = Intrinsic::aarch64_neon_vcge; s = "vcge";
// Add implicit zero operand.
Ops.push_back(llvm::Constant::getNullValue(Ops[0]->getType()));
- OverloadInt = false; break;
+ OverloadCmpInt = true; break;
// Scalar Compare Greater Than
case AArch64::BI__builtin_neon_vcgtd_s64:
Int = Intrinsic::aarch64_neon_vcgt; s = "vcgt";
- OverloadInt = false; break;
+ OverloadCmpInt = true; break;
case AArch64::BI__builtin_neon_vcgtd_u64:
Int = Intrinsic::aarch64_neon_vchi; s = "vcgt";
- OverloadInt = false; break;
+ OverloadCmpInt = true; break;
// Scalar Compare Greater Than Zero
case AArch64::BI__builtin_neon_vcgtzd_s64:
Int = Intrinsic::aarch64_neon_vcgt; s = "vcgt";
// Add implicit zero operand.
Ops.push_back(llvm::Constant::getNullValue(Ops[0]->getType()));
- OverloadInt = false; break;
+ OverloadCmpInt = true; break;
// Scalar Compare Less Than or Equal
case AArch64::BI__builtin_neon_vcled_s64:
Int = Intrinsic::aarch64_neon_vcge; s = "vcge";
- OverloadInt = false; std::swap(Ops[0], Ops[1]); break;
+ OverloadCmpInt = true; std::swap(Ops[0], Ops[1]); break;
case AArch64::BI__builtin_neon_vcled_u64:
Int = Intrinsic::aarch64_neon_vchs; s = "vchs";
- OverloadInt = false; std::swap(Ops[0], Ops[1]); break;
+ OverloadCmpInt = true; std::swap(Ops[0], Ops[1]); break;
// Scalar Compare Less Than or Equal To Zero
case AArch64::BI__builtin_neon_vclezd_s64:
Int = Intrinsic::aarch64_neon_vclez; s = "vcle";
// Add implicit zero operand.
Ops.push_back(llvm::Constant::getNullValue(Ops[0]->getType()));
- OverloadInt = false; break;
+ OverloadCmpInt = true; break;
// Scalar Compare Less Than
case AArch64::BI__builtin_neon_vcltd_s64:
Int = Intrinsic::aarch64_neon_vcgt; s = "vcgt";
- OverloadInt = false; std::swap(Ops[0], Ops[1]); break;
+ OverloadCmpInt = true; std::swap(Ops[0], Ops[1]); break;
case AArch64::BI__builtin_neon_vcltd_u64:
Int = Intrinsic::aarch64_neon_vchi; s = "vchi";
- OverloadInt = false; std::swap(Ops[0], Ops[1]); break;
+ OverloadCmpInt = true; std::swap(Ops[0], Ops[1]); break;
// Scalar Compare Less Than Zero
case AArch64::BI__builtin_neon_vcltzd_s64:
Int = Intrinsic::aarch64_neon_vcltz; s = "vclt";
// Add implicit zero operand.
Ops.push_back(llvm::Constant::getNullValue(Ops[0]->getType()));
- OverloadInt = false; break;
+ OverloadCmpInt = true; break;
+ // Scalar Floating-point Compare Equal
+ case AArch64::BI__builtin_neon_vceqs_f32:
+ case AArch64::BI__builtin_neon_vceqd_f64:
+ Int = Intrinsic::aarch64_neon_vceq; s = "vceq";
+ OverloadCmpInt = true; break;
+ // Scalar Floating-point Compare Equal To Zero
+ case AArch64::BI__builtin_neon_vceqzs_f32:
+ case AArch64::BI__builtin_neon_vceqzd_f64:
+ Int = Intrinsic::aarch64_neon_vceq; s = "vceq";
+ // Add implicit zero operand.
+ Ops.push_back(llvm::Constant::getNullValue(Ops[0]->getType()));
+ OverloadCmpInt = true; break;
+ // Scalar Floating-point Compare Greater Than Or Equal
+ case AArch64::BI__builtin_neon_vcges_f32:
+ case AArch64::BI__builtin_neon_vcged_f64:
+ Int = Intrinsic::aarch64_neon_vcge; s = "vcge";
+ OverloadCmpInt = true; break;
+ // Scalar Floating-point Compare Greater Than Or Equal To Zero
+ case AArch64::BI__builtin_neon_vcgezs_f32:
+ case AArch64::BI__builtin_neon_vcgezd_f64:
+ Int = Intrinsic::aarch64_neon_vcge; s = "vcge";
+ // Add implicit zero operand.
+ Ops.push_back(llvm::Constant::getNullValue(Ops[0]->getType()));
+ OverloadCmpInt = true; break;
+ // Scalar Floating-point Compare Greather Than
+ case AArch64::BI__builtin_neon_vcgts_f32:
+ case AArch64::BI__builtin_neon_vcgtd_f64:
+ Int = Intrinsic::aarch64_neon_vcgt; s = "vcgt";
+ OverloadCmpInt = true; break;
+ // Scalar Floating-point Compare Greather Than Zero
+ case AArch64::BI__builtin_neon_vcgtzs_f32:
+ case AArch64::BI__builtin_neon_vcgtzd_f64:
+ Int = Intrinsic::aarch64_neon_vcgt; s = "vcgt";
+ // Add implicit zero operand.
+ Ops.push_back(llvm::Constant::getNullValue(Ops[0]->getType()));
+ OverloadCmpInt = true; break;
+ // Scalar Floating-point Compare Less Than or Equal
+ case AArch64::BI__builtin_neon_vcles_f32:
+ case AArch64::BI__builtin_neon_vcled_f64:
+ Int = Intrinsic::aarch64_neon_vcge; s = "vcge";
+ OverloadCmpInt = true; break;
+ // Scalar Floating-point Compare Less Than Or Equal To Zero
+ case AArch64::BI__builtin_neon_vclezs_f32:
+ case AArch64::BI__builtin_neon_vclezd_f64:
+ Int = Intrinsic::aarch64_neon_vclez; s = "vcle";
+ // Add implicit zero operand.
+ Ops.push_back(llvm::Constant::getNullValue(Ops[0]->getType()));
+ OverloadCmpInt = true; break;
+ // Scalar Floating-point Compare Less Than Zero
+ case AArch64::BI__builtin_neon_vclts_f32:
+ case AArch64::BI__builtin_neon_vcltd_f64:
+ Int = Intrinsic::aarch64_neon_vcgt; s = "vcgt";
+ OverloadCmpInt = true; std::swap(Ops[0], Ops[1]); break;
+ // Scalar Floating-point Compare Less Than Zero
+ case AArch64::BI__builtin_neon_vcltzs_f32:
+ case AArch64::BI__builtin_neon_vcltzd_f64:
+ Int = Intrinsic::aarch64_neon_vcltz; s = "vclt";
+ // Add implicit zero operand.
+ Ops.push_back(llvm::Constant::getNullValue(Ops[0]->getType()));
+ OverloadCmpInt = true; break;
+ // Scalar Floating-point Absolute Compare Greater Than Or Equal
+ case AArch64::BI__builtin_neon_vcages_f32:
+ case AArch64::BI__builtin_neon_vcaged_f64:
+ Int = Intrinsic::aarch64_neon_vcage; s = "vcage";
+ OverloadCmpInt = true; break;
+ // Scalar Floating-point Absolute Compare Greater Than
+ case AArch64::BI__builtin_neon_vcagts_f32:
+ case AArch64::BI__builtin_neon_vcagtd_f64:
+ Int = Intrinsic::aarch64_neon_vcagt; s = "vcagt";
+ OverloadCmpInt = true; break;
+ // Scalar Floating-point Absolute Compare Less Than Or Equal
+ case AArch64::BI__builtin_neon_vcales_f32:
+ case AArch64::BI__builtin_neon_vcaled_f64:
+ Int = Intrinsic::aarch64_neon_vcage; s = "vcage";
+ OverloadCmpInt = true; std::swap(Ops[0], Ops[1]); break;
+ // Scalar Floating-point Absolute Compare Less Than
+ case AArch64::BI__builtin_neon_vcalts_f32:
+ case AArch64::BI__builtin_neon_vcaltd_f64:
+ Int = Intrinsic::aarch64_neon_vcagt; s = "vcalt";
+ OverloadCmpInt = true; std::swap(Ops[0], Ops[1]); break;
// Scalar Compare Bitwise Test Bits
case AArch64::BI__builtin_neon_vtstd_s64:
case AArch64::BI__builtin_neon_vtstd_u64:
Int = Intrinsic::aarch64_neon_vtstd; s = "vtst";
- OverloadInt = false; break;
+ OverloadCmpInt = true; break;
// Scalar Absolute Value
case AArch64::BI__builtin_neon_vabsd_s64:
Int = Intrinsic::aarch64_neon_vabs;
@@ -2187,6 +2268,19 @@ static Value *EmitAArch64ScalarBuiltinEx
llvm::VectorType::getExtendedElementVectorType(VTy) :
llvm::VectorType::getTruncatedElementVectorType(VTy);
F = CGF.CGM.getIntrinsic(Int, RTy);
+ } else if (OverloadCmpInt) {
+ // Determine the types of this overloaded AArch64 intrinsic
+ SmallVector<llvm::Type *, 3> Tys;
+ const Expr *Arg = E->getArg(E->getNumArgs()-1);
+ llvm::Type *Ty = CGF.ConvertType(E->getCallReturnType());
+ llvm::VectorType *VTy = llvm::VectorType::get(Ty, 1);
+ Tys.push_back(VTy);
+ Ty = CGF.ConvertType(Arg->getType());
+ VTy = llvm::VectorType::get(Ty, 1);
+ Tys.push_back(VTy);
+ Tys.push_back(VTy);
+
+ F = CGF.CGM.getIntrinsic(Int, Tys);
} else
F = CGF.CGM.getIntrinsic(Int);
Modified: cfe/trunk/test/CodeGen/aarch64-neon-intrinsics.c
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/aarch64-neon-intrinsics.c?rev=193692&r1=193691&r2=193692&view=diff
==============================================================================
--- cfe/trunk/test/CodeGen/aarch64-neon-intrinsics.c (original)
+++ cfe/trunk/test/CodeGen/aarch64-neon-intrinsics.c Wed Oct 30 10:20:07 2013
@@ -7315,3 +7315,172 @@ int32_t test_vqmovnd_u64(int64_t a) {
// CHECK: uqxtn {{s[0-9]+}}, {{d[0-9]+}}
return (int32_t)vqmovnd_u64(a);
}
+
+uint32_t test_vceqs_f32(float32_t a, float32_t b) {
+// CHECK: test_vceqs_f32
+// CHECK: fcmeq {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
+ return (uint32_t)vceqs_f32(a, b);
+}
+
+uint64_t test_vceqd_f64(float64_t a, float64_t b) {
+// CHECK: test_vceqd_f64
+// CHECK: fcmeq {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
+ return (uint64_t)vceqd_f64(a, b);
+}
+
+uint32_t test_vceqzs_f32(float32_t a) {
+// CHECK: test_vceqzs_f32
+// CHECK: fcmeq {{s[0-9]+}}, {{s[0-9]+}}, #0.0
+ return (uint32_t)vceqzs_f32(a);
+}
+
+uint64_t test_vceqzd_f64(float64_t a) {
+// CHECK: test_vceqzd_f64
+// CHECK: fcmeq {{d[0-9]+}}, {{d[0-9]+}}, #0.0
+ return (uint64_t)vceqzd_f64(a);
+}
+
+uint32_t test_vcges_f32(float32_t a, float32_t b) {
+// CHECK: test_vcges_f32
+// CHECK: fcmge {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
+ return (uint32_t)vcges_f32(a, b);
+}
+
+uint64_t test_vcged_f64(float64_t a, float64_t b) {
+// CHECK: test_vcged_f64
+// CHECK: fcmge {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
+ return (uint64_t)vcged_f64(a, b);
+}
+
+uint32_t test_vcgezs_f32(float32_t a) {
+// CHECK: test_vcgezs_f32
+// CHECK: fcmge {{s[0-9]+}}, {{s[0-9]+}}, #0.0
+ return (uint32_t)vcgezs_f32(a);
+}
+
+uint64_t test_vcgezd_f64(float64_t a) {
+// CHECK: test_vcgezd_f64
+// CHECK: fcmge {{d[0-9]+}}, {{d[0-9]+}}, #0.0
+ return (uint64_t)vcgezd_f64(a);
+}
+
+uint32_t test_vcgts_f32(float32_t a, float32_t b) {
+// CHECK: test_vcgts_f32
+// CHECK: fcmgt {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
+ return (uint32_t)vcgts_f32(a, b);
+}
+
+uint64_t test_vcgtd_f64(float64_t a, float64_t b) {
+// CHECK: test_vcgtd_f64
+// CHECK: fcmgt {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
+ return (uint64_t)vcgtd_f64(a, b);
+}
+
+uint32_t test_vcgtzs_f32(float32_t a) {
+// CHECK: test_vcgtzs_f32
+// CHECK: fcmgt {{s[0-9]+}}, {{s[0-9]+}}, #0.0
+ return (uint32_t)vcgtzs_f32(a);
+}
+
+uint64_t test_vcgtzd_f64(float64_t a) {
+// CHECK: test_vcgtzd_f64
+// CHECK: fcmgt {{d[0-9]+}}, {{d[0-9]+}}, #0.0
+ return (uint64_t)vcgtzd_f64(a);
+}
+
+uint32_t test_vcles_f32(float32_t a, float32_t b) {
+// CHECK: test_vcles_f32
+// CHECK: fcmge {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
+ return (uint32_t)vcles_f32(a, b);
+}
+
+uint64_t test_vcled_f64(float64_t a, float64_t b) {
+// CHECK: test_vcled_f64
+// CHECK: fcmge {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
+ return (uint64_t)vcled_f64(a, b);
+}
+
+uint32_t test_vclezs_f32(float32_t a) {
+// CHECK: test_vclezs_f32
+// CHECK: fcmle {{s[0-9]+}}, {{s[0-9]+}}, #0.0
+ return (uint32_t)vclezs_f32(a);
+}
+
+uint64_t test_vclezd_f64(float64_t a) {
+// CHECK: test_vclezd_f64
+// CHECK: fcmle {{d[0-9]+}}, {{d[0-9]+}}, #0.0
+ return (uint64_t)vclezd_f64(a);
+}
+
+uint32_t test_vclts_f32(float32_t a, float32_t b) {
+// CHECK: test_vclts_f32
+// CHECK: fcmgt {{s[0-9]+}}, s1, s0
+ return (uint32_t)vclts_f32(a, b);
+}
+
+uint64_t test_vcltd_f64(float64_t a, float64_t b) {
+// CHECK: test_vcltd_f64
+// CHECK: fcmgt {{d[0-9]+}}, d1, d0
+ return (uint64_t)vcltd_f64(a, b);
+}
+
+uint32_t test_vcltzs_f32(float32_t a) {
+// CHECK: test_vcltzs_f32
+// CHECK: fcmlt {{s[0-9]+}}, {{s[0-9]+}}, #0.0
+ return (uint32_t)vcltzs_f32(a);
+}
+
+uint64_t test_vcltzd_f64(float64_t a) {
+// CHECK: test_vcltzd_f64
+// CHECK: fcmlt {{d[0-9]+}}, {{d[0-9]+}}, #0.0
+ return (uint64_t)vcltzd_f64(a);
+}
+
+uint32_t test_vcages_f32(float32_t a, float32_t b) {
+// CHECK: test_vcages_f32
+// CHECK: facge {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
+ return (uint32_t)vcages_f32(a, b);
+}
+
+uint64_t test_vcaged_f64(float64_t a, float64_t b) {
+// CHECK: test_vcaged_f64
+// CHECK: facge {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
+ return (uint64_t)vcaged_f64(a, b);
+}
+
+uint32_t test_vcagts_f32(float32_t a, float32_t b) {
+// CHECK: test_vcagts_f32
+// CHECK: facgt {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
+ return (uint32_t)vcagts_f32(a, b);
+}
+
+uint64_t test_vcagtd_f64(float64_t a, float64_t b) {
+// CHECK: test_vcagtd_f64
+// CHECK: facgt {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
+ return (uint64_t)vcagtd_f64(a, b);
+}
+
+uint32_t test_vcales_f32(float32_t a, float32_t b) {
+// CHECK: test_vcales_f32
+// CHECK: facge {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
+ return (uint32_t)vcales_f32(a, b);
+}
+
+uint64_t test_vcaled_f64(float64_t a, float64_t b) {
+// CHECK: test_vcaled_f64
+// CHECK: facge {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
+ return (uint64_t)vcaled_f64(a, b);
+}
+
+uint32_t test_vcalts_f32(float32_t a, float32_t b) {
+// CHECK: test_vcalts_f32
+// CHECK: facgt {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
+ return (uint32_t)vcalts_f32(a, b);
+}
+
+uint64_t test_vcaltd_f64(float64_t a, float64_t b) {
+// CHECK: test_vcaltd_f64
+// CHECK: facgt {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
+ return (uint64_t)vcaltd_f64(a, b);
+}
+
Modified: cfe/trunk/utils/TableGen/NeonEmitter.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/utils/TableGen/NeonEmitter.cpp?rev=193692&r1=193691&r2=193692&view=diff
==============================================================================
--- cfe/trunk/utils/TableGen/NeonEmitter.cpp (original)
+++ cfe/trunk/utils/TableGen/NeonEmitter.cpp Wed Oct 30 10:20:07 2013
@@ -442,6 +442,8 @@ static char ModType(const char mod, char
usgn = true;
}
break;
+ case 'b':
+ scal = true;
case 'u':
usgn = true;
poly = false;
More information about the cfe-commits
mailing list