r205303 - ARM64: add a few bits of polynomial intrinsic codegen.

Tim Northover tnorthover at apple.com
Tue Apr 1 05:23:08 PDT 2014


Author: tnorthover
Date: Tue Apr  1 07:23:08 2014
New Revision: 205303

URL: http://llvm.org/viewvc/llvm-project?rev=205303&view=rev
Log:
ARM64: add a few bits of polynomial intrinsic codegen.

Modified:
    cfe/trunk/lib/CodeGen/CGBuiltin.cpp
    cfe/trunk/test/CodeGen/aarch64-poly128.c
    cfe/trunk/test/CodeGen/aarch64-poly64.c

Modified: cfe/trunk/lib/CodeGen/CGBuiltin.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGBuiltin.cpp?rev=205303&r1=205302&r2=205303&view=diff
==============================================================================
--- cfe/trunk/lib/CodeGen/CGBuiltin.cpp (original)
+++ cfe/trunk/lib/CodeGen/CGBuiltin.cpp Tue Apr  1 07:23:08 2014
@@ -2608,6 +2608,7 @@ static NeonIntrinsicInfo ARM64SISDIntrin
   NEONMAP1(vminvq_f64, arm64_neon_fminv, AddRetType | Add1ArgType),
   NEONMAP1(vminvq_s32, arm64_neon_sminv, AddRetType | Add1ArgType),
   NEONMAP1(vminvq_u32, arm64_neon_uminv, AddRetType | Add1ArgType),
+  NEONMAP1(vmull_p64, arm64_neon_pmull64, 0),
   NEONMAP1(vmulxd_f64, arm64_neon_fmulx, Add1ArgType),
   NEONMAP1(vmulxs_f32, arm64_neon_fmulx, Add1ArgType),
   NEONMAP1(vpaddd_s64, arm64_neon_uaddv, AddRetType | Add1ArgType),
@@ -5190,6 +5191,16 @@ Value *CodeGenFunction::EmitARM64Builtin
   // Handle non-overloaded intrinsics first.
   switch (BuiltinID) {
   default: break;
+  case NEON::BI__builtin_neon_vldrq_p128: {
+    llvm::Type *Int128PTy = llvm::Type::getIntNPtrTy(getLLVMContext(), 128);
+    Value *Ptr = Builder.CreateBitCast(EmitScalarExpr(E->getArg(0)), Int128PTy);
+    return Builder.CreateLoad(Ptr);
+  }
+  case NEON::BI__builtin_neon_vstrq_p128: {
+    llvm::Type *Int128PTy = llvm::Type::getIntNPtrTy(getLLVMContext(), 128);
+    Value *Ptr = Builder.CreateBitCast(Ops[0], Int128PTy);
+    return Builder.CreateStore(EmitScalarExpr(E->getArg(1)), Ptr);
+  }
   case NEON::BI__builtin_neon_vcvts_u32_f32:
   case NEON::BI__builtin_neon_vcvtd_u64_f64:
     usgn = true;

Modified: cfe/trunk/test/CodeGen/aarch64-poly128.c
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/aarch64-poly128.c?rev=205303&r1=205302&r2=205303&view=diff
==============================================================================
--- cfe/trunk/test/CodeGen/aarch64-poly128.c (original)
+++ cfe/trunk/test/CodeGen/aarch64-poly128.c Tue Apr  1 07:23:08 2014
@@ -1,6 +1,11 @@
 // REQUIRES: aarch64-registered-target
+// REQUIRES: arm64-registered-target
 // RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +neon \
-// RUN:   -ffp-contract=fast -S -O3 -o - %s | FileCheck %s
+// RUN:  -ffp-contract=fast -S -O3 -o - %s | FileCheck %s --check-prefix=CHECK \
+// RUN:  --check-prefix=CHECK-AARCH64
+// RUN: %clang_cc1 -triple arm64-none-linux-gnu \
+// RUN:  -ffp-contract=fast -S -O3 -o - %s | FileCheck %s --check-prefix=CHECK \
+// RUN:  --check-prefix=CHECK-ARM64
 
 // Test new aarch64 intrinsics with poly128
 // FIXME: Currently, poly128_t equals to uint128, which will be spilt into
@@ -14,22 +19,29 @@
 void test_vstrq_p128(poly128_t * ptr, poly128_t val) {
   // CHECK-LABEL: test_vstrq_p128
   vstrq_p128(ptr, val);
-	// CHECK: str	{{x[0-9]+}}, [{{x[0-9]+}}, #8]
-	// CHECK-NEXT: str	 {{x[0-9]+}}, [{{x[0-9]+}}]
+// CHECK-AARCH64: str {{x[0-9]+}}, [{{x[0-9]+}}, #8]
+// CHECK-AARCH64-NEXT: str  {{x[0-9]+}}, [{{x[0-9]+}}]
+
+  // CHECK-ARM64: stp {{x[0-9]+}}, {{x[0-9]+}}, [x0]
 }
 
 poly128_t test_vldrq_p128(poly128_t * ptr) {
   // CHECK-LABEL: test_vldrq_p128
   return vldrq_p128(ptr);
-	// CHECK: ldr	{{x[0-9]+}}, [{{x[0-9]+}}]
-	// CHECK-NEXT: ldr	{{x[0-9]+}}, [{{x[0-9]+}}, #8]
+  // CHECK-AARCH64: ldr {{x[0-9]+}}, [{{x[0-9]+}}]
+  // CHECK-AARCH64-NEXT: ldr {{x[0-9]+}}, [{{x[0-9]+}}, #8]
+
+  // CHECK-ARM64: ldp {{x[0-9]+}}, {{x[0-9]+}}, [x0]
 }
 
 void test_ld_st_p128(poly128_t * ptr) {
   // CHECK-LABEL: test_ld_st_p128
    vstrq_p128(ptr+1, vldrq_p128(ptr));
-	// CHECK: ldr {{q[0-9]+}}, [{{x[0-9]+}}]
-	// CHECK-NEXT: str	{{q[0-9]+}}, [{{x[0-9]+}}, #16]
+ // CHECK-AARCH64: ldr {{q[0-9]+}}, [{{x[0-9]+}}]
+ // CHECK-AARCH64-NEXT: str {{q[0-9]+}}, [{{x[0-9]+}}, #16]
+
+ // CHECK-ARM64: ldp [[PLO:x[0-9]+]], [[PHI:x[0-9]+]], [{{x[0-9]+}}]
+ // CHECK-ARM64-NEXT: stp [[PLO]], [[PHI]], [{{x[0-9]+}}, #16]
 }
 
 poly128_t test_vmull_p64(poly64_t a, poly64_t b) {

Modified: cfe/trunk/test/CodeGen/aarch64-poly64.c
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/aarch64-poly64.c?rev=205303&r1=205302&r2=205303&view=diff
==============================================================================
--- cfe/trunk/test/CodeGen/aarch64-poly64.c (original)
+++ cfe/trunk/test/CodeGen/aarch64-poly64.c Tue Apr  1 07:23:08 2014
@@ -1,6 +1,11 @@
 // REQUIRES: aarch64-registered-target
+// REQUIRES: arm64-registered-target
 // RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +neon \
-// RUN:   -ffp-contract=fast -S -O3 -o - %s | FileCheck %s
+// RUN:  -ffp-contract=fast -S -O3 -o - %s | FileCheck %s --check-prefix=CHECK \
+// RUN:  --check-prefix=CHECK-AARCH64
+// RUN: %clang_cc1 -triple arm64-none-linux-gnu \
+// RUN:  -ffp-contract=fast -S -O3 -o - %s | FileCheck %s --check-prefix=CHECK \
+// RUN:  --check-prefix=CHECK-ARM64
 
 // Test new aarch64 intrinsics with poly64
 
@@ -69,7 +74,9 @@ poly64x2_t test_vsetq_lane_p64(poly64_t
 poly64x1_t test_vcopy_lane_p64(poly64x1_t a, poly64x1_t b) {
   // CHECK-LABEL: test_vcopy_lane_p64
   return vcopy_lane_p64(a, 0, b, 0);
-  // CHECK: fmov  {{d[0-9]+}}, {{d[0-9]+}}
+  // CHECK-AARCH64: fmov  {{d[0-9]+}}, {{d[0-9]+}}
+
+  // CHECK-ARM64: orr v0.16b, v1.16b, v1.16b
 }
 
 poly64x2_t test_vcopyq_lane_p64(poly64x2_t a, poly64x1_t b) {
@@ -81,7 +88,7 @@ poly64x2_t test_vcopyq_lane_p64(poly64x2
 poly64x2_t test_vcopyq_laneq_p64(poly64x2_t a, poly64x2_t b) {
   // CHECK-LABEL: test_vcopyq_laneq_p64
   return vcopyq_laneq_p64(a, 1, b, 1);
-  // CHECK: ins  {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[1]
+  // CHECK-AARCH64: ins  {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[1]
 }
 
 poly64x1_t test_vcreate_p64(uint64_t a) {
@@ -128,97 +135,101 @@ poly64x2_t test_vcombine_p64(poly64x1_t
 poly64x1_t test_vld1_p64(poly64_t const * ptr) {
   // CHECK-LABEL: test_vld1_p64
   return vld1_p64(ptr);
-  // CHECK:  ld1 {{{v[0-9]+}}.1d}, [{{x[0-9]+|sp}}]
+  // CHECK-AARCH64:  ld1 {{{v[0-9]+}}.1d}, [{{x[0-9]+|sp}}]
+  // CHECK-ARM64: ldr {{d[0-9]+}}, [{{x[0-9]+|sp}}]
 }
 
 poly64x2_t test_vld1q_p64(poly64_t const * ptr) {
   // CHECK-LABEL: test_vld1q_p64
   return vld1q_p64(ptr);
-  // CHECK:  ld1 {{{v[0-9]+}}.2d}, [{{x[0-9]+|sp}}]
+  // CHECK-AARCH64:  ld1 {{{v[0-9]+}}.2d}, [{{x[0-9]+|sp}}]
+  // CHECK-ARM64: ldr {{q[0-9]+}}, [{{x[0-9]+|sp}}]
 }
 
 void test_vst1_p64(poly64_t * ptr, poly64x1_t val) {
   // CHECK-LABEL: test_vst1_p64
   return vst1_p64(ptr, val);
-  // CHECK:  st1 {{{v[0-9]+}}.1d}, [{{x[0-9]+|sp}}]
+  // CHECK-AARCH64:  st1 {{{v[0-9]+}}.1d}, [{{x[0-9]+|sp}}]
+  // CHECK-ARM64: str {{d[0-9]+}}, [{{x[0-9]+|sp}}]
 }
 
 void test_vst1q_p64(poly64_t * ptr, poly64x2_t val) {
   // CHECK-LABEL: test_vst1q_p64
   return vst1q_p64(ptr, val);
-  // CHECK:  st1 {{{v[0-9]+}}.2d}, [{{x[0-9]+|sp}}]
+  // CHECK-AARCH64:  st1 {{{v[0-9]+}}.2d}, [{{x[0-9]+|sp}}]
+  // CHECK-ARM64: str {{q[0-9]+}}, [{{x[0-9]+|sp}}]
 }
 
 poly64x1x2_t test_vld2_p64(poly64_t const * ptr) {
   // CHECK-LABEL: test_vld2_p64
   return vld2_p64(ptr);
-  // CHECK: ld1 {{{v[0-9]+}}.1d, {{v[0-9]+}}.1d}, [{{x[0-9]+|sp}}]
+  // CHECK: ld1 {{{ *v[0-9]+.1d, v[0-9]+.1d *}}}, [{{x[0-9]+|sp}}]
 }
 
 poly64x2x2_t test_vld2q_p64(poly64_t const * ptr) {
   // CHECK-LABEL: test_vld2q_p64
   return vld2q_p64(ptr);
-  // CHECK: ld2 {{{v[0-9]+}}.2d, {{v[0-9]+}}.2d}, [{{x[0-9]+|sp}}]
+  // CHECK: ld2 {{{ *v[0-9]+.2d, v[0-9]+.2d *}}}, [{{x[0-9]+|sp}}]
 }
 
 poly64x1x3_t test_vld3_p64(poly64_t const * ptr) {
   // CHECK-LABEL: test_vld3_p64
   return vld3_p64(ptr);
-  // CHECK: ld1 {{{v[0-9]+}}.1d, {{v[0-9]+}}.1d, {{v[0-9]+}}.1d}, [{{x[0-9]+|sp}}]
+  // CHECK: ld1 {{{ *v[0-9]+.1d, v[0-9]+.1d, v[0-9]+.1d *}}}, [{{x[0-9]+|sp}}]
 }
 
 poly64x2x3_t test_vld3q_p64(poly64_t const * ptr) {
   // CHECK-LABEL: test_vld3q_p64
   return vld3q_p64(ptr);
-  // CHECK: ld3 {{{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d}, [{{x[0-9]+|sp}}]
+  // CHECK: ld3 {{{ *v[0-9]+.2d, v[0-9]+.2d, v[0-9]+.2d *}}}, [{{x[0-9]+|sp}}]
 }
 
 poly64x1x4_t test_vld4_p64(poly64_t const * ptr) {
   // CHECK-LABEL: test_vld4_p64
   return vld4_p64(ptr);
-  // CHECK: ld1 {{{v[0-9]+}}.1d, {{v[0-9]+}}.1d, {{v[0-9]+}}.1d, {{v[0-9]+}}.1d}, [{{x[0-9]+|sp}}]
+  // CHECK: ld1 {{{ *v[0-9]+.1d, v[0-9]+.1d, v[0-9]+.1d, v[0-9]+.1d *}}}, [{{x[0-9]+|sp}}]
 }
 
 poly64x2x4_t test_vld4q_p64(poly64_t const * ptr) {
   // CHECK-LABEL: test_vld4q_p64
   return vld4q_p64(ptr);
-  // CHECK: ld4 {{{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d}, [{{x[0-9]+|sp}}]
+  // CHECK: ld4 {{{ *v[0-9]+.2d, v[0-9]+.2d, v[0-9]+.2d, v[0-9]+.2d *}}}, [{{x[0-9]+|sp}}]
 }
 
 void test_vst2_p64(poly64_t * ptr, poly64x1x2_t val) {
   // CHECK-LABEL: test_vst2_p64
   return vst2_p64(ptr, val);
-  // CHECK:  st1 {{{v[0-9]+}}.1d, {{v[0-9]+}}.1d}, [{{x[0-9]+|sp}}]
+  // CHECK:  st1 {{{ *v[0-9]+.1d, v[0-9]+.1d *}}}, [{{x[0-9]+|sp}}]
 }
 
 void test_vst2q_p64(poly64_t * ptr, poly64x2x2_t val) {
   // CHECK-LABEL: test_vst2q_p64
   return vst2q_p64(ptr, val);
-  // CHECK:  st2 {{{v[0-9]+}}.2d, {{v[0-9]+}}.2d}, [{{x[0-9]+|sp}}]
+  // CHECK:  st2 {{{ *v[0-9]+.2d, v[0-9]+.2d *}}}, [{{x[0-9]+|sp}}]
 }
 
 void test_vst3_p64(poly64_t * ptr, poly64x1x3_t val) {
   // CHECK-LABEL: test_vst3_p64
   return vst3_p64(ptr, val);
-  // CHECK:  st1 {{{v[0-9]+}}.1d, {{v[0-9]+}}.1d, {{v[0-9]+}}.1d}, [{{x[0-9]+|sp}}]
+  // CHECK:  st1 {{{ *v[0-9]+.1d, v[0-9]+.1d, v[0-9]+.1d *}}}, [{{x[0-9]+|sp}}]
 }
 
 void test_vst3q_p64(poly64_t * ptr, poly64x2x3_t val) {
   // CHECK-LABEL: test_vst3q_p64
   return vst3q_p64(ptr, val);
-  // CHECK:  st3 {{{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d}, [{{x[0-9]+|sp}}]
+  // CHECK:  st3 {{{ *v[0-9]+.2d, v[0-9]+.2d, v[0-9]+.2d *}}}, [{{x[0-9]+|sp}}]
 }
 
 void test_vst4_p64(poly64_t * ptr, poly64x1x4_t val) {
   // CHECK-LABEL: test_vst4_p64
   return vst4_p64(ptr, val);
-  // CHECK:  st1 {{{v[0-9]+}}.1d, {{v[0-9]+}}.1d, {{v[0-9]+}}.1d, {{v[0-9]+}}.1d}, [{{x[0-9]+|sp}}]
+  // CHECK:  st1 {{{ *v[0-9]+.1d, v[0-9]+.1d, v[0-9]+.1d, v[0-9]+.1d *}}}, [{{x[0-9]+|sp}}]
 }
 
 void test_vst4q_p64(poly64_t * ptr, poly64x2x4_t val) {
   // CHECK-LABEL: test_vst4q_p64
   return vst4q_p64(ptr, val);
-  // CHECK:  st4 {{{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d}, [{{x[0-9]+|sp}}]
+  // CHECK:  st4 {{{ *v[0-9]+.2d, v[0-9]+.2d, v[0-9]+.2d, v[0-9]+.2d *}}}, [{{x[0-9]+|sp}}]
 }
 
 poly64x1_t test_vext_p64(poly64x1_t a, poly64x1_t b) {
@@ -230,43 +241,49 @@ poly64x1_t test_vext_p64(poly64x1_t a, p
 poly64x2_t test_vextq_p64(poly64x2_t a, poly64x2_t b) {
   // CHECK-LABEL: test_vextq_p64
   return vextq_p64(a, b, 1);
-  // CHECK: ext {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #0x8
+  // CHECK: ext {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{#0x8|#8}}
 }
 
 poly64x2_t test_vzip1q_p64(poly64x2_t a, poly64x2_t b) {
   // CHECK-LABEL: test_vzip1q_p64
   return vzip1q_p64(a, b);
-  // CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
+  // CHECK-AARCH64: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
+  // CHECK-ARM64: zip1 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
 }
 
 poly64x2_t test_vzip2q_p64(poly64x2_t a, poly64x2_t b) {
   // CHECK-LABEL: test_vzip2q_p64
   return vzip2q_u64(a, b);
-  // CHECK: ins {{v[0-9]+}}.d[0], {{v[0-9]+}}.d[1]
+  // CHECK-AARCH64: ins {{v[0-9]+}}.d[0], {{v[0-9]+}}.d[1]
+  // CHECK-ARM64: zip2 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
 }
 
 poly64x2_t test_vuzp1q_p64(poly64x2_t a, poly64x2_t b) {
   // CHECK-LABEL: test_vuzp1q_p64
   return vuzp1q_p64(a, b);
-  // CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
+  // CHECK-AARCH64: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
+  // CHECK-ARM64: zip1 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
 }
 
 poly64x2_t test_vuzp2q_p64(poly64x2_t a, poly64x2_t b) {
   // CHECK-LABEL: test_vuzp2q_p64
   return vuzp2q_u64(a, b);
-  // CHECK: ins {{v[0-9]+}}.d[0], {{v[0-9]+}}.d[1]
+  // CHECK-AARCH64: ins {{v[0-9]+}}.d[0], {{v[0-9]+}}.d[1]
+  // CHECK-ARM64: zip2 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
 }
 
 poly64x2_t test_vtrn1q_p64(poly64x2_t a, poly64x2_t b) {
   // CHECK-LABEL: test_vtrn1q_p64
   return vtrn1q_p64(a, b);
-  // CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
+  // CHECK-AARCH64: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
+  // CHECK-ARM64: zip1 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
 }
 
 poly64x2_t test_vtrn2q_p64(poly64x2_t a, poly64x2_t b) {
   // CHECK-LABEL: test_vtrn2q_p64
   return vtrn2q_u64(a, b);
-  // CHECK: ins {{v[0-9]+}}.d[0], {{v[0-9]+}}.d[1]
+  // CHECK-AARCH64: ins {{v[0-9]+}}.d[0], {{v[0-9]+}}.d[1]
+  // CHECK-ARM64: zip2 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
 }
 
 poly64x1_t test_vsri_n_p64(poly64x1_t a, poly64x1_t b) {





More information about the cfe-commits mailing list