r194042 - Implemented aarch64 neon intrinsic vcopy_lane with float type.

Kevin Qin Kevin.Qin at arm.com
Mon Nov 4 18:05:44 PST 2013


Author: kevinqin
Date: Mon Nov  4 20:05:44 2013
New Revision: 194042

URL: http://llvm.org/viewvc/llvm-project?rev=194042&view=rev
Log:
Implemented aarch64 neon intrinsic vcopy_lane with float type.

Modified:
    cfe/trunk/include/clang/Basic/arm_neon.td
    cfe/trunk/lib/CodeGen/CGBuiltin.cpp
    cfe/trunk/test/CodeGen/aarch64-neon-copy.c
    cfe/trunk/utils/TableGen/NeonEmitter.cpp

Modified: cfe/trunk/include/clang/Basic/arm_neon.td
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Basic/arm_neon.td?rev=194042&r1=194041&r2=194042&view=diff
==============================================================================
--- cfe/trunk/include/clang/Basic/arm_neon.td (original)
+++ cfe/trunk/include/clang/Basic/arm_neon.td Mon Nov  4 20:05:44 2013
@@ -98,7 +98,9 @@ def OP_DIV  : Op;
 def OP_LONG_HI : Op;
 def OP_NARROW_HI : Op;
 def OP_MOVL_HI : Op;
-def OP_COPY : Op;
+def OP_COPY_LN : Op;
+def OP_COPYQ_LN : Op;
+def OP_COPY_LNQ : Op;
 
 class Inst <string n, string p, string t, Op o> {
   string Name = n;
@@ -669,13 +671,18 @@ def VQDMLSL_HIGH : SOpInst<"vqdmlsl_high
 ////////////////////////////////////////////////////////////////////////////////
 // Extract or insert element from vector
 def GET_LANE : IInst<"vget_lane", "sdi",
-                        "csilPcPsUcUsUiUlQcQsQiQlQUcQUsQUiQUlPcPsQPcQPs">;
+                     "csilPcPsUcUsUiUlQcQsQiQlQUcQUsQUiQUlPcPsQPcQPsfdQfQd">;
 def SET_LANE : IInst<"vset_lane", "dsdi",
-                 "csilPcPsUiUlUcUsUiUlQcQsQiQlQUcQUsQUiQUlPcPsQPcQPshfdQhQfQd">;
+                     "csilPcPsUcUsUiUlQcQsQiQlQUcQUsQUiQUlPcPsQPcQPsfdQfQd">;
 def COPY_LANE : IOpInst<"vcopy_lane", "ddidi",
-                     "csilPcPsUcUsUiUlQcQsQiQlQUcQUsQUiQUlPcPsQPcQPs", OP_COPY>;
-def COPY_LANEQ : IOpInst<"vcopy_laneq", "ggidi",
-                     "QcQsQiQlQUcQUsQUiQUlQPcQPs", OP_COPY>;
+                        "csiPcPsUcUsUiPcPsf", OP_COPY_LN>;
+def COPYQ_LANE : IOpInst<"vcopy_lane", "ddigi",
+                        "QcQsQiQlQUcQUsQUiQUlQPcQPsQfQd", OP_COPYQ_LN>;
+def COPY_LANEQ : IOpInst<"vcopy_laneq", "ddiki",
+                     "csiPcPsUcUsUif", OP_COPY_LNQ>;
+def COPYQ_LANEQ : IOpInst<"vcopy_laneq", "ddidi",
+                     "QcQsQiQlQUcQUsQUiQUlQPcQPsQfd", OP_COPY_LN>;
+
 
 ////////////////////////////////////////////////////////////////////////////////
 // Set all lanes to same value

Modified: cfe/trunk/lib/CodeGen/CGBuiltin.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGBuiltin.cpp?rev=194042&r1=194041&r2=194042&view=diff
==============================================================================
--- cfe/trunk/lib/CodeGen/CGBuiltin.cpp (original)
+++ cfe/trunk/lib/CodeGen/CGBuiltin.cpp Mon Nov  4 20:05:44 2013
@@ -2441,23 +2441,25 @@ Value *CodeGenFunction::EmitAArch64Built
   case AArch64::BI__builtin_neon_vget_lane_i16:
   case AArch64::BI__builtin_neon_vget_lane_i32:
   case AArch64::BI__builtin_neon_vget_lane_i64:
+  case AArch64::BI__builtin_neon_vget_lane_f32:
+  case AArch64::BI__builtin_neon_vget_lane_f64:
   case AArch64::BI__builtin_neon_vgetq_lane_i8:
   case AArch64::BI__builtin_neon_vgetq_lane_i16:
   case AArch64::BI__builtin_neon_vgetq_lane_i32:
   case AArch64::BI__builtin_neon_vgetq_lane_i64:
+  case AArch64::BI__builtin_neon_vgetq_lane_f32:
+  case AArch64::BI__builtin_neon_vgetq_lane_f64:
     return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vget_lane_i8, E);
   case AArch64::BI__builtin_neon_vset_lane_i8:
   case AArch64::BI__builtin_neon_vset_lane_i16:
   case AArch64::BI__builtin_neon_vset_lane_i32:
   case AArch64::BI__builtin_neon_vset_lane_i64:
-  case AArch64::BI__builtin_neon_vset_lane_f16:
   case AArch64::BI__builtin_neon_vset_lane_f32:
   case AArch64::BI__builtin_neon_vset_lane_f64:
   case AArch64::BI__builtin_neon_vsetq_lane_i8:
   case AArch64::BI__builtin_neon_vsetq_lane_i16:
   case AArch64::BI__builtin_neon_vsetq_lane_i32:
   case AArch64::BI__builtin_neon_vsetq_lane_i64:
-  case AArch64::BI__builtin_neon_vsetq_lane_f16:
   case AArch64::BI__builtin_neon_vsetq_lane_f32:
   case AArch64::BI__builtin_neon_vsetq_lane_f64:
     return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vset_lane_i8, E);

Modified: cfe/trunk/test/CodeGen/aarch64-neon-copy.c
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/aarch64-neon-copy.c?rev=194042&r1=194041&r2=194042&view=diff
==============================================================================
--- cfe/trunk/test/CodeGen/aarch64-neon-copy.c (original)
+++ cfe/trunk/test/CodeGen/aarch64-neon-copy.c Mon Nov  4 20:05:44 2013
@@ -281,16 +281,238 @@ int16x4_t test_vcopy_lane_s16(int16x4_t
   // CHECK: ins {{v[0-9]+}}.h[2], {{v[0-9]+}}.h[3]
 }
 
+poly8x8_t test_vcopy_lane_p8(poly8x8_t v1, poly8x8_t v2) {
+  // CHECK: test_vcopy_lane_p8
+  return vcopy_lane_p8(v1, 5, v2, 3);
+  // CHECK: ins {{v[0-9]+}}.b[5], {{v[0-9]+}}.b[3]
+}
+
+poly16x4_t test_vcopy_lane_p16(poly16x4_t v1, poly16x4_t v2) {
+  // CHECK: test_vcopy_lane_p16
+  return vcopy_lane_p16(v1, 2, v2, 3);
+  // CHECK: ins {{v[0-9]+}}.h[2], {{v[0-9]+}}.h[3]
+}
+
 int32x2_t test_vcopy_lane_s32(int32x2_t v1, int32x2_t v2) {
   // CHECK: test_vcopy_lane_s32
   return vcopy_lane_s32(v1, 0, v2, 1);
   // CHECK: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1]
 }
 
-int64x1_t test_vcopy_lane_s64(int64x1_t v1, int64x1_t v2) {
-  // CHECK: test_vcopy_lane_s64
-  return vcopy_lane_s64(v1, 0, v2, 0);
-  // CHECK: fmov {{d[0-9]+}}, {{d[0-9]+}}
+float32x2_t test_vcopy_lane_f32(float32x2_t v1, float32x2_t v2) {
+  // CHECK: test_vcopy_lane_f32
+  return vcopy_lane_f32(v1, 0, v2, 1);
+  // CHECK: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1]
+}
+
+uint8x8_t test_vcopy_lane_u8(uint8x8_t v1, uint8x8_t v2) {
+  // CHECK: test_vcopy_lane_u8
+  return vcopy_lane_u8(v1, 5, v2, 3);
+  // CHECK: ins {{v[0-9]+}}.b[5], {{v[0-9]+}}.b[3]
+}
+
+uint16x4_t test_vcopy_lane_u16(uint16x4_t v1, uint16x4_t v2) {
+  // CHECK: test_vcopy_lane_u16
+  return vcopy_lane_u16(v1, 2, v2, 3);
+  // CHECK: ins {{v[0-9]+}}.h[2], {{v[0-9]+}}.h[3]
+}
+
+uint32x2_t test_vcopy_lane_u32(uint32x2_t v1, uint32x2_t v2) {
+  // CHECK: test_vcopy_lane_u32
+  return vcopy_lane_u32(v1, 0, v2, 1);
+  // CHECK: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1]
+}
+
+int8x8_t test_vcopy_laneq_s8(int8x8_t v1, int8x16_t v2) {
+  // CHECK: test_vcopy_laneq_s8
+  return vcopy_laneq_s8(v1, 5, v2, 3);
+  // CHECK: ins {{v[0-9]+}}.b[5], {{v[0-9]+}}.b[3]
+}
+
+int16x4_t test_vcopy_laneq_s16(int16x4_t v1, int16x8_t v2) {
+  // CHECK: test_vcopy_laneq_s16
+  return vcopy_laneq_s16(v1, 2, v2, 3);
+  // CHECK: ins {{v[0-9]+}}.h[2], {{v[0-9]+}}.h[3]
+}
+
+poly8x8_t test_vcopy_laneq_p8(poly8x8_t v1, poly8x16_t v2) {
+  // CHECK: test_vcopy_laneq_p8
+  return vcopy_laneq_p8(v1, 5, v2, 3);
+  // CHECK: ins {{v[0-9]+}}.b[5], {{v[0-9]+}}.b[3]
+}
+
+poly16x4_t test_vcopy_laneq_p16(poly16x4_t v1, poly16x8_t v2) {
+  // CHECK: test_vcopy_laneq_p16
+  return vcopy_laneq_p16(v1, 2, v2, 3);
+  // CHECK: ins {{v[0-9]+}}.h[2], {{v[0-9]+}}.h[3]
+}
+
+int32x2_t test_vcopy_laneq_s32(int32x2_t v1, int32x4_t v2) {
+  // CHECK: test_vcopy_laneq_s32
+  return vcopy_laneq_s32(v1, 0, v2, 1);
+  // CHECK: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1]
+}
+
+float32x2_t test_vcopy_laneq_f32(float32x2_t v1, float32x4_t v2) {
+  // CHECK: test_vcopy_laneq_f32
+  return vcopy_laneq_f32(v1, 0, v2, 1);
+  // CHECK: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1]
+}
+
+uint8x8_t test_vcopy_laneq_u8(uint8x8_t v1, uint8x16_t v2) {
+  // CHECK: test_vcopy_laneq_u8
+  return vcopy_laneq_u8(v1, 5, v2, 3);
+  // CHECK: ins {{v[0-9]+}}.b[5], {{v[0-9]+}}.b[3]
+}
+
+uint16x4_t test_vcopy_laneq_u16(uint16x4_t v1, uint16x8_t v2) {
+  // CHECK: test_vcopy_laneq_u16
+  return vcopy_laneq_u16(v1, 2, v2, 3);
+  // CHECK: ins {{v[0-9]+}}.h[2], {{v[0-9]+}}.h[3]
+}
+
+uint32x2_t test_vcopy_laneq_u32(uint32x2_t v1, uint32x4_t v2) {
+  // CHECK: test_vcopy_laneq_u32
+  return vcopy_laneq_u32(v1, 0, v2, 1);
+  // CHECK: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1]
+}
+
+int8x16_t test_vcopyq_lane_s8(int8x16_t v1, int8x8_t v2) {
+  // CHECK: test_vcopyq_lane_s8
+  return vcopyq_lane_s8(v1, 5, v2, 3);
+  // CHECK: ins {{v[0-9]+}}.b[5], {{v[0-9]+}}.b[3]
+}
+
+int16x8_t test_vcopyq_lane_s16(int16x8_t v1, int16x4_t v2) {
+  // CHECK: test_vcopyq_lane_s16
+  return vcopyq_lane_s16(v1, 2, v2, 3);
+  // CHECK: ins {{v[0-9]+}}.h[2], {{v[0-9]+}}.h[3]
+}
+
+poly8x16_t test_vcopyq_lane_p8(poly8x16_t v1, poly8x8_t v2) {
+  // CHECK: test_vcopyq_lane_p8
+  return vcopyq_lane_p8(v1, 5, v2, 3);
+  // CHECK: ins {{v[0-9]+}}.b[5], {{v[0-9]+}}.b[3]
+}
+
+poly16x8_t test_vcopyq_lane_p16(poly16x8_t v1, poly16x4_t v2) {
+  // CHECK: test_vcopyq_lane_p16
+  return vcopyq_lane_p16(v1, 2, v2, 3);
+  // CHECK: ins {{v[0-9]+}}.h[2], {{v[0-9]+}}.h[3]
+}
+
+int32x4_t test_vcopyq_lane_s32(int32x4_t v1, int32x2_t v2) {
+  // CHECK: test_vcopyq_lane_s32
+  return vcopyq_lane_s32(v1, 0, v2, 1);
+  // CHECK: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1]
+}
+
+int64x2_t test_vcopyq_lane_s64(int64x2_t v1, int64x1_t v2) {
+  // CHECK: test_vcopyq_lane_s64
+  return vcopyq_lane_s64(v1, 1, v2, 0);
+  // CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
+}
+
+float32x4_t test_vcopyq_lane_f32(float32x4_t v1, float32x2_t v2) {
+  // CHECK: test_vcopyq_lane_f32
+  return vcopyq_lane_f32(v1, 0, v2, 1);
+  // CHECK: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1]
+}
+
+float64x2_t test_vcopyq_lane_f64(float64x2_t v1, float64x1_t v2) {
+  // CHECK: test_vcopyq_lane_f64
+  return vcopyq_lane_f64(v1, 1, v2, 0);
+  // CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
+}
+
+uint8x16_t test_vcopyq_lane_u8(uint8x16_t v1, uint8x8_t v2) {
+  // CHECK: test_vcopyq_lane_u8
+  return vcopyq_lane_u8(v1, 5, v2, 3);
+  // CHECK: ins {{v[0-9]+}}.b[5], {{v[0-9]+}}.b[3]
+}
+
+uint16x8_t test_vcopyq_lane_u16(uint16x8_t v1, uint16x4_t v2) {
+  // CHECK: test_vcopyq_lane_u16
+  return vcopyq_lane_u16(v1, 2, v2, 3);
+  // CHECK: ins {{v[0-9]+}}.h[2], {{v[0-9]+}}.h[3]
+}
+
+uint32x4_t test_vcopyq_lane_u32(uint32x4_t v1, uint32x2_t v2) {
+  // CHECK: test_vcopyq_lane_u32
+  return vcopyq_lane_u32(v1, 0, v2, 1);
+  // CHECK: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1]
+}
+
+uint64x2_t test_vcopyq_lane_u64(uint64x2_t v1, uint64x1_t v2) {
+  // CHECK: test_vcopyq_lane_u64
+  return vcopyq_lane_u64(v1, 1, v2, 0);
+  // CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
+}
+
+int8x16_t test_vcopyq_laneq_s8(int8x16_t v1, int8x16_t v2) {
+  // CHECK: test_vcopyq_laneq_s8
+  return vcopyq_laneq_s8(v1, 5, v2, 3);
+  // CHECK: ins {{v[0-9]+}}.b[5], {{v[0-9]+}}.b[3]
+}
+
+int16x8_t test_vcopyq_laneq_s16(int16x8_t v1, int16x8_t v2) {
+  // CHECK: test_vcopyq_laneq_s16
+  return vcopyq_laneq_s16(v1, 2, v2, 3);
+  // CHECK: ins {{v[0-9]+}}.h[2], {{v[0-9]+}}.h[3]
+}
+
+poly8x16_t test_vcopyq_laneq_p8(poly8x16_t v1, poly8x16_t v2) {
+  // CHECK: test_vcopyq_laneq_p8
+  return vcopyq_laneq_p8(v1, 5, v2, 3);
+  // CHECK: ins {{v[0-9]+}}.b[5], {{v[0-9]+}}.b[3]
+}
+
+poly16x8_t test_vcopyq_laneq_p16(poly16x8_t v1, poly16x8_t v2) {
+  // CHECK: test_vcopyq_laneq_p16
+  return vcopyq_laneq_p16(v1, 2, v2, 3);
+  // CHECK: ins {{v[0-9]+}}.h[2], {{v[0-9]+}}.h[3]
+}
+
+int32x4_t test_vcopyq_laneq_s32(int32x4_t v1, int32x4_t v2) {
+  // CHECK: test_vcopyq_laneq_s32
+  return vcopyq_laneq_s32(v1, 0, v2, 1);
+  // CHECK: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1]
+}
+
+float32x4_t test_vcopyq_laneq_f32(float32x4_t v1, float32x4_t v2) {
+  // CHECK: test_vcopyq_laneq_f32
+  return vcopyq_laneq_f32(v1, 0, v2, 1);
+  // CHECK: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1]
+}
+
+int64x2_t test_vcopyq_laneq_s64(int64x2_t v1, int64x2_t v2) {
+  // CHECK: test_vcopyq_laneq_s64
+  return vcopyq_laneq_s64(v1, 1, v2, 1);
+  // CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[1]
+}
+
+uint8x16_t test_vcopyq_laneq_u8(uint8x16_t v1, uint8x16_t v2) {
+  // CHECK: test_vcopyq_laneq_u8
+  return vcopyq_laneq_u8(v1, 5, v2, 3);
+  // CHECK: ins {{v[0-9]+}}.b[5], {{v[0-9]+}}.b[3]
+}
+
+uint16x8_t test_vcopyq_laneq_u16(uint16x8_t v1, uint16x8_t v2) {
+  // CHECK: test_vcopyq_laneq_u16
+  return vcopyq_laneq_u16(v1, 2, v2, 3);
+  // CHECK: ins {{v[0-9]+}}.h[2], {{v[0-9]+}}.h[3]
+}
+
+uint32x4_t test_vcopyq_laneq_u32(uint32x4_t v1, uint32x4_t v2) {
+  // CHECK: test_vcopyq_laneq_u32
+  return vcopyq_laneq_u32(v1, 0, v2, 1);
+  // CHECK: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1]
+}
+
+uint64x2_t test_vcopyq_laneq_u64(uint64x2_t v1, uint64x2_t v2) {
+  // CHECK: test_vcopyq_laneq_u64
+  return vcopyq_laneq_u64(v1, 0, v2, 1);
+  // CHECK: ins {{v[0-9]+}}.d[0], {{v[0-9]+}}.d[1]
 }
 
 int8x8_t test_vcreate_s8(uint64_t v1) {

Modified: cfe/trunk/utils/TableGen/NeonEmitter.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/utils/TableGen/NeonEmitter.cpp?rev=194042&r1=194041&r2=194042&view=diff
==============================================================================
--- cfe/trunk/utils/TableGen/NeonEmitter.cpp (original)
+++ cfe/trunk/utils/TableGen/NeonEmitter.cpp Mon Nov  4 20:05:44 2013
@@ -120,7 +120,9 @@ enum OpKind {
   OpLongHi,
   OpNarrowHi,
   OpMovlHi,
-  OpCopy
+  OpCopyLane,
+  OpCopyQLane,
+  OpCopyLaneQ
 };
 
 enum ClassKind {
@@ -265,7 +267,9 @@ public:
     OpMap["OP_LONG_HI"] = OpLongHi;
     OpMap["OP_NARROW_HI"] = OpNarrowHi;
     OpMap["OP_MOVL_HI"] = OpMovlHi;
-    OpMap["OP_COPY"] = OpCopy;
+    OpMap["OP_COPY_LN"] = OpCopyLane;
+    OpMap["OP_COPYQ_LN"] = OpCopyQLane;
+    OpMap["OP_COPY_LNQ"] = OpCopyLaneQ;
 
     Record *SI = R.getClass("SInst");
     Record *II = R.getClass("IInst");
@@ -1358,7 +1362,7 @@ static std::string GenArgs(const std::st
     }
     s.push_back(arg);
     //To avoid argument being multiple defined, add extra number for renaming.
-    if (name == "vcopy_lane")
+    if (name == "vcopy_lane" || name == "vcopy_laneq")
       s.push_back('1');
     if ((i + 1) < e)
       s += ", ";
@@ -1383,7 +1387,7 @@ static std::string GenMacroLocals(const
       continue;
     generatedLocal = true;
     bool extranumber = false;
-    if(name == "vcopy_lane")
+    if (name == "vcopy_lane" || name == "vcopy_laneq")
       extranumber = true;
 
     s += TypeString(proto[i], typestr) + " __";
@@ -1854,12 +1858,26 @@ static std::string GenOpString(const std
          MangleName(RemoveHigh(name), typestr, ClassS) + "(__b, __c));";
     break;
   }
-  case OpCopy: {
+  case OpCopyLane: {
     s += TypeString('s', typestr) + " __c2 = " +
          MangleName("vget_lane", typestr, ClassS) + "(__c1, __d1); \\\n  " +
          MangleName("vset_lane", typestr, ClassS) + "(__c2, __a1, __b1);";
     break;
   }
+  case OpCopyQLane: {
+    std::string typeCode = "";
+    InstructionTypeCode(typestr, ClassS, quad, typeCode);
+    s += TypeString('s', typestr) + " __c2 = vget_lane_" + typeCode +
+         "(__c1, __d1); \\\n  vsetq_lane_" + typeCode + "(__c2, __a1, __b1);";
+    break;
+  }
+  case OpCopyLaneQ: {
+    std::string typeCode = "";
+    InstructionTypeCode(typestr, ClassS, quad, typeCode);
+    s += TypeString('s', typestr) + " __c2 = vgetq_lane_" + typeCode +
+         "(__c1, __d1); \\\n  vset_lane_" + typeCode + "(__c2, __a1, __b1);";
+    break;
+  }
   default:
     PrintFatalError("unknown OpKind!");
   }





More information about the cfe-commits mailing list