r201112 - [AArch64] Fixed vget/vset_lane_f16 implementation

Mon Feb 10 13:20:53 PST 2014

Author: apazos
Date: Mon Feb 10 15:20:53 2014
New Revision: 201112

URL: http://llvm.org/viewvc/llvm-project?rev=201112&view=rev
Log:
[AArch64] Fixed vget/vset_lane_f16 implementation

Replaced cast and vreinterepret operations with
code to reinterpret bitwise the types float16_t and
int16_t.


Modified:
    cfe/trunk/test/CodeGen/aarch64-neon-copy.c
    cfe/trunk/utils/TableGen/NeonEmitter.cpp

Modified: cfe/trunk/test/CodeGen/aarch64-neon-copy.c
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/aarch64-neon-copy.c?rev=201112&r1=201111&r2=201112&view=diff
==============================================================================

--- cfe/trunk/test/CodeGen/aarch64-neon-copy.c (original)
+++ cfe/trunk/test/CodeGen/aarch64-neon-copy.c Mon Feb 10 15:20:53 2014
@@ -1244,79 +1244,128 @@ float64x2_t test_vcopyq_laneq_f64(float6
   return vcopyq_laneq_f64(a, 1, c, 1);
 }
 
-// CHECK: test_vget_lane_f16
+// CHECK-LABEL: test_vget_lane_f16
 int test_vget_lane_f16(float16x4_t v1) {
   float16_t a = vget_lane_f16(v1, 3);
   return (int)a;
 // CHECK: dup {{h[0-9]+}}, {{v[0-9]+}}.h[3]
 }
 
-// CHECK: test_vgetq_lane_f16
+// CHECK-LABEL: test_vgetq_lane_f16
 int test_vgetq_lane_f16(float16x8_t v1) {
   float16_t a = vgetq_lane_f16(v1, 7);
   return (int)a;
 // CHECK: dup {{h[0-9]+}}, {{v[0-9]+}}.h[7]
 }
 
-// CHECK: test_vget_lane_f16_2
-float test_vget_lane_f16_2(float16x4_t v1) {
+// CHECK-LABEL: test2_vget_lane_f16
+float test2_vget_lane_f16(float16x4_t v1) {
   float16_t a = vget_lane_f16(v1, 3);
   return (float)a;
 // CHECK: dup {{h[0-9]+}}, {{v[0-9]+}}.h[3]
 }
 
-// CHECK: test_vgetq_lane_f16_2
-float test_vgetq_lane_f16_2(float16x8_t v1) {
+// CHECK-LABEL: test2_vgetq_lane_f16
+float test2_vgetq_lane_f16(float16x8_t v1) {
   float16_t a = vgetq_lane_f16(v1, 7);
   return (float)a;
 // CHECK: dup {{h[0-9]+}}, {{v[0-9]+}}.h[7]
 }
 
-// CHECK: test_vset_lane_f16
+// CHECK-LABEL: test_vset_lane_f16
 float16x4_t test_vset_lane_f16(float16x4_t v1) {
-  float16_t a;
+  float16_t a = 0.0;
   return vset_lane_f16(a, v1, 3);
-// CHECK: fmov  {{s[0-9]+}}, wzr
-// CHECK-NEXT: ins {{v[0-9]+}}.h[3],  {{v[0-9]+}}.h[0]
+// CHECK: ins {{v[0-9]+}}.h[3], wzr
 }
 
-// CHECK: test_vsetq_lane_f16
+// CHECK-LABEL: test_vsetq_lane_f16
 float16x8_t test_vsetq_lane_f16(float16x8_t v1) {
-  float16_t a;
+  float16_t a = 0.0;
   return vsetq_lane_f16(a, v1, 7);
-// CHECK: fmov  {{s[0-9]+}}, wzr
-// CHECK-NEXT: ins {{v[0-9]+}}.h[7],  {{v[0-9]+}}.h[0]
+// CHECK: ins {{v[0-9]+}}.h[7], wzr
 }
 
-// CHECK: test_vset_lane_f16_2
-float16x4_t test_vset_lane_f16_2(float16x4_t v1) {
+// CHECK-LABEL: test2_vset_lane_f16
+float16x4_t test2_vset_lane_f16(float16x4_t v1) {
+  float16_t a = 1.0;
+  return vset_lane_f16(a, v1, 3);
+// CHECK:  movz    {{w[0-9]+}}, #15360
+// CHECK-NEXT: ins {{v[0-9]+}}.h[3], {{w[0-9]+}}
+}
+
+// CHECK-LABEL: test2_vsetq_lane_f16
+float16x8_t test2_vsetq_lane_f16(float16x8_t v1) {
+  float16_t a = 1.0;
+  return vsetq_lane_f16(a, v1, 7);
+// CHECK:  movz    {{w[0-9]+}}, #15360
+// CHECK-NEXT: ins {{v[0-9]+}}.h[7],  {{w[0-9]+}}
+}
+
+// CHECK-LABEL: test_vget_vset_lane_f16
+float16x4_t test_vget_vset_lane_f16(float16x4_t v1) {
   float16_t a = vget_lane_f16(v1, 0);
   return vset_lane_f16(a, v1, 3);
 // CHECK: ins {{v[0-9]+}}.h[3],  {{v[0-9]+}}.h[0]
 }
 
-// CHECK: test_vsetq_lane_f16_2
-float16x8_t test_vsetq_lane_f16_2(float16x8_t v1) {
+// CHECK-LABEL: test_vgetq_vsetq_lane_f16
+float16x8_t test_vgetq_vsetq_lane_f16(float16x8_t v1) {
   float16_t a = vgetq_lane_f16(v1, 0);
   return vsetq_lane_f16(a, v1, 7);
 // CHECK: ins {{v[0-9]+}}.h[7],  {{v[0-9]+}}.h[0]
 }
 
+// CHECK-LABEL: test4_vset_lane_f16
+float16x4_t test4_vset_lane_f16(float16x4_t v1, float b, float c) {
+  float16_t a = (float16_t)b;
+  return vset_lane_f16(a, v1, 3);
+// CHECK: fmov {{w[0-9]+}},  {{s[0-9]+}}
+// CHECK: ins {{v[0-9]+}}.h[3],  {{w[0-9]+}}
+}
 
-// CHECK: test_vsetq_lane_f16_3
-float16x8_t test_vsetq_lane_f16_3(float16x8_t v1, float b, float c) {
+// CHECK-LABEL: test4_vsetq_lane_f16
+float16x8_t test4_vsetq_lane_f16(float16x8_t v1, float b, float c) {
   float16_t a = (float16_t)b;
   return vsetq_lane_f16(a, v1, 7);
+// CHECK: fmov {{w[0-9]+}},  {{s[0-9]+}}
 // CHECK: ins {{v[0-9]+}}.h[7],  {{w[0-9]+}}
 }
 
-// CHECK: test_vsetq_lane_f16_4
-float16x8_t test_vsetq_lane_f16_4(float16x8_t v1, float b, float c) {
+// CHECK-LABEL: test5_vset_lane_f16
+float16x4_t test5_vset_lane_f16(float16x4_t v1, float b, float c) {
+  float16_t a = (float16_t)b;
+  return vset_lane_f16(a, v1, 3);
+// CHECK: fmov {{w[0-9]+}},  {{s[0-9]+}}
+// CHECK: ins {{v[0-9]+}}.h[3],  {{w[0-9]+}}
+}
+
+// CHECK-LABEL: test5_vsetq_lane_f16
+float16x8_t test5_vsetq_lane_f16(float16x8_t v1, float b, float c) {
   float16_t a = (float16_t)b + 1.0;
   return vsetq_lane_f16(a, v1, 7);
+// CHECK: fmov {{w[0-9]+}},  {{s[0-9]+}}
 // CHECK: ins {{v[0-9]+}}.h[7],  {{w[0-9]+}}
 }
 
+// CHECK-LABEL: test_vset_vget_lane_f16
+int test_vset_vget_lane_f16(float16x4_t a) {
+  float16x4_t b;
+  b = vset_lane_f16(3.5, a, 3);
+  float16_t c = vget_lane_f16(b, 3);
+  return (int)c;
+// CHECK: movz x{{[0-9]+}}, #3
+}
+
+// CHECK-LABEL: test_vsetq_vgetq_lane_f16
+int test_vsetq_vgetq_lane_f16(float16x8_t a) {
+  float16x8_t b;
+  b = vsetq_lane_f16(3.5, a, 5);
+  float16_t c = vgetq_lane_f16(b, 5);
+  return (int)c;
+// CHECK: movz x{{[0-9]+}}, #3
+}
+
 // CHECK-LABEL: test_vdup_laneq_p64:
 poly64x1_t test_vdup_laneq_p64(poly64x2_t vec) {
   return vdup_laneq_p64(vec, 0);

Modified: cfe/trunk/utils/TableGen/NeonEmitter.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/utils/TableGen/NeonEmitter.cpp?rev=201112&r1=201111&r2=201112&view=diff
==============================================================================
--- cfe/trunk/utils/TableGen/NeonEmitter.cpp (original)
+++ cfe/trunk/utils/TableGen/NeonEmitter.cpp Mon Feb 10 15:20:53 2014
@@ -2272,28 +2272,39 @@ static std::string GenOpString(const std
     std::string typeCode = "";
     InstructionTypeCode(typestr, ClassS, quad, typeCode);
     s += TypeString(proto[1], typestr) + " __a1 = __a; \\\n  ";
-    if (quad) {
-     s += "int16x8_t __a2 = vreinterpretq_s16_f16(__a1);\\\n";
-     s += "  vgetq_lane_s16(__a2, __b);";
-    } else {
-     s += "int16x4_t __a2 = vreinterpret_s16_f16(__a1);\\\n";
-     s += "  vget_lane_s16(__a2, __b);";
-    }
+
+    std::string intType = quad ? "int16x8_t" : "int16x4_t";
+    std::string intName = quad ? "vgetq" : "vget";
+
+    // reinterpret float16 vector as int16 vector
+    s += intType + " __a2 = *(" + intType + " *)(&__a1);\\\n";
+
+    s += "  int16_t __a3 = " + intName + "_lane_s16(__a2, __b);\\\n";
+
+    // reinterpret int16 vector as float16 vector
+    s += "  float16_t __a4 = *(float16_t *)(&__a3);\\\n";
+    s += "  __a4;";
     break;
   }
   case OpScalarSetLane:{
     std::string typeCode = "";
     InstructionTypeCode(typestr, ClassS, quad, typeCode);
-    s += TypeString(proto[1], typestr) + " __a1 = __a; \\\n  ";
-    if (quad) {
-     s += "  int16x8_t __b2 = vreinterpretq_s16_f16(b);\\\n";
-     s += "  int16x8_t __b3 = vsetq_lane_s16(__a1, __b2, __c);\\\n";
-     s += "  vreinterpretq_f16_s16(__b3);";
-    } else {
-     s += "  int16x4_t __b2 = vreinterpret_s16_f16(b);\\\n";
-     s += "  int16x4_t __b3 = vset_lane_s16(__a1, __b2, __c);\\\n";
-     s += "  vreinterpret_f16_s16(__b3);";
-    }
+    s += TypeString(proto[1], typestr) + " __a1 = __a;\\\n  ";
+
+    std::string origType = quad ? "float16x8_t" : "float16x4_t";
+    std::string intType = quad ? "int16x8_t" : "int16x4_t";
+    std::string intName = quad ? "vsetq" : "vset";
+
+    // reinterpret float16_t as int16_t
+    s += "int16_t __a2 = *(int16_t *)(&__a1);\\\n";
+    // reinterpret float16 vector as int16 vector
+    s += "  " + intType + " __b2 = *(" + intType + " *)(&__b);\\\n";
+
+    s += "  " + intType + " __b3 = " + intName + "_lane_s16(__a2, __b2, __c);\\\n";
+
+    // reinterpret int16 vector as float16 vector
+    s += "  " + origType + " __b4 = *(" + origType + " *)(&__b3);\\\n";
+    s += "__b4;";
     break;
   }