r201112 - [AArch64] Fixed vget/vset_lane_f16 implementation
Ana Pazos
apazos at codeaurora.org
Mon Feb 10 13:20:53 PST 2014
Author: apazos
Date: Mon Feb 10 15:20:53 2014
New Revision: 201112
URL: http://llvm.org/viewvc/llvm-project?rev=201112&view=rev
Log:
[AArch64] Fixed vget/vset_lane_f16 implementation
Replaced cast and vreinterepret operations with
code to reinterpret bitwise the types float16_t and
int16_t.
Modified:
cfe/trunk/test/CodeGen/aarch64-neon-copy.c
cfe/trunk/utils/TableGen/NeonEmitter.cpp
Modified: cfe/trunk/test/CodeGen/aarch64-neon-copy.c
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/aarch64-neon-copy.c?rev=201112&r1=201111&r2=201112&view=diff
==============================================================================
--- cfe/trunk/test/CodeGen/aarch64-neon-copy.c (original)
+++ cfe/trunk/test/CodeGen/aarch64-neon-copy.c Mon Feb 10 15:20:53 2014
@@ -1244,79 +1244,128 @@ float64x2_t test_vcopyq_laneq_f64(float6
return vcopyq_laneq_f64(a, 1, c, 1);
}
-// CHECK: test_vget_lane_f16
+// CHECK-LABEL: test_vget_lane_f16
int test_vget_lane_f16(float16x4_t v1) {
float16_t a = vget_lane_f16(v1, 3);
return (int)a;
// CHECK: dup {{h[0-9]+}}, {{v[0-9]+}}.h[3]
}
-// CHECK: test_vgetq_lane_f16
+// CHECK-LABEL: test_vgetq_lane_f16
int test_vgetq_lane_f16(float16x8_t v1) {
float16_t a = vgetq_lane_f16(v1, 7);
return (int)a;
// CHECK: dup {{h[0-9]+}}, {{v[0-9]+}}.h[7]
}
-// CHECK: test_vget_lane_f16_2
-float test_vget_lane_f16_2(float16x4_t v1) {
+// CHECK-LABEL: test2_vget_lane_f16
+float test2_vget_lane_f16(float16x4_t v1) {
float16_t a = vget_lane_f16(v1, 3);
return (float)a;
// CHECK: dup {{h[0-9]+}}, {{v[0-9]+}}.h[3]
}
-// CHECK: test_vgetq_lane_f16_2
-float test_vgetq_lane_f16_2(float16x8_t v1) {
+// CHECK-LABEL: test2_vgetq_lane_f16
+float test2_vgetq_lane_f16(float16x8_t v1) {
float16_t a = vgetq_lane_f16(v1, 7);
return (float)a;
// CHECK: dup {{h[0-9]+}}, {{v[0-9]+}}.h[7]
}
-// CHECK: test_vset_lane_f16
+// CHECK-LABEL: test_vset_lane_f16
float16x4_t test_vset_lane_f16(float16x4_t v1) {
- float16_t a;
+ float16_t a = 0.0;
return vset_lane_f16(a, v1, 3);
-// CHECK: fmov {{s[0-9]+}}, wzr
-// CHECK-NEXT: ins {{v[0-9]+}}.h[3], {{v[0-9]+}}.h[0]
+// CHECK: ins {{v[0-9]+}}.h[3], wzr
}
-// CHECK: test_vsetq_lane_f16
+// CHECK-LABEL: test_vsetq_lane_f16
float16x8_t test_vsetq_lane_f16(float16x8_t v1) {
- float16_t a;
+ float16_t a = 0.0;
return vsetq_lane_f16(a, v1, 7);
-// CHECK: fmov {{s[0-9]+}}, wzr
-// CHECK-NEXT: ins {{v[0-9]+}}.h[7], {{v[0-9]+}}.h[0]
+// CHECK: ins {{v[0-9]+}}.h[7], wzr
}
-// CHECK: test_vset_lane_f16_2
-float16x4_t test_vset_lane_f16_2(float16x4_t v1) {
+// CHECK-LABEL: test2_vset_lane_f16
+float16x4_t test2_vset_lane_f16(float16x4_t v1) {
+ float16_t a = 1.0;
+ return vset_lane_f16(a, v1, 3);
+// CHECK: movz {{w[0-9]+}}, #15360
+// CHECK-NEXT: ins {{v[0-9]+}}.h[3], {{w[0-9]+}}
+}
+
+// CHECK-LABEL: test2_vsetq_lane_f16
+float16x8_t test2_vsetq_lane_f16(float16x8_t v1) {
+ float16_t a = 1.0;
+ return vsetq_lane_f16(a, v1, 7);
+// CHECK: movz {{w[0-9]+}}, #15360
+// CHECK-NEXT: ins {{v[0-9]+}}.h[7], {{w[0-9]+}}
+}
+
+// CHECK-LABEL: test_vget_vset_lane_f16
+float16x4_t test_vget_vset_lane_f16(float16x4_t v1) {
float16_t a = vget_lane_f16(v1, 0);
return vset_lane_f16(a, v1, 3);
// CHECK: ins {{v[0-9]+}}.h[3], {{v[0-9]+}}.h[0]
}
-// CHECK: test_vsetq_lane_f16_2
-float16x8_t test_vsetq_lane_f16_2(float16x8_t v1) {
+// CHECK-LABEL: test_vgetq_vsetq_lane_f16
+float16x8_t test_vgetq_vsetq_lane_f16(float16x8_t v1) {
float16_t a = vgetq_lane_f16(v1, 0);
return vsetq_lane_f16(a, v1, 7);
// CHECK: ins {{v[0-9]+}}.h[7], {{v[0-9]+}}.h[0]
}
+// CHECK-LABEL: test4_vset_lane_f16
+float16x4_t test4_vset_lane_f16(float16x4_t v1, float b, float c) {
+ float16_t a = (float16_t)b;
+ return vset_lane_f16(a, v1, 3);
+// CHECK: fmov {{w[0-9]+}}, {{s[0-9]+}}
+// CHECK: ins {{v[0-9]+}}.h[3], {{w[0-9]+}}
+}
-// CHECK: test_vsetq_lane_f16_3
-float16x8_t test_vsetq_lane_f16_3(float16x8_t v1, float b, float c) {
+// CHECK-LABEL: test4_vsetq_lane_f16
+float16x8_t test4_vsetq_lane_f16(float16x8_t v1, float b, float c) {
float16_t a = (float16_t)b;
return vsetq_lane_f16(a, v1, 7);
+// CHECK: fmov {{w[0-9]+}}, {{s[0-9]+}}
// CHECK: ins {{v[0-9]+}}.h[7], {{w[0-9]+}}
}
-// CHECK: test_vsetq_lane_f16_4
-float16x8_t test_vsetq_lane_f16_4(float16x8_t v1, float b, float c) {
+// CHECK-LABEL: test5_vset_lane_f16
+float16x4_t test5_vset_lane_f16(float16x4_t v1, float b, float c) {
+ float16_t a = (float16_t)b;
+ return vset_lane_f16(a, v1, 3);
+// CHECK: fmov {{w[0-9]+}}, {{s[0-9]+}}
+// CHECK: ins {{v[0-9]+}}.h[3], {{w[0-9]+}}
+}
+
+// CHECK-LABEL: test5_vsetq_lane_f16
+float16x8_t test5_vsetq_lane_f16(float16x8_t v1, float b, float c) {
float16_t a = (float16_t)b + 1.0;
return vsetq_lane_f16(a, v1, 7);
+// CHECK: fmov {{w[0-9]+}}, {{s[0-9]+}}
// CHECK: ins {{v[0-9]+}}.h[7], {{w[0-9]+}}
}
+// CHECK-LABEL: test_vset_vget_lane_f16
+int test_vset_vget_lane_f16(float16x4_t a) {
+ float16x4_t b;
+ b = vset_lane_f16(3.5, a, 3);
+ float16_t c = vget_lane_f16(b, 3);
+ return (int)c;
+// CHECK: movz x{{[0-9]+}}, #3
+}
+
+// CHECK-LABEL: test_vsetq_vgetq_lane_f16
+int test_vsetq_vgetq_lane_f16(float16x8_t a) {
+ float16x8_t b;
+ b = vsetq_lane_f16(3.5, a, 5);
+ float16_t c = vgetq_lane_f16(b, 5);
+ return (int)c;
+// CHECK: movz x{{[0-9]+}}, #3
+}
+
// CHECK-LABEL: test_vdup_laneq_p64:
poly64x1_t test_vdup_laneq_p64(poly64x2_t vec) {
return vdup_laneq_p64(vec, 0);
Modified: cfe/trunk/utils/TableGen/NeonEmitter.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/utils/TableGen/NeonEmitter.cpp?rev=201112&r1=201111&r2=201112&view=diff
==============================================================================
--- cfe/trunk/utils/TableGen/NeonEmitter.cpp (original)
+++ cfe/trunk/utils/TableGen/NeonEmitter.cpp Mon Feb 10 15:20:53 2014
@@ -2272,28 +2272,39 @@ static std::string GenOpString(const std
std::string typeCode = "";
InstructionTypeCode(typestr, ClassS, quad, typeCode);
s += TypeString(proto[1], typestr) + " __a1 = __a; \\\n ";
- if (quad) {
- s += "int16x8_t __a2 = vreinterpretq_s16_f16(__a1);\\\n";
- s += " vgetq_lane_s16(__a2, __b);";
- } else {
- s += "int16x4_t __a2 = vreinterpret_s16_f16(__a1);\\\n";
- s += " vget_lane_s16(__a2, __b);";
- }
+
+ std::string intType = quad ? "int16x8_t" : "int16x4_t";
+ std::string intName = quad ? "vgetq" : "vget";
+
+ // reinterpret float16 vector as int16 vector
+ s += intType + " __a2 = *(" + intType + " *)(&__a1);\\\n";
+
+ s += " int16_t __a3 = " + intName + "_lane_s16(__a2, __b);\\\n";
+
+ // reinterpret int16 vector as float16 vector
+ s += " float16_t __a4 = *(float16_t *)(&__a3);\\\n";
+ s += " __a4;";
break;
}
case OpScalarSetLane:{
std::string typeCode = "";
InstructionTypeCode(typestr, ClassS, quad, typeCode);
- s += TypeString(proto[1], typestr) + " __a1 = __a; \\\n ";
- if (quad) {
- s += " int16x8_t __b2 = vreinterpretq_s16_f16(b);\\\n";
- s += " int16x8_t __b3 = vsetq_lane_s16(__a1, __b2, __c);\\\n";
- s += " vreinterpretq_f16_s16(__b3);";
- } else {
- s += " int16x4_t __b2 = vreinterpret_s16_f16(b);\\\n";
- s += " int16x4_t __b3 = vset_lane_s16(__a1, __b2, __c);\\\n";
- s += " vreinterpret_f16_s16(__b3);";
- }
+ s += TypeString(proto[1], typestr) + " __a1 = __a;\\\n ";
+
+ std::string origType = quad ? "float16x8_t" : "float16x4_t";
+ std::string intType = quad ? "int16x8_t" : "int16x4_t";
+ std::string intName = quad ? "vsetq" : "vset";
+
+ // reinterpret float16_t as int16_t
+ s += "int16_t __a2 = *(int16_t *)(&__a1);\\\n";
+ // reinterpret float16 vector as int16 vector
+ s += " " + intType + " __b2 = *(" + intType + " *)(&__b);\\\n";
+
+ s += " " + intType + " __b3 = " + intName + "_lane_s16(__a2, __b2, __c);\\\n";
+
+ // reinterpret int16 vector as float16 vector
+ s += " " + origType + " __b4 = *(" + origType + " *)(&__b3);\\\n";
+ s += "__b4;";
break;
}
More information about the cfe-commits
mailing list