[PATCH] D89242: [PowerPC] [Clang] Port SSE4.1-compatible insert intrinsics

Qiu Chaofan via Phabricator via cfe-commits cfe-commits at lists.llvm.org
Mon Oct 12 08:02:15 PDT 2020


qiucf created this revision.
qiucf added reviewers: nemanjai, jsji, kbarton, PowerPC, wschmidt, rsmith.
Herald added subscribers: cfe-commits, steven.zhang, pengfei, shchenz.
Herald added a project: clang.
qiucf requested review of this revision.

This patch adds three intrinsics compatible to x86's SSE 4.1 on PowerPC target, with tests:

- _mm_insert_epi8
- _mm_insert_epi32
- _mm_insert_epi64

The intrinsics implementation is contributed by Paul Clarke.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D89242

Files:
  clang/lib/Headers/ppc_wrappers/smmintrin.h
  clang/test/CodeGen/ppc-smmintrin.c


Index: clang/test/CodeGen/ppc-smmintrin.c
===================================================================
--- clang/test/CodeGen/ppc-smmintrin.c
+++ clang/test/CodeGen/ppc-smmintrin.c
@@ -116,3 +116,32 @@
 // CHECK-NEXT: [[REG80:[0-9a-zA-Z_%.]+]] = call <16 x i8> @vec_sel(unsigned char vector[16], unsigned char vector[16], unsigned char vector[16])(<16 x i8> [[REG76]], <16 x i8> [[REG78]], <16 x i8> [[REG79]])
 // CHECK-NEXT: [[REG81:[0-9a-zA-Z_%.]+]] = bitcast <16 x i8> [[REG80]] to <2 x i64>
 // CHECK-NEXT: ret <2 x i64> [[REG81]]
+
+void __attribute__((noinline))
+test_insert() {
+  _mm_insert_epi8(m1, 1, 0);
+  _mm_insert_epi32(m1, 1, 0);
+  _mm_insert_epi64(m1, 0xFFFFFFFF1L, 0);
+}
+
+// CHECK-LABEL: @test_insert
+
+// CHECK: define available_externally <2 x i64> @_mm_insert_epi8(<2 x i64> {{[0-9a-zA-Z_%.]+}}, i32 signext {{[0-9a-zA-Z_%.]+}}, i32 signext {{[0-9a-zA-Z_%.]+}})
+// CHECK: %{{[0-9a-zA-Z_.]+}} = bitcast <2 x i64> %{{[0-9a-zA-Z_.]+}} to <16 x i8>
+// CHECK: %[[R0:[0-9a-zA-Z_.]+]] = trunc i32 %{{[0-9a-zA-Z_.]+}} to i8
+// CHECK: %[[R1:[0-9a-zA-Z_.]+]] = and i32 %{{[0-9a-zA-Z_.]+}}, 15
+// CHECK: %{{[0-9a-zA-Z_.]+}} = insertelement <16 x i8> %{{[0-9a-zA-Z_.]+}}, i8 %[[R0]], i32 %[[R1]]
+// CHECK: %[[R2:[0-9a-zA-Z_.]+]] = bitcast <16 x i8> %{{[0-9a-zA-Z_.]+}} to <2 x i64>
+// CHECK: ret <2 x i64> %[[R2]]
+
+// CHECK: define available_externally <2 x i64> @_mm_insert_epi32(<2 x i64> {{[0-9a-zA-Z_%.]+}}, i32 signext {{[0-9a-zA-Z_%.]+}}, i32 signext {{[0-9a-zA-Z_%.]+}})
+// CHECK: %{{[0-9a-zA-Z_.]+}} = bitcast <2 x i64> %{{[0-9a-zA-Z_.]+}} to <4 x i32>
+// CHECK: %[[R0:[0-9a-zA-Z_.]+]] = and i32 %{{[0-9a-zA-Z_.]+}}, 3
+// CHECK: %{{[0-9a-zA-Z_.]+}} = insertelement <4 x i32> %{{[0-9a-zA-Z_.]+}}, i32 %{{[0-9a-zA-Z_.]+}}, i32 %[[R0]]
+// CHECK: %[[R1:[0-9a-zA-Z_.]+]] = bitcast <4 x i32> %{{[0-9a-zA-Z_.]+}} to <2 x i64>
+// CHECK: ret <2 x i64> %[[R1]]
+
+// CHECK: define available_externally <2 x i64> @_mm_insert_epi64(<2 x i64> {{[0-9a-zA-Z_%.]+}}, i64 {{[0-9a-zA-Z_%.]+}}, i32 signext {{[0-9a-zA-Z_%.]+}})
+// CHECK: %[[R0:[0-9a-zA-Z_.]+]] = and i32 %{{[0-9a-zA-Z_.]+}}, 1
+// CHECK: %{{[0-9a-zA-Z_.]+}} = insertelement <2 x i64> %{{[0-9a-zA-Z_.]+}}, i64 %{{[0-9a-zA-Z_.]+}}, i32 %[[R0:[0-9a-zA-Z_.]+]]
+// CHECK: ret <2 x i64> %{{[0-9a-zA-Z_.]+}}
Index: clang/lib/Headers/ppc_wrappers/smmintrin.h
===================================================================
--- clang/lib/Headers/ppc_wrappers/smmintrin.h
+++ clang/lib/Headers/ppc_wrappers/smmintrin.h
@@ -78,6 +78,30 @@
   return (__m128i)vec_sel((__v16qu)__A, (__v16qu)__B, __lmask);
 }
 
+extern __inline __m128i
+    __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+    _mm_insert_epi8(__m128i const __A, int const __D, int const __N) {
+  __v16qi result = (__v16qi)__A;
+  result[__N & 0xf] = __D;
+  return (__m128i)result;
+}
+
+extern __inline __m128i
+    __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+    _mm_insert_epi32(__m128i const __A, int const __D, int const __N) {
+  __v4si result = (__v4si)__A;
+  result[__N & 3] = __D;
+  return (__m128i)result;
+}
+
+extern __inline __m128i
+    __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+    _mm_insert_epi64(__m128i const __A, long long const __D, int const __N) {
+  __v2di result = (__v2di)__A;
+  result[__N & 1] = __D;
+  return (__m128i)result;
+}
+
 #else
 #include_next <smmintrin.h>
 #endif /* defined(__linux__) && defined(__ppc64__) */


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D89242.297589.patch
Type: text/x-patch
Size: 3476 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/cfe-commits/attachments/20201012/ea981e31/attachment.bin>


More information about the cfe-commits mailing list