[clang] 72e4bf1 - [X86] Support some missing intrinsics

via cfe-commits cfe-commits at lists.llvm.org
Tue Apr 20 19:51:48 PDT 2021


Author: Liu, Chen3
Date: 2021-04-21T10:50:37+08:00
New Revision: 72e4bf12eec4e1526187b4f9445bc66a168552dd

URL: https://github.com/llvm/llvm-project/commit/72e4bf12eec4e1526187b4f9445bc66a168552dd
DIFF: https://github.com/llvm/llvm-project/commit/72e4bf12eec4e1526187b4f9445bc66a168552dd.diff

LOG: [X86] Support some missing intrinsics

Support for _mm512_i32logather_pd, _mm512_mask_i32logather_pd,
_mm512_i32logather_epi64, _mm512_mask_i32logather_epi64, _mm512_i32loscatter_pd,
_mm512_mask_i32loscatter_pd, _mm512_i32loscatter_epi64,
_mm512_mask_i32loscatter_epi64.

Differential Revision: https://reviews.llvm.org/D100368

Added: 
    

Modified: 
    clang/lib/Headers/avx512fintrin.h
    clang/test/CodeGen/X86/avx512f-builtins.c

Removed: 
    


################################################################################
diff  --git a/clang/lib/Headers/avx512fintrin.h b/clang/lib/Headers/avx512fintrin.h
index e60cb5f3df1f1..010bcadab0195 100644
--- a/clang/lib/Headers/avx512fintrin.h
+++ b/clang/lib/Headers/avx512fintrin.h
@@ -9588,6 +9588,169 @@ _mm512_cvtsi512_si32(__m512i __A) {
   return __b[0];
 }
 
+/// Loads 8 double-precision (64-bit) floating-point elements stored at memory
+/// locations starting at location \a base_addr at packed 32-bit integer indices
+/// stored in the lower half of \a vindex scaled by \a scale them in dst.
+///
+/// This intrinsic corresponds to the <c> VGATHERDPD </c> instructions.
+///
+/// \operation
+/// FOR j := 0 to 7
+///   i := j*64
+///   m := j*32
+///   addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8
+///   dst[i+63:i] := MEM[addr+63:addr]
+/// ENDFOR
+/// dst[MAX:512] := 0
+/// \endoperation
+#define _mm512_i32logather_pd(vindex, base_addr, scale)                        \
+  _mm512_i32gather_pd(_mm512_castsi512_si256(vindex), (base_addr), (scale))
+
+/// Loads 8 double-precision (64-bit) floating-point elements from memory
+/// starting at location \a base_addr at packed 32-bit integer indices stored in
+/// the lower half of \a vindex scaled by \a scale into dst using writemask
+/// \a mask (elements are copied from \a src when the corresponding mask bit is
+/// not set).
+///
+/// This intrinsic corresponds to the <c> VGATHERDPD </c> instructions.
+///
+/// \operation
+/// FOR j := 0 to 7
+///   i := j*64
+///   m := j*32
+///   IF mask[j]
+///     addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8
+///     dst[i+63:i] := MEM[addr+63:addr]
+///   ELSE
+///     dst[i+63:i] := src[i+63:i]
+///   FI
+/// ENDFOR
+/// dst[MAX:512] := 0
+/// \endoperation
+#define _mm512_mask_i32logather_pd(src, mask, vindex, base_addr, scale)        \
+  _mm512_mask_i32gather_pd((src), (mask), _mm512_castsi512_si256(vindex),      \
+                           (base_addr), (scale))
+
+/// Loads 8 64-bit integer elements from memory starting at location \a base_addr
+/// at packed 32-bit integer indices stored in the lower half of \a vindex
+/// scaled by \a scale and stores them in dst.
+///
+/// This intrinsic corresponds to the <c> VPGATHERDQ </c> instructions.
+///
+/// \operation
+/// FOR j := 0 to 7
+///   i := j*64
+///   m := j*32
+///   addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8
+///   dst[i+63:i] := MEM[addr+63:addr]
+/// ENDFOR
+/// dst[MAX:512] := 0
+/// \endoperation
+#define _mm512_i32logather_epi64(vindex, base_addr, scale)                     \
+  _mm512_i32gather_epi64(_mm512_castsi512_si256(vindex), (base_addr), (scale))
+
+/// Loads 8 64-bit integer elements from memory starting at location \a base_addr
+/// at packed 32-bit integer indices stored in the lower half of \a vindex
+/// scaled by \a scale and stores them in dst using writemask \a mask (elements
+/// are copied from \a src when the corresponding mask bit is not set).
+///
+/// This intrinsic corresponds to the <c> VPGATHERDQ </c> instructions.
+///
+/// \operation
+/// FOR j := 0 to 7
+///   i := j*64
+///   m := j*32
+///   IF mask[j]
+///     addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8
+///     dst[i+63:i] := MEM[addr+63:addr]
+///   ELSE
+///     dst[i+63:i] := src[i+63:i]
+///   FI
+/// ENDFOR
+/// dst[MAX:512] := 0
+/// \endoperation
+#define _mm512_mask_i32logather_epi64(src, mask, vindex, base_addr, scale)     \
+  _mm512_mask_i32gather_epi64((src), (mask), _mm512_castsi512_si256(vindex),   \
+                              (base_addr), (scale))
+
+/// Stores 8 packed double-precision (64-bit) floating-point elements in \a v1
+/// and to memory locations starting at location \a base_addr at packed 32-bit
+/// integer indices stored in \a vindex scaled by \a scale.
+///
+/// This intrinsic corresponds to the <c> VSCATTERDPD </c> instructions.
+///
+/// \operation
+/// FOR j := 0 to 7
+///   i := j*64
+///   m := j*32
+///   addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8
+///   MEM[addr+63:addr] := v1[i+63:i]
+/// ENDFOR
+/// \endoperation
+#define _mm512_i32loscatter_pd(base_addr, vindex, v1, scale)                   \
+  _mm512_i32scatter_pd((base_addr), _mm512_castsi512_si256(vindex), (v1), (scale))
+
+/// Stores 8 packed double-precision (64-bit) floating-point elements in \a v1
+/// to memory locations starting at location \a base_addr at packed 32-bit
+/// integer indices stored in \a vindex scaled by \a scale. Only those elements
+/// whose corresponding mask bit is set in writemask \a mask are written to
+/// memory.
+///
+/// This intrinsic corresponds to the <c> VSCATTERDPD </c> instructions.
+///
+/// \operation
+/// FOR j := 0 to 7
+///   i := j*64
+///   m := j*32
+///   IF mask[j]
+///     addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8
+///     MEM[addr+63:addr] := a[i+63:i]
+///   FI
+/// ENDFOR
+/// \endoperation
+#define _mm512_mask_i32loscatter_pd(base_addr, mask, vindex, v1, scale)        \
+  _mm512_mask_i32scatter_pd((base_addr), (mask),                               \
+                            _mm512_castsi512_si256(vindex), (v1), (scale))
+
+/// Stores 8 packed 64-bit integer elements located in \a v1 and stores them in
+/// memory locations starting at location \a base_addr at packed 32-bit integer
+/// indices stored in \a vindex scaled by \a scale.
+///
+/// This intrinsic corresponds to the <c> VPSCATTERDQ </c> instructions.
+///
+/// \operation
+/// FOR j := 0 to 7
+///   i := j*64
+///   m := j*32
+///   addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8
+///   MEM[addr+63:addr] := a[i+63:i]
+/// ENDFOR
+/// \endoperation
+#define _mm512_i32loscatter_epi64(base_addr, vindex, v1, scale)                \
+  _mm512_i32scatter_epi64((base_addr),                                         \
+                          _mm512_castsi512_si256(vindex), (v1), (scale))
+
+/// Stores 8 packed 64-bit integer elements located in a and stores them in
+/// memory locations starting at location \a base_addr at packed 32-bit integer
+/// indices stored in \a vindex scaled by scale using writemask \a mask (elements
+/// whose corresponding mask bit is not set are not written to memory).
+///
+/// This intrinsic corresponds to the <c> VPSCATTERDQ </c> instructions.
+///
+/// \operation
+/// FOR j := 0 to 7
+///   i := j*64
+///   m := j*32
+///   IF mask[j]
+///     addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8
+///     MEM[addr+63:addr] := a[i+63:i]
+///   FI
+/// ENDFOR
+/// \endoperation
+#define _mm512_mask_i32loscatter_epi64(base_addr, mask, vindex, v1, scale)     \
+  _mm512_mask_i32scatter_epi64((base_addr), (mask),                            \
+                               _mm512_castsi512_si256(vindex), (v1), (scale))
+
 #undef __DEFAULT_FN_ATTRS512
 #undef __DEFAULT_FN_ATTRS128
 #undef __DEFAULT_FN_ATTRS

diff  --git a/clang/test/CodeGen/X86/avx512f-builtins.c b/clang/test/CodeGen/X86/avx512f-builtins.c
index 173b7267270e0..dbab32db5d384 100644
--- a/clang/test/CodeGen/X86/avx512f-builtins.c
+++ b/clang/test/CodeGen/X86/avx512f-builtins.c
@@ -10819,3 +10819,51 @@ __m512i test_mm512_zextsi256_si512(__m256i A) {
   // CHECK: shufflevector <4 x i64> %{{.*}}, <4 x i64> %{{.*}}, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
   return _mm512_zextsi256_si512(A);
 }
+
+__m512d test_mm512_i32logather_pd(__m512i __index, void const *__addr) {
+  // CHECK-LABEL: @test_mm512_i32logather_pd
+  // CHECK: @llvm.x86.avx512.mask.gather.dpd.512
+  return _mm512_i32logather_pd(__index, __addr, 2);
+}
+
+__m512d test_mm512_mask_i32logather_pd(__m512d __v1_old, __mmask8 __mask, __m512i __index, void const *__addr) {
+  // CHECK-LABEL: @test_mm512_mask_i32logather_pd
+  // CHECK: @llvm.x86.avx512.mask.gather.dpd.512
+  return _mm512_mask_i32logather_pd(__v1_old, __mask, __index, __addr, 2);
+}
+
+void test_mm512_i32loscatter_pd(void *__addr, __m512i __index, __m512d __v1) {
+  // CHECK-LABEL: @test_mm512_i32loscatter_pd
+  // CHECK: @llvm.x86.avx512.mask.scatter.dpd.512
+  return _mm512_i32loscatter_pd(__addr, __index, __v1, 2);
+}
+
+void test_mm512_mask_i32loscatter_pd(void *__addr, __mmask8 __mask, __m512i __index, __m512d __v1) {
+  // CHECK-LABEL: @test_mm512_mask_i32loscatter_pd
+  // CHECK: @llvm.x86.avx512.mask.scatter.dpd.512
+  return _mm512_mask_i32loscatter_pd(__addr, __mask, __index, __v1, 2);
+}
+
+__m512i test_mm512_i32logather_epi64(__m512i __index, void const *__addr) {
+  // CHECK-LABEL: @test_mm512_i32logather_epi64
+  // CHECK: @llvm.x86.avx512.mask.gather.dpq.512
+  return _mm512_i32logather_epi64(__index, __addr, 2);
+}
+
+__m512i test_mm512_mask_i32logather_epi64(__m512i __v1_old, __mmask8 __mask, __m512i __index, void const *__addr) {
+  // CHECK-LABEL: @test_mm512_mask_i32logather_epi64
+  // CHECK: @llvm.x86.avx512.mask.gather.dpq.512
+  return _mm512_mask_i32logather_epi64(__v1_old, __mask, __index, __addr, 2);
+}
+
+void test_mm512_i32loscatter_epi64(void *__addr, __m512i __index, __m512i __v1) {
+  // CHECK-LABEL: @test_mm512_i32loscatter_epi64
+  // CHECK: @llvm.x86.avx512.mask.scatter.dpq.512
+  _mm512_i32loscatter_epi64(__addr, __index, __v1, 2);
+}
+
+void test_mm512_mask_i32loscatter_epi64(void *__addr, __mmask8 __mask, __m512i __index, __m512i __v1) {
+  // CHECK-LABEL: @test_mm512_mask_i32loscatter_epi64
+  // CHECK: @llvm.x86.avx512.mask.scatter.dpq.512
+  _mm512_mask_i32loscatter_epi64(__addr, __mask, __index, __v1, 2);
+}


        


More information about the cfe-commits mailing list