[clang] [CIR] Upstream gather instrinsics (PR #169157)
Jasmine Tang via cfe-commits
cfe-commits at lists.llvm.org
Mon Dec 1 05:16:43 PST 2025
https://github.com/badumbatish updated https://github.com/llvm/llvm-project/pull/169157
>From 6bfbce8ef9c95726c35fda6f5e0a7b268658f7cd Mon Sep 17 00:00:00 2001
From: Jasmine Tang <jjasmine at igalia.com>
Date: Mon, 1 Dec 2025 05:12:20 -0800
Subject: [PATCH 1/3] Rebase from main, use new func sig and add nyi case
---
clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp | 93 +++++++++++++++++-
.../CodeGenBuiltins/X86/avx512f-builtins.c | 95 +++++++++++++++++++
2 files changed, 187 insertions(+), 1 deletion(-)
diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
index b242efc00e491..11deb2ce0cb03 100644
--- a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
@@ -487,6 +487,10 @@ mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID,
case X86::BI__builtin_ia32_compressqi128_mask:
case X86::BI__builtin_ia32_compressqi256_mask:
case X86::BI__builtin_ia32_compressqi512_mask:
+ cgm.errorNYI(expr->getSourceRange(),
+ std::string("unimplemented X86 builtin call: ") +
+ getContext().BuiltinInfo.getName(builtinID));
+ return {};
case X86::BI__builtin_ia32_gather3div2df:
case X86::BI__builtin_ia32_gather3div2di:
case X86::BI__builtin_ia32_gather3div4df:
@@ -510,7 +514,94 @@ mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID,
case X86::BI__builtin_ia32_gathersiv8di:
case X86::BI__builtin_ia32_gathersiv16si:
case X86::BI__builtin_ia32_gatherdiv8di:
- case X86::BI__builtin_ia32_gatherdiv16si:
+ case X86::BI__builtin_ia32_gatherdiv16si: {
+ StringRef intrinsicName;
+ switch (builtinID) {
+ default:
+ llvm_unreachable("Unexpected builtin");
+ case X86::BI__builtin_ia32_gather3div2df:
+ intrinsicName = "x86.avx512.mask.gather3div2.df";
+ break;
+ case X86::BI__builtin_ia32_gather3div2di:
+ intrinsicName = "x86.avx512.mask.gather3div2.di";
+ break;
+ case X86::BI__builtin_ia32_gather3div4df:
+ intrinsicName = "x86.avx512.mask.gather3div4.df";
+ break;
+ case X86::BI__builtin_ia32_gather3div4di:
+ intrinsicName = "x86.avx512.mask.gather3div4.di";
+ break;
+ case X86::BI__builtin_ia32_gather3div4sf:
+ intrinsicName = "x86.avx512.mask.gather3div4.sf";
+ break;
+ case X86::BI__builtin_ia32_gather3div4si:
+ intrinsicName = "x86.avx512.mask.gather3div4.si";
+ break;
+ case X86::BI__builtin_ia32_gather3div8sf:
+ intrinsicName = "x86.avx512.mask.gather3div8.sf";
+ break;
+ case X86::BI__builtin_ia32_gather3div8si:
+ intrinsicName = "x86.avx512.mask.gather3div8.si";
+ break;
+ case X86::BI__builtin_ia32_gather3siv2df:
+ intrinsicName = "x86.avx512.mask.gather3siv2.df";
+ break;
+ case X86::BI__builtin_ia32_gather3siv2di:
+ intrinsicName = "x86.avx512.mask.gather3siv2.di";
+ break;
+ case X86::BI__builtin_ia32_gather3siv4df:
+ intrinsicName = "x86.avx512.mask.gather3siv4.df";
+ break;
+ case X86::BI__builtin_ia32_gather3siv4di:
+ intrinsicName = "x86.avx512.mask.gather3siv4.di";
+ break;
+ case X86::BI__builtin_ia32_gather3siv4sf:
+ intrinsicName = "x86.avx512.mask.gather3siv4.sf";
+ break;
+ case X86::BI__builtin_ia32_gather3siv4si:
+ intrinsicName = "x86.avx512.mask.gather3siv4.si";
+ break;
+ case X86::BI__builtin_ia32_gather3siv8sf:
+ intrinsicName = "x86.avx512.mask.gather3siv8.sf";
+ break;
+ case X86::BI__builtin_ia32_gather3siv8si:
+ intrinsicName = "x86.avx512.mask.gather3siv8.si";
+ break;
+ case X86::BI__builtin_ia32_gathersiv8df:
+ intrinsicName = "x86.avx512.mask.gather.dpd.512";
+ break;
+ case X86::BI__builtin_ia32_gathersiv16sf:
+ intrinsicName = "x86.avx512.mask.gather.dps.512";
+ break;
+ case X86::BI__builtin_ia32_gatherdiv8df:
+ intrinsicName = "x86.avx512.mask.gather.qpd.512";
+ break;
+ case X86::BI__builtin_ia32_gatherdiv16sf:
+ intrinsicName = "x86.avx512.mask.gather.qps.512";
+ break;
+ case X86::BI__builtin_ia32_gathersiv8di:
+ intrinsicName = "x86.avx512.mask.gather.dpq.512";
+ break;
+ case X86::BI__builtin_ia32_gathersiv16si:
+ intrinsicName = "x86.avx512.mask.gather.dpi.512";
+ break;
+ case X86::BI__builtin_ia32_gatherdiv8di:
+ intrinsicName = "x86.avx512.mask.gather.qpq.512";
+ break;
+ case X86::BI__builtin_ia32_gatherdiv16si:
+ intrinsicName = "x86.avx512.mask.gather.qpi.512";
+ break;
+ }
+
+ mlir::Location loc = getLoc(expr->getExprLoc());
+ unsigned minElts =
+ std::min(cast<cir::VectorType>(ops[0].getType()).getSize(),
+ cast<cir::VectorType>(ops[2].getType()).getSize());
+ ops[3] =
+ getMaskVecValue(builder, loc, ops[3], minElts);
+ return emitIntrinsicCallOp(builder, loc,intrinsicName.str(),
+ convertType(expr->getType()), ops);
+ }
case X86::BI__builtin_ia32_scattersiv8df:
case X86::BI__builtin_ia32_scattersiv16sf:
case X86::BI__builtin_ia32_scatterdiv8df:
diff --git a/clang/test/CIR/CodeGenBuiltins/X86/avx512f-builtins.c b/clang/test/CIR/CodeGenBuiltins/X86/avx512f-builtins.c
index 31d6bc3d22408..a9a4eb3406a45 100644
--- a/clang/test/CIR/CodeGenBuiltins/X86/avx512f-builtins.c
+++ b/clang/test/CIR/CodeGenBuiltins/X86/avx512f-builtins.c
@@ -228,3 +228,98 @@ __mmask16 test_kmov_w(__mmask16 A) {
// OGCG: bitcast <16 x i1> {{.*}} to i16
return __builtin_ia32_kmovw(A);
}
+__m256 test_mm512_i64gather_ps(__m512i __index, void const *__addr) {
+ // CHECK-LABEL: test_mm512_i64gather_ps
+ // CHECK: @llvm.x86.avx512.mask.gather.qps.512
+ return _mm512_i64gather_ps(__index, __addr, 2);
+}
+
+__m256 test_mm512_mask_i64gather_ps(__m256 __v1_old, __mmask8 __mask, __m512i __index, void const *__addr) {
+ // CHECK-LABEL: test_mm512_mask_i64gather_ps
+ // CHECK: @llvm.x86.avx512.mask.gather.qps.512
+ return _mm512_mask_i64gather_ps(__v1_old, __mask, __index, __addr, 2);
+}
+
+__m256i test_mm512_i64gather_epi32(__m512i __index, void const *__addr) {
+ // CHECK-LABEL: test_mm512_i64gather_epi32
+ // CHECK: @llvm.x86.avx512.mask.gather.qpi.512
+ return _mm512_i64gather_epi32(__index, __addr, 2);
+}
+
+__m256i test_mm512_mask_i64gather_epi32(__m256i __v1_old, __mmask8 __mask, __m512i __index, void const *__addr) {
+ // CHECK-LABEL: test_mm512_mask_i64gather_epi32
+ // CHECK: @llvm.x86.avx512.mask.gather.qpi.512
+ return _mm512_mask_i64gather_epi32(__v1_old, __mask, __index, __addr, 2);
+}
+
+__m512d test_mm512_i64gather_pd(__m512i __index, void const *__addr) {
+ // CHECK-LABEL: test_mm512_i64gather_pd
+ // CHECK: @llvm.x86.avx512.mask.gather.qpd.512
+ return _mm512_i64gather_pd(__index, __addr, 2);
+}
+
+__m512d test_mm512_mask_i64gather_pd(__m512d __v1_old, __mmask8 __mask, __m512i __index, void const *__addr) {
+ // CHECK-LABEL: test_mm512_mask_i64gather_pd
+ // CHECK: @llvm.x86.avx512.mask.gather.qpd.512
+ return _mm512_mask_i64gather_pd(__v1_old, __mask, __index, __addr, 2);
+}
+
+__m512i test_mm512_i64gather_epi64(__m512i __index, void const *__addr) {
+ // CHECK-LABEL: test_mm512_i64gather_epi64
+ // CHECK: @llvm.x86.avx512.mask.gather.qpq.512
+ return _mm512_i64gather_epi64(__index, __addr, 2);
+}
+
+__m512i test_mm512_mask_i64gather_epi64(__m512i __v1_old, __mmask8 __mask, __m512i __index, void const *__addr) {
+ // CHECK-LABEL: test_mm512_mask_i64gather_epi64
+ // CHECK: @llvm.x86.avx512.mask.gather.qpq.512
+ return _mm512_mask_i64gather_epi64(__v1_old, __mask, __index, __addr, 2);
+}
+
+__m512 test_mm512_i32gather_ps(__m512i __index, void const *__addr) {
+ // CHECK-LABEL: test_mm512_i32gather_ps
+ // CHECK: @llvm.x86.avx512.mask.gather.dps.512
+ return _mm512_i32gather_ps(__index, __addr, 2);
+}
+
+__m512 test_mm512_mask_i32gather_ps(__m512 v1_old, __mmask16 __mask, __m512i __index, void const *__addr) {
+ // CHECK-LABEL: test_mm512_mask_i32gather_ps
+ // CHECK: @llvm.x86.avx512.mask.gather.dps.512
+ return _mm512_mask_i32gather_ps(v1_old, __mask, __index, __addr, 2);
+}
+
+__m512i test_mm512_i32gather_epi32(__m512i __index, void const *__addr) {
+ // CHECK-LABEL: test_mm512_i32gather_epi32
+ // CHECK: @llvm.x86.avx512.mask.gather.dpi.512
+ return _mm512_i32gather_epi32(__index, __addr, 2);
+}
+
+__m512i test_mm512_mask_i32gather_epi32(__m512i __v1_old, __mmask16 __mask, __m512i __index, void const *__addr) {
+ // CHECK-LABEL: test_mm512_mask_i32gather_epi32
+ // CHECK: @llvm.x86.avx512.mask.gather.dpi.512
+ return _mm512_mask_i32gather_epi32(__v1_old, __mask, __index, __addr, 2);
+}
+
+__m512d test_mm512_i32gather_pd(__m256i __index, void const *__addr) {
+ // CHECK-LABEL: test_mm512_i32gather_pd
+ // CHECK: @llvm.x86.avx512.mask.gather.dpd.512
+ return _mm512_i32gather_pd(__index, __addr, 2);
+}
+
+__m512d test_mm512_mask_i32gather_pd(__m512d __v1_old, __mmask8 __mask, __m256i __index, void const *__addr) {
+ // CHECK-LABEL: test_mm512_mask_i32gather_pd
+ // CHECK: @llvm.x86.avx512.mask.gather.dpd.512
+ return _mm512_mask_i32gather_pd(__v1_old, __mask, __index, __addr, 2);
+}
+
+__m512i test_mm512_i32gather_epi64(__m256i __index, void const *__addr) {
+ // CHECK-LABEL: test_mm512_i32gather_epi64
+ // CHECK: @llvm.x86.avx512.mask.gather.dpq.512
+ return _mm512_i32gather_epi64(__index, __addr, 2);
+}
+
+__m512i test_mm512_mask_i32gather_epi64(__m512i __v1_old, __mmask8 __mask, __m256i __index, void const *__addr) {
+ // CHECK-LABEL: test_mm512_mask_i32gather_epi64
+ // CHECK: @llvm.x86.avx512.mask.gather.dpq.512
+ return _mm512_mask_i32gather_epi64(__v1_old, __mask, __index, __addr, 2);
+}
>From 060fa3f87ba15611404822728077da3d5e6d62be Mon Sep 17 00:00:00 2001
From: Jasmine Tang <jjasmine at igalia.com>
Date: Sat, 22 Nov 2025 01:56:09 -0800
Subject: [PATCH 2/3] Add clang/test/CIR/CodeGen/X86/avx512vl-builtins.c
---
.../CodeGenBuiltins/X86/avx512vl-builtins.c | 151 ++++++++++++++++++
1 file changed, 151 insertions(+)
create mode 100644 clang/test/CIR/CodeGenBuiltins/X86/avx512vl-builtins.c
diff --git a/clang/test/CIR/CodeGenBuiltins/X86/avx512vl-builtins.c b/clang/test/CIR/CodeGenBuiltins/X86/avx512vl-builtins.c
new file mode 100644
index 0000000000000..08b40ad033b24
--- /dev/null
+++ b/clang/test/CIR/CodeGenBuiltins/X86/avx512vl-builtins.c
@@ -0,0 +1,151 @@
+// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx512f -target-feature +avx512vl -fclangir -emit-cir -o %t.cir -Wall -Werror -Wsign-conversion
+// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
+// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx512f -target-feature +avx512vl -fclangir -emit-llvm -o %t.ll -Wall -Werror -Wsign-conversion
+// RUN: FileCheck --check-prefixes=LLVM --input-file=%t.ll %s
+
+
+#include <immintrin.h>
+
+__m128d test_mm_mmask_i64gather_pd(__m128d __v1_old, __mmask8 __mask, __m128i __index, void const *__addr) {
+ // CIR-LABEL: test_mm_mmask_i64gather_pd
+ // CIR: cir.call_llvm_intrinsic "x86.avx512.mask.gather3div2.df"
+
+ // LLVM-LABEL: @test_mm_mmask_i64gather_pd
+ // LLVM: @llvm.x86.avx512.mask.gather3div2.df
+ return _mm_mmask_i64gather_pd(__v1_old, __mask, __index, __addr, 2);
+}
+
+__m128i test_mm_mmask_i64gather_epi64(__m128i __v1_old, __mmask8 __mask, __m128i __index, void const *__addr) {
+ // CIR-LABEL: test_mm_mmask_i64gather_epi64
+ // CIR: cir.call_llvm_intrinsic "x86.avx512.mask.gather3div2.di"
+
+ // LLVM-LABEL: @test_mm_mmask_i64gather_epi64
+ // LLVM: @llvm.x86.avx512.mask.gather3div2.di
+ return _mm_mmask_i64gather_epi64(__v1_old, __mask, __index, __addr, 2);
+}
+
+__m256d test_mm256_mmask_i64gather_pd(__m256d __v1_old, __mmask8 __mask, __m256i __index, void const *__addr) {
+ // CIR-LABEL: test_mm256_mmask_i64gather_pd
+ // CIR: cir.call_llvm_intrinsic "x86.avx512.mask.gather3div4.df"
+
+ // LLVM-LABEL: @test_mm256_mmask_i64gather_pd
+ // LLVM: @llvm.x86.avx512.mask.gather3div4.df
+ return _mm256_mmask_i64gather_pd(__v1_old, __mask, __index, __addr, 2);
+}
+
+__m256i test_mm256_mmask_i64gather_epi64(__m256i __v1_old, __mmask8 __mask, __m256i __index, void const *__addr) {
+ // CIR-LABEL: test_mm256_mmask_i64gather_epi64
+ // CIR: cir.call_llvm_intrinsic "x86.avx512.mask.gather3div4.di"
+
+ // LLVM-LABEL: @test_mm256_mmask_i64gather_epi64
+ // LLVM: @llvm.x86.avx512.mask.gather3div4.di
+ return _mm256_mmask_i64gather_epi64(__v1_old, __mask, __index, __addr, 2);
+}
+
+__m128 test_mm_mmask_i64gather_ps(__m128 __v1_old, __mmask8 __mask, __m128i __index, void const *__addr) {
+ // CIR-LABEL: test_mm_mmask_i64gather_ps
+ // CIR: cir.call_llvm_intrinsic "x86.avx512.mask.gather3div4.sf"
+
+ // LLVM-LABEL: @test_mm_mmask_i64gather_ps
+ // LLVM: @llvm.x86.avx512.mask.gather3div4.sf
+ return _mm_mmask_i64gather_ps(__v1_old, __mask, __index, __addr, 2);
+}
+
+__m128i test_mm_mmask_i64gather_epi32(__m128i __v1_old, __mmask8 __mask, __m128i __index, void const *__addr) {
+ // CIR-LABEL: test_mm_mmask_i64gather_epi32
+ // CIR: cir.call_llvm_intrinsic "x86.avx512.mask.gather3div4.si"
+
+ // LLVM-LABEL: @test_mm_mmask_i64gather_epi32
+ // LLVM: @llvm.x86.avx512.mask.gather3div4.si
+ return _mm_mmask_i64gather_epi32(__v1_old, __mask, __index, __addr, 2);
+}
+
+__m128 test_mm256_mmask_i64gather_ps(__m128 __v1_old, __mmask8 __mask, __m256i __index, void const *__addr) {
+ // CIR-LABEL: test_mm256_mmask_i64gather_ps
+ // CIR: cir.call_llvm_intrinsic "x86.avx512.mask.gather3div8.sf"
+
+ // LLVM-LABEL: @test_mm256_mmask_i64gather_ps
+ // LLVM: @llvm.x86.avx512.mask.gather3div8.sf
+ return _mm256_mmask_i64gather_ps(__v1_old, __mask, __index, __addr, 2);
+}
+
+__m128i test_mm256_mmask_i64gather_epi32(__m128i __v1_old, __mmask8 __mask, __m256i __index, void const *__addr) {
+ // CIR-LABEL: test_mm256_mmask_i64gather_epi32
+ // CIR: cir.call_llvm_intrinsic "x86.avx512.mask.gather3div8.si"
+
+ // LLVM-LABEL: @test_mm256_mmask_i64gather_epi32
+ // LLVM: @llvm.x86.avx512.mask.gather3div8.si
+ return _mm256_mmask_i64gather_epi32(__v1_old, __mask, __index, __addr, 2);
+}
+
+__m128d test_mm_mask_i32gather_pd(__m128d __v1_old, __mmask8 __mask, __m128i __index, void const *__addr) {
+ // CIR-LABEL: test_mm_mask_i32gather_pd
+ // CIR: cir.call_llvm_intrinsic "x86.avx512.mask.gather3siv2.df"
+
+ // LLVM-LABEL: @test_mm_mask_i32gather_pd
+ // LLVM: @llvm.x86.avx512.mask.gather3siv2.df
+ return _mm_mmask_i32gather_pd(__v1_old, __mask, __index, __addr, 2);
+}
+
+__m128i test_mm_mask_i32gather_epi64(__m128i __v1_old, __mmask8 __mask, __m128i __index, void const *__addr) {
+ // CIR-LABEL: test_mm_mask_i32gather_epi64
+ // CIR: cir.call_llvm_intrinsic "x86.avx512.mask.gather3siv2.di"
+
+ // LLVM-LABEL: @test_mm_mask_i32gather_epi64
+ // LLVM: @llvm.x86.avx512.mask.gather3siv2.di
+ return _mm_mmask_i32gather_epi64(__v1_old, __mask, __index, __addr, 2);
+}
+
+__m256d test_mm256_mask_i32gather_pd(__m256d __v1_old, __mmask8 __mask, __m128i __index, void const *__addr) {
+ // CIR-LABEL: test_mm256_mask_i32gather_pd
+ // CIR: cir.call_llvm_intrinsic "x86.avx512.mask.gather3siv4.df"
+
+ // LLVM-LABEL: @test_mm256_mask_i32gather_pd
+ // LLVM: @llvm.x86.avx512.mask.gather3siv4.df
+ return _mm256_mmask_i32gather_pd(__v1_old, __mask, __index, __addr, 2);
+}
+
+__m256i test_mm256_mask_i32gather_epi64(__m256i __v1_old, __mmask8 __mask, __m128i __index, void const *__addr) {
+ // CIR-LABEL: test_mm256_mask_i32gather_epi64
+ // CIR: cir.call_llvm_intrinsic "x86.avx512.mask.gather3siv4.di"
+
+ // LLVM-LABEL: @test_mm256_mask_i32gather_epi64
+ // LLVM: @llvm.x86.avx512.mask.gather3siv4.di
+ return _mm256_mmask_i32gather_epi64(__v1_old, __mask, __index, __addr, 2);
+}
+
+__m128 test_mm_mask_i32gather_ps(__m128 __v1_old, __mmask8 __mask, __m128i __index, void const *__addr) {
+ // CIR-LABEL: test_mm_mask_i32gather_ps
+ // CIR: cir.call_llvm_intrinsic "x86.avx512.mask.gather3siv4.sf"
+
+ // LLVM-LABEL: @test_mm_mask_i32gather_ps
+ // LLVM: @llvm.x86.avx512.mask.gather3siv4.sf
+ return _mm_mmask_i32gather_ps(__v1_old, __mask, __index, __addr, 2);
+}
+
+__m128i test_mm_mask_i32gather_epi32(__m128i __v1_old, __mmask8 __mask, __m128i __index, void const *__addr) {
+ // CIR-LABEL: test_mm_mask_i32gather_epi32
+ // CIR: cir.call_llvm_intrinsic "x86.avx512.mask.gather3siv4.si"
+
+ // LLVM-LABEL: @test_mm_mask_i32gather_epi32
+ // LLVM: @llvm.x86.avx512.mask.gather3siv4.si
+ return _mm_mmask_i32gather_epi32(__v1_old, __mask, __index, __addr, 2);
+}
+
+__m256 test_mm256_mask_i32gather_ps(__m256 __v1_old, __mmask8 __mask, __m256i __index, void const *__addr) {
+ // CIR-LABEL: test_mm256_mask_i32gather_ps
+ // CIR: cir.call_llvm_intrinsic "x86.avx512.mask.gather3siv8.sf"
+
+ // LLVM-LABEL: @test_mm256_mask_i32gather_ps
+ // LLVM: @llvm.x86.avx512.mask.gather3siv8.sf
+ return _mm256_mmask_i32gather_ps(__v1_old, __mask, __index, __addr, 2);
+}
+
+__m256i test_mm256_mask_i32gather_epi32(__m256i __v1_old, __mmask8 __mask, __m256i __index, void const *__addr) {
+ // CIR-LABEL: test_mm256_mask_i32gather_epi32
+ // CIR: cir.call_llvm_intrinsic "x86.avx512.mask.gather3siv8.si"
+
+ // LLVM-LABEL: @test_mm256_mask_i32gather_epi32
+ // LLVM: @llvm.x86.avx512.mask.gather3siv8.si
+ return _mm256_mmask_i32gather_epi32(__v1_old, __mask, __index, __addr, 2);
+}
>From 572f323c4eee36f19d79731898723a2f02035c13 Mon Sep 17 00:00:00 2001
From: Jasmine Tang <jjasmine at igalia.com>
Date: Tue, 25 Nov 2025 12:08:16 -0800
Subject: [PATCH 3/3] Change getMaskValue usage, add LLVM and OGCG test
---
.../CodeGenBuiltins/X86/avx512f-builtins.c | 160 ++++++++++++++----
.../CodeGenBuiltins/X86/avx512vl-builtins.c | 50 ++++++
2 files changed, 178 insertions(+), 32 deletions(-)
diff --git a/clang/test/CIR/CodeGenBuiltins/X86/avx512f-builtins.c b/clang/test/CIR/CodeGenBuiltins/X86/avx512f-builtins.c
index a9a4eb3406a45..9d957f5de554d 100644
--- a/clang/test/CIR/CodeGenBuiltins/X86/avx512f-builtins.c
+++ b/clang/test/CIR/CodeGenBuiltins/X86/avx512f-builtins.c
@@ -229,97 +229,193 @@ __mmask16 test_kmov_w(__mmask16 A) {
return __builtin_ia32_kmovw(A);
}
__m256 test_mm512_i64gather_ps(__m512i __index, void const *__addr) {
- // CHECK-LABEL: test_mm512_i64gather_ps
- // CHECK: @llvm.x86.avx512.mask.gather.qps.512
+ // CIR-LABEL: test_mm512_i64gather_ps
+ // CIR: cir.call_llvm_intrinsic "x86.avx512.mask.gather.qps.512"
+
+ // LLVM-LABEL: test_mm512_i64gather_ps
+ // LLVM: call <8 x float> @llvm.x86.avx512.mask.gather.qps.512
+
+ // OGCG-LABEL: test_mm512_i64gather_ps
+ // OGCG: call <8 x float> @llvm.x86.avx512.mask.gather.qps.512
return _mm512_i64gather_ps(__index, __addr, 2);
}
__m256 test_mm512_mask_i64gather_ps(__m256 __v1_old, __mmask8 __mask, __m512i __index, void const *__addr) {
- // CHECK-LABEL: test_mm512_mask_i64gather_ps
- // CHECK: @llvm.x86.avx512.mask.gather.qps.512
+ // CIR-LABEL: test_mm512_mask_i64gather_ps
+ // CIR: cir.call_llvm_intrinsic "x86.avx512.mask.gather.qps.512"
+
+ // LLVM-LABEL: test_mm512_mask_i64gather_ps
+ // LLVM: call <8 x float> @llvm.x86.avx512.mask.gather.qps.512
+
+ // OGCG-LABEL: test_mm512_mask_i64gather_ps
+ // OGCG: call <8 x float> @llvm.x86.avx512.mask.gather.qps.512
return _mm512_mask_i64gather_ps(__v1_old, __mask, __index, __addr, 2);
}
__m256i test_mm512_i64gather_epi32(__m512i __index, void const *__addr) {
- // CHECK-LABEL: test_mm512_i64gather_epi32
- // CHECK: @llvm.x86.avx512.mask.gather.qpi.512
+ // CIR-LABEL: test_mm512_i64gather_epi32
+ // CIR: cir.call_llvm_intrinsic "x86.avx512.mask.gather.qpi.512"
+
+ // LLVM-LABEL: test_mm512_i64gather_epi32
+ // LLVM: call <8 x i32> @llvm.x86.avx512.mask.gather.qpi.512
+
+ // OGCG-LABEL: test_mm512_i64gather_epi32
+ // OGCG: call <8 x i32> @llvm.x86.avx512.mask.gather.qpi.512
return _mm512_i64gather_epi32(__index, __addr, 2);
}
__m256i test_mm512_mask_i64gather_epi32(__m256i __v1_old, __mmask8 __mask, __m512i __index, void const *__addr) {
- // CHECK-LABEL: test_mm512_mask_i64gather_epi32
- // CHECK: @llvm.x86.avx512.mask.gather.qpi.512
+ // CIR-LABEL: test_mm512_mask_i64gather_epi32
+ // CIR: cir.call_llvm_intrinsic "x86.avx512.mask.gather.qpi.512"
+
+ // LLVM-LABEL: test_mm512_mask_i64gather_epi32
+ // LLVM: call <8 x i32> @llvm.x86.avx512.mask.gather.qpi.512
+
+ // OGCG-LABEL: test_mm512_mask_i64gather_epi32
+ // OGCG: call <8 x i32> @llvm.x86.avx512.mask.gather.qpi.512
return _mm512_mask_i64gather_epi32(__v1_old, __mask, __index, __addr, 2);
}
__m512d test_mm512_i64gather_pd(__m512i __index, void const *__addr) {
- // CHECK-LABEL: test_mm512_i64gather_pd
- // CHECK: @llvm.x86.avx512.mask.gather.qpd.512
+ // CIR-LABEL: test_mm512_i64gather_pd
+ // CIR: cir.call_llvm_intrinsic "x86.avx512.mask.gather.qpd.512
+
+ // LLVM-LABEL: test_mm512_i64gather_pd
+ // LLVM: call <8 x double> @llvm.x86.avx512.mask.gather.qpd.512
+
+ // OGCG-LABEL: test_mm512_i64gather_pd
+ // OGCG: call <8 x double> @llvm.x86.avx512.mask.gather.qpd.512
return _mm512_i64gather_pd(__index, __addr, 2);
}
__m512d test_mm512_mask_i64gather_pd(__m512d __v1_old, __mmask8 __mask, __m512i __index, void const *__addr) {
- // CHECK-LABEL: test_mm512_mask_i64gather_pd
- // CHECK: @llvm.x86.avx512.mask.gather.qpd.512
+ // CIR-LABEL: test_mm512_mask_i64gather_pd
+ // CIR: cir.call_llvm_intrinsic "x86.avx512.mask.gather.qpd.512
+
+ // LLVM-LABEL: test_mm512_mask_i64gather_pd
+ // LLVM: call <8 x double> @llvm.x86.avx512.mask.gather.qpd.512
+
+ // OGCG-LABEL: test_mm512_mask_i64gather_pd
+ // OGCG: call <8 x double> @llvm.x86.avx512.mask.gather.qpd.512
return _mm512_mask_i64gather_pd(__v1_old, __mask, __index, __addr, 2);
}
__m512i test_mm512_i64gather_epi64(__m512i __index, void const *__addr) {
- // CHECK-LABEL: test_mm512_i64gather_epi64
- // CHECK: @llvm.x86.avx512.mask.gather.qpq.512
+ // CIR-LABEL: test_mm512_i64gather_epi64
+ // CIR: cir.call_llvm_intrinsic "x86.avx512.mask.gather.qpq.512
+
+ // LLVM-LABEL: test_mm512_i64gather_epi64
+ // LLVM: call <8 x i64> @llvm.x86.avx512.mask.gather.qpq.512
+
+ // OGCG-LABEL: test_mm512_i64gather_epi64
+ // OGCG: call <8 x i64> @llvm.x86.avx512.mask.gather.qpq.512
return _mm512_i64gather_epi64(__index, __addr, 2);
}
__m512i test_mm512_mask_i64gather_epi64(__m512i __v1_old, __mmask8 __mask, __m512i __index, void const *__addr) {
- // CHECK-LABEL: test_mm512_mask_i64gather_epi64
- // CHECK: @llvm.x86.avx512.mask.gather.qpq.512
+ // CIR-LABEL: test_mm512_mask_i64gather_epi64
+ // CIR: cir.call_llvm_intrinsic "x86.avx512.mask.gather.qpq.512
+
+ // LLVM-LABEL: test_mm512_mask_i64gather_epi64
+ // LLVM: call <8 x i64> @llvm.x86.avx512.mask.gather.qpq.512
+
+ // OGCG-LABEL: test_mm512_mask_i64gather_epi64
+ // OGCG: call <8 x i64> @llvm.x86.avx512.mask.gather.qpq.512
return _mm512_mask_i64gather_epi64(__v1_old, __mask, __index, __addr, 2);
}
__m512 test_mm512_i32gather_ps(__m512i __index, void const *__addr) {
- // CHECK-LABEL: test_mm512_i32gather_ps
- // CHECK: @llvm.x86.avx512.mask.gather.dps.512
+ // CIR-LABEL: test_mm512_i32gather_ps
+ // CIR: cir.call_llvm_intrinsic "x86.avx512.mask.gather.dps.512
+
+ // LLVM-LABEL: test_mm512_i32gather_ps
+ // LLVM: call <16 x float> @llvm.x86.avx512.mask.gather.dps.512
+
+ // OGCG-LABEL: test_mm512_i32gather_ps
+ // OGCG: call <16 x float> @llvm.x86.avx512.mask.gather.dps.512
return _mm512_i32gather_ps(__index, __addr, 2);
}
__m512 test_mm512_mask_i32gather_ps(__m512 v1_old, __mmask16 __mask, __m512i __index, void const *__addr) {
- // CHECK-LABEL: test_mm512_mask_i32gather_ps
- // CHECK: @llvm.x86.avx512.mask.gather.dps.512
+ // CIR-LABEL: test_mm512_mask_i32gather_ps
+ // CIR: cir.call_llvm_intrinsic "x86.avx512.mask.gather.dps.512
+
+ // LLVM-LABEL: test_mm512_mask_i32gather_ps
+ // LLVM: call <16 x float> @llvm.x86.avx512.mask.gather.dps.512
+
+ // OGCG-LABEL: test_mm512_mask_i32gather_ps
+ // OGCG: call <16 x float> @llvm.x86.avx512.mask.gather.dps.512
return _mm512_mask_i32gather_ps(v1_old, __mask, __index, __addr, 2);
}
__m512i test_mm512_i32gather_epi32(__m512i __index, void const *__addr) {
- // CHECK-LABEL: test_mm512_i32gather_epi32
- // CHECK: @llvm.x86.avx512.mask.gather.dpi.512
+ // CIR-LABEL: test_mm512_i32gather_epi32
+ // CIR: cir.call_llvm_intrinsic "x86.avx512.mask.gather.dpi.512
+
+ // LLVM-LABEL: test_mm512_i32gather_epi32
+ // LLVM: call <16 x i32> @llvm.x86.avx512.mask.gather.dpi.512
+
+ // OGCG-LABEL: test_mm512_i32gather_epi32
+ // OGCG: call <16 x i32> @llvm.x86.avx512.mask.gather.dpi.512
return _mm512_i32gather_epi32(__index, __addr, 2);
}
__m512i test_mm512_mask_i32gather_epi32(__m512i __v1_old, __mmask16 __mask, __m512i __index, void const *__addr) {
- // CHECK-LABEL: test_mm512_mask_i32gather_epi32
- // CHECK: @llvm.x86.avx512.mask.gather.dpi.512
+ // CIR-LABEL: test_mm512_mask_i32gather_epi32
+ // CIR: cir.call_llvm_intrinsic "x86.avx512.mask.gather.dpi.512
+
+ // LLVM-LABEL: test_mm512_mask_i32gather_epi32
+ // LLVM: call <16 x i32> @llvm.x86.avx512.mask.gather.dpi.512
+
+ // OGCG-LABEL: test_mm512_mask_i32gather_epi32
+ // OGCG: call <16 x i32> @llvm.x86.avx512.mask.gather.dpi.512
return _mm512_mask_i32gather_epi32(__v1_old, __mask, __index, __addr, 2);
}
__m512d test_mm512_i32gather_pd(__m256i __index, void const *__addr) {
- // CHECK-LABEL: test_mm512_i32gather_pd
- // CHECK: @llvm.x86.avx512.mask.gather.dpd.512
+ // CIR-LABEL: test_mm512_i32gather_pd
+ // CIR: cir.call_llvm_intrinsic "x86.avx512.mask.gather.dpd.512
+
+ // LLVM-LABEL: test_mm512_i32gather_pd
+ // LLVM: call <8 x double> @llvm.x86.avx512.mask.gather.dpd.512
+
+ // OGCG-LABEL: test_mm512_i32gather_pd
+ // OGCG: call <8 x double> @llvm.x86.avx512.mask.gather.dpd.512
return _mm512_i32gather_pd(__index, __addr, 2);
}
__m512d test_mm512_mask_i32gather_pd(__m512d __v1_old, __mmask8 __mask, __m256i __index, void const *__addr) {
- // CHECK-LABEL: test_mm512_mask_i32gather_pd
- // CHECK: @llvm.x86.avx512.mask.gather.dpd.512
+ // CIR-LABEL: test_mm512_mask_i32gather_pd
+ // CIR: cir.call_llvm_intrinsic "x86.avx512.mask.gather.dpd.512
+
+ // LLVM-LABEL: test_mm512_mask_i32gather_pd
+ // LLVM: call <8 x double> @llvm.x86.avx512.mask.gather.dpd.512
+
+ // OGCG-LABEL: test_mm512_mask_i32gather_pd
+ // OGCG: call <8 x double> @llvm.x86.avx512.mask.gather.dpd.512
return _mm512_mask_i32gather_pd(__v1_old, __mask, __index, __addr, 2);
}
__m512i test_mm512_i32gather_epi64(__m256i __index, void const *__addr) {
- // CHECK-LABEL: test_mm512_i32gather_epi64
- // CHECK: @llvm.x86.avx512.mask.gather.dpq.512
+ // CIR-LABEL: test_mm512_i32gather_epi64
+ // CIR: cir.call_llvm_intrinsic "x86.avx512.mask.gather.dpq.512
+
+ // LLVM-LABEL: test_mm512_i32gather_epi64
+ // LLVM: call <8 x i64> @llvm.x86.avx512.mask.gather.dpq.512
+
+ // OGCG-LABEL: test_mm512_i32gather_epi64
+ // OGCG: call <8 x i64> @llvm.x86.avx512.mask.gather.dpq.512
return _mm512_i32gather_epi64(__index, __addr, 2);
}
__m512i test_mm512_mask_i32gather_epi64(__m512i __v1_old, __mmask8 __mask, __m256i __index, void const *__addr) {
- // CHECK-LABEL: test_mm512_mask_i32gather_epi64
- // CHECK: @llvm.x86.avx512.mask.gather.dpq.512
+ // CIR-LABEL: test_mm512_mask_i32gather_epi64
+ // CIR: cir.call_llvm_intrinsic "x86.avx512.mask.gather.dpq.512
+
+ // LLVM-LABEL: test_mm512_mask_i32gather_epi64
+ // LLVM: call <8 x i64> @llvm.x86.avx512.mask.gather.dpq.512
+
+ // OGCG-LABEL: test_mm512_mask_i32gather_epi64
+ // OGCG: call <8 x i64> @llvm.x86.avx512.mask.gather.dpq.512
return _mm512_mask_i32gather_epi64(__v1_old, __mask, __index, __addr, 2);
}
diff --git a/clang/test/CIR/CodeGenBuiltins/X86/avx512vl-builtins.c b/clang/test/CIR/CodeGenBuiltins/X86/avx512vl-builtins.c
index 08b40ad033b24..accf1f60d7c32 100644
--- a/clang/test/CIR/CodeGenBuiltins/X86/avx512vl-builtins.c
+++ b/clang/test/CIR/CodeGenBuiltins/X86/avx512vl-builtins.c
@@ -2,6 +2,8 @@
// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx512f -target-feature +avx512vl -fclangir -emit-llvm -o %t.ll -Wall -Werror -Wsign-conversion
// RUN: FileCheck --check-prefixes=LLVM --input-file=%t.ll %s
+// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx512f -target-feature +avx512vl -emit-llvm -o %t.ll -Wall -Werror -Wsign-conversion
+// RUN: FileCheck --check-prefixes=OGCG --input-file=%t.ll %s
#include <immintrin.h>
@@ -12,6 +14,9 @@ __m128d test_mm_mmask_i64gather_pd(__m128d __v1_old, __mmask8 __mask, __m128i __
// LLVM-LABEL: @test_mm_mmask_i64gather_pd
// LLVM: @llvm.x86.avx512.mask.gather3div2.df
+
+ // OGCG-LABEL: @test_mm_mmask_i64gather_pd
+ // OGCG: @llvm.x86.avx512.mask.gather3div2.df
return _mm_mmask_i64gather_pd(__v1_old, __mask, __index, __addr, 2);
}
@@ -21,6 +26,9 @@ __m128i test_mm_mmask_i64gather_epi64(__m128i __v1_old, __mmask8 __mask, __m128i
// LLVM-LABEL: @test_mm_mmask_i64gather_epi64
// LLVM: @llvm.x86.avx512.mask.gather3div2.di
+
+ // OGCG-LABEL: @test_mm_mmask_i64gather_epi64
+ // OGCG: @llvm.x86.avx512.mask.gather3div2.di
return _mm_mmask_i64gather_epi64(__v1_old, __mask, __index, __addr, 2);
}
@@ -30,6 +38,9 @@ __m256d test_mm256_mmask_i64gather_pd(__m256d __v1_old, __mmask8 __mask, __m256i
// LLVM-LABEL: @test_mm256_mmask_i64gather_pd
// LLVM: @llvm.x86.avx512.mask.gather3div4.df
+
+ // OGCG-LABEL: @test_mm256_mmask_i64gather_pd
+ // OGCG: @llvm.x86.avx512.mask.gather3div4.df
return _mm256_mmask_i64gather_pd(__v1_old, __mask, __index, __addr, 2);
}
@@ -39,6 +50,9 @@ __m256i test_mm256_mmask_i64gather_epi64(__m256i __v1_old, __mmask8 __mask, __m2
// LLVM-LABEL: @test_mm256_mmask_i64gather_epi64
// LLVM: @llvm.x86.avx512.mask.gather3div4.di
+
+ // OGCG-LABEL: @test_mm256_mmask_i64gather_epi64
+ // OGCG: @llvm.x86.avx512.mask.gather3div4.di
return _mm256_mmask_i64gather_epi64(__v1_old, __mask, __index, __addr, 2);
}
@@ -48,6 +62,9 @@ __m128 test_mm_mmask_i64gather_ps(__m128 __v1_old, __mmask8 __mask, __m128i __in
// LLVM-LABEL: @test_mm_mmask_i64gather_ps
// LLVM: @llvm.x86.avx512.mask.gather3div4.sf
+
+ // OGCG-LABEL: @test_mm_mmask_i64gather_ps
+ // OGCG: @llvm.x86.avx512.mask.gather3div4.sf
return _mm_mmask_i64gather_ps(__v1_old, __mask, __index, __addr, 2);
}
@@ -57,6 +74,9 @@ __m128i test_mm_mmask_i64gather_epi32(__m128i __v1_old, __mmask8 __mask, __m128i
// LLVM-LABEL: @test_mm_mmask_i64gather_epi32
// LLVM: @llvm.x86.avx512.mask.gather3div4.si
+
+ // OGCG-LABEL: @test_mm_mmask_i64gather_epi32
+ // OGCG: @llvm.x86.avx512.mask.gather3div4.si
return _mm_mmask_i64gather_epi32(__v1_old, __mask, __index, __addr, 2);
}
@@ -66,6 +86,9 @@ __m128 test_mm256_mmask_i64gather_ps(__m128 __v1_old, __mmask8 __mask, __m256i _
// LLVM-LABEL: @test_mm256_mmask_i64gather_ps
// LLVM: @llvm.x86.avx512.mask.gather3div8.sf
+
+ // OGCG-LABEL: @test_mm256_mmask_i64gather_ps
+ // OGCG: @llvm.x86.avx512.mask.gather3div8.sf
return _mm256_mmask_i64gather_ps(__v1_old, __mask, __index, __addr, 2);
}
@@ -75,6 +98,9 @@ __m128i test_mm256_mmask_i64gather_epi32(__m128i __v1_old, __mmask8 __mask, __m2
// LLVM-LABEL: @test_mm256_mmask_i64gather_epi32
// LLVM: @llvm.x86.avx512.mask.gather3div8.si
+
+ // OGCG-LABEL: @test_mm256_mmask_i64gather_epi32
+ // OGCG: @llvm.x86.avx512.mask.gather3div8.si
return _mm256_mmask_i64gather_epi32(__v1_old, __mask, __index, __addr, 2);
}
@@ -84,6 +110,9 @@ __m128d test_mm_mask_i32gather_pd(__m128d __v1_old, __mmask8 __mask, __m128i __i
// LLVM-LABEL: @test_mm_mask_i32gather_pd
// LLVM: @llvm.x86.avx512.mask.gather3siv2.df
+
+ // OGCG-LABEL: @test_mm_mask_i32gather_pd
+ // OGCG: @llvm.x86.avx512.mask.gather3siv2.df
return _mm_mmask_i32gather_pd(__v1_old, __mask, __index, __addr, 2);
}
@@ -93,6 +122,9 @@ __m128i test_mm_mask_i32gather_epi64(__m128i __v1_old, __mmask8 __mask, __m128i
// LLVM-LABEL: @test_mm_mask_i32gather_epi64
// LLVM: @llvm.x86.avx512.mask.gather3siv2.di
+
+ // OGCG-LABEL: @test_mm_mask_i32gather_epi64
+ // OGCG: @llvm.x86.avx512.mask.gather3siv2.di
return _mm_mmask_i32gather_epi64(__v1_old, __mask, __index, __addr, 2);
}
@@ -102,6 +134,9 @@ __m256d test_mm256_mask_i32gather_pd(__m256d __v1_old, __mmask8 __mask, __m128i
// LLVM-LABEL: @test_mm256_mask_i32gather_pd
// LLVM: @llvm.x86.avx512.mask.gather3siv4.df
+
+ // OGCG-LABEL: @test_mm256_mask_i32gather_pd
+ // OGCG: @llvm.x86.avx512.mask.gather3siv4.df
return _mm256_mmask_i32gather_pd(__v1_old, __mask, __index, __addr, 2);
}
@@ -111,6 +146,9 @@ __m256i test_mm256_mask_i32gather_epi64(__m256i __v1_old, __mmask8 __mask, __m12
// LLVM-LABEL: @test_mm256_mask_i32gather_epi64
// LLVM: @llvm.x86.avx512.mask.gather3siv4.di
+
+ // OGCG-LABEL: @test_mm256_mask_i32gather_epi64
+ // OGCG: @llvm.x86.avx512.mask.gather3siv4.di
return _mm256_mmask_i32gather_epi64(__v1_old, __mask, __index, __addr, 2);
}
@@ -120,6 +158,9 @@ __m128 test_mm_mask_i32gather_ps(__m128 __v1_old, __mmask8 __mask, __m128i __ind
// LLVM-LABEL: @test_mm_mask_i32gather_ps
// LLVM: @llvm.x86.avx512.mask.gather3siv4.sf
+
+ // OGCG-LABEL: @test_mm_mask_i32gather_ps
+ // OGCG: @llvm.x86.avx512.mask.gather3siv4.sf
return _mm_mmask_i32gather_ps(__v1_old, __mask, __index, __addr, 2);
}
@@ -129,6 +170,9 @@ __m128i test_mm_mask_i32gather_epi32(__m128i __v1_old, __mmask8 __mask, __m128i
// LLVM-LABEL: @test_mm_mask_i32gather_epi32
// LLVM: @llvm.x86.avx512.mask.gather3siv4.si
+
+ // OGCG-LABEL: @test_mm_mask_i32gather_epi32
+ // OGCG: @llvm.x86.avx512.mask.gather3siv4.si
return _mm_mmask_i32gather_epi32(__v1_old, __mask, __index, __addr, 2);
}
@@ -138,6 +182,9 @@ __m256 test_mm256_mask_i32gather_ps(__m256 __v1_old, __mmask8 __mask, __m256i __
// LLVM-LABEL: @test_mm256_mask_i32gather_ps
// LLVM: @llvm.x86.avx512.mask.gather3siv8.sf
+
+ // OGCG-LABEL: @test_mm256_mask_i32gather_ps
+ // OGCG: @llvm.x86.avx512.mask.gather3siv8.sf
return _mm256_mmask_i32gather_ps(__v1_old, __mask, __index, __addr, 2);
}
@@ -147,5 +194,8 @@ __m256i test_mm256_mask_i32gather_epi32(__m256i __v1_old, __mmask8 __mask, __m25
// LLVM-LABEL: @test_mm256_mask_i32gather_epi32
// LLVM: @llvm.x86.avx512.mask.gather3siv8.si
+
+ // OGCG-LABEL: @test_mm256_mask_i32gather_epi32
+ // OGCG: @llvm.x86.avx512.mask.gather3siv8.si
return _mm256_mmask_i32gather_epi32(__v1_old, __mask, __index, __addr, 2);
}
More information about the cfe-commits
mailing list