[clang] [CIR] Upstream gather instrinsics (PR #169157)
Jasmine Tang via cfe-commits
cfe-commits at lists.llvm.org
Sat Nov 22 01:43:09 PST 2025
https://github.com/badumbatish created https://github.com/llvm/llvm-project/pull/169157
None
>From 09aaf71c66fc7262554fe197cdd2dd2764fa5ee3 Mon Sep 17 00:00:00 2001
From: Jasmine Tang <jjasmine at igalia.com>
Date: Wed, 19 Nov 2025 06:16:32 -0800
Subject: [PATCH] Gather implementation
---
clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp | 109 +++++++++++++++++-
clang/test/CIR/CodeGen/X86/avx512f-builtins.c | 96 +++++++++++++++
2 files changed, 204 insertions(+), 1 deletion(-)
diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
index 978fee7dbec9d..5eb20a2437ae5 100644
--- a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
@@ -68,6 +68,27 @@ static mlir::Value emitVectorFCmp(CIRGenBuilderTy &builder,
return bitCast;
}
+// Convert the mask from an integer type to a vector of i1.
+static mlir::Value getMaskVecValue(CIRGenFunction &cgf, mlir::Value mask,
+ unsigned numElts, mlir::Location loc) {
+ cir::VectorType maskTy =
+ cir::VectorType::get(cgf.getBuilder().getSIntNTy(1),
+ cast<cir::IntType>(mask.getType()).getWidth());
+
+ mlir::Value maskVec = cgf.getBuilder().createBitcast(mask, maskTy);
+
+ // If we have less than 8 elements, then the starting mask was an i8 and
+ // we need to extract down to the right number of elements.
+ if (numElts < 8) {
+ llvm::SmallVector<int64_t, 4> indices;
+ for (unsigned i = 0; i != numElts; ++i)
+ indices.push_back(i);
+ maskVec = cgf.getBuilder().createVecShuffle(loc, maskVec, maskVec, indices);
+ }
+
+ return maskVec;
+}
+
mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID,
const CallExpr *expr) {
if (builtinID == Builtin::BI__builtin_cpu_is) {
@@ -456,7 +477,93 @@ mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID,
case X86::BI__builtin_ia32_gathersiv8di:
case X86::BI__builtin_ia32_gathersiv16si:
case X86::BI__builtin_ia32_gatherdiv8di:
- case X86::BI__builtin_ia32_gatherdiv16si:
+ case X86::BI__builtin_ia32_gatherdiv16si: {
+ StringRef intrinsicName;
+ switch (builtinID) {
+ default:
+ llvm_unreachable("Unexpected builtin");
+ case X86::BI__builtin_ia32_gather3div2df:
+ intrinsicName = "x86.avx512.mask.gather3div2.df";
+ break;
+ case X86::BI__builtin_ia32_gather3div2di:
+ intrinsicName = "x86.avx512.mask.gather3div2.di";
+ break;
+ case X86::BI__builtin_ia32_gather3div4df:
+ intrinsicName = "x86.avx512.mask.gather3div4.df";
+ break;
+ case X86::BI__builtin_ia32_gather3div4di:
+ intrinsicName = "x86.avx512.mask.gather3div4.di";
+ break;
+ case X86::BI__builtin_ia32_gather3div4sf:
+ intrinsicName = "x86.avx512.mask.gather3div4.sf";
+ break;
+ case X86::BI__builtin_ia32_gather3div4si:
+ intrinsicName = "x86.avx512.mask.gather3div4.si";
+ break;
+ case X86::BI__builtin_ia32_gather3div8sf:
+ intrinsicName = "x86.avx512.mask.gather3div8.sf";
+ break;
+ case X86::BI__builtin_ia32_gather3div8si:
+ intrinsicName = "x86.avx512.mask.gather3div8.si";
+ break;
+ case X86::BI__builtin_ia32_gather3siv2df:
+ intrinsicName = "x86.avx512.mask.gather3siv2.df";
+ break;
+ case X86::BI__builtin_ia32_gather3siv2di:
+ intrinsicName = "x86.avx512.mask.gather3siv2.di";
+ break;
+ case X86::BI__builtin_ia32_gather3siv4df:
+ intrinsicName = "x86.avx512.mask.gather3siv4.df";
+ break;
+ case X86::BI__builtin_ia32_gather3siv4di:
+ intrinsicName = "x86.avx512.mask.gather3siv4.di";
+ break;
+ case X86::BI__builtin_ia32_gather3siv4sf:
+ intrinsicName = "x86.avx512.mask.gather3siv4.sf";
+ break;
+ case X86::BI__builtin_ia32_gather3siv4si:
+ intrinsicName = "x86.avx512.mask.gather3siv4.si";
+ break;
+ case X86::BI__builtin_ia32_gather3siv8sf:
+ intrinsicName = "x86.avx512.mask.gather3siv8.sf";
+ break;
+ case X86::BI__builtin_ia32_gather3siv8si:
+ intrinsicName = "x86.avx512.mask.gather3siv8.si";
+ break;
+ case X86::BI__builtin_ia32_gathersiv8df:
+ intrinsicName = "x86.avx512.mask.gather.dpd.512";
+ break;
+ case X86::BI__builtin_ia32_gathersiv16sf:
+ intrinsicName = "x86.avx512.mask.gather.dps.512";
+ break;
+ case X86::BI__builtin_ia32_gatherdiv8df:
+ intrinsicName = "x86.avx512.mask.gather.qpd.512";
+ break;
+ case X86::BI__builtin_ia32_gatherdiv16sf:
+ intrinsicName = "x86.avx512.mask.gather.qps.512";
+ break;
+ case X86::BI__builtin_ia32_gathersiv8di:
+ intrinsicName = "x86.avx512.mask.gather.dpq.512";
+ break;
+ case X86::BI__builtin_ia32_gathersiv16si:
+ intrinsicName = "x86.avx512.mask.gather.dpi.512";
+ break;
+ case X86::BI__builtin_ia32_gatherdiv8di:
+ intrinsicName = "x86.avx512.mask.gather.qpq.512";
+ break;
+ case X86::BI__builtin_ia32_gatherdiv16si:
+ intrinsicName = "x86.avx512.mask.gather.qpi.512";
+ break;
+ }
+
+ unsigned minElts =
+ std::min(cast<cir::VectorType>(ops[0].getType()).getSize(),
+ cast<cir::VectorType>(ops[2].getType()).getSize());
+ ops[3] =
+ getMaskVecValue(*this, ops[3], minElts, getLoc(expr->getExprLoc()));
+ return emitIntrinsicCallOp(*this, expr, intrinsicName.str(),
+ convertType(expr->getType()), ops);
+ }
case X86::BI__builtin_ia32_scattersiv8df:
case X86::BI__builtin_ia32_scattersiv16sf:
case X86::BI__builtin_ia32_scatterdiv8df:
diff --git a/clang/test/CIR/CodeGen/X86/avx512f-builtins.c b/clang/test/CIR/CodeGen/X86/avx512f-builtins.c
index dc54a87856a7c..e95e5f95e3513 100644
--- a/clang/test/CIR/CodeGen/X86/avx512f-builtins.c
+++ b/clang/test/CIR/CodeGen/X86/avx512f-builtins.c
@@ -77,3 +77,99 @@ __m512i test_mm512_undefined_epi32(void) {
// OGCG: ret <8 x i64> zeroinitializer
return _mm512_undefined_epi32();
}
+
+__m256 test_mm512_i64gather_ps(__m512i __index, void const *__addr) {
+ // CHECK-LABEL: test_mm512_i64gather_ps
+ // CHECK: @llvm.x86.avx512.mask.gather.qps.512
+ return _mm512_i64gather_ps(__index, __addr, 2);
+}
+
+__m256 test_mm512_mask_i64gather_ps(__m256 __v1_old, __mmask8 __mask, __m512i __index, void const *__addr) {
+ // CHECK-LABEL: test_mm512_mask_i64gather_ps
+ // CHECK: @llvm.x86.avx512.mask.gather.qps.512
+ return _mm512_mask_i64gather_ps(__v1_old, __mask, __index, __addr, 2);
+}
+
+__m256i test_mm512_i64gather_epi32(__m512i __index, void const *__addr) {
+ // CHECK-LABEL: test_mm512_i64gather_epi32
+ // CHECK: @llvm.x86.avx512.mask.gather.qpi.512
+ return _mm512_i64gather_epi32(__index, __addr, 2);
+}
+
+__m256i test_mm512_mask_i64gather_epi32(__m256i __v1_old, __mmask8 __mask, __m512i __index, void const *__addr) {
+ // CHECK-LABEL: test_mm512_mask_i64gather_epi32
+ // CHECK: @llvm.x86.avx512.mask.gather.qpi.512
+ return _mm512_mask_i64gather_epi32(__v1_old, __mask, __index, __addr, 2);
+}
+
+__m512d test_mm512_i64gather_pd(__m512i __index, void const *__addr) {
+ // CHECK-LABEL: test_mm512_i64gather_pd
+ // CHECK: @llvm.x86.avx512.mask.gather.qpd.512
+ return _mm512_i64gather_pd(__index, __addr, 2);
+}
+
+__m512d test_mm512_mask_i64gather_pd(__m512d __v1_old, __mmask8 __mask, __m512i __index, void const *__addr) {
+ // CHECK-LABEL: test_mm512_mask_i64gather_pd
+ // CHECK: @llvm.x86.avx512.mask.gather.qpd.512
+ return _mm512_mask_i64gather_pd(__v1_old, __mask, __index, __addr, 2);
+}
+
+__m512i test_mm512_i64gather_epi64(__m512i __index, void const *__addr) {
+ // CHECK-LABEL: test_mm512_i64gather_epi64
+ // CHECK: @llvm.x86.avx512.mask.gather.qpq.512
+ return _mm512_i64gather_epi64(__index, __addr, 2);
+}
+
+__m512i test_mm512_mask_i64gather_epi64(__m512i __v1_old, __mmask8 __mask, __m512i __index, void const *__addr) {
+ // CHECK-LABEL: test_mm512_mask_i64gather_epi64
+ // CHECK: @llvm.x86.avx512.mask.gather.qpq.512
+ return _mm512_mask_i64gather_epi64(__v1_old, __mask, __index, __addr, 2);
+}
+
+__m512 test_mm512_i32gather_ps(__m512i __index, void const *__addr) {
+ // CHECK-LABEL: test_mm512_i32gather_ps
+ // CHECK: @llvm.x86.avx512.mask.gather.dps.512
+ return _mm512_i32gather_ps(__index, __addr, 2);
+}
+
+__m512 test_mm512_mask_i32gather_ps(__m512 v1_old, __mmask16 __mask, __m512i __index, void const *__addr) {
+ // CHECK-LABEL: test_mm512_mask_i32gather_ps
+ // CHECK: @llvm.x86.avx512.mask.gather.dps.512
+ return _mm512_mask_i32gather_ps(v1_old, __mask, __index, __addr, 2);
+}
+
+__m512i test_mm512_i32gather_epi32(__m512i __index, void const *__addr) {
+ // CHECK-LABEL: test_mm512_i32gather_epi32
+ // CHECK: @llvm.x86.avx512.mask.gather.dpi.512
+ return _mm512_i32gather_epi32(__index, __addr, 2);
+}
+
+__m512i test_mm512_mask_i32gather_epi32(__m512i __v1_old, __mmask16 __mask, __m512i __index, void const *__addr) {
+ // CHECK-LABEL: test_mm512_mask_i32gather_epi32
+ // CHECK: @llvm.x86.avx512.mask.gather.dpi.512
+ return _mm512_mask_i32gather_epi32(__v1_old, __mask, __index, __addr, 2);
+}
+
+__m512d test_mm512_i32gather_pd(__m256i __index, void const *__addr) {
+ // CHECK-LABEL: test_mm512_i32gather_pd
+ // CHECK: @llvm.x86.avx512.mask.gather.dpd.512
+ return _mm512_i32gather_pd(__index, __addr, 2);
+}
+
+__m512d test_mm512_mask_i32gather_pd(__m512d __v1_old, __mmask8 __mask, __m256i __index, void const *__addr) {
+ // CHECK-LABEL: test_mm512_mask_i32gather_pd
+ // CHECK: @llvm.x86.avx512.mask.gather.dpd.512
+ return _mm512_mask_i32gather_pd(__v1_old, __mask, __index, __addr, 2);
+}
+
+__m512i test_mm512_i32gather_epi64(__m256i __index, void const *__addr) {
+ // CHECK-LABEL: test_mm512_i32gather_epi64
+ // CHECK: @llvm.x86.avx512.mask.gather.dpq.512
+ return _mm512_i32gather_epi64(__index, __addr, 2);
+}
+
+__m512i test_mm512_mask_i32gather_epi64(__m512i __v1_old, __mmask8 __mask, __m256i __index, void const *__addr) {
+ // CHECK-LABEL: test_mm512_mask_i32gather_epi64
+ // CHECK: @llvm.x86.avx512.mask.gather.dpq.512
+ return _mm512_mask_i32gather_epi64(__v1_old, __mask, __index, __addr, 2);
+}
More information about the cfe-commits
mailing list