[clang] [CIR][X86] Add support for permd builtins (PR #172132)
Zhihui Yang via cfe-commits
cfe-commits at lists.llvm.org
Fri Dec 12 22:23:54 PST 2025
https://github.com/YGGkk created https://github.com/llvm/llvm-project/pull/172132
Part of https://github.com/llvm/llvm-project/issues/167765
>From 7bb370ddc58d5364fa1c636f3c29a40e54a943e5 Mon Sep 17 00:00:00 2001
From: Zhihui Yang <youngwisdm at gmail.com>
Date: Fri, 12 Dec 2025 22:21:52 -0800
Subject: [PATCH] [CIR][X86] Add support for permd builtins
---
clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp | 17 ++++-
.../CIR/CodeGenBuiltins/X86/permd-builtins.c | 62 +++++++++++++++++++
2 files changed, 78 insertions(+), 1 deletion(-)
create mode 100644 clang/test/CIR/CodeGenBuiltins/X86/permd-builtins.c
diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
index 75bf25b20f1af..a859210235852 100644
--- a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
@@ -1210,7 +1210,22 @@ mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID,
case X86::BI__builtin_ia32_permdi256:
case X86::BI__builtin_ia32_permdf256:
case X86::BI__builtin_ia32_permdi512:
- case X86::BI__builtin_ia32_permdf512:
+ case X86::BI__builtin_ia32_permdf512: {
+ unsigned imm = ops[1].getDefiningOp<cir::ConstantOp>()
+ .getIntValue()
+ .getZExtValue();
+ unsigned numElts = cast<cir::VectorType>(ops[0].getType()).getSize();
+
+ // These intrinsics operate on 256-bit lanes of four 64-bit elements.
+ int64_t Indices[8];
+
+ for (unsigned l = 0; l != numElts; l += 4)
+ for (unsigned i = 0; i != 4; ++i)
+ Indices[l + i] = l + ((imm >> (2 * i)) & 0x3);
+
+ return builder.createVecShuffle(getLoc(expr->getExprLoc()), ops[0],
+ ArrayRef(Indices, numElts));
+ }
case X86::BI__builtin_ia32_palignr128:
case X86::BI__builtin_ia32_palignr256:
case X86::BI__builtin_ia32_palignr512:
diff --git a/clang/test/CIR/CodeGenBuiltins/X86/permd-builtins.c b/clang/test/CIR/CodeGenBuiltins/X86/permd-builtins.c
new file mode 100644
index 0000000000000..ae1efc32d6db9
--- /dev/null
+++ b/clang/test/CIR/CodeGenBuiltins/X86/permd-builtins.c
@@ -0,0 +1,62 @@
+// RUN: %clang_cc1 -x c -ffreestanding -triple x86_64-unknown-linux -target-feature +avx512vl -Wno-implicit-function-declaration -fclangir -emit-cir -o %t.cir %s
+// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
+// RUN: %clang_cc1 -x c++ -ffreestanding -triple x86_64-unknown-linux -target-feature +avx512vl -Wno-implicit-function-declaration -fclangir -emit-cir -o %t.cir %s
+// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
+
+// RUN: %clang_cc1 -x c -ffreestanding -triple x86_64-unknown-linux -target-feature +avx512vl -Wno-implicit-function-declaration -fclangir -emit-llvm -o %t.ll %s
+// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s
+// RUN: %clang_cc1 -x c++ -ffreestanding -triple x86_64-unknown-linux -target-feature +avx512vl -Wno-implicit-function-declaration -fclangir -emit-llvm -o %t.ll %s
+// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s
+
+// RUN: %clang_cc1 -x c -ffreestanding -triple=x86_64-unknown-linux -target-feature +avx512vl -emit-llvm -Wall -Werror %s -o - | FileCheck %s -check-prefix=OGCG
+// RUN: %clang_cc1 -x c++ -ffreestanding -triple=x86_64-unknown-linux -target-feature +avx512vl -emit-llvm -Wall -Werror %s -o - | FileCheck %s -check-prefix=OGCG
+
+#include <immintrin.h>
+
+__m256i test__builtin_ia32_permdi256()
+{
+ // CIR-LABEL: test__builtin_ia32_permdi256
+ // CIR: cir.vec.shuffle({{%.*}}, {{%.*}}: !cir.vector<4 x !s64i>) [#cir.int<1> : !s32i, #cir.int<0> : !s32i, #cir.int<0> : !s32i, #cir.int<0> : !s32i] : !cir.vector<4 x !s64i>
+ // LLVM-LABEL: test__builtin_ia32_permdi256
+ // LLVM: shufflevector <4 x i64> {{%.*}}, <4 x i64> poison, <4 x i32> <i32 1, i32 0, i32 0, i32 0>
+ // OGCG-LABEL: test__builtin_ia32_permdi256
+ // OGCG: shufflevector <4 x i64> {{%.*}}, <4 x i64> poison, <4 x i32> <i32 1, i32 0, i32 0, i32 0>
+ __v4di vec = {0, 1, 2, 3};
+ return __builtin_ia32_permdi256(vec, 1);
+}
+
+__m512i test__builtin_ia32_permdi512()
+{
+ // CIR-LABEL: test__builtin_ia32_permdi512
+ // CIR: cir.vec.shuffle({{%.*}}, {{%.*}}: !cir.vector<8 x !s64i>) [#cir.int<1> : !s32i, #cir.int<0> : !s32i, #cir.int<0> : !s32i, #cir.int<0> : !s32i, #cir.int<5> : !s32i, #cir.int<4> : !s32i, #cir.int<4> : !s32i, #cir.int<4> : !s32i] : !cir.vector<8 x !s64i>
+ // LLVM-LABEL: test__builtin_ia32_permdi512
+ // LLVM: shufflevector <8 x i64> {{%.*}}, <8 x i64> poison, <8 x i32> <i32 1, i32 0, i32 0, i32 0, i32 5, i32 4, i32 4, i32 4>
+ // OGCG-LABEL: test__builtin_ia32_permdi512
+ // OGCG: shufflevector <8 x i64> {{%.*}}, <8 x i64> poison, <8 x i32> <i32 1, i32 0, i32 0, i32 0, i32 5, i32 4, i32 4, i32 4>
+ __v8di vec = {0, 1, 2, 3, 4, 5, 6, 7};
+ return __builtin_ia32_permdi512(vec, 1);
+}
+
+__m256d test__builtin_ia32_permdf256()
+{
+ // CIR-LABEL: test__builtin_ia32_permdf256
+ // CIR: cir.vec.shuffle({{%.*}}, {{%.*}}: !cir.vector<4 x !cir.double>) [#cir.int<1> : !s32i, #cir.int<0> : !s32i, #cir.int<0> : !s32i, #cir.int<0> : !s32i] : !cir.vector<4 x !cir.double>
+ // LLVM-LABEL: test__builtin_ia32_permdf256
+ // LLVM: shufflevector <4 x double> {{%.*}}, <4 x double> poison, <4 x i32> <i32 1, i32 0, i32 0, i32 0>
+ // OGCG-LABEL: test__builtin_ia32_permdf256
+ // OGCG: shufflevector <4 x double> {{%.*}}, <4 x double> poison, <4 x i32> <i32 1, i32 0, i32 0, i32 0>
+ __v4df vec = {0, 1, 2, 3};
+ return __builtin_ia32_permdf256(vec, 1);
+}
+
+__m512d test__builtin_ia32_permdf512()
+{
+ // CIR-LABEL: test__builtin_ia32_permdf512
+ // CIR: cir.vec.shuffle({{%.*}}, {{%.*}}: !cir.vector<8 x !cir.double>) [#cir.int<1> : !s32i, #cir.int<0> : !s32i, #cir.int<0> : !s32i, #cir.int<0> : !s32i, #cir.int<5> : !s32i, #cir.int<4> : !s32i, #cir.int<4> : !s32i, #cir.int<4> : !s32i] : !cir.vector<8 x !cir.double>
+ // LLVM-LABEL: test__builtin_ia32_permdf512
+ // LLVM: shufflevector <8 x double> {{%.*}}, <8 x double> poison, <8 x i32> <i32 1, i32 0, i32 0, i32 0, i32 5, i32 4, i32 4, i32 4>
+ // OGCG-LABEL: test__builtin_ia32_permdf512
+ // OGCG: shufflevector <8 x double> {{%.*}}, <8 x double> poison, <8 x i32> <i32 1, i32 0, i32 0, i32 0, i32 5, i32 4, i32 4, i32 4>
+ __v8df vec = {0, 1, 2, 3, 4, 5, 6, 7};
+ return __builtin_ia32_permdf512(vec, 1);
+}
\ No newline at end of file
More information about the cfe-commits
mailing list