[clang] [CIR][X86] Add support for vpshl/vpshr builtins (PR #179538)

Wed Feb 4 09:20:22 PST 2026

https://github.com/Priyanshu3820 updated https://github.com/llvm/llvm-project/pull/179538

>From 8704d692db00621dd46cd55914b6ba8d12c71f74 Mon Sep 17 00:00:00 2001
From: Priyanshu Kumar <10b.priyanshu at gmail.com>
Date: Wed, 4 Feb 2026 16:47:58 +0000
Subject: [PATCH] Add support for vpshl/vpshr builtins

---
 clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp       |  19 +-
 clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp    |  13 +-
 .../X86/avx512vbmi2-builtins.c                | 418 ++++++++++++++++++
 3 files changed, 444 insertions(+), 6 deletions(-)
 create mode 100644 clang/test/CIR/CodeGenBuiltins/X86/avx512vbmi2-builtins.c

diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp
index 580ada8901cbb..c7d6cd8ae240e 100644
--- a/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp
@@ -1246,8 +1246,23 @@ RValue CIRGenFunction::emitBuiltinExpr(const GlobalDecl &gd, unsigned builtinID,
   case Builtin::BI__builtin_elementwise_canonicalize:
   case Builtin::BI__builtin_elementwise_copysign:
   case Builtin::BI__builtin_elementwise_fma:
-  case Builtin::BI__builtin_elementwise_fshl:
-  case Builtin::BI__builtin_elementwise_fshr:
+    return errorBuiltinNYI(*this, e, builtinID);
+  case Builtin::BI__builtin_elementwise_fshl: {
+    mlir::Location loc = getLoc(e->getExprLoc());
+    mlir::Value a = emitScalarExpr(e->getArg(0));
+    mlir::Value b = emitScalarExpr(e->getArg(1));
+    mlir::Value c = emitScalarExpr(e->getArg(2));
+    return RValue::get(builder.emitIntrinsicCallOp(loc, "fshl", a.getType(),
+                                                   mlir::ValueRange{a, b, c}));
+  }
+  case Builtin::BI__builtin_elementwise_fshr: {
+    mlir::Location loc = getLoc(e->getExprLoc());
+    mlir::Value a = emitScalarExpr(e->getArg(0));
+    mlir::Value b = emitScalarExpr(e->getArg(1));
+    mlir::Value c = emitScalarExpr(e->getArg(2));
+    return RValue::get(builder.emitIntrinsicCallOp(loc, "fshr", a.getType(),
+                                                   mlir::ValueRange{a, b, c}));
+  }
   case Builtin::BI__builtin_elementwise_add_sat:
   case Builtin::BI__builtin_elementwise_sub_sat:
   case Builtin::BI__builtin_elementwise_max:
diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
index 80022998448ad..4e190ab4de3fb 100644
--- a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
@@ -2032,6 +2032,10 @@ CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID, const CallExpr *expr) {
   case X86::BI__builtin_ia32_pternlogd256_maskz:
   case X86::BI__builtin_ia32_pternlogq128_maskz:
   case X86::BI__builtin_ia32_pternlogq256_maskz:
+    cgm.errorNYI(expr->getSourceRange(),
+                 std::string("unimplemented X86 builtin call: ") +
+                     getContext().BuiltinInfo.getName(builtinID));
+    return mlir::Value{};
   case X86::BI__builtin_ia32_vpshldd128:
   case X86::BI__builtin_ia32_vpshldd256:
   case X86::BI__builtin_ia32_vpshldd512:
@@ -2041,6 +2045,8 @@ CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID, const CallExpr *expr) {
   case X86::BI__builtin_ia32_vpshldw128:
   case X86::BI__builtin_ia32_vpshldw256:
   case X86::BI__builtin_ia32_vpshldw512:
+    return emitX86FunnelShift(builder, getLoc(expr->getExprLoc()), ops[0],
+                              ops[1], ops[2], false);
   case X86::BI__builtin_ia32_vpshrdd128:
   case X86::BI__builtin_ia32_vpshrdd256:
   case X86::BI__builtin_ia32_vpshrdd512:
@@ -2050,10 +2056,9 @@ CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID, const CallExpr *expr) {
   case X86::BI__builtin_ia32_vpshrdw128:
   case X86::BI__builtin_ia32_vpshrdw256:
   case X86::BI__builtin_ia32_vpshrdw512:
-    cgm.errorNYI(expr->getSourceRange(),
-                 std::string("unimplemented X86 builtin call: ") +
-                     getContext().BuiltinInfo.getName(builtinID));
-    return mlir::Value{};
+    // Ops 0 and 1 are swapped.
+    return emitX86FunnelShift(builder, getLoc(expr->getExprLoc()), ops[1],
+                              ops[0], ops[2], true);
   case X86::BI__builtin_ia32_reduce_fadd_pd512:
   case X86::BI__builtin_ia32_reduce_fadd_ps512:
   case X86::BI__builtin_ia32_reduce_fadd_ph512:
diff --git a/clang/test/CIR/CodeGenBuiltins/X86/avx512vbmi2-builtins.c b/clang/test/CIR/CodeGenBuiltins/X86/avx512vbmi2-builtins.c
new file mode 100644
index 0000000000000..ae0cdf09f3f77
--- /dev/null
+++ b/clang/test/CIR/CodeGenBuiltins/X86/avx512vbmi2-builtins.c
@@ -0,0 +1,418 @@
+// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding -triple x86_64-unknown-linux-gnu -fclangir -target-feature +avx512vbmi2 -emit-cir %s -o %t.cir
+// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
+// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding -triple x86_64-unknown-linux-gnu -fclangir -target-feature +avx512vbmi2 -emit-llvm %s -o %t.ll
+// RUN: FileCheck --check-prefixes=LLVM --input-file=%t.ll %s
+// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx512vbmi2 -emit-llvm -o %t.ll -Wall -Werror -Wsign-conversion
+// RUN: FileCheck --check-prefixes=OGCG --input-file=%t.ll %s
+
+
+#include <immintrin.h>
+
+__m512i test_mm512_mask_shldi_epi64(__m512i s, __mmask8 u, __m512i a, __m512i b) {
+  // CIR-LABEL: @test_mm512_mask_shldi_epi64
+  // CIR: cir.call_llvm_intrinsic "fshl" %{{.*}}, %{{.*}}, %{{.*}} : (!cir.vector<8 x !s64i>, !cir.vector<8 x !s64i>, !cir.vector<8 x !u64i>) -> !cir.vector<8 x !s64i>
+  // CIR: cir.vec.ternary(%{{.*}}, %{{.*}}, %{{.*}}) : !cir.vector<8 x !cir.int<s, 1>>, !cir.vector<8 x !s64i>
+  // LLVM-LABEL: @test_mm512_mask_shldi_epi64
+  // LLVM: call <8 x i64> @llvm.fshl.v8i64(<8 x i64> {{.*}}, <8 x i64> {{.*}}, <8 x i64> splat (i64 47))
+  // LLVM: select <8 x i1> {{.*}}, <8 x i64> {{.*}}, <8 x i64>
+  // OGCG-LABEL: @test_mm512_mask_shldi_epi64
+  // OGCG: call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> splat (i64 47))
+  // OGCG: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}}
+  return _mm512_mask_shldi_epi64(s, u, a, b, 47);
+}
+
+__m512i test_mm512_maskz_shldi_epi64(__mmask8 u, __m512i a, __m512i b) {
+  // CIR-LABEL: @test_mm512_maskz_shldi_epi64
+  // CIR: cir.call_llvm_intrinsic "fshl" %{{.*}}, %{{.*}}, %{{.*}} : (!cir.vector<8 x !s64i>, !cir.vector<8 x !s64i>, !cir.vector<8 x !u64i>) -> !cir.vector<8 x !s64i>
+  // CIR: cir.vec.ternary(%{{.*}}, %{{.*}}, %{{.*}}) : !cir.vector<8 x !cir.int<s, 1>>, !cir.vector<8 x !s64i>
+  // LLVM-LABEL: @test_mm512_maskz_shldi_epi64
+  // LLVM: call <8 x i64> @llvm.fshl.v8i64(<8 x i64> {{.*}}, <8 x i64> {{.*}}, <8 x i64> splat (i64 63))
+  // LLVM: select <8 x i1> {{.*}}, <8 x i64> {{.*}}, <8 x i64>
+  // OGCG-LABEL: @test_mm512_maskz_shldi_epi64
+  // OGCG: call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> splat (i64 63))
+  // OGCG: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}}
+  return _mm512_maskz_shldi_epi64(u, a, b, 63);
+}
+
+__m512i test_mm512_shldi_epi64(__m512i a, __m512i b) {
+  // CIR-LABEL: @test_mm512_shldi_epi64
+  // CIR: cir.call_llvm_intrinsic "fshl" %{{.*}}, %{{.*}}, %{{.*}} : (!cir.vector<8 x !s64i>, !cir.vector<8 x !s64i>, !cir.vector<8 x !u64i>) -> !cir.vector<8 x !s64i>
+  // LLVM-LABEL: @test_mm512_shldi_epi64
+  // LLVM: call <8 x i64> @llvm.fshl.v8i64(<8 x i64> {{.*}}, <8 x i64> {{.*}}, <8 x i64> splat (i64 31))
+  // OGCG-LABEL: @test_mm512_shldi_epi64
+  // OGCG: call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> splat (i64 31))
+  return _mm512_shldi_epi64(a, b, 31);
+}
+
+__m512i test_mm512_mask_shldi_epi32(__m512i s, __mmask16 u, __m512i a, __m512i b) {
+  // CIR-LABEL: @test_mm512_mask_shldi_epi32
+  // CIR: cir.call_llvm_intrinsic "fshl" %{{.*}}, %{{.*}}, %{{.*}} : (!cir.vector<16 x !s32i>, !cir.vector<16 x !s32i>, !cir.vector<16 x !u32i>) -> !cir.vector<16 x !s32i>
+  // CIR: cir.vec.ternary(%{{.*}}, %{{.*}}, %{{.*}}) : !cir.vector<16 x !cir.int<s, 1>>, !cir.vector<16 x !s32i>
+  // LLVM-LABEL: @test_mm512_mask_shldi_epi32
+  // LLVM: call <16 x i32> @llvm.fshl.v16i32(<16 x i32> {{.*}}, <16 x i32> {{.*}}, <16 x i32> splat (i32 7))
+  // LLVM: select <16 x i1> {{.*}}, <16 x i32> {{.*}}, <16 x i32>
+  // OGCG-LABEL: @test_mm512_mask_shldi_epi32
+  // OGCG: call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> splat (i32 7))
+  // OGCG: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}}
+  return _mm512_mask_shldi_epi32(s, u, a, b, 7);
+}
+
+__m512i test_mm512_maskz_shldi_epi32(__mmask16 u, __m512i a, __m512i b) {
+  // CIR-LABEL: @test_mm512_maskz_shldi_epi32
+  // CIR: cir.call_llvm_intrinsic "fshl" %{{.*}}, %{{.*}}, %{{.*}} : (!cir.vector<16 x !s32i>, !cir.vector<16 x !s32i>, !cir.vector<16 x !u32i>) -> !cir.vector<16 x !s32i>
+  // CIR: cir.vec.ternary(%{{.*}}, %{{.*}}, %{{.*}}) : !cir.vector<16 x !cir.int<s, 1>>, !cir.vector<16 x !s32i>
+  // LLVM-LABEL: @test_mm512_maskz_shldi_epi32
+  // LLVM: call <16 x i32> @llvm.fshl.v16i32(<16 x i32> {{.*}}, <16 x i32> {{.*}}, <16 x i32> splat (i32 15))
+  // LLVM: select <16 x i1> {{.*}}, <16 x i32> {{.*}}, <16 x i32>
+  // OGCG-LABEL: @test_mm512_maskz_shldi_epi32
+  // OGCG: call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> splat (i32 15))
+  // OGCG: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}}
+  return _mm512_maskz_shldi_epi32(u, a, b, 15);
+}
+
+__m512i test_mm512_shldi_epi32(__m512i a, __m512i b) {
+  // CIR-LABEL: @test_mm512_shldi_epi32
+  // CIR: cir.call_llvm_intrinsic "fshl" %{{.*}}, %{{.*}}, %{{.*}} : (!cir.vector<16 x !s32i>, !cir.vector<16 x !s32i>, !cir.vector<16 x !u32i>) -> !cir.vector<16 x !s32i>
+  // LLVM-LABEL: @test_mm512_shldi_epi32
+  // LLVM: call <16 x i32> @llvm.fshl.v16i32(<16 x i32> {{.*}}, <16 x i32> {{.*}}, <16 x i32> splat (i32 31))
+  // OGCG-LABEL: @test_mm512_shldi_epi32
+  // OGCG: call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> splat (i32 31))
+  return _mm512_shldi_epi32(a, b, 31);
+}
+
+__m512i test_mm512_mask_shldi_epi16(__m512i s, __mmask32 u, __m512i a, __m512i b) {
+  // CIR-LABEL: @test_mm512_mask_shldi_epi16
+  // CIR: cir.call_llvm_intrinsic "fshl" %{{.*}}, %{{.*}}, %{{.*}} : (!cir.vector<32 x !s16i>, !cir.vector<32 x !s16i>, !cir.vector<32 x !u16i>) -> !cir.vector<32 x !s16i>
+  // CIR: cir.vec.ternary(%{{.*}}, %{{.*}}, %{{.*}}) : !cir.vector<32 x !cir.int<s, 1>>, !cir.vector<32 x !s16i>
+  // LLVM-LABEL: @test_mm512_mask_shldi_epi16
+  // LLVM: call <32 x i16> @llvm.fshl.v32i16(<32 x i16> {{.*}}, <32 x i16> {{.*}}, <32 x i16> splat (i16 3))
+  // LLVM: select <32 x i1> {{.*}}, <32 x i16> {{.*}}, <32 x i16>
+  // OGCG-LABEL: @test_mm512_mask_shldi_epi16
+  // OGCG: call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> splat (i16 3))
+  // OGCG: select <32 x i1> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}}
+  return _mm512_mask_shldi_epi16(s, u, a, b, 3);
+}
+
+__m512i test_mm512_maskz_shldi_epi16(__mmask32 u, __m512i a, __m512i b) {
+  // CIR-LABEL: @test_mm512_maskz_shldi_epi16
+  // CIR: cir.call_llvm_intrinsic "fshl" %{{.*}}, %{{.*}}, %{{.*}} : (!cir.vector<32 x !s16i>, !cir.vector<32 x !s16i>, !cir.vector<32 x !u16i>) -> !cir.vector<32 x !s16i>
+  // CIR: cir.vec.ternary(%{{.*}}, %{{.*}}, %{{.*}}) : !cir.vector<32 x !cir.int<s, 1>>, !cir.vector<32 x !s16i>
+  // LLVM-LABEL: @test_mm512_maskz_shldi_epi16
+  // LLVM: call <32 x i16> @llvm.fshl.v32i16(<32 x i16> {{.*}}, <32 x i16> {{.*}}, <32 x i16> splat (i16 15))
+  // LLVM: select <32 x i1> {{.*}}, <32 x i16> {{.*}}, <32 x i16>
+  // OGCG-LABEL: @test_mm512_maskz_shldi_epi16
+  // OGCG: call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> splat (i16 15))
+  // OGCG: select <32 x i1> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}}
+  return _mm512_maskz_shldi_epi16(u, a, b, 15);
+}
+
+__m512i test_mm512_shldi_epi16(__m512i a, __m512i b) {
+  // CIR-LABEL: @test_mm512_shldi_epi16
+  // CIR: cir.call_llvm_intrinsic "fshl" %{{.*}}, %{{.*}}, %{{.*}} : (!cir.vector<32 x !s16i>, !cir.vector<32 x !s16i>, !cir.vector<32 x !u16i>) -> !cir.vector<32 x !s16i>
+  // LLVM-LABEL: @test_mm512_shldi_epi16
+  // LLVM: call <32 x i16> @llvm.fshl.v32i16(<32 x i16> {{.*}}, <32 x i16> {{.*}}, <32 x i16> splat (i16 31))
+  // OGCG-LABEL: @test_mm512_shldi_epi16
+  // OGCG: call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> splat (i16 31))
+  return _mm512_shldi_epi16(a, b, 31);
+}
+
+__m512i test_mm512_mask_shldv_epi64(__m512i s, __mmask8 u, __m512i a, __m512i b) {
+  // CIR-LABEL: @test_mm512_mask_shldv_epi64
+  // CIR: cir.call_llvm_intrinsic "fshl" %{{.*}}, %{{.*}}, %{{.*}} : (!cir.vector<8 x !s64i>, !cir.vector<8 x !s64i>, !cir.vector<8 x !u64i>) -> !cir.vector<8 x !s64i>
+  // CIR: cir.vec.ternary(%{{.*}}, %{{.*}}, %{{.*}}) : !cir.vector<8 x !cir.int<s, 1>>, !cir.vector<8 x !s64i>
+  // LLVM-LABEL: @test_mm512_mask_shldv_epi64
+  // LLVM: call <8 x i64> @llvm.fshl.v8i64(<8 x i64> {{.*}}, <8 x i64> {{.*}}, <8 x i64>
+  // LLVM: select <8 x i1> {{.*}}, <8 x i64> {{.*}}, <8 x i64>
+  // OGCG-LABEL: @test_mm512_mask_shldv_epi64
+  // OGCG: call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64>
+  // OGCG: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}}
+  return _mm512_mask_shldv_epi64(s, u, a, b);
+}
+
+__m512i test_mm512_maskz_shldv_epi64(__mmask8 u, __m512i s, __m512i a, __m512i b) {
+  // CIR-LABEL: @test_mm512_maskz_shldv_epi64
+  // CIR: cir.call_llvm_intrinsic "fshl" %{{.*}}, %{{.*}}, %{{.*}} : (!cir.vector<8 x !s64i>, !cir.vector<8 x !s64i>, !cir.vector<8 x !u64i>) -> !cir.vector<8 x !s64i>
+  // CIR: cir.vec.ternary(%{{.*}}, %{{.*}}, %{{.*}}) : !cir.vector<8 x !cir.int<s, 1>>, !cir.vector<8 x !s64i>
+  // LLVM-LABEL: @test_mm512_maskz_shldv_epi64
+  // LLVM: call <8 x i64> @llvm.fshl.v8i64(<8 x i64> {{.*}}, <8 x i64> {{.*}}, <8 x i64>
+  // LLVM: select <8 x i1> {{.*}}, <8 x i64> {{.*}}, <8 x i64>
+  // OGCG-LABEL: @test_mm512_maskz_shldv_epi64
+  // OGCG: call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64>
+  // OGCG: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}}
+  return _mm512_maskz_shldv_epi64(u, s, a, b);
+}
+
+__m512i test_mm512_shldv_epi64(__m512i s, __m512i a, __m512i b) {
+  // CIR-LABEL: @test_mm512_shldv_epi64
+  // CIR: cir.call_llvm_intrinsic "fshl" %{{.*}}, %{{.*}}, %{{.*}} : (!cir.vector<8 x !s64i>, !cir.vector<8 x !s64i>, !cir.vector<8 x !u64i>) -> !cir.vector<8 x !s64i>
+  // LLVM-LABEL: @test_mm512_shldv_epi64
+  // LLVM: call <8 x i64> @llvm.fshl.v8i64(<8 x i64> {{.*}}, <8 x i64> {{.*}}, <8 x i64>
+  // OGCG-LABEL: @test_mm512_shldv_epi64
+  // OGCG: call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64>
+  return _mm512_shldv_epi64(s, a, b);
+}
+
+__m512i test_mm512_shldv_epi32(__m512i s, __m512i a, __m512i b) {
+  // CIR-LABEL: @test_mm512_shldv_epi32
+  // CIR: cir.call_llvm_intrinsic "fshl" %{{.*}}, %{{.*}}, %{{.*}} : (!cir.vector<16 x !s32i>, !cir.vector<16 x !s32i>, !cir.vector<16 x !u32i>) -> !cir.vector<16 x !s32i>
+  // LLVM-LABEL: @test_mm512_shldv_epi32
+  // LLVM: call <16 x i32> @llvm.fshl.v16i32(<16 x i32> {{.*}}, <16 x i32> {{.*}}, <16 x i32>
+  // OGCG-LABEL: @test_mm512_shldv_epi32
+  // OGCG: call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32>
+  return _mm512_shldv_epi32(s, a, b);
+}
+
+__m512i test_mm512_mask_shldv_epi16(__m512i s, __mmask32 u, __m512i a, __m512i b) {
+  // CIR-LABEL: @test_mm512_mask_shldv_epi16
+  // CIR: cir.call_llvm_intrinsic "fshl" %{{.*}}, %{{.*}}, %{{.*}} : (!cir.vector<32 x !s16i>, !cir.vector<32 x !s16i>, !cir.vector<32 x !u16i>) -> !cir.vector<32 x !s16i>
+  // CIR: cir.vec.ternary(%{{.*}}, %{{.*}}, %{{.*}}) : !cir.vector<32 x !cir.int<s, 1>>, !cir.vector<32 x !s16i>
+  // LLVM-LABEL: @test_mm512_mask_shldv_epi16
+  // LLVM: call <32 x i16> @llvm.fshl.v32i16(<32 x i16> {{.*}}, <32 x i16> {{.*}}, <32 x i16>
+  // LLVM: select <32 x i1> {{.*}}, <32 x i16> {{.*}}, <32 x i16>
+  // OGCG-LABEL: @test_mm512_mask_shldv_epi16
+  // OGCG: call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16>
+  // OGCG: select <32 x i1> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}}
+  return _mm512_mask_shldv_epi16(s, u, a, b);
+}
+
+__m512i test_mm512_maskz_shldv_epi16(__mmask32 u, __m512i s, __m512i a, __m512i b) {
+  // CIR-LABEL: @test_mm512_maskz_shldv_epi16
+  // CIR: cir.call_llvm_intrinsic "fshl" %{{.*}}, %{{.*}}, %{{.*}} : (!cir.vector<32 x !s16i>, !cir.vector<32 x !s16i>, !cir.vector<32 x !u16i>) -> !cir.vector<32 x !s16i>
+  // CIR: cir.vec.ternary(%{{.*}}, %{{.*}}, %{{.*}}) : !cir.vector<32 x !cir.int<s, 1>>, !cir.vector<32 x !s16i>
+  // LLVM-LABEL: @test_mm512_maskz_shldv_epi16
+  // LLVM: call <32 x i16> @llvm.fshl.v32i16(<32 x i16> {{.*}}, <32 x i16> {{.*}}, <32 x i16>
+  // LLVM: select <32 x i1> {{.*}}, <32 x i16> {{.*}}, <32 x i16>
+  // OGCG-LABEL: @test_mm512_maskz_shldv_epi16
+  // OGCG: call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16>
+  // OGCG: select <32 x i1> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}}
+  return _mm512_maskz_shldv_epi16(u, s, a, b);
+}
+
+__m512i test_mm512_shldv_epi16(__m512i s, __m512i a, __m512i b) {
+  // CIR-LABEL: @test_mm512_shldv_epi16
+  // CIR: cir.call_llvm_intrinsic "fshl" %{{.*}}, %{{.*}}, %{{.*}} : (!cir.vector<32 x !s16i>, !cir.vector<32 x !s16i>, !cir.vector<32 x !u16i>) -> !cir.vector<32 x !s16i>
+  // LLVM-LABEL: @test_mm512_shldv_epi16
+  // LLVM: call <32 x i16> @llvm.fshl.v32i16(<32 x i16> {{.*}}, <32 x i16> {{.*}}, <32 x i16>
+  // OGCG-LABEL: @test_mm512_shldv_epi16
+  // OGCG: call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16>
+  return _mm512_shldv_epi16(s, a, b);
+}
+
+__m512i test_mm512_mask_shrdi_epi64(__m512i s, __mmask8 u, __m512i a, __m512i b) {
+  // CIR-LABEL: @test_mm512_mask_shrdi_epi64
+  // CIR: cir.call_llvm_intrinsic "fshr" %{{.*}}, %{{.*}}, %{{.*}} : (!cir.vector<8 x !s64i>, !cir.vector<8 x !s64i>, !cir.vector<8 x !u64i>) -> !cir.vector<8 x !s64i>
+  // CIR: cir.vec.ternary(%{{.*}}, %{{.*}}, %{{.*}}) : !cir.vector<8 x !cir.int<s, 1>>, !cir.vector<8 x !s64i>
+  // LLVM-LABEL: @test_mm512_mask_shrdi_epi64
+  // LLVM: call <8 x i64> @llvm.fshr.v8i64(<8 x i64> {{.*}}, <8 x i64> {{.*}}, <8 x i64> splat (i64 47))
+  // LLVM: select <8 x i1> {{.*}}, <8 x i64> {{.*}}, <8 x i64>
+  // OGCG-LABEL: @test_mm512_mask_shrdi_epi64
+  // OGCG: call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> splat (i64 47))
+  // OGCG: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}}
+  return _mm512_mask_shrdi_epi64(s, u, a, b, 47);
+}
+
+__m512i test_mm512_maskz_shrdi_epi64(__mmask8 u, __m512i a, __m512i b) {
+  // CIR-LABEL: @test_mm512_maskz_shrdi_epi64
+  // CIR: cir.call_llvm_intrinsic "fshr" %{{.*}}, %{{.*}}, %{{.*}} : (!cir.vector<8 x !s64i>, !cir.vector<8 x !s64i>, !cir.vector<8 x !u64i>) -> !cir.vector<8 x !s64i>
+  // CIR: cir.vec.ternary(%{{.*}}, %{{.*}}, %{{.*}}) : !cir.vector<8 x !cir.int<s, 1>>, !cir.vector<8 x !s64i>
+  // LLVM-LABEL: @test_mm512_maskz_shrdi_epi64
+  // LLVM: call <8 x i64> @llvm.fshr.v8i64(<8 x i64> {{.*}}, <8 x i64> {{.*}}, <8 x i64> splat (i64 63))
+  // LLVM: select <8 x i1> {{.*}}, <8 x i64> {{.*}}, <8 x i64>
+  // OGCG-LABEL: @test_mm512_maskz_shrdi_epi64
+  // OGCG: call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> splat (i64 63))
+  // OGCG: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}}
+  return _mm512_maskz_shrdi_epi64(u, a, b, 63);
+}
+
+__m512i test_mm512_shrdi_epi64(__m512i a, __m512i b) {
+  // CIR-LABEL: @test_mm512_shrdi_epi64
+  // CIR: cir.call_llvm_intrinsic "fshr" %{{.*}}, %{{.*}}, %{{.*}} : (!cir.vector<8 x !s64i>, !cir.vector<8 x !s64i>, !cir.vector<8 x !u64i>) -> !cir.vector<8 x !s64i>
+  // LLVM-LABEL: @test_mm512_shrdi_epi64
+  // LLVM: call <8 x i64> @llvm.fshr.v8i64(<8 x i64> {{.*}}, <8 x i64> {{.*}}, <8 x i64> splat (i64 31))
+  // OGCG-LABEL: @test_mm512_shrdi_epi64
+  // OGCG: call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> splat (i64 31))
+  return _mm512_shrdi_epi64(a, b, 31);
+}
+
+__m512i test_mm512_mask_shrdi_epi32(__m512i s, __mmask16 u, __m512i a, __m512i b) {
+  // CIR-LABEL: @test_mm512_mask_shrdi_epi32
+  // CIR: cir.call_llvm_intrinsic "fshr" %{{.*}}, %{{.*}}, %{{.*}} : (!cir.vector<16 x !s32i>, !cir.vector<16 x !s32i>, !cir.vector<16 x !u32i>) -> !cir.vector<16 x !s32i>
+  // CIR: cir.vec.ternary(%{{.*}}, %{{.*}}, %{{.*}}) : !cir.vector<16 x !cir.int<s, 1>>, !cir.vector<16 x !s32i>
+  // LLVM-LABEL: @test_mm512_mask_shrdi_epi32
+  // LLVM: call <16 x i32> @llvm.fshr.v16i32(<16 x i32> {{.*}}, <16 x i32> {{.*}}, <16 x i32> splat (i32 7))
+  // LLVM: select <16 x i1> {{.*}}, <16 x i32> {{.*}}, <16 x i32>
+  // OGCG-LABEL: @test_mm512_mask_shrdi_epi32
+  // OGCG: call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> splat (i32 7))
+  // OGCG: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}}
+  return _mm512_mask_shrdi_epi32(s, u, a, b, 7);
+}
+
+__m512i test_mm512_maskz_shrdi_epi32(__mmask16 u, __m512i a, __m512i b) {
+  // CIR-LABEL: @test_mm512_maskz_shrdi_epi32
+  // CIR: cir.call_llvm_intrinsic "fshr" %{{.*}}, %{{.*}}, %{{.*}} : (!cir.vector<16 x !s32i>, !cir.vector<16 x !s32i>, !cir.vector<16 x !u32i>) -> !cir.vector<16 x !s32i>
+  // CIR: cir.vec.ternary(%{{.*}}, %{{.*}}, %{{.*}}) : !cir.vector<16 x !cir.int<s, 1>>, !cir.vector<16 x !s32i>
+  // LLVM-LABEL: @test_mm512_maskz_shrdi_epi32
+  // LLVM: call <16 x i32> @llvm.fshr.v16i32(<16 x i32> {{.*}}, <16 x i32> {{.*}}, <16 x i32> splat (i32 15))
+  // LLVM: select <16 x i1> {{.*}}, <16 x i32> {{.*}}, <16 x i32>
+  // OGCG-LABEL: @test_mm512_maskz_shrdi_epi32
+  // OGCG: call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> splat (i32 15))
+  // OGCG: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}}
+  return _mm512_maskz_shrdi_epi32(u, a, b, 15);
+}
+
+__m512i test_mm512_shrdi_epi32(__m512i a, __m512i b) {
+  // CIR-LABEL: @test_mm512_shrdi_epi32
+  // CIR: cir.call_llvm_intrinsic "fshr" %{{.*}}, %{{.*}}, %{{.*}} : (!cir.vector<16 x !s32i>, !cir.vector<16 x !s32i>, !cir.vector<16 x !u32i>) -> !cir.vector<16 x !s32i>
+  // LLVM-LABEL: @test_mm512_shrdi_epi32
+  // LLVM: call <16 x i32> @llvm.fshr.v16i32(<16 x i32> {{.*}}, <16 x i32> {{.*}}, <16 x i32> splat (i32 31))
+  // OGCG-LABEL: @test_mm512_shrdi_epi32
+  // OGCG: call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> splat (i32 31))
+  return _mm512_shrdi_epi32(a, b, 31);
+}
+
+__m512i test_mm512_mask_shrdi_epi16(__m512i s, __mmask32 u, __m512i a, __m512i b) {
+  // CIR-LABEL: @test_mm512_mask_shrdi_epi16
+  // CIR: cir.call_llvm_intrinsic "fshr" %{{.*}}, %{{.*}}, %{{.*}} : (!cir.vector<32 x !s16i>, !cir.vector<32 x !s16i>, !cir.vector<32 x !u16i>) -> !cir.vector<32 x !s16i>
+  // CIR: cir.vec.ternary(%{{.*}}, %{{.*}}, %{{.*}}) : !cir.vector<32 x !cir.int<s, 1>>, !cir.vector<32 x !s16i>
+  // LLVM-LABEL: @test_mm512_mask_shrdi_epi16
+  // LLVM: call <32 x i16> @llvm.fshr.v32i16(<32 x i16> {{.*}}, <32 x i16> {{.*}}, <32 x i16> splat (i16 3))
+  // LLVM: select <32 x i1> {{.*}}, <32 x i16> {{.*}}, <32 x i16>
+  // OGCG-LABEL: @test_mm512_mask_shrdi_epi16
+  // OGCG: call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> splat (i16 3))
+  // OGCG: select <32 x i1> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}}
+  return _mm512_mask_shrdi_epi16(s, u, a, b, 3);
+}
+
+__m512i test_mm512_maskz_shrdi_epi16(__mmask32 u, __m512i a, __m512i b) {
+  // CIR-LABEL: @test_mm512_maskz_shrdi_epi16
+  // CIR: cir.call_llvm_intrinsic "fshr" %{{.*}}, %{{.*}}, %{{.*}} : (!cir.vector<32 x !s16i>, !cir.vector<32 x !s16i>, !cir.vector<32 x !u16i>) -> !cir.vector<32 x !s16i>
+  // CIR: cir.vec.ternary(%{{.*}}, %{{.*}}, %{{.*}}) : !cir.vector<32 x !cir.int<s, 1>>, !cir.vector<32 x !s16i>
+  // LLVM-LABEL: @test_mm512_maskz_shrdi_epi16
+  // LLVM: call <32 x i16> @llvm.fshr.v32i16(<32 x i16> {{.*}}, <32 x i16> {{.*}}, <32 x i16> splat (i16 15))
+  // LLVM: select <32 x i1> {{.*}}, <32 x i16> {{.*}}, <32 x i16>
+  // OGCG-LABEL: @test_mm512_maskz_shrdi_epi16
+  // OGCG: call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> splat (i16 15))
+  // OGCG: select <32 x i1> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}}
+  return _mm512_maskz_shrdi_epi16(u, a, b, 15);
+}
+
+__m512i test_mm512_shrdi_epi16(__m512i a, __m512i b) {
+  // CIR-LABEL: @test_mm512_shrdi_epi16
+  // CIR: cir.call_llvm_intrinsic "fshr" %{{.*}}, %{{.*}}, %{{.*}} : (!cir.vector<32 x !s16i>, !cir.vector<32 x !s16i>, !cir.vector<32 x !u16i>) -> !cir.vector<32 x !s16i>
+  // LLVM-LABEL: @test_mm512_shrdi_epi16
+  // LLVM: call <32 x i16> @llvm.fshr.v32i16(<32 x i16> {{.*}}, <32 x i16> {{.*}}, <32 x i16> splat (i16 31))
+  // OGCG-LABEL: @test_mm512_shrdi_epi16
+  // OGCG: call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> splat (i16 31))
+  return _mm512_shrdi_epi16(a, b, 31);
+}
+
+__m512i test_mm512_mask_shldv_epi32(__m512i s, __mmask16 u, __m512i a, __m512i b) {
+  // CIR-LABEL: @test_mm512_mask_shldv_epi32
+  // CIR: cir.call_llvm_intrinsic "fshl" %{{.*}}, %{{.*}}, %{{.*}} : (!cir.vector<16 x !s32i>, !cir.vector<16 x !s32i>, !cir.vector<16 x !u32i>) -> !cir.vector<16 x !s32i>
+  // CIR: cir.vec.ternary(%{{.*}}, %{{.*}}, %{{.*}}) : !cir.vector<16 x !cir.int<s, 1>>, !cir.vector<16 x !s32i>
+  // LLVM-LABEL: @test_mm512_mask_shldv_epi32
+  // LLVM: call <16 x i32> @llvm.fshl.v16i32(<16 x i32> {{.*}}, <16 x i32> {{.*}}, <16 x i32>
+  // LLVM: select <16 x i1> {{.*}}, <16 x i32> {{.*}}, <16 x i32>
+  // OGCG-LABEL: @test_mm512_mask_shldv_epi32
+  // OGCG: call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32>
+  // OGCG: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}}
+  return _mm512_mask_shldv_epi32(s, u, a, b);
+}
+
+__m512i test_mm512_maskz_shldv_epi32(__mmask16 u, __m512i s, __m512i a, __m512i b) {
+  // CIR-LABEL: @test_mm512_maskz_shldv_epi32
+  // CIR: cir.call_llvm_intrinsic "fshl" %{{.*}}, %{{.*}}, %{{.*}} : (!cir.vector<16 x !s32i>, !cir.vector<16 x !s32i>, !cir.vector<16 x !u32i>) -> !cir.vector<16 x !s32i>
+  // CIR: cir.vec.ternary(%{{.*}}, %{{.*}}, %{{.*}}) : !cir.vector<16 x !cir.int<s, 1>>, !cir.vector<16 x !s32i>
+  // LLVM-LABEL: @test_mm512_maskz_shldv_epi32
+  // LLVM: call <16 x i32> @llvm.fshl.v16i32(<16 x i32> {{.*}}, <16 x i32> {{.*}}, <16 x i32>
+  // LLVM: select <16 x i1> {{.*}}, <16 x i32> {{.*}}, <16 x i32>
+  // OGCG-LABEL: @test_mm512_maskz_shldv_epi32
+  // OGCG: call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32>
+  // OGCG: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}}
+  return _mm512_maskz_shldv_epi32(u, s, a, b);
+}
+
+__m512i test_mm512_mask_shrdv_epi64(__m512i s, __mmask8 u, __m512i a, __m512i b) {
+  // CIR-LABEL: @test_mm512_mask_shrdv_epi64
+  // CIR: cir.call_llvm_intrinsic "fshr" %{{.*}}, %{{.*}}, %{{.*}} : (!cir.vector<8 x !s64i>, !cir.vector<8 x !s64i>, !cir.vector<8 x !u64i>) -> !cir.vector<8 x !s64i>
+  // CIR: cir.vec.ternary(%{{.*}}, %{{.*}}, %{{.*}}) : !cir.vector<8 x !cir.int<s, 1>>, !cir.vector<8 x !s64i>
+  // LLVM-LABEL: @test_mm512_mask_shrdv_epi64
+  // LLVM: call <8 x i64> @llvm.fshr.v8i64(<8 x i64> {{.*}}, <8 x i64> {{.*}}, <8 x i64>
+  // LLVM: select <8 x i1> {{.*}}, <8 x i64> {{.*}}, <8 x i64>
+  // OGCG-LABEL: @test_mm512_mask_shrdv_epi64
+  // OGCG: call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64>
+  // OGCG: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}}
+  return _mm512_mask_shrdv_epi64(s, u, a, b);
+}
+
+__m512i test_mm512_maskz_shrdv_epi64(__mmask8 u, __m512i s, __m512i a, __m512i b) {
+  // CIR-LABEL: @test_mm512_maskz_shrdv_epi64
+  // CIR: cir.call_llvm_intrinsic "fshr" %{{.*}}, %{{.*}}, %{{.*}} : (!cir.vector<8 x !s64i>, !cir.vector<8 x !s64i>, !cir.vector<8 x !u64i>) -> !cir.vector<8 x !s64i>
+  // CIR: cir.vec.ternary(%{{.*}}, %{{.*}}, %{{.*}}) : !cir.vector<8 x !cir.int<s, 1>>, !cir.vector<8 x !s64i>
+  // LLVM-LABEL: @test_mm512_maskz_shrdv_epi64
+  // LLVM: call <8 x i64> @llvm.fshr.v8i64(<8 x i64> {{.*}}, <8 x i64> {{.*}}, <8 x i64>
+  // LLVM: select <8 x i1> {{.*}}, <8 x i64> {{.*}}, <8 x i64>
+  // OGCG-LABEL: @test_mm512_maskz_shrdv_epi64
+  // OGCG: call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64>
+  // OGCG: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}}
+  return _mm512_maskz_shrdv_epi64(u, s, a, b);
+}
+
+__m512i test_mm512_mask_shrdv_epi32(__m512i s, __mmask16 u, __m512i a, __m512i b) {
+  // CIR-LABEL: @test_mm512_mask_shrdv_epi32
+  // CIR: cir.call_llvm_intrinsic "fshr" %{{.*}}, %{{.*}}, %{{.*}} : (!cir.vector<16 x !s32i>, !cir.vector<16 x !s32i>, !cir.vector<16 x !u32i>) -> !cir.vector<16 x !s32i>
+  // CIR: cir.vec.ternary(%{{.*}}, %{{.*}}, %{{.*}}) : !cir.vector<16 x !cir.int<s, 1>>, !cir.vector<16 x !s32i>
+  // LLVM-LABEL: @test_mm512_mask_shrdv_epi32
+  // LLVM: call <16 x i32> @llvm.fshr.v16i32(<16 x i32> {{.*}}, <16 x i32> {{.*}}, <16 x i32>
+  // LLVM: select <16 x i1> {{.*}}, <16 x i32> {{.*}}, <16 x i32>
+  // OGCG-LABEL: @test_mm512_mask_shrdv_epi32
+  // OGCG: call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32>
+  // OGCG: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}}
+  return _mm512_mask_shrdv_epi32(s, u, a, b);
+}
+
+__m512i test_mm512_maskz_shrdv_epi32(__mmask16 u, __m512i s, __m512i a, __m512i b) {
+  // CIR-LABEL: @test_mm512_maskz_shrdv_epi32
+  // CIR: cir.call_llvm_intrinsic "fshr" %{{.*}}, %{{.*}}, %{{.*}} : (!cir.vector<16 x !s32i>, !cir.vector<16 x !s32i>, !cir.vector<16 x !u32i>) -> !cir.vector<16 x !s32i>
+  // CIR: cir.vec.ternary(%{{.*}}, %{{.*}}, %{{.*}}) : !cir.vector<16 x !cir.int<s, 1>>, !cir.vector<16 x !s32i>
+  // LLVM-LABEL: @test_mm512_maskz_shrdv_epi32
+  // LLVM: call <16 x i32> @llvm.fshr.v16i32(<16 x i32> {{.*}}, <16 x i32> {{.*}}, <16 x i32>
+  // LLVM: select <16 x i1> {{.*}}, <16 x i32> {{.*}}, <16 x i32>
+  // OGCG-LABEL: @test_mm512_maskz_shrdv_epi32
+  // OGCG: call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32>
+  // OGCG: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}}
+  return _mm512_maskz_shrdv_epi32(u, s, a, b);
+}
+
+__m512i test_mm512_shrdv_epi32(__m512i s, __m512i a, __m512i b) {
+  // CIR-LABEL: @test_mm512_shrdv_epi32
+  // CIR: cir.call_llvm_intrinsic "fshr" %{{.*}}, %{{.*}}, %{{.*}} : (!cir.vector<16 x !s32i>, !cir.vector<16 x !s32i>, !cir.vector<16 x !u32i>) -> !cir.vector<16 x !s32i>
+  // LLVM-LABEL: @test_mm512_shrdv_epi32
+  // LLVM: call <16 x i32> @llvm.fshr.v16i32(<16 x i32> {{.*}}, <16 x i32> {{.*}}, <16 x i32>
+  // OGCG-LABEL: @test_mm512_shrdv_epi32
+  // OGCG: call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32>
+  return _mm512_shrdv_epi32(s, a, b);
+}
+
+__m512i test_mm512_mask_shrdv_epi16(__m512i s, __mmask32 u, __m512i a, __m512i b) {
+  // CIR-LABEL: @test_mm512_mask_shrdv_epi16
+  // CIR: cir.call_llvm_intrinsic "fshr" %{{.*}}, %{{.*}}, %{{.*}} : (!cir.vector<32 x !s16i>, !cir.vector<32 x !s16i>, !cir.vector<32 x !u16i>) -> !cir.vector<32 x !s16i>
+  // CIR: cir.vec.ternary(%{{.*}}, %{{.*}}, %{{.*}}) : !cir.vector<32 x !cir.int<s, 1>>, !cir.vector<32 x !s16i>
+  // LLVM-LABEL: @test_mm512_mask_shrdv_epi16
+  // LLVM: call <32 x i16> @llvm.fshr.v32i16(<32 x i16> {{.*}}, <32 x i16> {{.*}}, <32 x i16>
+  // LLVM: select <32 x i1> {{.*}}, <32 x i16> {{.*}}, <32 x i16>
+  // OGCG-LABEL: @test_mm512_mask_shrdv_epi16
+  // OGCG: call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16>
+  // OGCG: select <32 x i1> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}}
+  return _mm512_mask_shrdv_epi16(s, u, a, b);
+}
+
+__m512i test_mm512_shrdv_epi16(__m512i s, __m512i a, __m512i b) {
+  // CIR-LABEL: @test_mm512_shrdv_epi16
+  // CIR: cir.call_llvm_intrinsic "fshr" %{{.*}}, %{{.*}}, %{{.*}} : (!cir.vector<32 x !s16i>, !cir.vector<32 x !s16i>, !cir.vector<32 x !u16i>) -> !cir.vector<32 x !s16i>
+  // LLVM-LABEL: @test_mm512_shrdv_epi16
+  // LLVM: call <32 x i16> @llvm.fshr.v32i16(<32 x i16> {{.*}}, <32 x i16> {{.*}}, <32 x i16>
+  // OGCG-LABEL: @test_mm512_shrdv_epi16
+  // OGCG: call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16>
+  return _mm512_shrdv_epi16(s, a, b);
+}