[clang] 2b4d818 - [Clang][LLVM][SVE2.1] Created intrinsics for DUPQ instr. (#83260)

Fri Mar 8 07:35:52 PST 2024

Author: Lukacma
Date: 2024-03-08T15:35:48Z
New Revision: 2b4d8188b263019477fa996b74b3da8a87a0e04b

URL: https://github.com/llvm/llvm-project/commit/2b4d8188b263019477fa996b74b3da8a87a0e04b
DIFF: https://github.com/llvm/llvm-project/commit/2b4d8188b263019477fa996b74b3da8a87a0e04b.diff

LOG: [Clang][LLVM][SVE2.1]  Created intrinsics for DUPQ instr. (#83260)

This patch adds clang and llvm support for following intrinsic and maps
it to DUPQ instruction:
```
   // Variants are also available for:
   // _s8, _u16, _s16, _u32, _s32, _u64, _s64
   // _bf16, _f16, _f32, _f64
   svuint8_t svdup_laneq[_u8](svuint8_t zn, uint64_t imm_idx);
```

Added: 
    clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_dupq.c
    llvm/test/CodeGen/AArch64/sve2p1-intrinsics-dupq.ll

Modified: 
    clang/include/clang/Basic/arm_sve.td
    clang/test/Sema/aarch64-sve2p1-intrinsics/acle_sve2p1_imm.cpp
    llvm/include/llvm/IR/IntrinsicsAArch64.td
    llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
    llvm/lib/Target/AArch64/SVEInstrFormats.td

Removed: 
    


################################################################################
diff  --git a/clang/include/clang/Basic/arm_sve.td b/clang/include/clang/Basic/arm_sve.td
index 6da30e08e7521e..6cc249837d3f3d 100644

--- a/clang/include/clang/Basic/arm_sve.td
+++ b/clang/include/clang/Basic/arm_sve.td
@@ -2215,6 +2215,15 @@ let TargetGuard = "sve2p1" in {
   def SVTBXQ : SInst<"svtbxq[_{d}]", "dddu", "cUcsUsiUilUlbhfd", MergeNone, "aarch64_sve_tbxq">;
   // EXTQ
   def EXTQ : SInst<"svextq[_{d}]", "dddk", "cUcsUsiUilUlbhfd", MergeNone, "aarch64_sve_extq", [], [ImmCheck<2, ImmCheck0_15>]>;
+  // DUPQ
+  def SVDUP_LANEQ_B  : SInst<"svdup_laneq[_{d}]", "ddi",  "cUc", MergeNone, "aarch64_sve_dup_laneq", [IsStreamingCompatible], [ImmCheck<1, ImmCheck0_15>]>;
+  def SVDUP_LANEQ_H  : SInst<"svdup_laneq[_{d}]", "ddi",  "sUsh", MergeNone, "aarch64_sve_dup_laneq", [IsStreamingCompatible], [ImmCheck<1, ImmCheck0_7>]>;
+  def SVDUP_LANEQ_S  : SInst<"svdup_laneq[_{d}]", "ddi",  "iUif", MergeNone, "aarch64_sve_dup_laneq", [IsStreamingCompatible], [ImmCheck<1, ImmCheck0_3>]>;
+  def SVDUP_LANEQ_D  : SInst<"svdup_laneq[_{d}]", "ddi",  "lUld", MergeNone, "aarch64_sve_dup_laneq", [IsStreamingCompatible], [ImmCheck<1, ImmCheck0_1>]>;
+
+  let TargetGuard = "bf16" in {
+    def SVDUP_LANEQ_BF16  : SInst<"svdup_laneq[_{d}]", "ddi",  "b", MergeNone, "aarch64_sve_dup_laneq", [IsStreamingCompatible], [ImmCheck<1, ImmCheck0_7>]>;
+  }
   // PMOV
   // Move to Pred
   multiclass PMOV_TO_PRED<string name, string types, string intrinsic, list<FlagType> flags=[], ImmCheckType immCh > {

diff  --git a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_dupq.c b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_dupq.c
new file mode 100644
index 00000000000000..587a67aa6b7ca2
--- /dev/null
+++ b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_dupq.c
@@ -0,0 +1,213 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 2
+// REQUIRES: aarch64-registered-target
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1 -target-feature +bf16\
+// RUN:   -S -Werror -emit-llvm -disable-O0-optnone -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve2p1 -target-feature +bf16\
+// RUN:   -S -Werror -emit-llvm -disable-O0-optnone -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1 -target-feature +bf16\
+// RUN:   -S -Werror -emit-llvm -disable-O0-optnone -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve2p1 -target-feature +bf16\
+// RUN:   -S -Werror -emit-llvm -disable-O0-optnone -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1 -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+
+
+#include <arm_sve.h>
+
+#ifdef SVE_OVERLOADED_FORMS
+// A simple used,unused... macro, long enough to represent any SVE builtin.
+#define SVE_ACLE_FUNC(A1, A2_UNUSED) A1
+#else
+#define SVE_ACLE_FUNC(A1, A2) A1##A2
+#endif
+
+// CHECK-LABEL: define dso_local <vscale x 16 x i8> @test_svdup_laneq_s8
+// CHECK-SAME: (<vscale x 16 x i8> [[ZN:%.*]]) #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.dup.laneq.nxv16i8(<vscale x 16 x i8> [[ZN]], i32 0)
+// CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 16 x i8> @_Z19test_svdup_laneq_s8u10__SVInt8_t
+// CPP-CHECK-SAME: (<vscale x 16 x i8> [[ZN:%.*]]) #[[ATTR0:[0-9]+]] {
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.dup.laneq.nxv16i8(<vscale x 16 x i8> [[ZN]], i32 0)
+// CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
+//
+svint8_t test_svdup_laneq_s8(svint8_t zn) {
+    return SVE_ACLE_FUNC(svdup_laneq, _s8)(zn, 0);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 16 x i8> @test_svdup_laneq_u8
+// CHECK-SAME: (<vscale x 16 x i8> [[ZN:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.dup.laneq.nxv16i8(<vscale x 16 x i8> [[ZN]], i32 15)
+// CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 16 x i8> @_Z19test_svdup_laneq_u8u11__SVUint8_t
+// CPP-CHECK-SAME: (<vscale x 16 x i8> [[ZN:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.dup.laneq.nxv16i8(<vscale x 16 x i8> [[ZN]], i32 15)
+// CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
+//
+svuint8_t test_svdup_laneq_u8(svuint8_t zn) {
+    return SVE_ACLE_FUNC(svdup_laneq, _u8)(zn, 15);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 8 x i16> @test_svdup_laneq_s16
+// CHECK-SAME: (<vscale x 8 x i16> [[ZN:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.dup.laneq.nxv8i16(<vscale x 8 x i16> [[ZN]], i32 1)
+// CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP0]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 8 x i16> @_Z20test_svdup_laneq_s16u11__SVInt16_t
+// CPP-CHECK-SAME: (<vscale x 8 x i16> [[ZN:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.dup.laneq.nxv8i16(<vscale x 8 x i16> [[ZN]], i32 1)
+// CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP0]]
+//
+svint16_t test_svdup_laneq_s16(svint16_t zn) {
+    return SVE_ACLE_FUNC(svdup_laneq, _s16)(zn, 1);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 8 x i16> @test_svdup_laneq_u16
+// CHECK-SAME: (<vscale x 8 x i16> [[ZN:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.dup.laneq.nxv8i16(<vscale x 8 x i16> [[ZN]], i32 7)
+// CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP0]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 8 x i16> @_Z20test_svdup_laneq_u16u12__SVUint16_t
+// CPP-CHECK-SAME: (<vscale x 8 x i16> [[ZN:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.dup.laneq.nxv8i16(<vscale x 8 x i16> [[ZN]], i32 7)
+// CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP0]]
+//
+svuint16_t test_svdup_laneq_u16(svuint16_t zn) {
+    return SVE_ACLE_FUNC(svdup_laneq, _u16)(zn, 7);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 4 x i32> @test_svdup_laneq_s32
+// CHECK-SAME: (<vscale x 4 x i32> [[ZN:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.dup.laneq.nxv4i32(<vscale x 4 x i32> [[ZN]], i32 2)
+// CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP0]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 4 x i32> @_Z20test_svdup_laneq_s32u11__SVInt32_t
+// CPP-CHECK-SAME: (<vscale x 4 x i32> [[ZN:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.dup.laneq.nxv4i32(<vscale x 4 x i32> [[ZN]], i32 2)
+// CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP0]]
+//
+svint32_t test_svdup_laneq_s32(svint32_t zn) {
+    return SVE_ACLE_FUNC(svdup_laneq, _s32)(zn, 2);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 4 x i32> @test_svdup_laneq_u32
+// CHECK-SAME: (<vscale x 4 x i32> [[ZN:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.dup.laneq.nxv4i32(<vscale x 4 x i32> [[ZN]], i32 3)
+// CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP0]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 4 x i32> @_Z20test_svdup_laneq_u32u12__SVUint32_t
+// CPP-CHECK-SAME: (<vscale x 4 x i32> [[ZN:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.dup.laneq.nxv4i32(<vscale x 4 x i32> [[ZN]], i32 3)
+// CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP0]]
+//
+svuint32_t test_svdup_laneq_u32(svuint32_t zn) {
+    return SVE_ACLE_FUNC(svdup_laneq, _u32)(zn, 3);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 2 x i64> @test_svdup_laneq_s64
+// CHECK-SAME: (<vscale x 2 x i64> [[ZN:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.dup.laneq.nxv2i64(<vscale x 2 x i64> [[ZN]], i32 0)
+// CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP0]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 2 x i64> @_Z20test_svdup_laneq_s64u11__SVInt64_t
+// CPP-CHECK-SAME: (<vscale x 2 x i64> [[ZN:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.dup.laneq.nxv2i64(<vscale x 2 x i64> [[ZN]], i32 0)
+// CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP0]]
+//
+svint64_t test_svdup_laneq_s64(svint64_t zn) {
+    return SVE_ACLE_FUNC(svdup_laneq, _s64)(zn, 0);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 2 x i64> @test_svdup_laneq_u64
+// CHECK-SAME: (<vscale x 2 x i64> [[ZN:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.dup.laneq.nxv2i64(<vscale x 2 x i64> [[ZN]], i32 1)
+// CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP0]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 2 x i64> @_Z20test_svdup_laneq_u64u12__SVUint64_t
+// CPP-CHECK-SAME: (<vscale x 2 x i64> [[ZN:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.dup.laneq.nxv2i64(<vscale x 2 x i64> [[ZN]], i32 1)
+// CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP0]]
+//
+svuint64_t test_svdup_laneq_u64(svuint64_t zn) {
+    return SVE_ACLE_FUNC(svdup_laneq, _u64)(zn, 1);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 8 x half> @test_svdup_laneq_f16
+// CHECK-SAME: (<vscale x 8 x half> [[ZN:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.dup.laneq.nxv8f16(<vscale x 8 x half> [[ZN]], i32 4)
+// CHECK-NEXT:    ret <vscale x 8 x half> [[TMP0]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 8 x half> @_Z20test_svdup_laneq_f16u13__SVFloat16_t
+// CPP-CHECK-SAME: (<vscale x 8 x half> [[ZN:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.dup.laneq.nxv8f16(<vscale x 8 x half> [[ZN]], i32 4)
+// CPP-CHECK-NEXT:    ret <vscale x 8 x half> [[TMP0]]
+//
+svfloat16_t test_svdup_laneq_f16(svfloat16_t zn) {
+    return SVE_ACLE_FUNC(svdup_laneq, _f16)(zn, 4);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 4 x float> @test_svdup_laneq_f32
+// CHECK-SAME: (<vscale x 4 x float> [[ZN:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.dup.laneq.nxv4f32(<vscale x 4 x float> [[ZN]], i32 1)
+// CHECK-NEXT:    ret <vscale x 4 x float> [[TMP0]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 4 x float> @_Z20test_svdup_laneq_f32u13__SVFloat32_t
+// CPP-CHECK-SAME: (<vscale x 4 x float> [[ZN:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.dup.laneq.nxv4f32(<vscale x 4 x float> [[ZN]], i32 1)
+// CPP-CHECK-NEXT:    ret <vscale x 4 x float> [[TMP0]]
+//
+svfloat32_t test_svdup_laneq_f32(svfloat32_t zn) {
+    return SVE_ACLE_FUNC(svdup_laneq, _f32)(zn, 1);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 2 x double> @test_svdup_laneq_f64
+// CHECK-SAME: (<vscale x 2 x double> [[ZN:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.dup.laneq.nxv2f64(<vscale x 2 x double> [[ZN]], i32 1)
+// CHECK-NEXT:    ret <vscale x 2 x double> [[TMP0]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 2 x double> @_Z20test_svdup_laneq_f64u13__SVFloat64_t
+// CPP-CHECK-SAME: (<vscale x 2 x double> [[ZN:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.dup.laneq.nxv2f64(<vscale x 2 x double> [[ZN]], i32 1)
+// CPP-CHECK-NEXT:    ret <vscale x 2 x double> [[TMP0]]
+//
+svfloat64_t test_svdup_laneq_f64(svfloat64_t zn) {
+    return SVE_ACLE_FUNC(svdup_laneq, _f64)(zn, 1);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 8 x bfloat> @test_svdup_laneq_bf16
+// CHECK-SAME: (<vscale x 8 x bfloat> [[ZN:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.dup.laneq.nxv8bf16(<vscale x 8 x bfloat> [[ZN]], i32 3)
+// CHECK-NEXT:    ret <vscale x 8 x bfloat> [[TMP0]]
+//
+// CPP-CHECK-LABEL: define dso_local <vscale x 8 x bfloat> @_Z21test_svdup_laneq_bf16u14__SVBfloat16_t
+// CPP-CHECK-SAME: (<vscale x 8 x bfloat> [[ZN:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.dup.laneq.nxv8bf16(<vscale x 8 x bfloat> [[ZN]], i32 3)
+// CPP-CHECK-NEXT:    ret <vscale x 8 x bfloat> [[TMP0]]
+//
+svbfloat16_t test_svdup_laneq_bf16(svbfloat16_t zn) {
+    return SVE_ACLE_FUNC(svdup_laneq, _bf16)(zn, 3);
+}

diff  --git a/clang/test/Sema/aarch64-sve2p1-intrinsics/acle_sve2p1_imm.cpp b/clang/test/Sema/aarch64-sve2p1-intrinsics/acle_sve2p1_imm.cpp
index d857608d441557..a6ec5150f0aabb 100644
--- a/clang/test/Sema/aarch64-sve2p1-intrinsics/acle_sve2p1_imm.cpp
+++ b/clang/test/Sema/aarch64-sve2p1-intrinsics/acle_sve2p1_imm.cpp
@@ -188,3 +188,32 @@ void test_svget_svset_b(uint64_t idx, svboolx2_t tuple2, svboolx4_t tuple4, svbo
   svget2_b(tuple2, idx); // expected-error {{argument to 'svget2_b' must be a constant integer}}
   svget4_b(tuple4, idx); // expected-error {{argument to 'svget4_b' must be a constant integer}}
 }
+
+__attribute__((target("+sve2p1")))
+void test_svdup_laneq(){  
+  svuint8_t zn_u8;
+  svuint16_t zn_u16;
+  svuint32_t zn_u32;
+  svuint64_t zn_u64;
+  svint8_t zn_s8;
+  svint16_t zn_s16;
+  svint32_t zn_s32;
+  svint64_t zn_s64;
+  svfloat16_t zn_f16;
+  svfloat32_t zn_f32;
+  svfloat64_t zn_f64;
+  svbfloat16_t zn_bf16;
+
+  svdup_laneq_u8(zn_u8,-1); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 15]}}
+  svdup_laneq_u16(zn_u16,-1); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 7]}}
+  svdup_laneq_u32(zn_u32,-1); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 3]}}
+  svdup_laneq_u64(zn_u64,-1); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 1]}}
+  svdup_laneq_s8(zn_s8,-1); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 15]}}
+  svdup_laneq_s16(zn_s16,-1); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 7]}}
+  svdup_laneq_s32(zn_s32,-1); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 3]}}
+  svdup_laneq_s64(zn_s64,-1); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 1]}}
+  svdup_laneq_f16(zn_f16,-1); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 7]}}
+  svdup_laneq_f32(zn_f32,-1); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 3]}}
+  svdup_laneq_f64(zn_f64,-1); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 1]}}
+  svdup_laneq_bf16(zn_bf16,-1); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 7]}}
+}
\ No newline at end of file

diff  --git a/llvm/include/llvm/IR/IntrinsicsAArch64.td b/llvm/include/llvm/IR/IntrinsicsAArch64.td
index 5a9a7c4b43a1ff..bcaa37de74b630 100644
--- a/llvm/include/llvm/IR/IntrinsicsAArch64.td
+++ b/llvm/include/llvm/IR/IntrinsicsAArch64.td
@@ -1360,6 +1360,12 @@ let TargetPrefix = "aarch64" in {  // All intrinsics start with "llvm.aarch64.".
                  LLVMSubdivide2VectorType<0>,
                  llvm_i32_ty],
                 [IntrNoMem, ImmArg<ArgIndex<3>>]>;
+  
+  class SVE2_1VectorArgIndexed_Intrinsic
+    : DefaultAttrsIntrinsic<[llvm_anyvector_ty],
+                [LLVMMatchType<0>,
+                 llvm_i32_ty],
+                [IntrNoMem, ImmArg<ArgIndex<1>>]>;
 
   class AdvSIMD_SVE_CDOT_LANE_Intrinsic
     : DefaultAttrsIntrinsic<[llvm_anyvector_ty],
@@ -1913,6 +1919,7 @@ def int_aarch64_sve_clastb    : AdvSIMD_Pred2VectorArg_Intrinsic;
 def int_aarch64_sve_clastb_n  : AdvSIMD_SVE_ReduceWithInit_Intrinsic;
 def int_aarch64_sve_compact   : AdvSIMD_Pred1VectorArg_Intrinsic;
 def int_aarch64_sve_dupq_lane : AdvSIMD_SVE_DUPQ_Intrinsic;
+def int_aarch64_sve_dup_laneq : SVE2_1VectorArgIndexed_Intrinsic;
 def int_aarch64_sve_ext       : AdvSIMD_2VectorArgIndexed_Intrinsic;
 def int_aarch64_sve_sel       : AdvSIMD_Pred2VectorArg_Intrinsic;
 def int_aarch64_sve_lasta     : AdvSIMD_SVE_Reduce_Intrinsic;

diff  --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
index 7c98f934a1317e..e0a010af415534 100644
--- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
@@ -4101,7 +4101,7 @@ defm FMINNMQV : sve2p1_fp_reduction_q<0b101, "fminnmqv", int_aarch64_sve_fminnmq
 defm FMAXQV   : sve2p1_fp_reduction_q<0b110, "fmaxqv", int_aarch64_sve_fmaxqv>;
 defm FMINQV   : sve2p1_fp_reduction_q<0b111, "fminqv", int_aarch64_sve_fminqv>;
 
-defm DUPQ_ZZI : sve2p1_dupq<"dupq">;
+defm DUPQ_ZZI : sve2p1_dupq<"dupq", int_aarch64_sve_dup_laneq>;
 defm EXTQ_ZZI : sve2p1_extq<"extq", int_aarch64_sve_extq>;
 
 defm PMOV_PZI : sve2p1_vector_to_pred<"pmov", int_aarch64_sve_pmov_to_pred_lane, int_aarch64_sve_pmov_to_pred_lane_zero>;

diff  --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td
index 8baf3a6d3d818b..c19e02bb03d1f9 100644
--- a/llvm/lib/Target/AArch64/SVEInstrFormats.td
+++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td
@@ -9893,23 +9893,33 @@ class sve2p1_dupq<bits<5> ind_tsz, string mnemonic, ZPRRegOp zprty, Operand ityp
   let hasSideEffects = 0;
 }
 
-multiclass sve2p1_dupq<string mnemonic> {
-  def _B : sve2p1_dupq<{?, ?, ?, ?, 1}, mnemonic, ZPR8, VectorIndexB32b> {
+multiclass sve2p1_dupq<string mnemonic, SDPatternOperator Op> {
+  def _B : sve2p1_dupq<{?, ?, ?, ?, 1}, mnemonic, ZPR8, VectorIndexB32b_timm> {
     bits<4> index;
     let Inst{20-17} = index;
   }
-  def _H : sve2p1_dupq<{?, ?, ?, 1, 0}, mnemonic, ZPR16, VectorIndexH32b> {
+  def _H : sve2p1_dupq<{?, ?, ?, 1, 0}, mnemonic, ZPR16, VectorIndexH32b_timm> {
     bits<3> index;
     let Inst{20-18} = index;
   }
-  def _S : sve2p1_dupq<{?, ?, 1, 0, 0}, mnemonic, ZPR32, VectorIndexS32b> {
+  def _S : sve2p1_dupq<{?, ?, 1, 0, 0}, mnemonic, ZPR32, VectorIndexS32b_timm> {
     bits<2> index;
     let Inst{20-19} = index;
   }
-  def _D : sve2p1_dupq<{?, 1, 0, 0, 0}, mnemonic, ZPR64, VectorIndexD32b> {
+  def _D : sve2p1_dupq<{?, 1, 0, 0, 0}, mnemonic, ZPR64, VectorIndexD32b_timm> {
     bits<1> index;
     let Inst{20} = index;
   }
+
+  def : SVE_2_Op_Imm_Pat<nxv16i8, Op, nxv16i8, i32, VectorIndexB32b_timm, !cast<Instruction>(NAME # _B)>;
+  def : SVE_2_Op_Imm_Pat<nxv8i16, Op, nxv8i16, i32, VectorIndexH32b_timm, !cast<Instruction>(NAME # _H)>;
+  def : SVE_2_Op_Imm_Pat<nxv4i32, Op, nxv4i32, i32, VectorIndexS32b_timm, !cast<Instruction>(NAME # _S)>;
+  def : SVE_2_Op_Imm_Pat<nxv2i64, Op, nxv2i64, i32, VectorIndexD32b_timm, !cast<Instruction>(NAME # _D)>;
+
+  def : SVE_2_Op_Imm_Pat<nxv8f16, Op, nxv8f16, i32, VectorIndexH32b_timm, !cast<Instruction>(NAME # _H)>;
+  def : SVE_2_Op_Imm_Pat<nxv4f32, Op, nxv4f32, i32, VectorIndexS32b_timm, !cast<Instruction>(NAME # _S)>;
+  def : SVE_2_Op_Imm_Pat<nxv2f64, Op, nxv2f64, i32, VectorIndexD32b_timm, !cast<Instruction>(NAME # _D)>;
+  def : SVE_2_Op_Imm_Pat<nxv8bf16, Op, nxv8bf16, i32, VectorIndexH32b_timm, !cast<Instruction>(NAME # _H)>;
 }
 
 

diff  --git a/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-dupq.ll b/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-dupq.ll
new file mode 100644
index 00000000000000..f1a423b02ac2ad
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-dupq.ll
@@ -0,0 +1,83 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
+; RUN: llc -mtriple=aarch64--linux-gnu -mattr=+sve2p1,+bf16 < %s | FileCheck %s
+
+define <vscale x 16 x i8> @test_dupq_i8 (<vscale x 16 x i8> %zn) {
+; CHECK-LABEL: test_dupq_i8:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    dupq z0.b, z0.b[15]
+; CHECK-NEXT:    ret
+  %res = call <vscale x 16 x i8> @llvm.aarch64.sve.dup.laneq.nxv16i8(<vscale x 16 x i8> %zn, i32 15)
+  ret <vscale x 16 x i8> %res
+}
+
+define <vscale x 8 x i16> @test_dupq_i16 (<vscale x 8 x i16> %zn) {
+; CHECK-LABEL: test_dupq_i16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    dupq z0.h, z0.h[7]
+; CHECK-NEXT:    ret
+  %res = call <vscale x 8 x i16> @llvm.aarch64.sve.dup.laneq.nxv8i16(<vscale x 8 x i16> %zn, i32 7)
+  ret <vscale x 8 x i16> %res
+}
+
+define <vscale x 4 x i32> @test_dupq__i32 (<vscale x 4 x i32> %zn) {
+; CHECK-LABEL: test_dupq__i32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    dupq z0.s, z0.s[3]
+; CHECK-NEXT:    ret
+  %res = call <vscale x 4 x i32> @llvm.aarch64.sve.dup.laneq.nxv4i32(<vscale x 4 x i32> %zn, i32 3)
+  ret <vscale x 4 x i32> %res
+}
+
+define <vscale x 2 x i64> @test_dupq_i64 (<vscale x 2 x i64> %zn) {
+; CHECK-LABEL: test_dupq_i64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    dupq z0.d, z0.d[1]
+; CHECK-NEXT:    ret
+  %res = call <vscale x 2 x i64> @llvm.aarch64.sve.dup.laneq.nxv2i64(<vscale x 2 x i64> %zn, i32 1)
+  ret <vscale x 2 x i64> %res
+}
+
+define <vscale x 8 x half> @test_dupq_f16(<vscale x 8 x half> %zn) {
+; CHECK-LABEL: test_dupq_f16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    dupq z0.h, z0.h[4]
+; CHECK-NEXT:    ret
+  %res = call <vscale x 8 x half> @llvm.aarch64.sve.dup.laneq.nxv8f16(<vscale x 8 x half> %zn, i32 4)
+  ret <vscale x 8 x half> %res
+}
+
+define <vscale x 4 x float> @test_dupq_f32(<vscale x 4 x float> %zn) {
+; CHECK-LABEL: test_dupq_f32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    dupq z0.s, z0.s[2]
+; CHECK-NEXT:    ret
+  %res = call <vscale x 4 x float> @llvm.aarch64.sve.dup.laneq.nxv4f32(<vscale x 4 x float> %zn, i32 2)
+  ret <vscale x 4 x float> %res
+}
+
+define <vscale x 2 x double> @test_dupq_f64(<vscale x 2 x double> %zn) {
+; CHECK-LABEL: test_dupq_f64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    dupq z0.d, z0.d[0]
+; CHECK-NEXT:    ret
+  %res = call <vscale x 2 x double> @llvm.aarch64.sve.dup.laneq.nxv2f64(<vscale x 2 x double> %zn, i32 0)
+  ret <vscale x 2 x double> %res
+}
+
+define <vscale x 8 x bfloat> @test_dupq_bf16(<vscale x 8 x bfloat> %zn) {
+; CHECK-LABEL: test_dupq_bf16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    dupq z0.h, z0.h[1]
+; CHECK-NEXT:    ret
+  %res = call <vscale x 8 x bfloat> @llvm.aarch64.sve.dup.laneq.nxv8bf16(<vscale x 8 x bfloat> %zn, i32 1)
+  ret <vscale x 8 x bfloat> %res
+}
+
+declare <vscale x 16 x i8> @llvm.aarch64.sve.dup.laneq.nxv16i8(<vscale x 16 x i8>, i32)
+declare <vscale x 8 x i16> @llvm.aarch64.sve.dup.laneq.nxv8i16(<vscale x 8 x i16>, i32)
+declare <vscale x 4 x i32> @llvm.aarch64.sve.dup.laneq.nxv4i32(<vscale x 4 x i32>, i32)
+declare <vscale x 2 x i64> @llvm.aarch64.sve.dup.laneq.nxv2i64(<vscale x 2 x i64>, i32)
+declare <vscale x 8 x half> @llvm.aarch64.sve.dup.laneq.nxv8f16(<vscale x 8 x half>, i32)
+declare <vscale x 4 x float> @llvm.aarch64.sve.dup.laneq.nxv4f32(<vscale x 4 x float>, i32)
+declare <vscale x 2 x double> @llvm.aarch64.sve.dup.laneq.nxv2f64(<vscale x 2 x double>, i32)
+declare <vscale x 8 x bfloat> @llvm.aarch64.sve.dup.laneq.nxv8bf16(<vscale x 8 x bfloat>, i32)