[clang] [llvm] [AArch64][SVE] Change the immediate argument in svextq (PR #115340)

via llvm-commits llvm-commits at lists.llvm.org
Thu Nov 7 08:38:10 PST 2024


https://github.com/SpencerAbson created https://github.com/llvm/llvm-project/pull/115340

In order to align with `svext` and NEON `vext`/`vextq`, this patch changes immediate argument in `svextq` such that it refers to elements of the size of those of the source vector, rather than bytes. The [spec for this intrinsic](https://github.com/ARM-software/acle/blob/main/main/acle.md#extq) is ambiguous about the meaning of this argument, this issue was raised after there was a differing interpretation for it from the implementers of the ACLE in GCC.

For example (with our current implementation):

`svextq_f64(zn_f64, zm_f64, 1)` would, for each 128-bit segment of `zn_f64,` concatenate the highest 15 bytes of this segment with the first byte of the corresponding segment of `zm_f64`.

After this patch, the behavior of `svextq_f64(zn_f64, zm_f64, 1)` would be, for each 128-bit vector segment of `zn_f64`, to concatenate the higher doubleword of this segment with the lower doubleword of the corresponding segment of `zm_f64`.

The range of the immediate argument in `svextq` would be modified such that it is:
- [0,15] for `svextq_{s8,u8}`
- [0,7] for `svextq_{s16,u16,f16,bf16}`
- [0,3] for `svextq_{s32,u32,f32}`
- [0,1] for `svextq_{s64,u64,f64}`

>From e5212d805bcd7db193255f02ea577420b9536e38 Mon Sep 17 00:00:00 2001
From: Spencer Abson <Spencer.Abson at arm.com>
Date: Thu, 7 Nov 2024 11:44:27 +0000
Subject: [PATCH] [AArch64][SVE] Fix the immediate argument in svextq

The meaning of the immediate argument in svextq should be tied to the element size of its operands.

For example:

svextq_f64(zn_f64, zm_f64, 1) would, for each 128-bit segment of zn_f64, concatenate the highest 15 bytes of this segment with
the first byte of the corresponding segment of zm_f64.

The intuitive behavior of svextq_f64(zn_f64, zm_f64, 1) is to concatenate the higher doubleword of zn_f64 with the lower doubleword of zm_f64.

The range of the immediate argument in svextq has been modified such that it is:
- [0,15] for svextq_{s8,u8}
- [0,7] for svextq_{s16,u16,f16,bf16}
- [0,3] for svextq_{s32,u32,f32}
- [0,1] for svextq_{s64,u64,f64}
---
 clang/include/clang/Basic/arm_sve.td          |  2 +-
 .../acle_sve2p1_extq.c                        | 42 +++++++++---------
 .../acle_sve2p1_imm.cpp                       | 44 +++++++++++++++++--
 .../lib/Target/AArch64/AArch64InstrFormats.td | 33 ++++++++++++++
 llvm/lib/Target/AArch64/SVEInstrFormats.td    | 17 ++++---
 .../CodeGen/AArch64/sve2p1-intrinsics-extq.ll | 28 ++++++------
 6 files changed, 118 insertions(+), 48 deletions(-)

diff --git a/clang/include/clang/Basic/arm_sve.td b/clang/include/clang/Basic/arm_sve.td
index 90b1ec242e6bae..f4f87323eef09f 100644
--- a/clang/include/clang/Basic/arm_sve.td
+++ b/clang/include/clang/Basic/arm_sve.td
@@ -2284,7 +2284,7 @@ let SVETargetGuard = "sve2p1", SMETargetGuard = InvalidMode in {
   def SVTBLQ : SInst<"svtblq[_{d}]", "ddu", "cUcsUsiUilUlbhfd", MergeNone, "aarch64_sve_tblq">;
   def SVTBXQ : SInst<"svtbxq[_{d}]", "dddu", "cUcsUsiUilUlbhfd", MergeNone, "aarch64_sve_tbxq">;
   // EXTQ
-  def EXTQ : SInst<"svextq[_{d}]", "dddk", "cUcsUsiUilUlbhfd", MergeNone, "aarch64_sve_extq", [], [ImmCheck<2, ImmCheck0_15>]>;
+  def EXTQ : SInst<"svextq[_{d}]", "dddk", "cUcsUsiUilUlbhfd", MergeNone, "aarch64_sve_extq", [], [ImmCheck<2, ImmCheckLaneIndex, 0>]>;
 
   // PMOV
   // Move to Pred
diff --git a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_extq.c b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_extq.c
index 5fbfa881500ba1..06eec1e00900cc 100644
--- a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_extq.c
+++ b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_extq.c
@@ -103,111 +103,111 @@ svuint32_t test_svextq_u32(svuint32_t zn, svuint32_t zm) {
 // CHECK-LABEL: define dso_local <vscale x 4 x i32> @test_svextq_s32
 // CHECK-SAME: (<vscale x 4 x i32> [[ZN:%.*]], <vscale x 4 x i32> [[ZM:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.extq.nxv4i32(<vscale x 4 x i32> [[ZN]], <vscale x 4 x i32> [[ZM]], i32 6)
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.extq.nxv4i32(<vscale x 4 x i32> [[ZN]], <vscale x 4 x i32> [[ZM]], i32 3)
 // CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP0]]
 //
 // CPP-CHECK-LABEL: define dso_local <vscale x 4 x i32> @_Z15test_svextq_s32u11__SVInt32_tS_
 // CPP-CHECK-SAME: (<vscale x 4 x i32> [[ZN:%.*]], <vscale x 4 x i32> [[ZM:%.*]]) #[[ATTR0]] {
 // CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.extq.nxv4i32(<vscale x 4 x i32> [[ZN]], <vscale x 4 x i32> [[ZM]], i32 6)
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.extq.nxv4i32(<vscale x 4 x i32> [[ZN]], <vscale x 4 x i32> [[ZM]], i32 3)
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP0]]
 //
 svint32_t test_svextq_s32(svint32_t zn, svint32_t zm) {
-    return SVE_ACLE_FUNC(svextq, _s32,,)(zn, zm, 6);
+    return SVE_ACLE_FUNC(svextq, _s32,,)(zn, zm, 3);
 }
 
 // CHECK-LABEL: define dso_local <vscale x 2 x i64> @test_svextq_u64
 // CHECK-SAME: (<vscale x 2 x i64> [[ZN:%.*]], <vscale x 2 x i64> [[ZM:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.extq.nxv2i64(<vscale x 2 x i64> [[ZN]], <vscale x 2 x i64> [[ZM]], i32 3)
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.extq.nxv2i64(<vscale x 2 x i64> [[ZN]], <vscale x 2 x i64> [[ZM]], i32 1)
 // CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP0]]
 //
 // CPP-CHECK-LABEL: define dso_local <vscale x 2 x i64> @_Z15test_svextq_u64u12__SVUint64_tS_
 // CPP-CHECK-SAME: (<vscale x 2 x i64> [[ZN:%.*]], <vscale x 2 x i64> [[ZM:%.*]]) #[[ATTR0]] {
 // CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.extq.nxv2i64(<vscale x 2 x i64> [[ZN]], <vscale x 2 x i64> [[ZM]], i32 3)
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.extq.nxv2i64(<vscale x 2 x i64> [[ZN]], <vscale x 2 x i64> [[ZM]], i32 1)
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP0]]
 //
 svuint64_t test_svextq_u64(svuint64_t zn, svuint64_t zm) {
-  return SVE_ACLE_FUNC(svextq, _u64,,)(zn, zm, 3);
+  return SVE_ACLE_FUNC(svextq, _u64,,)(zn, zm, 1);
 }
 
 // CHECK-LABEL: define dso_local <vscale x 2 x i64> @test_svextq_s64
 // CHECK-SAME: (<vscale x 2 x i64> [[ZN:%.*]], <vscale x 2 x i64> [[ZM:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.extq.nxv2i64(<vscale x 2 x i64> [[ZN]], <vscale x 2 x i64> [[ZM]], i32 7)
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.extq.nxv2i64(<vscale x 2 x i64> [[ZN]], <vscale x 2 x i64> [[ZM]], i32 0)
 // CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP0]]
 //
 // CPP-CHECK-LABEL: define dso_local <vscale x 2 x i64> @_Z15test_svextq_s64u11__SVInt64_tS_
 // CPP-CHECK-SAME: (<vscale x 2 x i64> [[ZN:%.*]], <vscale x 2 x i64> [[ZM:%.*]]) #[[ATTR0]] {
 // CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.extq.nxv2i64(<vscale x 2 x i64> [[ZN]], <vscale x 2 x i64> [[ZM]], i32 7)
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.extq.nxv2i64(<vscale x 2 x i64> [[ZN]], <vscale x 2 x i64> [[ZM]], i32 0)
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP0]]
 //
 svint64_t test_svextq_s64(svint64_t zn, svint64_t zm) {
-    return SVE_ACLE_FUNC(svextq, _s64,,)(zn, zm, 7);
+    return SVE_ACLE_FUNC(svextq, _s64,,)(zn, zm, 0);
 }
 
 // CHECK-LABEL: define dso_local <vscale x 8 x half> @test_svextq_f16
 // CHECK-SAME: (<vscale x 8 x half> [[ZN:%.*]], <vscale x 8 x half> [[ZM:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.extq.nxv8f16(<vscale x 8 x half> [[ZN]], <vscale x 8 x half> [[ZM]], i32 8)
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.extq.nxv8f16(<vscale x 8 x half> [[ZN]], <vscale x 8 x half> [[ZM]], i32 7)
 // CHECK-NEXT:    ret <vscale x 8 x half> [[TMP0]]
 //
 // CPP-CHECK-LABEL: define dso_local <vscale x 8 x half> @_Z15test_svextq_f16u13__SVFloat16_tS_
 // CPP-CHECK-SAME: (<vscale x 8 x half> [[ZN:%.*]], <vscale x 8 x half> [[ZM:%.*]]) #[[ATTR0]] {
 // CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.extq.nxv8f16(<vscale x 8 x half> [[ZN]], <vscale x 8 x half> [[ZM]], i32 8)
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.extq.nxv8f16(<vscale x 8 x half> [[ZN]], <vscale x 8 x half> [[ZM]], i32 7)
 // CPP-CHECK-NEXT:    ret <vscale x 8 x half> [[TMP0]]
 //
 svfloat16_t test_svextq_f16(svfloat16_t zn, svfloat16_t zm) {
-    return SVE_ACLE_FUNC(svextq, _f16,,)(zn, zm, 8);
+    return SVE_ACLE_FUNC(svextq, _f16,,)(zn, zm, 7);
 }
 
 // CHECK-LABEL: define dso_local <vscale x 4 x float> @test_svextq_f32
 // CHECK-SAME: (<vscale x 4 x float> [[ZN:%.*]], <vscale x 4 x float> [[ZM:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.extq.nxv4f32(<vscale x 4 x float> [[ZN]], <vscale x 4 x float> [[ZM]], i32 9)
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.extq.nxv4f32(<vscale x 4 x float> [[ZN]], <vscale x 4 x float> [[ZM]], i32 2)
 // CHECK-NEXT:    ret <vscale x 4 x float> [[TMP0]]
 //
 // CPP-CHECK-LABEL: define dso_local <vscale x 4 x float> @_Z15test_svextq_f32u13__SVFloat32_tS_
 // CPP-CHECK-SAME: (<vscale x 4 x float> [[ZN:%.*]], <vscale x 4 x float> [[ZM:%.*]]) #[[ATTR0]] {
 // CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.extq.nxv4f32(<vscale x 4 x float> [[ZN]], <vscale x 4 x float> [[ZM]], i32 9)
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.extq.nxv4f32(<vscale x 4 x float> [[ZN]], <vscale x 4 x float> [[ZM]], i32 2)
 // CPP-CHECK-NEXT:    ret <vscale x 4 x float> [[TMP0]]
 //
 svfloat32_t test_svextq_f32(svfloat32_t zn, svfloat32_t zm) {
-    return SVE_ACLE_FUNC(svextq, _f32,,)(zn, zm, 9);
+    return SVE_ACLE_FUNC(svextq, _f32,,)(zn, zm, 2);
 }
 
 // CHECK-LABEL: define dso_local <vscale x 2 x double> @test_svextq_f64
 // CHECK-SAME: (<vscale x 2 x double> [[ZN:%.*]], <vscale x 2 x double> [[ZM:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.extq.nxv2f64(<vscale x 2 x double> [[ZN]], <vscale x 2 x double> [[ZM]], i32 10)
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.extq.nxv2f64(<vscale x 2 x double> [[ZN]], <vscale x 2 x double> [[ZM]], i32 0)
 // CHECK-NEXT:    ret <vscale x 2 x double> [[TMP0]]
 //
 // CPP-CHECK-LABEL: define dso_local <vscale x 2 x double> @_Z15test_svextq_f64u13__SVFloat64_tS_
 // CPP-CHECK-SAME: (<vscale x 2 x double> [[ZN:%.*]], <vscale x 2 x double> [[ZM:%.*]]) #[[ATTR0]] {
 // CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.extq.nxv2f64(<vscale x 2 x double> [[ZN]], <vscale x 2 x double> [[ZM]], i32 10)
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.extq.nxv2f64(<vscale x 2 x double> [[ZN]], <vscale x 2 x double> [[ZM]], i32 0)
 // CPP-CHECK-NEXT:    ret <vscale x 2 x double> [[TMP0]]
 //
 svfloat64_t test_svextq_f64(svfloat64_t zn, svfloat64_t zm) {
-    return SVE_ACLE_FUNC(svextq, _f64,,)(zn, zm, 10);
+    return SVE_ACLE_FUNC(svextq, _f64,,)(zn, zm, 0);
 }
 
 // CHECK-LABEL: define dso_local <vscale x 8 x bfloat> @test_svextq_bf16
 // CHECK-SAME: (<vscale x 8 x bfloat> [[ZN:%.*]], <vscale x 8 x bfloat> [[ZM:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.extq.nxv8bf16(<vscale x 8 x bfloat> [[ZN]], <vscale x 8 x bfloat> [[ZM]], i32 11)
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.extq.nxv8bf16(<vscale x 8 x bfloat> [[ZN]], <vscale x 8 x bfloat> [[ZM]], i32 6)
 // CHECK-NEXT:    ret <vscale x 8 x bfloat> [[TMP0]]
 //
 // CPP-CHECK-LABEL: define dso_local <vscale x 8 x bfloat> @_Z16test_svextq_bf16u14__SVBfloat16_tS_
 // CPP-CHECK-SAME: (<vscale x 8 x bfloat> [[ZN:%.*]], <vscale x 8 x bfloat> [[ZM:%.*]]) #[[ATTR0]] {
 // CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.extq.nxv8bf16(<vscale x 8 x bfloat> [[ZN]], <vscale x 8 x bfloat> [[ZM]], i32 11)
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.extq.nxv8bf16(<vscale x 8 x bfloat> [[ZN]], <vscale x 8 x bfloat> [[ZM]], i32 6)
 // CPP-CHECK-NEXT:    ret <vscale x 8 x bfloat> [[TMP0]]
 //
 svbfloat16_t test_svextq_bf16(svbfloat16_t zn, svbfloat16_t zm) {
-    return SVE_ACLE_FUNC(svextq, _bf16,,)(zn, zm, 11);
+    return SVE_ACLE_FUNC(svextq, _bf16,,)(zn, zm, 6);
 }
diff --git a/clang/test/Sema/aarch64-sve2p1-intrinsics/acle_sve2p1_imm.cpp b/clang/test/Sema/aarch64-sve2p1-intrinsics/acle_sve2p1_imm.cpp
index f7047c99e884e9..ac7586e202b96c 100644
--- a/clang/test/Sema/aarch64-sve2p1-intrinsics/acle_sve2p1_imm.cpp
+++ b/clang/test/Sema/aarch64-sve2p1-intrinsics/acle_sve2p1_imm.cpp
@@ -138,9 +138,47 @@ void test_svbfmul_lane(svbfloat16_t zn, svbfloat16_t zm, uint64_t idx){
 }
 
 __attribute__((target("+sve2p1")))
-void test_svextq_lane(svint16_t zn_i16, svint16_t zm_i16, svfloat16_t zn_f16, svfloat16_t zm_f16){
-  svextq_s16(zn_i16, zm_i16, -1); // expected-error {{argument value -1 is outside the valid range [0, 15]}}
-  svextq_f16(zn_f16, zm_f16, 16);  // expected-error {{argument value 16 is outside the valid range [0, 15]}}
+void test_svextq_8b_offset(svint8_t s8, svuint8_t u8){
+  svextq_s8(s8, s8, -1);  // expected-error {{argument value -1 is outside the valid range [0, 15]}}
+  svextq_u8(u8, u8, -1);  // expected-error {{argument value -1 is outside the valid range [0, 15]}}
+
+  svextq_s8(s8, s8, 16);   // expected-error {{argument value 16 is outside the valid range [0, 15]}}
+  svextq_u8(u8, u8, 16);   // expected-error {{argument value 16 is outside the valid range [0, 15]}}
+}
+
+__attribute__((target("+sve2p1")))
+void test_svextq_16b_offset(svint16_t s16, svuint16_t u16, svfloat16_t f16, svbfloat16_t bf16){
+  svextq_s16(s16, s16, -1);     // expected-error {{argument value -1 is outside the valid range [0, 7]}}
+  svextq_u16(u16, u16, -1);     // expected-error {{argument value -1 is outside the valid range [0, 7]}}
+  svextq_f16(f16, f16, -1);     // expected-error {{argument value -1 is outside the valid range [0, 7]}}
+  svextq_bf16(bf16, bf16, -1);  // expected-error {{argument value -1 is outside the valid range [0, 7]}}
+
+  svextq_s16(s16, s16, 8);     // expected-error {{argument value 8 is outside the valid range [0, 7]}}
+  svextq_u16(u16, u16, 8);     // expected-error {{argument value 8 is outside the valid range [0, 7]}}
+  svextq_f16(f16, f16, 8);     // expected-error {{argument value 8 is outside the valid range [0, 7]}}
+  svextq_bf16(bf16, bf16, 8);  // expected-error {{argument value 8 is outside the valid range [0, 7]}}
+}
+
+__attribute__((target("+sve2p1")))
+void test_svextq_32b_offset(svint32_t s32, svuint32_t u32, svfloat32_t f32){
+  svextq_s32(s32, s32, -1);   // expected-error {{argument value -1 is outside the valid range [0, 3]}}
+  svextq_u32(u32, u32, -1);   // expected-error {{argument value -1 is outside the valid range [0, 3]}}
+  svextq_f32(f32, f32, -1);   // expected-error {{argument value -1 is outside the valid range [0, 3]}}
+
+  svextq_s32(s32, s32, 4);   // expected-error {{argument value 4 is outside the valid range [0, 3]}}
+  svextq_u32(u32, u32, 4);   // expected-error {{argument value 4 is outside the valid range [0, 3]}}
+  svextq_f32(f32, f32, 4);   // expected-error {{argument value 4 is outside the valid range [0, 3]}}
+}
+
+__attribute__((target("+sve2p1")))
+void test_svextq_64b_offset(svint64_t s64, svuint64_t u64, svfloat64_t f64){
+  svextq_s64(s64, s64, -1);   // expected-error {{argument value -1 is outside the valid range [0, 1]}}
+  svextq_u64(u64, u64, -1);   // expected-error {{argument value -1 is outside the valid range [0, 1]}}
+  svextq_f64(f64, f64, -1);   // expected-error {{argument value -1 is outside the valid range [0, 1]}}
+
+  svextq_s64(s64, s64, 2);   // expected-error {{argument value 2 is outside the valid range [0, 1]}}
+  svextq_u64(u64, u64, 2);   // expected-error {{argument value 2 is outside the valid range [0, 1]}}
+  svextq_f64(f64, f64, 2);   // expected-error {{argument value 2 is outside the valid range [0, 1]}}
 }
 
 __attribute__((target("+sve2p1")))
diff --git a/llvm/lib/Target/AArch64/AArch64InstrFormats.td b/llvm/lib/Target/AArch64/AArch64InstrFormats.td
index e44caef686be29..6f52a25cdd71a3 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrFormats.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrFormats.td
@@ -529,6 +529,18 @@ def UImmS8XForm : SDNodeXForm<imm, [{
   return CurDAG->getTargetConstant(N->getZExtValue() / 8, SDLoc(N), MVT::i64);
 }]>;
 
+def UImmM2XForm : SDNodeXForm<imm, [{
+  return CurDAG->getTargetConstant(N->getZExtValue() * 2, SDLoc(N), MVT::i32);
+}]>;
+
+def UImmM4XForm : SDNodeXForm<imm, [{
+  return CurDAG->getTargetConstant(N->getZExtValue() * 4, SDLoc(N), MVT::i32);
+}]>;
+
+def UImmM8XForm : SDNodeXForm<imm, [{
+  return CurDAG->getTargetConstant(N->getZExtValue() * 8, SDLoc(N), MVT::i32);
+}]>;
+
 // uimm5sN predicate - True if the immediate is a multiple of N in the range
 // [0 * N, 32 * N].
 def UImm5s2Operand : UImmScaledMemoryIndexed<5, 2>;
@@ -1098,6 +1110,13 @@ def timm32_0_1 : Operand<i32>, TImmLeaf<i32, [{
   let ParserMatchClass = Imm0_1Operand;
 }
 
+// extq_timm32_0_1m8 - True if the 32-bit immediate is in the range [0,1], scale this immediate
+// by a factor of 8 after a match is made.
+def extq_timm32_0_1m8 : Operand<i32>, TImmLeaf<i32, [{
+  return ((uint32_t)Imm) < 2;}], UImmM8XForm> {
+  let ParserMatchClass = Imm0_15Operand;
+}
+
 // timm32_1_1 - True if the 32-bit immediate is in the range [1,1]
 def timm32_1_1 : Operand<i32>, TImmLeaf<i32, [{
     return ((uint32_t)Imm) == 1;
@@ -1140,6 +1159,13 @@ def timm32_0_3 : Operand<i32>, TImmLeaf<i32, [{
   let ParserMatchClass = Imm0_3Operand;
 }
 
+// extq_timm32_0_3m4 - True if the 32-bit immediate is in the range [0,3], scale this immediate
+// by a factor of 4 after a match is made.
+def extq_timm32_0_3m4 : Operand<i32>, TImmLeaf<i32, [{
+  return ((uint32_t)Imm) < 4;}], UImmM4XForm> {
+  let ParserMatchClass = Imm0_15Operand;
+}
+
 // timm32_0_7 predicate - True if the 32-bit immediate is in the range [0,7]
 def timm32_0_7 : Operand<i32>, TImmLeaf<i32, [{
   return ((uint32_t)Imm) < 8;
@@ -1147,6 +1173,13 @@ def timm32_0_7 : Operand<i32>, TImmLeaf<i32, [{
   let ParserMatchClass = Imm0_7Operand;
 }
 
+// extq_timm32_0_7m2 - True if the 32-bit immediate is in the range [0,7], scale this immediate
+// by a factor of 2 after a match is made.
+def extq_timm32_0_7m2 : Operand<i32>, TImmLeaf<i32, [{
+  return ((uint32_t)Imm) < 8;}], UImmM2XForm> {
+  let ParserMatchClass = Imm0_15Operand;
+}
+
 // timm32_1_7 predicate - True if the 32-bit immediate is in the range [1,7]
 def timm32_1_7 : Operand<i32>, TImmLeaf<i32, [{
   return ((uint32_t)Imm) > 0 && ((uint32_t)Imm) < 8;
diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td
index 5cfcc01afd20f3..89b295b5a1d6ec 100644
--- a/llvm/lib/Target/AArch64/SVEInstrFormats.td
+++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td
@@ -10358,15 +10358,14 @@ class sve2p1_extq<string mnemonic>
 multiclass sve2p1_extq<string mnemonic, SDPatternOperator Op> {
   def NAME : sve2p1_extq<mnemonic>;
   def : SVE_3_Op_Imm_Pat<nxv16i8, Op, nxv16i8, nxv16i8, i32, timm32_0_15, !cast<Instruction>(NAME)>;
-  def : SVE_3_Op_Imm_Pat<nxv8i16, Op, nxv8i16, nxv8i16, i32, timm32_0_15, !cast<Instruction>(NAME)>;
-  def : SVE_3_Op_Imm_Pat<nxv4i32, Op, nxv4i32, nxv4i32, i32, timm32_0_15, !cast<Instruction>(NAME)>;
-  def : SVE_3_Op_Imm_Pat<nxv2i64, Op, nxv2i64, nxv2i64, i32, timm32_0_15, !cast<Instruction>(NAME)>;
-
-  def : SVE_3_Op_Imm_Pat<nxv8f16, Op, nxv8f16, nxv8f16, i32, timm32_0_15, !cast<Instruction>(NAME)>;
-  def : SVE_3_Op_Imm_Pat<nxv4f32, Op, nxv4f32, nxv4f32, i32, timm32_0_15, !cast<Instruction>(NAME)>;
-  def : SVE_3_Op_Imm_Pat<nxv2f64, Op, nxv2f64, nxv2f64, i32, timm32_0_15, !cast<Instruction>(NAME)>;
-  def : SVE_3_Op_Imm_Pat<nxv8bf16, Op, nxv8bf16, nxv8bf16, i32, timm32_0_15, !cast<Instruction>(NAME
-)>;
+  def : SVE_3_Op_Imm_Pat<nxv8i16, Op, nxv8i16, nxv8i16, i32, extq_timm32_0_7m2, !cast<Instruction>(NAME)>;
+  def : SVE_3_Op_Imm_Pat<nxv4i32, Op, nxv4i32, nxv4i32, i32, extq_timm32_0_3m4, !cast<Instruction>(NAME)>;
+  def : SVE_3_Op_Imm_Pat<nxv2i64, Op, nxv2i64, nxv2i64, i32, extq_timm32_0_1m8, !cast<Instruction>(NAME)>;
+
+  def : SVE_3_Op_Imm_Pat<nxv8f16, Op, nxv8f16, nxv8f16, i32, extq_timm32_0_7m2, !cast<Instruction>(NAME)>;
+  def : SVE_3_Op_Imm_Pat<nxv4f32, Op, nxv4f32, nxv4f32, i32, extq_timm32_0_3m4, !cast<Instruction>(NAME)>;
+  def : SVE_3_Op_Imm_Pat<nxv2f64, Op, nxv2f64, nxv2f64, i32, extq_timm32_0_1m8, !cast<Instruction>(NAME)>;
+  def : SVE_3_Op_Imm_Pat<nxv8bf16, Op, nxv8bf16, nxv8bf16, i32, extq_timm32_0_7m2, !cast<Instruction>(NAME)>;
 }
 
 // SVE move predicate from vector
diff --git a/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-extq.ll b/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-extq.ll
index a49aa7cfcf8a2d..bb4c67fca5dc8b 100644
--- a/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-extq.ll
+++ b/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-extq.ll
@@ -4,16 +4,16 @@
 define <vscale x 16 x i8> @test_extq_i8 (<vscale x 16 x i8> %zn, <vscale x 16 x i8> %zm) {
 ; CHECK-LABEL: test_extq_i8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    extq z0.b, z0.b, z1.b, #0
+; CHECK-NEXT:    extq z0.b, z0.b, z1.b, #15
 ; CHECK-NEXT:    ret
-  %res = call <vscale x 16 x i8> @llvm.aarch64.sve.extq.nxv16i8(<vscale x 16 x i8> %zn, <vscale x 16 x i8> %zm, i32 0)
+  %res = call <vscale x 16 x i8> @llvm.aarch64.sve.extq.nxv16i8(<vscale x 16 x i8> %zn, <vscale x 16 x i8> %zm, i32 15)
   ret <vscale x 16 x i8> %res
 }
 
 define <vscale x 8 x i16> @test_extq_i16 (<vscale x 8 x i16> %zn, <vscale x 8 x i16> %zm) {
 ; CHECK-LABEL: test_extq_i16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    extq z0.b, z0.b, z1.b, #1
+; CHECK-NEXT:    extq z0.b, z0.b, z1.b, #2
 ; CHECK-NEXT:    ret
   %res = call <vscale x 8 x i16> @llvm.aarch64.sve.extq.nxv8i16(<vscale x 8 x i16> %zn, <vscale x 8 x i16> %zm, i32 1)
   ret <vscale x 8 x i16> %res
@@ -22,7 +22,7 @@ define <vscale x 8 x i16> @test_extq_i16 (<vscale x 8 x i16> %zn, <vscale x 8 x
 define <vscale x 4 x i32> @test_extq_i32 (<vscale x 4 x i32> %zn, <vscale x 4 x i32> %zm) {
 ; CHECK-LABEL: test_extq_i32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    extq z0.b, z0.b, z1.b, #2
+; CHECK-NEXT:    extq z0.b, z0.b, z1.b, #8
 ; CHECK-NEXT:    ret
   %res = call <vscale x 4 x i32> @llvm.aarch64.sve.extq.nxv4i32(<vscale x 4 x i32> %zn, <vscale x 4 x i32> %zm, i32 2)
   ret <vscale x 4 x i32> %res
@@ -31,45 +31,45 @@ define <vscale x 4 x i32> @test_extq_i32 (<vscale x 4 x i32> %zn, <vscale x 4 x
 define <vscale x 2 x i64> @test_extq_i64 (<vscale x 2 x i64> %zn, <vscale x 2 x i64> %zm) {
 ; CHECK-LABEL: test_extq_i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    extq z0.b, z0.b, z1.b, #3
+; CHECK-NEXT:    extq z0.b, z0.b, z1.b, #8
 ; CHECK-NEXT:    ret
-  %res = call <vscale x 2 x i64> @llvm.aarch64.sve.extq.nxv2i64(<vscale x 2 x i64> %zn, <vscale x 2 x i64> %zm, i32 3)
+  %res = call <vscale x 2 x i64> @llvm.aarch64.sve.extq.nxv2i64(<vscale x 2 x i64> %zn, <vscale x 2 x i64> %zm, i32 1)
   ret <vscale x 2 x i64> %res
 }
 
 define <vscale x 8 x half> @test_extq_f16(<vscale x 8 x half> %zn, <vscale x 8 x half> %zm) {
 ; CHECK-LABEL: test_extq_f16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    extq z0.b, z0.b, z1.b, #4
+; CHECK-NEXT:    extq z0.b, z0.b, z1.b, #14
 ; CHECK-NEXT:    ret
-  %res = call <vscale x 8 x half> @llvm.aarch64.sve.extq.nxv8f16(<vscale x 8 x half> %zn, <vscale x 8 x half> %zm, i32 4)
+  %res = call <vscale x 8 x half> @llvm.aarch64.sve.extq.nxv8f16(<vscale x 8 x half> %zn, <vscale x 8 x half> %zm, i32 7)
   ret <vscale x 8 x half> %res
 }
 
 define <vscale x 4 x float> @test_extq_f32(<vscale x 4 x float> %zn, <vscale x 4 x float> %zm) {
 ; CHECK-LABEL: test_extq_f32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    extq z0.b, z0.b, z1.b, #5
+; CHECK-NEXT:    extq z0.b, z0.b, z1.b, #4
 ; CHECK-NEXT:    ret
-  %res = call <vscale x 4 x float> @llvm.aarch64.sve.extq.nxv4f32(<vscale x 4 x float> %zn, <vscale x 4 x float> %zm, i32 5)
+  %res = call <vscale x 4 x float> @llvm.aarch64.sve.extq.nxv4f32(<vscale x 4 x float> %zn, <vscale x 4 x float> %zm, i32 1)
   ret <vscale x 4 x float> %res
 }
 
 define <vscale x 2 x double> @test_extq_f64(<vscale x 2 x double> %zn, <vscale x 2 x double> %zm) {
 ; CHECK-LABEL: test_extq_f64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    extq z0.b, z0.b, z1.b, #6
+; CHECK-NEXT:    extq z0.b, z0.b, z1.b, #8
 ; CHECK-NEXT:    ret
-  %res = call <vscale x 2 x double> @llvm.aarch64.sve.extq.nxv2f64(<vscale x 2 x double> %zn, <vscale x 2 x double> %zm, i32 6)
+  %res = call <vscale x 2 x double> @llvm.aarch64.sve.extq.nxv2f64(<vscale x 2 x double> %zn, <vscale x 2 x double> %zm, i32 1)
   ret <vscale x 2 x double> %res
 }
 
 define <vscale x 8 x bfloat> @test_extq_bf16(<vscale x 8 x bfloat> %zn, <vscale x 8 x bfloat> %zm) {
 ; CHECK-LABEL: test_extq_bf16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    extq z0.b, z0.b, z1.b, #15
+; CHECK-NEXT:    extq z0.b, z0.b, z1.b, #6
 ; CHECK-NEXT:    ret
-  %res = call <vscale x 8 x bfloat> @llvm.aarch64.sve.extq.nxv8bf16(<vscale x 8 x bfloat> %zn, <vscale x 8 x bfloat> %zm, i32 15)
+  %res = call <vscale x 8 x bfloat> @llvm.aarch64.sve.extq.nxv8bf16(<vscale x 8 x bfloat> %zn, <vscale x 8 x bfloat> %zm, i32 3)
   ret <vscale x 8 x bfloat> %res
 }
 



More information about the llvm-commits mailing list