[clang] 9e37892 - [ARM,MVE] Add intrinsics for vector get/set lane.

Simon Tatham via cfe-commits cfe-commits at lists.llvm.org
Fri Nov 15 01:55:05 PST 2019


Author: Simon Tatham
Date: 2019-11-15T09:53:58Z
New Revision: 9e37892773c0954a15f84b011223da1e707ab3bf

URL: https://github.com/llvm/llvm-project/commit/9e37892773c0954a15f84b011223da1e707ab3bf
DIFF: https://github.com/llvm/llvm-project/commit/9e37892773c0954a15f84b011223da1e707ab3bf.diff

LOG: [ARM,MVE] Add intrinsics for vector get/set lane.

This adds the `vgetq_lane` and `vsetq_lane` families, to copy between
a scalar and a specified lane of a vector.

One of the new `vgetq_lane` intrinsics returns a `float16_t`, which
causes a compile error if `%clang_cc1` doesn't get the option
`-fallow-half-arguments-and-returns`. The driver passes that option to
cc1 already, but I've had to edit all the explicit cc1 command lines
in the existing MVE intrinsics tests.

A couple of fixes are included for the code I wrote up front in
MveEmitter to support lane-index immediates (and which nothing has
tested until now): the type was wrong (`uint32_t` instead of `int`)
and the range was off by one.

I've also added a method of bypassing the default promotion to `i32`
that is done by the MveEmitter code generation: it's sensible to
promote short scalars like `i16` to `i32` if they're going to be
passed to custom IR intrinsics representing a machine instruction
operating on GPRs, but not if they're going to be passed to standard
IR operations like `insertelement` which expect the exact type.

Reviewers: ostannard, MarkMurrayARM, dmgreen

Reviewed By: dmgreen

Subscribers: kristof.beyls, cfe-commits

Tags: #clang

Differential Revision: https://reviews.llvm.org/D70188

Added: 
    clang/test/CodeGen/arm-mve-intrinsics/get-set-lane.c

Modified: 
    clang/include/clang/Basic/arm_mve.td
    clang/include/clang/Basic/arm_mve_defs.td
    clang/test/CodeGen/arm-mve-intrinsics/load-store.c
    clang/test/CodeGen/arm-mve-intrinsics/scalar-shifts.c
    clang/test/CodeGen/arm-mve-intrinsics/scatter-gather.c
    clang/test/CodeGen/arm-mve-intrinsics/vadc.c
    clang/test/CodeGen/arm-mve-intrinsics/vaddq.c
    clang/test/CodeGen/arm-mve-intrinsics/vcvt.c
    clang/test/CodeGen/arm-mve-intrinsics/vld24.c
    clang/test/CodeGen/arm-mve-intrinsics/vldr.c
    clang/test/CodeGen/arm-mve-intrinsics/vminvq.c
    clang/test/Sema/arm-mve-immediates.c
    clang/utils/TableGen/MveEmitter.cpp

Removed: 
    


################################################################################
diff  --git a/clang/include/clang/Basic/arm_mve.td b/clang/include/clang/Basic/arm_mve.td
index c8501813d264..b72a8303ba3e 100644
--- a/clang/include/clang/Basic/arm_mve.td
+++ b/clang/include/clang/Basic/arm_mve.td
@@ -413,4 +413,9 @@ let params = T.All in {
     def vuninitializedq_polymorphic: Intrinsic<
         Vector, (args Vector), (undef Vector)>;
   }
+
+  def vgetq_lane: Intrinsic<Scalar, (args Vector:$v, imm_lane:$lane),
+                            (xelt_var $v, $lane)>;
+  def vsetq_lane: Intrinsic<Vector, (args unpromoted<Scalar>:$e, Vector:$v, imm_lane:$lane),
+                            (ielt_var $v, $e, $lane)>;
 }

diff  --git a/clang/include/clang/Basic/arm_mve_defs.td b/clang/include/clang/Basic/arm_mve_defs.td
index 911c2c129db9..4a1d6ed92664 100644
--- a/clang/include/clang/Basic/arm_mve_defs.td
+++ b/clang/include/clang/Basic/arm_mve_defs.td
@@ -77,6 +77,8 @@ def xval: IRBuilder<"CreateExtractValue"> {
 def ielt_const: IRBuilder<"CreateInsertElement"> {
   let special_params = [IRBuilderIntParam<2, "uint64_t">];
 }
+def ielt_var: IRBuilder<"CreateInsertElement">;
+def xelt_var: IRBuilder<"CreateExtractElement">;
 def trunc: IRBuilder<"CreateTrunc">;
 def bitcast: IRBuilder<"CreateBitCast">;
 def extend: CGHelperFn<"SignOrZeroExtend"> {
@@ -172,6 +174,10 @@ def CTO_CopyKind: ComplexTypeOp;
 // of _s32 / _f16 / _u8 suffix.
 def Void : Type;
 
+// A wrapper you can put on an intrinsic's argument type to prevent it from
+// being automatically promoted to i32 from a smaller integer type.
+class unpromoted<Type t> : Type { Type underlying_type = t; }
+
 // Primitive types: base class, and an instance for the set of scalar integer
 // and floating types that MVE uses.
 class PrimitiveType<string kind_, int size_>: Type {
@@ -285,7 +291,7 @@ def imm_0toNm1 : Immediate<u32, IB_EltBit<0>>;
 
 // imm_lane has to be the index of a vector lane in the main vector type, i.e
 // it can range from 0 to (128 / size of scalar)-1 inclusive. (e.g. vgetq_lane)
-def imm_lane : Immediate<u32, IB_LaneIndex>;
+def imm_lane : Immediate<sint, IB_LaneIndex>;
 
 // imm_1to32 can be in the range 1 to 32, unconditionally. (e.g. scalar shift
 // intrinsics)

diff  --git a/clang/test/CodeGen/arm-mve-intrinsics/get-set-lane.c b/clang/test/CodeGen/arm-mve-intrinsics/get-set-lane.c
new file mode 100644
index 000000000000..6eaf0f8a71f5
--- /dev/null
+++ b/clang/test/CodeGen/arm-mve-intrinsics/get-set-lane.c
@@ -0,0 +1,291 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
+// RUN: %clang_cc1 -triple thumbv8.1m.main-arm-none-eabi -target-feature +mve.fp -mfloat-abi hard -fallow-half-arguments-and-returns -O0 -disable-O0-optnone -S -emit-llvm -o - %s | opt -S -mem2reg -sroa -early-cse | FileCheck %s
+// RUN: %clang_cc1 -triple thumbv8.1m.main-arm-none-eabi -target-feature +mve.fp -mfloat-abi hard -fallow-half-arguments-and-returns -O0 -disable-O0-optnone -DPOLYMORPHIC -S -emit-llvm -o - %s | opt -S -mem2reg -sroa -early-cse | FileCheck %s
+
+#include <arm_mve.h>
+
+// CHECK-LABEL: @test_vgetq_lane_f16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = extractelement <8 x half> [[A:%.*]], i32 2
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast half [[TMP0]] to i16
+// CHECK-NEXT:    [[TMP_0_INSERT_EXT:%.*]] = zext i16 [[TMP1]] to i32
+// CHECK-NEXT:    [[TMP2:%.*]] = bitcast i32 [[TMP_0_INSERT_EXT]] to float
+// CHECK-NEXT:    ret float [[TMP2]]
+//
+float16_t test_vgetq_lane_f16(float16x8_t a)
+{
+#ifdef POLYMORPHIC
+    return vgetq_lane(a, 2);
+#else /* POLYMORPHIC */
+    return vgetq_lane_f16(a, 2);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vgetq_lane_f32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = extractelement <4 x float> [[A:%.*]], i32 3
+// CHECK-NEXT:    ret float [[TMP0]]
+//
+float32_t test_vgetq_lane_f32(float32x4_t a)
+{
+#ifdef POLYMORPHIC
+    return vgetq_lane(a, 3);
+#else /* POLYMORPHIC */
+    return vgetq_lane_f32(a, 3);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vgetq_lane_s16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = extractelement <8 x i16> [[A:%.*]], i32 4
+// CHECK-NEXT:    ret i16 [[TMP0]]
+//
+int16_t test_vgetq_lane_s16(int16x8_t a)
+{
+#ifdef POLYMORPHIC
+    return vgetq_lane(a, 4);
+#else /* POLYMORPHIC */
+    return vgetq_lane_s16(a, 4);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vgetq_lane_s32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = extractelement <4 x i32> [[A:%.*]], i32 0
+// CHECK-NEXT:    ret i32 [[TMP0]]
+//
+int32_t test_vgetq_lane_s32(int32x4_t a)
+{
+#ifdef POLYMORPHIC
+    return vgetq_lane(a, 0);
+#else /* POLYMORPHIC */
+    return vgetq_lane_s32(a, 0);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vgetq_lane_s64(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = extractelement <2 x i64> [[A:%.*]], i32 0
+// CHECK-NEXT:    ret i64 [[TMP0]]
+//
+int64_t test_vgetq_lane_s64(int64x2_t a)
+{
+#ifdef POLYMORPHIC
+    return vgetq_lane(a, 0);
+#else /* POLYMORPHIC */
+    return vgetq_lane_s64(a, 0);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vgetq_lane_s8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = extractelement <16 x i8> [[A:%.*]], i32 10
+// CHECK-NEXT:    ret i8 [[TMP0]]
+//
+int8_t test_vgetq_lane_s8(int8x16_t a)
+{
+#ifdef POLYMORPHIC
+    return vgetq_lane(a, 10);
+#else /* POLYMORPHIC */
+    return vgetq_lane_s8(a, 10);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vgetq_lane_u16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = extractelement <8 x i16> [[A:%.*]], i32 3
+// CHECK-NEXT:    ret i16 [[TMP0]]
+//
+uint16_t test_vgetq_lane_u16(uint16x8_t a)
+{
+#ifdef POLYMORPHIC
+    return vgetq_lane(a, 3);
+#else /* POLYMORPHIC */
+    return vgetq_lane_u16(a, 3);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vgetq_lane_u32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = extractelement <4 x i32> [[A:%.*]], i32 3
+// CHECK-NEXT:    ret i32 [[TMP0]]
+//
+uint32_t test_vgetq_lane_u32(uint32x4_t a)
+{
+#ifdef POLYMORPHIC
+    return vgetq_lane(a, 3);
+#else /* POLYMORPHIC */
+    return vgetq_lane_u32(a, 3);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vgetq_lane_u64(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = extractelement <2 x i64> [[A:%.*]], i32 1
+// CHECK-NEXT:    ret i64 [[TMP0]]
+//
+uint64_t test_vgetq_lane_u64(uint64x2_t a)
+{
+#ifdef POLYMORPHIC
+    return vgetq_lane(a, 1);
+#else /* POLYMORPHIC */
+    return vgetq_lane_u64(a, 1);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vgetq_lane_u8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = extractelement <16 x i8> [[A:%.*]], i32 1
+// CHECK-NEXT:    ret i8 [[TMP0]]
+//
+uint8_t test_vgetq_lane_u8(uint8x16_t a)
+{
+#ifdef POLYMORPHIC
+    return vgetq_lane(a, 1);
+#else /* POLYMORPHIC */
+    return vgetq_lane_u8(a, 1);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vsetq_lane_f16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast float [[A_COERCE:%.*]] to i32
+// CHECK-NEXT:    [[TMP_0_EXTRACT_TRUNC:%.*]] = trunc i32 [[TMP0]] to i16
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast i16 [[TMP_0_EXTRACT_TRUNC]] to half
+// CHECK-NEXT:    [[TMP2:%.*]] = insertelement <8 x half> [[B:%.*]], half [[TMP1]], i32 4
+// CHECK-NEXT:    ret <8 x half> [[TMP2]]
+//
+float16x8_t test_vsetq_lane_f16(float16_t a, float16x8_t b)
+{
+#ifdef POLYMORPHIC
+    return vsetq_lane(a, b, 4);
+#else /* POLYMORPHIC */
+    return vsetq_lane_f16(a, b, 4);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vsetq_lane_f32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = insertelement <4 x float> [[B:%.*]], float [[A:%.*]], i32 2
+// CHECK-NEXT:    ret <4 x float> [[TMP0]]
+//
+float32x4_t test_vsetq_lane_f32(float32_t a, float32x4_t b)
+{
+#ifdef POLYMORPHIC
+    return vsetq_lane(a, b, 2);
+#else /* POLYMORPHIC */
+    return vsetq_lane_f32(a, b, 2);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vsetq_lane_s8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = insertelement <16 x i8> [[B:%.*]], i8 [[A:%.*]], i32 12
+// CHECK-NEXT:    ret <16 x i8> [[TMP0]]
+//
+int8x16_t test_vsetq_lane_s8(int8_t a, int8x16_t b)
+{
+#ifdef POLYMORPHIC
+    return vsetq_lane(a, b, 12);
+#else /* POLYMORPHIC */
+    return vsetq_lane_s8(a, b, 12);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vsetq_lane_s16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = insertelement <8 x i16> [[B:%.*]], i16 [[A:%.*]], i32 6
+// CHECK-NEXT:    ret <8 x i16> [[TMP0]]
+//
+int16x8_t test_vsetq_lane_s16(int16_t a, int16x8_t b)
+{
+#ifdef POLYMORPHIC
+    return vsetq_lane(a, b, 6);
+#else /* POLYMORPHIC */
+    return vsetq_lane_s16(a, b, 6);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vsetq_lane_s32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = insertelement <4 x i32> [[B:%.*]], i32 [[A:%.*]], i32 2
+// CHECK-NEXT:    ret <4 x i32> [[TMP0]]
+//
+int32x4_t test_vsetq_lane_s32(int32_t a, int32x4_t b)
+{
+#ifdef POLYMORPHIC
+    return vsetq_lane(a, b, 2);
+#else /* POLYMORPHIC */
+    return vsetq_lane_s32(a, b, 2);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vsetq_lane_s64(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = insertelement <2 x i64> [[B:%.*]], i64 [[A:%.*]], i32 0
+// CHECK-NEXT:    ret <2 x i64> [[TMP0]]
+//
+int64x2_t test_vsetq_lane_s64(int64_t a, int64x2_t b)
+{
+#ifdef POLYMORPHIC
+    return vsetq_lane(a, b, 0);
+#else /* POLYMORPHIC */
+    return vsetq_lane_s64(a, b, 0);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vsetq_lane_u8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = insertelement <16 x i8> [[B:%.*]], i8 [[A:%.*]], i32 2
+// CHECK-NEXT:    ret <16 x i8> [[TMP0]]
+//
+uint8x16_t test_vsetq_lane_u8(uint8_t a, uint8x16_t b)
+{
+#ifdef POLYMORPHIC
+    return vsetq_lane(a, b, 2);
+#else /* POLYMORPHIC */
+    return vsetq_lane_u8(a, b, 2);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vsetq_lane_u16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = insertelement <8 x i16> [[B:%.*]], i16 [[A:%.*]], i32 7
+// CHECK-NEXT:    ret <8 x i16> [[TMP0]]
+//
+uint16x8_t test_vsetq_lane_u16(uint16_t a, uint16x8_t b)
+{
+#ifdef POLYMORPHIC
+    return vsetq_lane(a, b, 7);
+#else /* POLYMORPHIC */
+    return vsetq_lane_u16(a, b, 7);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vsetq_lane_u32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = insertelement <4 x i32> [[B:%.*]], i32 [[A:%.*]], i32 0
+// CHECK-NEXT:    ret <4 x i32> [[TMP0]]
+//
+uint32x4_t test_vsetq_lane_u32(uint32_t a, uint32x4_t b)
+{
+#ifdef POLYMORPHIC
+    return vsetq_lane(a, b, 0);
+#else /* POLYMORPHIC */
+    return vsetq_lane_u32(a, b, 0);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vsetq_lane_u64(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = insertelement <2 x i64> [[B:%.*]], i64 [[A:%.*]], i32 1
+// CHECK-NEXT:    ret <2 x i64> [[TMP0]]
+//
+uint64x2_t test_vsetq_lane_u64(uint64_t a, uint64x2_t b)
+{
+#ifdef POLYMORPHIC
+    return vsetq_lane(a, b, 1);
+#else /* POLYMORPHIC */
+    return vsetq_lane_u64(a, b, 1);
+#endif /* POLYMORPHIC */
+}

diff  --git a/clang/test/CodeGen/arm-mve-intrinsics/load-store.c b/clang/test/CodeGen/arm-mve-intrinsics/load-store.c
index 5cbf6a3128c0..e7a92bc26128 100644
--- a/clang/test/CodeGen/arm-mve-intrinsics/load-store.c
+++ b/clang/test/CodeGen/arm-mve-intrinsics/load-store.c
@@ -1,6 +1,6 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
-// RUN: %clang_cc1 -triple thumbv8.1m.main-arm-none-eabi -target-feature +mve.fp -mfloat-abi hard -O0 -disable-O0-optnone -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s
-// RUN: %clang_cc1 -triple thumbv8.1m.main-arm-none-eabi -target-feature +mve.fp -mfloat-abi hard -O0 -disable-O0-optnone -DPOLYMORPHIC -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s
+// RUN: %clang_cc1 -triple thumbv8.1m.main-arm-none-eabi -target-feature +mve.fp -mfloat-abi hard -fallow-half-arguments-and-returns -O0 -disable-O0-optnone -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s
+// RUN: %clang_cc1 -triple thumbv8.1m.main-arm-none-eabi -target-feature +mve.fp -mfloat-abi hard -fallow-half-arguments-and-returns -O0 -disable-O0-optnone -DPOLYMORPHIC -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s
 
 #include <arm_mve.h>
 

diff  --git a/clang/test/CodeGen/arm-mve-intrinsics/scalar-shifts.c b/clang/test/CodeGen/arm-mve-intrinsics/scalar-shifts.c
index 0eead7a973f0..12e8f1195743 100644
--- a/clang/test/CodeGen/arm-mve-intrinsics/scalar-shifts.c
+++ b/clang/test/CodeGen/arm-mve-intrinsics/scalar-shifts.c
@@ -1,5 +1,5 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
-// RUN: %clang_cc1 -triple thumbv8.1m.main-arm-none-eabi -target-feature +mve.fp -mfloat-abi hard -O0 -disable-O0-optnone -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s
+// RUN: %clang_cc1 -triple thumbv8.1m.main-arm-none-eabi -target-feature +mve.fp -mfloat-abi hard -fallow-half-arguments-and-returns -O0 -disable-O0-optnone -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s
 
 #include <arm_mve.h>
 

diff  --git a/clang/test/CodeGen/arm-mve-intrinsics/scatter-gather.c b/clang/test/CodeGen/arm-mve-intrinsics/scatter-gather.c
index 830f62442c3e..8bf2111a9e63 100644
--- a/clang/test/CodeGen/arm-mve-intrinsics/scatter-gather.c
+++ b/clang/test/CodeGen/arm-mve-intrinsics/scatter-gather.c
@@ -1,6 +1,6 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
-// RUN: %clang_cc1 -triple thumbv8.1m.main-arm-none-eabi -target-feature +mve.fp -mfloat-abi hard -O0 -disable-O0-optnone -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s
-// RUN: %clang_cc1 -triple thumbv8.1m.main-arm-none-eabi -target-feature +mve.fp -mfloat-abi hard -O0 -disable-O0-optnone -DPOLYMORPHIC -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s
+// RUN: %clang_cc1 -triple thumbv8.1m.main-arm-none-eabi -target-feature +mve.fp -mfloat-abi hard -fallow-half-arguments-and-returns -O0 -disable-O0-optnone -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s
+// RUN: %clang_cc1 -triple thumbv8.1m.main-arm-none-eabi -target-feature +mve.fp -mfloat-abi hard -fallow-half-arguments-and-returns -O0 -disable-O0-optnone -DPOLYMORPHIC -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s
 
 #include <arm_mve.h>
 

diff  --git a/clang/test/CodeGen/arm-mve-intrinsics/vadc.c b/clang/test/CodeGen/arm-mve-intrinsics/vadc.c
index 58a47fc42bcb..94fa1d1b00f2 100644
--- a/clang/test/CodeGen/arm-mve-intrinsics/vadc.c
+++ b/clang/test/CodeGen/arm-mve-intrinsics/vadc.c
@@ -1,6 +1,6 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
-// RUN: %clang_cc1 -triple thumbv8.1m.main-arm-none-eabi -target-feature +mve.fp -mfloat-abi hard -O0 -disable-O0-optnone -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s
-// RUN: %clang_cc1 -triple thumbv8.1m.main-arm-none-eabi -target-feature +mve.fp -mfloat-abi hard -O0 -disable-O0-optnone -DPOLYMORPHIC -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s
+// RUN: %clang_cc1 -triple thumbv8.1m.main-arm-none-eabi -target-feature +mve.fp -mfloat-abi hard -fallow-half-arguments-and-returns -O0 -disable-O0-optnone -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s
+// RUN: %clang_cc1 -triple thumbv8.1m.main-arm-none-eabi -target-feature +mve.fp -mfloat-abi hard -fallow-half-arguments-and-returns -O0 -disable-O0-optnone -DPOLYMORPHIC -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s
 
 #include <arm_mve.h>
 

diff  --git a/clang/test/CodeGen/arm-mve-intrinsics/vaddq.c b/clang/test/CodeGen/arm-mve-intrinsics/vaddq.c
index 1f18d5b57880..0f4402c9ea53 100644
--- a/clang/test/CodeGen/arm-mve-intrinsics/vaddq.c
+++ b/clang/test/CodeGen/arm-mve-intrinsics/vaddq.c
@@ -1,6 +1,6 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
-// RUN: %clang_cc1 -triple thumbv8.1m.main-arm-none-eabi -target-feature +mve.fp -mfloat-abi hard -O0 -disable-O0-optnone -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s
-// RUN: %clang_cc1 -triple thumbv8.1m.main-arm-none-eabi -target-feature +mve.fp -mfloat-abi hard -O0 -disable-O0-optnone -DPOLYMORPHIC -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s
+// RUN: %clang_cc1 -triple thumbv8.1m.main-arm-none-eabi -target-feature +mve.fp -mfloat-abi hard -fallow-half-arguments-and-returns -O0 -disable-O0-optnone -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s
+// RUN: %clang_cc1 -triple thumbv8.1m.main-arm-none-eabi -target-feature +mve.fp -mfloat-abi hard -fallow-half-arguments-and-returns -O0 -disable-O0-optnone -DPOLYMORPHIC -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s
 
 #include <arm_mve.h>
 

diff  --git a/clang/test/CodeGen/arm-mve-intrinsics/vcvt.c b/clang/test/CodeGen/arm-mve-intrinsics/vcvt.c
index ed3ecd3ee62e..ccee9fd76c7c 100644
--- a/clang/test/CodeGen/arm-mve-intrinsics/vcvt.c
+++ b/clang/test/CodeGen/arm-mve-intrinsics/vcvt.c
@@ -1,5 +1,5 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
-// RUN: %clang_cc1 -triple thumbv8.1m.main-arm-none-eabi -target-feature +mve.fp -mfloat-abi hard -O0 -disable-O0-optnone -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s
+// RUN: %clang_cc1 -triple thumbv8.1m.main-arm-none-eabi -target-feature +mve.fp -mfloat-abi hard -fallow-half-arguments-and-returns -O0 -disable-O0-optnone -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s
 
 #include <arm_mve.h>
 

diff  --git a/clang/test/CodeGen/arm-mve-intrinsics/vld24.c b/clang/test/CodeGen/arm-mve-intrinsics/vld24.c
index eaf96da36dfa..984d5989217e 100644
--- a/clang/test/CodeGen/arm-mve-intrinsics/vld24.c
+++ b/clang/test/CodeGen/arm-mve-intrinsics/vld24.c
@@ -1,6 +1,6 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
-// RUN: %clang_cc1 -triple thumbv8.1m.main-arm-none-eabi -target-feature +mve.fp -mfloat-abi hard -O0 -disable-O0-optnone -S -emit-llvm -o - %s | opt -S -mem2reg -sroa -early-cse | FileCheck %s
-// RUN: %clang_cc1 -triple thumbv8.1m.main-arm-none-eabi -target-feature +mve.fp -mfloat-abi hard -O0 -disable-O0-optnone -DPOLYMORPHIC -S -emit-llvm -o - %s | opt -S -mem2reg -sroa -early-cse | FileCheck %s
+// RUN: %clang_cc1 -triple thumbv8.1m.main-arm-none-eabi -target-feature +mve.fp -mfloat-abi hard -fallow-half-arguments-and-returns -O0 -disable-O0-optnone -S -emit-llvm -o - %s | opt -S -mem2reg -sroa -early-cse | FileCheck %s
+// RUN: %clang_cc1 -triple thumbv8.1m.main-arm-none-eabi -target-feature +mve.fp -mfloat-abi hard -fallow-half-arguments-and-returns -O0 -disable-O0-optnone -DPOLYMORPHIC -S -emit-llvm -o - %s | opt -S -mem2reg -sroa -early-cse | FileCheck %s
 
 #include <arm_mve.h>
 

diff  --git a/clang/test/CodeGen/arm-mve-intrinsics/vldr.c b/clang/test/CodeGen/arm-mve-intrinsics/vldr.c
index a1f02dec729f..e394ed13ad74 100644
--- a/clang/test/CodeGen/arm-mve-intrinsics/vldr.c
+++ b/clang/test/CodeGen/arm-mve-intrinsics/vldr.c
@@ -1,5 +1,5 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
-// RUN: %clang_cc1 -triple thumbv8.1m.main-arm-none-eabi -target-feature +mve.fp -mfloat-abi hard -O0 -disable-O0-optnone -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s
+// RUN: %clang_cc1 -triple thumbv8.1m.main-arm-none-eabi -target-feature +mve.fp -mfloat-abi hard -fallow-half-arguments-and-returns -O0 -disable-O0-optnone -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s
 
 #include <arm_mve.h>
 

diff  --git a/clang/test/CodeGen/arm-mve-intrinsics/vminvq.c b/clang/test/CodeGen/arm-mve-intrinsics/vminvq.c
index 10eedb934ba3..1cf4d0ee198e 100644
--- a/clang/test/CodeGen/arm-mve-intrinsics/vminvq.c
+++ b/clang/test/CodeGen/arm-mve-intrinsics/vminvq.c
@@ -1,6 +1,6 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
-// RUN: %clang_cc1 -triple thumbv8.1m.main-arm-none-eabi -target-feature +mve.fp -mfloat-abi hard -O0 -disable-O0-optnone -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s
-// RUN: %clang_cc1 -triple thumbv8.1m.main-arm-none-eabi -target-feature +mve.fp -mfloat-abi hard -O0 -disable-O0-optnone -DPOLYMORPHIC -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s
+// RUN: %clang_cc1 -triple thumbv8.1m.main-arm-none-eabi -target-feature +mve.fp -mfloat-abi hard -fallow-half-arguments-and-returns -O0 -disable-O0-optnone -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s
+// RUN: %clang_cc1 -triple thumbv8.1m.main-arm-none-eabi -target-feature +mve.fp -mfloat-abi hard -fallow-half-arguments-and-returns -O0 -disable-O0-optnone -DPOLYMORPHIC -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s
 
 #include <arm_mve.h>
 

diff  --git a/clang/test/Sema/arm-mve-immediates.c b/clang/test/Sema/arm-mve-immediates.c
index cdf68b8a949f..45b2357a600a 100644
--- a/clang/test/Sema/arm-mve-immediates.c
+++ b/clang/test/Sema/arm-mve-immediates.c
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -triple thumbv8.1m.main-arm-none-eabi -target-feature +mve.fp -verify -fsyntax-only %s
+// RUN: %clang_cc1 -triple thumbv8.1m.main-arm-none-eabi -fallow-half-arguments-and-returns -target-feature +mve.fp -verify -fsyntax-only %s
 
 #include <arm_mve.h>
 
@@ -54,3 +54,47 @@ void test_load_offsets(uint32x4_t addr32, uint64x2_t addr64)
   vstrwq_scatter_base(addr32, 2, addr32); // expected-error {{argument should be a multiple of 4}}
   vstrwq_scatter_base(addr32, 1, addr32); // expected-error {{argument should be a multiple of 4}}
 }
+
+void test_lane_indices(uint8x16_t v16, uint16x8_t v8,
+                       uint32x4_t v4, uint64x2_t v2)
+{
+  vgetq_lane_u8(v16, -1); // expected-error {{argument value -1 is outside the valid range [0, 15]}}
+  vgetq_lane_u8(v16, 0);
+  vgetq_lane_u8(v16, 15);
+  vgetq_lane_u8(v16, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}}
+
+  vgetq_lane_u16(v8, -1); // expected-error {{argument value -1 is outside the valid range [0, 7]}}
+  vgetq_lane_u16(v8, 0);
+  vgetq_lane_u16(v8, 7);
+  vgetq_lane_u16(v8, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}}
+
+  vgetq_lane_u32(v4, -1); // expected-error {{argument value -1 is outside the valid range [0, 3]}}
+  vgetq_lane_u32(v4, 0);
+  vgetq_lane_u32(v4, 3);
+  vgetq_lane_u32(v4, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}}
+
+  vgetq_lane_u64(v2, -1); // expected-error {{argument value -1 is outside the valid range [0, 1]}}
+  vgetq_lane_u64(v2, 0);
+  vgetq_lane_u64(v2, 1);
+  vgetq_lane_u64(v2, 2); // expected-error {{argument value 2 is outside the valid range [0, 1]}}
+
+  vsetq_lane_u8(23, v16, -1); // expected-error {{argument value -1 is outside the valid range [0, 15]}}
+  vsetq_lane_u8(23, v16, 0);
+  vsetq_lane_u8(23, v16, 15);
+  vsetq_lane_u8(23, v16, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}}
+
+  vsetq_lane_u16(23, v8, -1); // expected-error {{argument value -1 is outside the valid range [0, 7]}}
+  vsetq_lane_u16(23, v8, 0);
+  vsetq_lane_u16(23, v8, 7);
+  vsetq_lane_u16(23, v8, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}}
+
+  vsetq_lane_u32(23, v4, -1); // expected-error {{argument value -1 is outside the valid range [0, 3]}}
+  vsetq_lane_u32(23, v4, 0);
+  vsetq_lane_u32(23, v4, 3);
+  vsetq_lane_u32(23, v4, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}}
+
+  vsetq_lane_u64(23, v2, -1); // expected-error {{argument value -1 is outside the valid range [0, 1]}}
+  vsetq_lane_u64(23, v2, 0);
+  vsetq_lane_u64(23, v2, 1);
+  vsetq_lane_u64(23, v2, 2); // expected-error {{argument value 2 is outside the valid range [0, 1]}}
+}

diff  --git a/clang/utils/TableGen/MveEmitter.cpp b/clang/utils/TableGen/MveEmitter.cpp
index 4258dd0a1f41..1ca3b5a3f224 100644
--- a/clang/utils/TableGen/MveEmitter.cpp
+++ b/clang/utils/TableGen/MveEmitter.cpp
@@ -980,7 +980,8 @@ class MveEmitter {
                             const Type *Param);
   Result::Ptr getCodeForDagArg(DagInit *D, unsigned ArgNum,
                                const Result::Scope &Scope, const Type *Param);
-  Result::Ptr getCodeForArg(unsigned ArgNum, const Type *ArgType);
+  Result::Ptr getCodeForArg(unsigned ArgNum, const Type *ArgType,
+                            bool Promote);
 
   // Constructor and top-level functions.
 
@@ -1003,8 +1004,13 @@ const Type *MveEmitter::getType(Init *I, const Type *Param) {
 }
 
 const Type *MveEmitter::getType(Record *R, const Type *Param) {
+  // Pass to a subfield of any wrapper records. We don't expect more than one
+  // of these: immediate operands are used as plain numbers rather than as
+  // llvm::Value, so it's meaningless to promote their type anyway.
   if (R->isSubClassOf("Immediate"))
-    R = R->getValueAsDef("type"); // pass to subfield
+    R = R->getValueAsDef("type");
+  else if (R->isSubClassOf("unpromoted"))
+    R = R->getValueAsDef("underlying_type");
 
   if (R->getName() == "Void")
     return getVoidType();
@@ -1197,12 +1203,13 @@ Result::Ptr MveEmitter::getCodeForDagArg(DagInit *D, unsigned ArgNum,
   PrintFatalError("bad dag argument type for code generation");
 }
 
-Result::Ptr MveEmitter::getCodeForArg(unsigned ArgNum, const Type *ArgType) {
+Result::Ptr MveEmitter::getCodeForArg(unsigned ArgNum, const Type *ArgType,
+                                      bool Promote) {
   Result::Ptr V =
       std::make_shared<BuiltinArgResult>(ArgNum, isa<PointerType>(ArgType));
 
   if (const auto *ST = dyn_cast<ScalarType>(ArgType)) {
-    if (ST->isInteger() && ST->sizeInBits() < 32)
+    if (Promote && ST->isInteger() && ST->sizeInBits() < 32)
       V = std::make_shared<IntCastResult>(getScalarType("u32"), V);
   } else if (const auto *PT = dyn_cast<PredicateType>(ArgType)) {
     V = std::make_shared<IntCastResult>(getScalarType("u32"), V);
@@ -1260,6 +1267,11 @@ ACLEIntrinsic::ACLEIntrinsic(MveEmitter &ME, Record *R, const Type *Param)
   for (unsigned i = 0, e = ArgsDag->getNumArgs(); i < e; ++i) {
     Init *TypeInit = ArgsDag->getArg(i);
 
+    bool Promote = true;
+    if (auto TypeDI = dyn_cast<DefInit>(TypeInit))
+      if (TypeDI->getDef()->isSubClassOf("unpromoted"))
+        Promote = false;
+
     // Work out the type of the argument, for use in the function prototype in
     // the header file.
     const Type *ArgType = ME.getType(TypeInit, Param);
@@ -1269,7 +1281,7 @@ ACLEIntrinsic::ACLEIntrinsic(MveEmitter &ME, Record *R, const Type *Param)
     // into the variable-name scope that the code gen will refer to.
     StringRef ArgName = ArgsDag->getArgNameStr(i);
     if (!ArgName.empty())
-      Scope[ArgName] = ME.getCodeForArg(i, ArgType);
+      Scope[ArgName] = ME.getCodeForArg(i, ArgType, Promote);
 
     // If the argument is a subclass of Immediate, record the details about
     // what values it can take, for Sema checking.
@@ -1288,7 +1300,7 @@ ACLEIntrinsic::ACLEIntrinsic(MveEmitter &ME, Record *R, const Type *Param)
         } else if (Bounds->getName() == "IB_LaneIndex") {
           IA.boundsType = ImmediateArg::BoundsType::ExplicitRange;
           IA.i1 = 0;
-          IA.i2 = 128 / Param->sizeInBits();
+          IA.i2 = 128 / Param->sizeInBits() - 1;
         } else if (Bounds->getName() == "IB_EltBit") {
           IA.boundsType = ImmediateArg::BoundsType::ExplicitRange;
           IA.i1 = Bounds->getValueAsInt("base");


        


More information about the cfe-commits mailing list