[clang] b14a6f0 - [ARM][MVE] vcreateq lane ordering for big endian

Tomas Matheson via cfe-commits cfe-commits at lists.llvm.org
Fri Apr 30 05:49:44 PDT 2021


Author: Tomas Matheson
Date: 2021-04-30T13:48:05+01:00
New Revision: b14a6f06cc8763830a25023edf5b9ccee18e426a

URL: https://github.com/llvm/llvm-project/commit/b14a6f06cc8763830a25023edf5b9ccee18e426a
DIFF: https://github.com/llvm/llvm-project/commit/b14a6f06cc8763830a25023edf5b9ccee18e426a.diff

LOG: [ARM][MVE] vcreateq lane ordering for big endian

Use of bitcast resulted in lanes being swapped for vcreateq with big
endian. Fix this by using vreinterpret. No code change for little
endian. Adds IR lit test.

Differential Revision: https://reviews.llvm.org/D101606

Added: 
    

Modified: 
    clang/include/clang/Basic/arm_mve.td
    clang/test/CodeGen/arm-mve-intrinsics/admin.c

Removed: 
    


################################################################################
diff  --git a/clang/include/clang/Basic/arm_mve.td b/clang/include/clang/Basic/arm_mve.td
index 8106f9a5a9def..55c2fbe7f0217 100644
--- a/clang/include/clang/Basic/arm_mve.td
+++ b/clang/include/clang/Basic/arm_mve.td
@@ -1543,7 +1543,7 @@ foreach desttype = T.All in {
 let params = T.All in {
   let pnt = PNT_None in {
     def vcreateq: Intrinsic<Vector, (args u64:$a, u64:$b),
-        (bitcast (ielt_const (ielt_const (undef VecOf<u64>), $a, 0),
+        (vreinterpret (ielt_const (ielt_const (undef VecOf<u64>), $a, 0),
                              $b, 1), Vector)>;
     def vuninitializedq: Intrinsic<Vector, (args), (undef Vector)>;
   }

diff  --git a/clang/test/CodeGen/arm-mve-intrinsics/admin.c b/clang/test/CodeGen/arm-mve-intrinsics/admin.c
index 137231557011a..6c81cda00bac8 100644
--- a/clang/test/CodeGen/arm-mve-intrinsics/admin.c
+++ b/clang/test/CodeGen/arm-mve-intrinsics/admin.c
@@ -1,51 +1,82 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
-// RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature +mve.fp -mfloat-abi hard -fallow-half-arguments-and-returns -O0 -disable-O0-optnone -S -emit-llvm -o - %s | opt -S -mem2reg -sroa -early-cse | FileCheck %s
-// RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature +mve.fp -mfloat-abi hard -fallow-half-arguments-and-returns -O0 -disable-O0-optnone -DPOLYMORPHIC -S -emit-llvm -o - %s | opt -S -mem2reg -sroa -early-cse | FileCheck %s
+// RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature +mve.fp -mfloat-abi hard -fallow-half-arguments-and-returns -O0 -disable-O0-optnone -S -emit-llvm -o - %s | opt -S -mem2reg -sroa -early-cse | FileCheck %s --check-prefixes=CHECK,CHECK-LE
+// RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature +mve.fp -mfloat-abi hard -fallow-half-arguments-and-returns -O0 -disable-O0-optnone -DPOLYMORPHIC -S -emit-llvm -o - %s | opt -S -mem2reg -sroa -early-cse | FileCheck %s --check-prefixes=CHECK,CHECK-LE
+// RUN: %clang_cc1 -triple thumbebv8.1m.main-none-none-eabi -target-feature +mve.fp -mfloat-abi hard -fallow-half-arguments-and-returns -O0 -disable-O0-optnone -S -emit-llvm -o - %s | opt -S -mem2reg -sroa -early-cse | FileCheck %s --check-prefixes=CHECK,CHECK-BE
+// RUN: %clang_cc1 -triple thumbebv8.1m.main-none-none-eabi -target-feature +mve.fp -mfloat-abi hard -fallow-half-arguments-and-returns -O0 -disable-O0-optnone -DPOLYMORPHIC -S -emit-llvm -o - %s | opt -S -mem2reg -sroa -early-cse | FileCheck %s --check-prefixes=CHECK,CHECK-BE
+
 
 #include <arm_mve.h>
 
-// CHECK-LABEL: @test_vcreateq_f16(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = insertelement <2 x i64> undef, i64 [[A:%.*]], i64 0
-// CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x i64> [[TMP0]], i64 [[B:%.*]], i64 1
-// CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to <8 x half>
-// CHECK-NEXT:    ret <8 x half> [[TMP2]]
+// CHECK-LE-LABEL: @test_vcreateq_f16(
+// CHECK-LE-NEXT:  entry:
+// CHECK-LE-NEXT:    [[TMP0:%.*]] = insertelement <2 x i64> undef, i64 [[A:%.*]], i64 0
+// CHECK-LE-NEXT:    [[TMP1:%.*]] = insertelement <2 x i64> [[TMP0]], i64 [[B:%.*]], i64 1
+// CHECK-LE-NEXT:    [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to <8 x half>
+// CHECK-LE-NEXT:    ret <8 x half> [[TMP2]]
+//
+// CHECK-BE-LABEL: @test_vcreateq_f16(
+// CHECK-BE-NEXT:  entry:
+// CHECK-BE-NEXT:    [[TMP0:%.*]] = insertelement <2 x i64> undef, i64 [[A:%.*]], i64 0
+// CHECK-BE-NEXT:    [[TMP1:%.*]] = insertelement <2 x i64> [[TMP0]], i64 [[B:%.*]], i64 1
+// CHECK-BE-NEXT:    [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.vreinterpretq.v8f16.v2i64(<2 x i64> [[TMP1]])
+// CHECK-BE-NEXT:    ret <8 x half> [[TMP2]]
 //
 float16x8_t test_vcreateq_f16(uint64_t a, uint64_t b)
 {
     return vcreateq_f16(a, b);
 }
 
-// CHECK-LABEL: @test_vcreateq_f32(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = insertelement <2 x i64> undef, i64 [[A:%.*]], i64 0
-// CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x i64> [[TMP0]], i64 [[B:%.*]], i64 1
-// CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to <4 x float>
-// CHECK-NEXT:    ret <4 x float> [[TMP2]]
+// CHECK-LE-LABEL: @test_vcreateq_f32(
+// CHECK-LE-NEXT:  entry:
+// CHECK-LE-NEXT:    [[TMP0:%.*]] = insertelement <2 x i64> undef, i64 [[A:%.*]], i64 0
+// CHECK-LE-NEXT:    [[TMP1:%.*]] = insertelement <2 x i64> [[TMP0]], i64 [[B:%.*]], i64 1
+// CHECK-LE-NEXT:    [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to <4 x float>
+// CHECK-LE-NEXT:    ret <4 x float> [[TMP2]]
+//
+// CHECK-BE-LABEL: @test_vcreateq_f32(
+// CHECK-BE-NEXT:  entry:
+// CHECK-BE-NEXT:    [[TMP0:%.*]] = insertelement <2 x i64> undef, i64 [[A:%.*]], i64 0
+// CHECK-BE-NEXT:    [[TMP1:%.*]] = insertelement <2 x i64> [[TMP0]], i64 [[B:%.*]], i64 1
+// CHECK-BE-NEXT:    [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.vreinterpretq.v4f32.v2i64(<2 x i64> [[TMP1]])
+// CHECK-BE-NEXT:    ret <4 x float> [[TMP2]]
 //
 float32x4_t test_vcreateq_f32(uint64_t a, uint64_t b)
 {
     return vcreateq_f32(a, b);
 }
 
-// CHECK-LABEL: @test_vcreateq_s16(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = insertelement <2 x i64> undef, i64 [[A:%.*]], i64 0
-// CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x i64> [[TMP0]], i64 [[B:%.*]], i64 1
-// CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to <8 x i16>
-// CHECK-NEXT:    ret <8 x i16> [[TMP2]]
+// CHECK-LE-LABEL: @test_vcreateq_s16(
+// CHECK-LE-NEXT:  entry:
+// CHECK-LE-NEXT:    [[TMP0:%.*]] = insertelement <2 x i64> undef, i64 [[A:%.*]], i64 0
+// CHECK-LE-NEXT:    [[TMP1:%.*]] = insertelement <2 x i64> [[TMP0]], i64 [[B:%.*]], i64 1
+// CHECK-LE-NEXT:    [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to <8 x i16>
+// CHECK-LE-NEXT:    ret <8 x i16> [[TMP2]]
+//
+// CHECK-BE-LABEL: @test_vcreateq_s16(
+// CHECK-BE-NEXT:  entry:
+// CHECK-BE-NEXT:    [[TMP0:%.*]] = insertelement <2 x i64> undef, i64 [[A:%.*]], i64 0
+// CHECK-BE-NEXT:    [[TMP1:%.*]] = insertelement <2 x i64> [[TMP0]], i64 [[B:%.*]], i64 1
+// CHECK-BE-NEXT:    [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.vreinterpretq.v8i16.v2i64(<2 x i64> [[TMP1]])
+// CHECK-BE-NEXT:    ret <8 x i16> [[TMP2]]
 //
 int16x8_t test_vcreateq_s16(uint64_t a, uint64_t b)
 {
     return vcreateq_s16(a, b);
 }
 
-// CHECK-LABEL: @test_vcreateq_s32(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = insertelement <2 x i64> undef, i64 [[A:%.*]], i64 0
-// CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x i64> [[TMP0]], i64 [[B:%.*]], i64 1
-// CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to <4 x i32>
-// CHECK-NEXT:    ret <4 x i32> [[TMP2]]
+// CHECK-LE-LABEL: @test_vcreateq_s32(
+// CHECK-LE-NEXT:  entry:
+// CHECK-LE-NEXT:    [[TMP0:%.*]] = insertelement <2 x i64> undef, i64 [[A:%.*]], i64 0
+// CHECK-LE-NEXT:    [[TMP1:%.*]] = insertelement <2 x i64> [[TMP0]], i64 [[B:%.*]], i64 1
+// CHECK-LE-NEXT:    [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to <4 x i32>
+// CHECK-LE-NEXT:    ret <4 x i32> [[TMP2]]
+//
+// CHECK-BE-LABEL: @test_vcreateq_s32(
+// CHECK-BE-NEXT:  entry:
+// CHECK-BE-NEXT:    [[TMP0:%.*]] = insertelement <2 x i64> undef, i64 [[A:%.*]], i64 0
+// CHECK-BE-NEXT:    [[TMP1:%.*]] = insertelement <2 x i64> [[TMP0]], i64 [[B:%.*]], i64 1
+// CHECK-BE-NEXT:    [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.vreinterpretq.v4i32.v2i64(<2 x i64> [[TMP1]])
+// CHECK-BE-NEXT:    ret <4 x i32> [[TMP2]]
 //
 int32x4_t test_vcreateq_s32(uint64_t a, uint64_t b)
 {
@@ -63,36 +94,57 @@ int64x2_t test_vcreateq_s64(uint64_t a, uint64_t b)
     return vcreateq_s64(a, b);
 }
 
-// CHECK-LABEL: @test_vcreateq_s8(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = insertelement <2 x i64> undef, i64 [[A:%.*]], i64 0
-// CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x i64> [[TMP0]], i64 [[B:%.*]], i64 1
-// CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to <16 x i8>
-// CHECK-NEXT:    ret <16 x i8> [[TMP2]]
+// CHECK-LE-LABEL: @test_vcreateq_s8(
+// CHECK-LE-NEXT:  entry:
+// CHECK-LE-NEXT:    [[TMP0:%.*]] = insertelement <2 x i64> undef, i64 [[A:%.*]], i64 0
+// CHECK-LE-NEXT:    [[TMP1:%.*]] = insertelement <2 x i64> [[TMP0]], i64 [[B:%.*]], i64 1
+// CHECK-LE-NEXT:    [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to <16 x i8>
+// CHECK-LE-NEXT:    ret <16 x i8> [[TMP2]]
+//
+// CHECK-BE-LABEL: @test_vcreateq_s8(
+// CHECK-BE-NEXT:  entry:
+// CHECK-BE-NEXT:    [[TMP0:%.*]] = insertelement <2 x i64> undef, i64 [[A:%.*]], i64 0
+// CHECK-BE-NEXT:    [[TMP1:%.*]] = insertelement <2 x i64> [[TMP0]], i64 [[B:%.*]], i64 1
+// CHECK-BE-NEXT:    [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.vreinterpretq.v16i8.v2i64(<2 x i64> [[TMP1]])
+// CHECK-BE-NEXT:    ret <16 x i8> [[TMP2]]
 //
 int8x16_t test_vcreateq_s8(uint64_t a, uint64_t b)
 {
     return vcreateq_s8(a, b);
 }
 
-// CHECK-LABEL: @test_vcreateq_u16(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = insertelement <2 x i64> undef, i64 [[A:%.*]], i64 0
-// CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x i64> [[TMP0]], i64 [[B:%.*]], i64 1
-// CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to <8 x i16>
-// CHECK-NEXT:    ret <8 x i16> [[TMP2]]
+// CHECK-LE-LABEL: @test_vcreateq_u16(
+// CHECK-LE-NEXT:  entry:
+// CHECK-LE-NEXT:    [[TMP0:%.*]] = insertelement <2 x i64> undef, i64 [[A:%.*]], i64 0
+// CHECK-LE-NEXT:    [[TMP1:%.*]] = insertelement <2 x i64> [[TMP0]], i64 [[B:%.*]], i64 1
+// CHECK-LE-NEXT:    [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to <8 x i16>
+// CHECK-LE-NEXT:    ret <8 x i16> [[TMP2]]
+//
+// CHECK-BE-LABEL: @test_vcreateq_u16(
+// CHECK-BE-NEXT:  entry:
+// CHECK-BE-NEXT:    [[TMP0:%.*]] = insertelement <2 x i64> undef, i64 [[A:%.*]], i64 0
+// CHECK-BE-NEXT:    [[TMP1:%.*]] = insertelement <2 x i64> [[TMP0]], i64 [[B:%.*]], i64 1
+// CHECK-BE-NEXT:    [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.vreinterpretq.v8i16.v2i64(<2 x i64> [[TMP1]])
+// CHECK-BE-NEXT:    ret <8 x i16> [[TMP2]]
 //
 uint16x8_t test_vcreateq_u16(uint64_t a, uint64_t b)
 {
     return vcreateq_u16(a, b);
 }
 
-// CHECK-LABEL: @test_vcreateq_u32(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = insertelement <2 x i64> undef, i64 [[A:%.*]], i64 0
-// CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x i64> [[TMP0]], i64 [[B:%.*]], i64 1
-// CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to <4 x i32>
-// CHECK-NEXT:    ret <4 x i32> [[TMP2]]
+// CHECK-LE-LABEL: @test_vcreateq_u32(
+// CHECK-LE-NEXT:  entry:
+// CHECK-LE-NEXT:    [[TMP0:%.*]] = insertelement <2 x i64> undef, i64 [[A:%.*]], i64 0
+// CHECK-LE-NEXT:    [[TMP1:%.*]] = insertelement <2 x i64> [[TMP0]], i64 [[B:%.*]], i64 1
+// CHECK-LE-NEXT:    [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to <4 x i32>
+// CHECK-LE-NEXT:    ret <4 x i32> [[TMP2]]
+//
+// CHECK-BE-LABEL: @test_vcreateq_u32(
+// CHECK-BE-NEXT:  entry:
+// CHECK-BE-NEXT:    [[TMP0:%.*]] = insertelement <2 x i64> undef, i64 [[A:%.*]], i64 0
+// CHECK-BE-NEXT:    [[TMP1:%.*]] = insertelement <2 x i64> [[TMP0]], i64 [[B:%.*]], i64 1
+// CHECK-BE-NEXT:    [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.vreinterpretq.v4i32.v2i64(<2 x i64> [[TMP1]])
+// CHECK-BE-NEXT:    ret <4 x i32> [[TMP2]]
 //
 uint32x4_t test_vcreateq_u32(uint64_t a, uint64_t b)
 {
@@ -110,12 +162,19 @@ uint64x2_t test_vcreateq_u64(uint64_t a, uint64_t b)
     return vcreateq_u64(a, b);
 }
 
-// CHECK-LABEL: @test_vcreateq_u8(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = insertelement <2 x i64> undef, i64 [[A:%.*]], i64 0
-// CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x i64> [[TMP0]], i64 [[B:%.*]], i64 1
-// CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to <16 x i8>
-// CHECK-NEXT:    ret <16 x i8> [[TMP2]]
+// CHECK-LE-LABEL: @test_vcreateq_u8(
+// CHECK-LE-NEXT:  entry:
+// CHECK-LE-NEXT:    [[TMP0:%.*]] = insertelement <2 x i64> undef, i64 [[A:%.*]], i64 0
+// CHECK-LE-NEXT:    [[TMP1:%.*]] = insertelement <2 x i64> [[TMP0]], i64 [[B:%.*]], i64 1
+// CHECK-LE-NEXT:    [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to <16 x i8>
+// CHECK-LE-NEXT:    ret <16 x i8> [[TMP2]]
+//
+// CHECK-BE-LABEL: @test_vcreateq_u8(
+// CHECK-BE-NEXT:  entry:
+// CHECK-BE-NEXT:    [[TMP0:%.*]] = insertelement <2 x i64> undef, i64 [[A:%.*]], i64 0
+// CHECK-BE-NEXT:    [[TMP1:%.*]] = insertelement <2 x i64> [[TMP0]], i64 [[B:%.*]], i64 1
+// CHECK-BE-NEXT:    [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.vreinterpretq.v16i8.v2i64(<2 x i64> [[TMP1]])
+// CHECK-BE-NEXT:    ret <16 x i8> [[TMP2]]
 //
 uint8x16_t test_vcreateq_u8(uint64_t a, uint64_t b)
 {


        


More information about the cfe-commits mailing list