[clang] d22e661 - [ARM, CDE] Implement CDE S and D-register intrinsics
Mikhail Maltsev via cfe-commits
cfe-commits at lists.llvm.org
Fri Mar 20 07:02:42 PDT 2020
Author: Mikhail Maltsev
Date: 2020-03-20T14:01:53Z
New Revision: d22e66171251cd3dd07507912189aa814a419678
URL: https://github.com/llvm/llvm-project/commit/d22e66171251cd3dd07507912189aa814a419678
DIFF: https://github.com/llvm/llvm-project/commit/d22e66171251cd3dd07507912189aa814a419678.diff
LOG: [ARM,CDE] Implement CDE S and D-register intrinsics
Summary:
This patch implements the following ACLE intrinsics:
uint32_t __arm_vcx1_u32(int coproc, uint32_t imm);
uint32_t __arm_vcx1a_u32(int coproc, uint32_t acc, uint32_t imm);
uint32_t __arm_vcx2_u32(int coproc, uint32_t n, uint32_t imm);
uint32_t __arm_vcx2a_u32(int coproc, uint32_t acc, uint32_t n, uint32_t imm);
uint32_t __arm_vcx3_u32(int coproc, uint32_t n, uint32_t m, uint32_t imm);
uint32_t __arm_vcx3a_u32(int coproc, uint32_t acc, uint32_t n, uint32_t m, uint32_t imm);
uint64_t __arm_vcx1d_u64(int coproc, uint32_t imm);
uint64_t __arm_vcx1da_u64(int coproc, uint64_t acc, uint32_t imm);
uint64_t __arm_vcx2d_u64(int coproc, uint64_t m, uint32_t imm);
uint64_t __arm_vcx2da_u64(int coproc, uint64_t acc, uint64_t m, uint32_t imm);
uint64_t __arm_vcx3d_u64(int coproc, uint64_t n, uint64_t m, uint32_t imm);
uint64_t __arm_vcx3da_u64(int coproc, uint64_t acc, uint64_t n, uint64_t m, uint32_t imm);
Since the semantics of CDE instructions is opaque to the compiler, the
ACLE intrinsics require dedicated LLVM IR intrinsics. The 64-bit and
32-bit variants share the same IR intrinsic.
Reviewers: simon_tatham, MarkMurrayARM, ostannard, dmgreen
Reviewed By: MarkMurrayARM
Subscribers: kristof.beyls, hiraditya, danielkiss, cfe-commits
Tags: #clang
Differential Revision: https://reviews.llvm.org/D76298
Added:
clang/test/CodeGen/arm-cde-vfp.c
llvm/test/CodeGen/Thumb2/cde-vfp.ll
Modified:
clang/include/clang/Basic/arm_cde.td
clang/test/Sema/arm-cde-immediates.c
clang/utils/TableGen/MveEmitter.cpp
llvm/include/llvm/IR/IntrinsicsARM.td
llvm/lib/Target/ARM/ARMInstrCDE.td
Removed:
################################################################################
diff --git a/clang/include/clang/Basic/arm_cde.td b/clang/include/clang/Basic/arm_cde.td
index 139007d387a0..9cd0af8987c9 100644
--- a/clang/include/clang/Basic/arm_cde.td
+++ b/clang/include/clang/Basic/arm_cde.td
@@ -13,6 +13,15 @@
include "arm_mve_defs.td"
+// f64 is not defined in arm_mve_defs.td because MVE instructions only work with
+// f16 and f32
+def f64: PrimitiveType<"f", 64>;
+
+// Float<t> expects t to be a scalar type, and expands to the floating-point
+// type of the same width.
+class Float<Type t>: ComplexType<(CTO_CopyKind t, f32)>;
+def FScalar: Float<Scalar>;
+
// ACLE CDE intrinsic
class CDEIntrinsic<Type ret, dag args, dag codegen>
: Intrinsic<ret, args, codegen> {
@@ -70,3 +79,31 @@ multiclass CDE_CX_m<dag argsImm, dag argsReg, dag cgArgs> {
defm cx1 : CDE_CX_m<(args imm_13b:$imm), (args), (?)>;
defm cx2 : CDE_CX_m<(args imm_9b:$imm), (args u32:$n), (? $n)>;
defm cx3 : CDE_CX_m<(args imm_6b:$imm), (args u32:$n, u32:$m), (? $n, $m)>;
+
+// VCX* instructions operating on VFP registers
+multiclass CDE_VCXFP_m<dag argsImm, dag argsReg32, dag argsReg64, dag cgArgs> {
+ defvar cp = (args imm_coproc:$cp);
+ let pnt = PNT_None, params = [u32] in {
+ def "" : CDEIntrinsic<u32, !con(cp, argsReg32, argsImm),
+ (bitcast !con((CDEIRInt<NAME, [f32]> $cp), cgArgs, (? $imm)),
+ Scalar)>;
+ def a : CDEIntrinsic<u32, !con(cp, (args u32:$acc), argsReg32, argsImm),
+ (bitcast !con((CDEIRInt<NAME # "a", [f32]> $cp,
+ (bitcast $acc, FScalar)), cgArgs, (? $imm)), Scalar)>;
+ }
+ let pnt = PNT_None, params = [u64] in {
+ def d : CDEIntrinsic<u64, !con(cp, argsReg64, argsImm),
+ (bitcast !con((CDEIRInt<NAME, [f64]> $cp), cgArgs, (? $imm)),
+ Scalar)>;
+ def da : CDEIntrinsic<u64, !con(cp, (args u64:$acc), argsReg64, argsImm),
+ (bitcast !con((CDEIRInt<NAME # "a", [f64]> $cp,
+ (bitcast $acc, FScalar)), cgArgs, (? $imm)), Scalar)>;
+ }
+}
+
+defm vcx1: CDE_VCXFP_m<(args imm_11b:$imm), (args), (args), (?)>;
+defm vcx2: CDE_VCXFP_m<(args imm_6b:$imm), (args u32:$n), (args u64:$n),
+ (? (bitcast $n, FScalar))>;
+defm vcx3: CDE_VCXFP_m<(args imm_3b:$imm),
+ (args u32:$n, u32:$m), (args u64:$n, u64:$m),
+ (? (bitcast $n, FScalar), (bitcast $m, FScalar))>;
diff --git a/clang/test/CodeGen/arm-cde-vfp.c b/clang/test/CodeGen/arm-cde-vfp.c
new file mode 100644
index 000000000000..fffcb716359d
--- /dev/null
+++ b/clang/test/CodeGen/arm-cde-vfp.c
@@ -0,0 +1,145 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
+// RUN: %clang_cc1 -triple thumbv8.1m.main-arm-none-eabi \
+// RUN: -target-feature +cdecp0 -target-feature +cdecp1 \
+// RUN: -mfloat-abi hard -O0 -disable-O0-optnone \
+// RUN: -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s
+
+#include <arm_cde.h>
+
+// CHECK-LABEL: @test_vcx1_u32(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = call float @llvm.arm.cde.vcx1.f32(i32 0, i32 11)
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast float [[TMP0]] to i32
+// CHECK-NEXT: ret i32 [[TMP1]]
+//
+uint32_t test_vcx1_u32(void) {
+ return __arm_vcx1_u32(0, 11);
+}
+
+// CHECK-LABEL: @test_vcx1a_u32(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i32 [[ACC:%.*]] to float
+// CHECK-NEXT: [[TMP1:%.*]] = call float @llvm.arm.cde.vcx1a.f32(i32 1, float [[TMP0]], i32 12)
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast float [[TMP1]] to i32
+// CHECK-NEXT: ret i32 [[TMP2]]
+//
+uint32_t test_vcx1a_u32(uint32_t acc) {
+ return __arm_vcx1a_u32(1, acc, 12);
+}
+
+// CHECK-LABEL: @test_vcx2_u32(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i32 [[N:%.*]] to float
+// CHECK-NEXT: [[TMP1:%.*]] = call float @llvm.arm.cde.vcx2.f32(i32 0, float [[TMP0]], i32 21)
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast float [[TMP1]] to i32
+// CHECK-NEXT: ret i32 [[TMP2]]
+//
+uint32_t test_vcx2_u32(uint32_t n) {
+ return __arm_vcx2_u32(0, n, 21);
+}
+
+// CHECK-LABEL: @test_vcx2a_u32(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i32 [[ACC:%.*]] to float
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i32 [[N:%.*]] to float
+// CHECK-NEXT: [[TMP2:%.*]] = call float @llvm.arm.cde.vcx2a.f32(i32 0, float [[TMP0]], float [[TMP1]], i32 22)
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32
+// CHECK-NEXT: ret i32 [[TMP3]]
+//
+uint32_t test_vcx2a_u32(uint32_t acc, uint32_t n) {
+ return __arm_vcx2a_u32(0, acc, n, 22);
+}
+
+// CHECK-LABEL: @test_vcx3_u32(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i32 [[N:%.*]] to float
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i32 [[M:%.*]] to float
+// CHECK-NEXT: [[TMP2:%.*]] = call float @llvm.arm.cde.vcx3.f32(i32 1, float [[TMP0]], float [[TMP1]], i32 3)
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast float [[TMP2]] to i32
+// CHECK-NEXT: ret i32 [[TMP3]]
+//
+uint32_t test_vcx3_u32(uint32_t n, uint32_t m) {
+ return __arm_vcx3_u32(1, n, m, 3);
+}
+
+// CHECK-LABEL: @test_vcx3a_u32(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i32 [[ACC:%.*]] to float
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i32 [[N:%.*]] to float
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast i32 [[M:%.*]] to float
+// CHECK-NEXT: [[TMP3:%.*]] = call float @llvm.arm.cde.vcx3a.f32(i32 0, float [[TMP0]], float [[TMP1]], float [[TMP2]], i32 5)
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast float [[TMP3]] to i32
+// CHECK-NEXT: ret i32 [[TMP4]]
+//
+uint32_t test_vcx3a_u32(uint32_t acc, uint32_t n, uint32_t m) {
+ return __arm_vcx3a_u32(0, acc, n, m, 5);
+}
+
+// CHECK-LABEL: @test_vcx1d_u64(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = call double @llvm.arm.cde.vcx1.f64(i32 0, i32 11)
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast double [[TMP0]] to i64
+// CHECK-NEXT: ret i64 [[TMP1]]
+//
+uint64_t test_vcx1d_u64(void) {
+ return __arm_vcx1d_u64(0, 11);
+}
+
+// CHECK-LABEL: @test_vcx1da_u64(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i64 [[ACC:%.*]] to double
+// CHECK-NEXT: [[TMP1:%.*]] = call double @llvm.arm.cde.vcx1a.f64(i32 1, double [[TMP0]], i32 12)
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast double [[TMP1]] to i64
+// CHECK-NEXT: ret i64 [[TMP2]]
+//
+uint64_t test_vcx1da_u64(uint64_t acc) {
+ return __arm_vcx1da_u64(1, acc, 12);
+}
+
+// CHECK-LABEL: @test_vcx2d_u64(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i64 [[N:%.*]] to double
+// CHECK-NEXT: [[TMP1:%.*]] = call double @llvm.arm.cde.vcx2.f64(i32 0, double [[TMP0]], i32 21)
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast double [[TMP1]] to i64
+// CHECK-NEXT: ret i64 [[TMP2]]
+//
+uint64_t test_vcx2d_u64(uint64_t n) {
+ return __arm_vcx2d_u64(0, n, 21);
+}
+
+// CHECK-LABEL: @test_vcx2da_u64(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i64 [[ACC:%.*]] to double
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i64 [[N:%.*]] to double
+// CHECK-NEXT: [[TMP2:%.*]] = call double @llvm.arm.cde.vcx2a.f64(i32 0, double [[TMP0]], double [[TMP1]], i32 22)
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast double [[TMP2]] to i64
+// CHECK-NEXT: ret i64 [[TMP3]]
+//
+uint64_t test_vcx2da_u64(uint64_t acc, uint64_t n) {
+ return __arm_vcx2da_u64(0, acc, n, 22);
+}
+
+// CHECK-LABEL: @test_vcx3d_u64(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i64 [[N:%.*]] to double
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i64 [[M:%.*]] to double
+// CHECK-NEXT: [[TMP2:%.*]] = call double @llvm.arm.cde.vcx3.f64(i32 1, double [[TMP0]], double [[TMP1]], i32 3)
+// CHECK-NEXT: [[TMP3:%.*]] = bitcast double [[TMP2]] to i64
+// CHECK-NEXT: ret i64 [[TMP3]]
+//
+uint64_t test_vcx3d_u64(uint64_t n, uint64_t m) {
+ return __arm_vcx3d_u64(1, n, m, 3);
+}
+
+// CHECK-LABEL: @test_vcx3da_u64(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast i64 [[ACC:%.*]] to double
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast i64 [[N:%.*]] to double
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast i64 [[M:%.*]] to double
+// CHECK-NEXT: [[TMP3:%.*]] = call double @llvm.arm.cde.vcx3a.f64(i32 0, double [[TMP0]], double [[TMP1]], double [[TMP2]], i32 5)
+// CHECK-NEXT: [[TMP4:%.*]] = bitcast double [[TMP3]] to i64
+// CHECK-NEXT: ret i64 [[TMP4]]
+//
+uint64_t test_vcx3da_u64(uint64_t acc, uint64_t n, uint64_t m) {
+ return __arm_vcx3da_u64(0, acc, n, m, 5);
+}
diff --git a/clang/test/Sema/arm-cde-immediates.c b/clang/test/Sema/arm-cde-immediates.c
index d521e099c7d1..19159f9be4ea 100644
--- a/clang/test/Sema/arm-cde-immediates.c
+++ b/clang/test/Sema/arm-cde-immediates.c
@@ -63,3 +63,43 @@ void test_cx(uint32_t a, uint64_t da, uint32_t n, uint32_t m) {
__arm_cx3da(0, da, n, m, a); // expected-error {{argument to '__arm_cx3da' must be a constant integer}}
__arm_cx3da(0, da, n, m, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}}
}
+
+void test_vcxfp_u32(uint32_t a, uint32_t n, uint32_t m) {
+ (void)__arm_vcx1_u32(0, 0);
+ __arm_vcx1_u32(0, a); // expected-error {{argument to '__arm_vcx1_u32' must be a constant integer}}
+ __arm_vcx1_u32(0, 2048); // expected-error {{argument value 2048 is outside the valid range [0, 2047]}}
+ __arm_vcx1a_u32(0, a, a); // expected-error {{argument to '__arm_vcx1a_u32' must be a constant integer}}
+ __arm_vcx1a_u32(0, a, 2048); // expected-error {{argument value 2048 is outside the valid range [0, 2047]}}
+
+ (void)__arm_vcx2_u32(0, n, 0);
+ __arm_vcx2_u32(0, n, a); // expected-error {{argument to '__arm_vcx2_u32' must be a constant integer}}
+ __arm_vcx2_u32(0, n, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}}
+ __arm_vcx2a_u32(0, a, n, a); // expected-error {{argument to '__arm_vcx2a_u32' must be a constant integer}}
+ __arm_vcx2a_u32(0, a, n, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}}
+
+ (void)__arm_vcx3_u32(0, n, m, 0);
+ __arm_vcx3_u32(0, n, m, a); // expected-error {{argument to '__arm_vcx3_u32' must be a constant integer}}
+ __arm_vcx3_u32(0, n, m, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}}
+ __arm_vcx3a_u32(0, a, n, m, a); // expected-error {{argument to '__arm_vcx3a_u32' must be a constant integer}}
+ __arm_vcx3a_u32(0, a, n, m, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}}
+}
+
+void test_vcxfp_u64(uint64_t a, uint64_t n, uint64_t m) {
+ (void)__arm_vcx1d_u64(0, 0);
+ __arm_vcx1d_u64(0, a); // expected-error {{argument to '__arm_vcx1d_u64' must be a constant integer}}
+ __arm_vcx1d_u64(0, 2048); // expected-error {{argument value 2048 is outside the valid range [0, 2047]}}
+ __arm_vcx1da_u64(0, a, a); // expected-error {{argument to '__arm_vcx1da_u64' must be a constant integer}}
+ __arm_vcx1da_u64(0, a, 2048); // expected-error {{argument value 2048 is outside the valid range [0, 2047]}}
+
+ (void)__arm_vcx2d_u64(0, n, 0);
+ __arm_vcx2d_u64(0, n, a); // expected-error {{argument to '__arm_vcx2d_u64' must be a constant integer}}
+ __arm_vcx2d_u64(0, n, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}}
+ __arm_vcx2da_u64(0, a, n, a); // expected-error {{argument to '__arm_vcx2da_u64' must be a constant integer}}
+ __arm_vcx2da_u64(0, a, n, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}}
+
+ (void)__arm_vcx3d_u64(0, n, m, 0);
+ __arm_vcx3d_u64(0, n, m, a); // expected-error {{argument to '__arm_vcx3d_u64' must be a constant integer}}
+ __arm_vcx3d_u64(0, n, m, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}}
+ __arm_vcx3da_u64(0, a, n, m, a); // expected-error {{argument to '__arm_vcx3da_u64' must be a constant integer}}
+ __arm_vcx3da_u64(0, a, n, m, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}}
+}
diff --git a/clang/utils/TableGen/MveEmitter.cpp b/clang/utils/TableGen/MveEmitter.cpp
index f75f5000f0f6..076b491ff94f 100644
--- a/clang/utils/TableGen/MveEmitter.cpp
+++ b/clang/utils/TableGen/MveEmitter.cpp
@@ -1995,6 +1995,9 @@ void CdeEmitter::EmitHeader(raw_ostream &OS) {
const ScalarType *ST = kv.second.get();
if (ST->hasNonstandardName())
continue;
+ // We don't have float64x2_t
+ if (ST->kind() == ScalarTypeKind::Float && ST->sizeInBits() == 64)
+ continue;
raw_ostream &OS = parts[ST->requiresFloat() ? MVEFloat : MVE];
const VectorType *VT = getVectorType(ST);
diff --git a/llvm/include/llvm/IR/IntrinsicsARM.td b/llvm/include/llvm/IR/IntrinsicsARM.td
index ba0cf909e5de..de2e6a39abeb 100644
--- a/llvm/include/llvm/IR/IntrinsicsARM.td
+++ b/llvm/include/llvm/IR/IntrinsicsARM.td
@@ -1301,4 +1301,20 @@ defm int_arm_cde_cx1: CDEGPRIntrinsics<[]>;
defm int_arm_cde_cx2: CDEGPRIntrinsics<[llvm_i32_ty]>;
defm int_arm_cde_cx3: CDEGPRIntrinsics<[llvm_i32_ty, llvm_i32_ty]>;
+multiclass CDEVCXIntrinsics<list<LLVMType> args> {
+ def "" : Intrinsic<
+ [llvm_anyfloat_ty],
+ !listconcat([llvm_i32_ty /* coproc */], args, [llvm_i32_ty /* imm */]),
+ [IntrNoMem, ImmArg<0>, ImmArg<!add(!size(args), 1)>]>;
+ def a : Intrinsic<
+ [llvm_anyfloat_ty],
+ !listconcat([llvm_i32_ty /* coproc */, LLVMMatchType<0> /* acc */],
+ args, [llvm_i32_ty /* imm */]),
+ [IntrNoMem, ImmArg<0>, ImmArg<!add(!size(args), 2)>]>;
+}
+
+defm int_arm_cde_vcx1 : CDEVCXIntrinsics<[]>;
+defm int_arm_cde_vcx2 : CDEVCXIntrinsics<[LLVMMatchType<0>]>;
+defm int_arm_cde_vcx3 : CDEVCXIntrinsics<[LLVMMatchType<0>, LLVMMatchType<0>]>;
+
} // end TargetPrefix
diff --git a/llvm/lib/Target/ARM/ARMInstrCDE.td b/llvm/lib/Target/ARM/ARMInstrCDE.td
index 648911acd26c..9497e1733689 100644
--- a/llvm/lib/Target/ARM/ARMInstrCDE.td
+++ b/llvm/lib/Target/ARM/ARMInstrCDE.td
@@ -542,3 +542,42 @@ def CDE_VCX3_fpdp : CDE_VCX3_FP_Instr_D<"vcx3", cde_vcx_params_d_noacc>;
def CDE_VCX3A_fpdp : CDE_VCX3_FP_Instr_D<"vcx3a", cde_vcx_params_d_acc>;
def CDE_VCX3_vec : CDE_VCX3_Vec_Instr<"vcx3", cde_vcx_params_q_noacc>;
def CDE_VCX3A_vec : CDE_VCX3_Vec_Instr<"vcx3a", cde_vcx_params_q_acc>;
+
+
+let Predicates = [HasCDE, HasFPRegs] in {
+ def : Pat<(f32 (int_arm_cde_vcx1 timm:$coproc, timm:$imm)),
+ (f32 (CDE_VCX1_fpsp p_imm:$coproc, imm_11b:$imm))>;
+ def : Pat<(f32 (int_arm_cde_vcx1a timm:$coproc, (f32 SPR:$acc), timm:$imm)),
+ (f32 (CDE_VCX1A_fpsp p_imm:$coproc, SPR:$acc, imm_11b:$imm))>;
+ def : Pat<(f64 (int_arm_cde_vcx1 timm:$coproc, timm:$imm)),
+ (f64 (CDE_VCX1_fpdp p_imm:$coproc, imm_11b:$imm))>;
+ def : Pat<(f64 (int_arm_cde_vcx1a timm:$coproc, (f64 DPR:$acc), timm:$imm)),
+ (f64 (CDE_VCX1A_fpdp p_imm:$coproc, DPR:$acc, imm_11b:$imm))>;
+
+ def : Pat<(f32 (int_arm_cde_vcx2 timm:$coproc, (f32 SPR:$n), timm:$imm)),
+ (f32 (CDE_VCX2_fpsp p_imm:$coproc, SPR:$n, imm_6b:$imm))>;
+ def : Pat<(f32 (int_arm_cde_vcx2a timm:$coproc, (f32 SPR:$acc), (f32 SPR:$n),
+ timm:$imm)),
+ (f32 (CDE_VCX2A_fpsp p_imm:$coproc, SPR:$acc, SPR:$n, imm_6b:$imm))>;
+ def : Pat<(f64 (int_arm_cde_vcx2 timm:$coproc, (f64 DPR:$n), timm:$imm)),
+ (f64 (CDE_VCX2_fpdp p_imm:$coproc, DPR:$n, imm_6b:$imm))>;
+ def : Pat<(f64 (int_arm_cde_vcx2a timm:$coproc, (f64 DPR:$acc), (f64 DPR:$n),
+ timm:$imm)),
+ (f64 (CDE_VCX2A_fpdp p_imm:$coproc, DPR:$acc, DPR:$n, imm_6b:$imm))>;
+
+ def : Pat<(f32 (int_arm_cde_vcx3 timm:$coproc, (f32 SPR:$n), (f32 SPR:$m),
+ timm:$imm)),
+ (f32 (CDE_VCX3_fpsp p_imm:$coproc, (f32 SPR:$n), (f32 SPR:$m),
+ imm_3b:$imm))>;
+ def : Pat<(f32 (int_arm_cde_vcx3a timm:$coproc, (f32 SPR:$acc), (f32 SPR:$n),
+ (f32 SPR:$m), timm:$imm)),
+ (f32 (CDE_VCX3A_fpsp p_imm:$coproc, SPR:$acc, SPR:$n, SPR:$m,
+ imm_3b:$imm))>;
+ def : Pat<(f64 (int_arm_cde_vcx3 timm:$coproc, (f64 DPR:$n), (f64 DPR:$m),
+ timm:$imm)),
+ (f64 (CDE_VCX3_fpdp p_imm:$coproc, DPR:$n, DPR:$m, imm_3b:$imm))>;
+ def : Pat<(f64 (int_arm_cde_vcx3a timm:$coproc, (f64 DPR:$acc), (f64 DPR:$n),
+ (f64 DPR:$m), timm:$imm)),
+ (f64 (CDE_VCX3A_fpdp p_imm:$coproc, DPR:$acc, DPR:$n, DPR:$m,
+ imm_3b:$imm))>;
+}
diff --git a/llvm/test/CodeGen/Thumb2/cde-vfp.ll b/llvm/test/CodeGen/Thumb2/cde-vfp.ll
new file mode 100644
index 000000000000..54ee1d516661
--- /dev/null
+++ b/llvm/test/CodeGen/Thumb2/cde-vfp.ll
@@ -0,0 +1,198 @@
+; RUN: llc -mtriple=thumbv8.1m.main -mattr=+cdecp0 -mattr=+cdecp1 -mattr=+mve -verify-machineinstrs -o - %s | FileCheck %s
+; RUN: llc -mtriple=thumbv8m.main -mattr=+cdecp0 -mattr=+cdecp1 -mattr=+fp-armv8d16sp -verify-machineinstrs -o - %s | FileCheck %s
+
+declare float @llvm.arm.cde.vcx1.f32(i32 immarg, i32 immarg)
+declare float @llvm.arm.cde.vcx1a.f32(i32 immarg, float, i32 immarg)
+declare float @llvm.arm.cde.vcx2.f32(i32 immarg, float, i32 immarg)
+declare float @llvm.arm.cde.vcx2a.f32(i32 immarg, float, float, i32 immarg)
+declare float @llvm.arm.cde.vcx3.f32(i32 immarg, float, float, i32 immarg)
+declare float @llvm.arm.cde.vcx3a.f32(i32 immarg, float, float, float, i32 immarg)
+
+declare double @llvm.arm.cde.vcx1.f64(i32 immarg, i32 immarg)
+declare double @llvm.arm.cde.vcx1a.f64(i32 immarg, double, i32 immarg)
+declare double @llvm.arm.cde.vcx2.f64(i32 immarg, double, i32 immarg)
+declare double @llvm.arm.cde.vcx2a.f64(i32 immarg, double, double, i32 immarg)
+declare double @llvm.arm.cde.vcx3.f64(i32 immarg, double, double, i32 immarg)
+declare double @llvm.arm.cde.vcx3a.f64(i32 immarg, double, double, double, i32 immarg)
+
+define arm_aapcs_vfpcc i32 @test_vcx1_u32() {
+; CHECK-LABEL: test_vcx1_u32:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vcx1 p0, s0, #11
+; CHECK-NEXT: vmov r0, s0
+; CHECK-NEXT: bx lr
+entry:
+ %0 = call float @llvm.arm.cde.vcx1.f32(i32 0, i32 11)
+ %1 = bitcast float %0 to i32
+ ret i32 %1
+}
+
+define arm_aapcs_vfpcc i32 @test_vcx1a_u32(i32 %acc) {
+; CHECK-LABEL: test_vcx1a_u32:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vmov s0, r0
+; CHECK-NEXT: vcx1a p1, s0, #12
+; CHECK-NEXT: vmov r0, s0
+; CHECK-NEXT: bx lr
+entry:
+ %0 = bitcast i32 %acc to float
+ %1 = call float @llvm.arm.cde.vcx1a.f32(i32 1, float %0, i32 12)
+ %2 = bitcast float %1 to i32
+ ret i32 %2
+}
+
+define arm_aapcs_vfpcc i32 @test_vcx2_u32(i32 %n) {
+; CHECK-LABEL: test_vcx2_u32:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vmov s0, r0
+; CHECK-NEXT: vcx2 p0, s0, s0, #21
+; CHECK-NEXT: vmov r0, s0
+; CHECK-NEXT: bx lr
+entry:
+ %0 = bitcast i32 %n to float
+ %1 = call float @llvm.arm.cde.vcx2.f32(i32 0, float %0, i32 21)
+ %2 = bitcast float %1 to i32
+ ret i32 %2
+}
+
+define arm_aapcs_vfpcc i32 @test_vcx2a_u32(i32 %acc, i32 %n) {
+; CHECK-LABEL: test_vcx2a_u32:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vmov s0, r1
+; CHECK-NEXT: vmov s2, r0
+; CHECK-NEXT: vcx2a p0, s2, s0, #22
+; CHECK-NEXT: vmov r0, s2
+; CHECK-NEXT: bx lr
+entry:
+ %0 = bitcast i32 %acc to float
+ %1 = bitcast i32 %n to float
+ %2 = call float @llvm.arm.cde.vcx2a.f32(i32 0, float %0, float %1, i32 22)
+ %3 = bitcast float %2 to i32
+ ret i32 %3
+}
+
+define arm_aapcs_vfpcc i32 @test_vcx3_u32(i32 %n, i32 %m) {
+; CHECK-LABEL: test_vcx3_u32:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vmov s0, r1
+; CHECK-NEXT: vmov s2, r0
+; CHECK-NEXT: vcx3 p1, s0, s2, s0, #3
+; CHECK-NEXT: vmov r0, s0
+; CHECK-NEXT: bx lr
+entry:
+ %0 = bitcast i32 %n to float
+ %1 = bitcast i32 %m to float
+ %2 = call float @llvm.arm.cde.vcx3.f32(i32 1, float %0, float %1, i32 3)
+ %3 = bitcast float %2 to i32
+ ret i32 %3
+}
+
+define arm_aapcs_vfpcc i32 @test_vcx3a_u32(i32 %acc, i32 %n, i32 %m) {
+; CHECK-LABEL: test_vcx3a_u32:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vmov s0, r2
+; CHECK-NEXT: vmov s2, r1
+; CHECK-NEXT: vmov s4, r0
+; CHECK-NEXT: vcx3a p0, s4, s2, s0, #5
+; CHECK-NEXT: vmov r0, s4
+; CHECK-NEXT: bx lr
+entry:
+ %0 = bitcast i32 %acc to float
+ %1 = bitcast i32 %n to float
+ %2 = bitcast i32 %m to float
+ %3 = call float @llvm.arm.cde.vcx3a.f32(i32 0, float %0, float %1, float %2, i32 5)
+ %4 = bitcast float %3 to i32
+ ret i32 %4
+}
+
+define arm_aapcs_vfpcc i64 @test_vcx1d_u64() {
+; CHECK-LABEL: test_vcx1d_u64:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vcx1 p0, d0, #11
+; CHECK-NEXT: vmov r0, r1, d0
+; CHECK-NEXT: bx lr
+entry:
+ %0 = call double @llvm.arm.cde.vcx1.f64(i32 0, i32 11)
+ %1 = bitcast double %0 to i64
+ ret i64 %1
+}
+
+define arm_aapcs_vfpcc i64 @test_vcx1da_u64(i64 %acc) {
+; CHECK-LABEL: test_vcx1da_u64:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vmov d0, r0, r1
+; CHECK-NEXT: vcx1a p1, d0, #12
+; CHECK-NEXT: vmov r0, r1, d0
+; CHECK-NEXT: bx lr
+entry:
+ %0 = bitcast i64 %acc to double
+ %1 = call double @llvm.arm.cde.vcx1a.f64(i32 1, double %0, i32 12)
+ %2 = bitcast double %1 to i64
+ ret i64 %2
+}
+
+define arm_aapcs_vfpcc i64 @test_vcx2d_u64(i64 %n) {
+; CHECK-LABEL: test_vcx2d_u64:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vmov d0, r0, r1
+; CHECK-NEXT: vcx2 p0, d0, d0, #21
+; CHECK-NEXT: vmov r0, r1, d0
+; CHECK-NEXT: bx lr
+entry:
+ %0 = bitcast i64 %n to double
+ %1 = call double @llvm.arm.cde.vcx2.f64(i32 0, double %0, i32 21)
+ %2 = bitcast double %1 to i64
+ ret i64 %2
+}
+
+define arm_aapcs_vfpcc i64 @test_vcx2da_u64(i64 %acc, i64 %n) {
+; CHECK-LABEL: test_vcx2da_u64:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vmov d0, r2, r3
+; CHECK-NEXT: vmov d1, r0, r1
+; CHECK-NEXT: vcx2a p0, d1, d0, #22
+; CHECK-NEXT: vmov r0, r1, d1
+; CHECK-NEXT: bx lr
+entry:
+ %0 = bitcast i64 %acc to double
+ %1 = bitcast i64 %n to double
+ %2 = call double @llvm.arm.cde.vcx2a.f64(i32 0, double %0, double %1, i32 22)
+ %3 = bitcast double %2 to i64
+ ret i64 %3
+}
+
+define arm_aapcs_vfpcc i64 @test_vcx3d_u64(i64 %n, i64 %m) {
+; CHECK-LABEL: test_vcx3d_u64:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vmov d0, r2, r3
+; CHECK-NEXT: vmov d1, r0, r1
+; CHECK-NEXT: vcx3 p1, d0, d1, d0, #3
+; CHECK-NEXT: vmov r0, r1, d0
+; CHECK-NEXT: bx lr
+entry:
+ %0 = bitcast i64 %n to double
+ %1 = bitcast i64 %m to double
+ %2 = call double @llvm.arm.cde.vcx3.f64(i32 1, double %0, double %1, i32 3)
+ %3 = bitcast double %2 to i64
+ ret i64 %3
+}
+
+define arm_aapcs_vfpcc i64 @test_vcx3da_u64(i64 %acc, i64 %n, i64 %m) {
+; CHECK-LABEL: test_vcx3da_u64:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: push {r7, lr}
+; CHECK-NEXT: ldrd lr, r12, [sp, #8]
+; CHECK-DAG: vmov [[D0:d.*]], r0, r1
+; CHECK-DAG: vmov [[D1:d.*]], r2, r3
+; CHECK-DAG: vmov [[D2:d.*]], lr, r12
+; CHECK-NEXT: vcx3a p0, [[D0]], [[D1]], [[D2]], #5
+; CHECK-NEXT: vmov r0, r1, [[D0]]
+; CHECK-NEXT: pop {r7, pc}
+entry:
+ %0 = bitcast i64 %acc to double
+ %1 = bitcast i64 %n to double
+ %2 = bitcast i64 %m to double
+ %3 = call double @llvm.arm.cde.vcx3a.f64(i32 0, double %0, double %1, double %2, i32 5)
+ %4 = bitcast double %3 to i64
+ ret i64 %4
+}
More information about the cfe-commits
mailing list