[clang] [Clang][AArch64] Add customisable immediate range checking to NEON (PR #100278)
via cfe-commits
cfe-commits at lists.llvm.org
Wed Jul 24 04:33:56 PDT 2024
https://github.com/SpencerAbson updated https://github.com/llvm/llvm-project/pull/100278
>From 5f4790180ced9cf3b66589106017d301772fb393 Mon Sep 17 00:00:00 2001
From: Spencer Abson <Spencer.Abson at arm.com>
Date: Tue, 23 Jul 2024 08:38:32 +0000
Subject: [PATCH 1/3] Rebase to resolve arm_neon.td conflict
---
clang/include/clang/Basic/TargetBuiltins.h | 39 +-
clang/include/clang/Basic/arm_fp16.td | 2 +-
.../include/clang/Basic/arm_immcheck_incl.td | 39 ++
clang/include/clang/Basic/arm_neon.td | 374 ++++++++++++------
clang/include/clang/Basic/arm_neon_incl.td | 11 +-
clang/include/clang/Basic/arm_sve_sme_incl.td | 36 +-
clang/include/clang/Sema/SemaARM.h | 3 +
clang/lib/Sema/SemaARM.cpp | 151 ++++---
clang/test/CodeGen/aarch64-neon-vcmla.c | 60 ++-
clang/test/Sema/aarch64-neon-vcmla-ranges.c | 202 ++++++++++
clang/utils/TableGen/NeonEmitter.cpp | 133 +++----
clang/utils/TableGen/SveEmitter.cpp | 2 +-
12 files changed, 700 insertions(+), 352 deletions(-)
create mode 100644 clang/include/clang/Basic/arm_immcheck_incl.td
create mode 100644 clang/test/Sema/aarch64-neon-vcmla-ranges.c
diff --git a/clang/include/clang/Basic/TargetBuiltins.h b/clang/include/clang/Basic/TargetBuiltins.h
index 4333830bf34f2..50e17ad7e1628 100644
--- a/clang/include/clang/Basic/TargetBuiltins.h
+++ b/clang/include/clang/Basic/TargetBuiltins.h
@@ -209,15 +209,45 @@ namespace clang {
Flags |= QuadFlag;
}
- EltType getEltType() const { return (EltType)(Flags & EltTypeMask); }
+ EltType getEltType() const { return (EltType)(Flags & EltTypeMask); }
bool isPoly() const {
EltType ET = getEltType();
return ET == Poly8 || ET == Poly16 || ET == Poly64;
}
bool isUnsigned() const { return (Flags & UnsignedFlag) != 0; }
- bool isQuad() const { return (Flags & QuadFlag) != 0; }
+ bool isQuad() const { return (Flags & QuadFlag) != 0; };
+ unsigned getEltSizeInBits() const {
+ switch(getEltType()){
+ case Int8:
+ case Poly8:
+ return 8;
+ case Int16:
+ case Float16:
+ case Poly16:
+ case BFloat16:
+ return 16;
+ case Int32:
+ case Float32:
+ return 32;
+ case Int64:
+ case Float64:
+ case Poly64:
+ return 64;
+ case Poly128:
+ return 128;
+ default:
+ llvm_unreachable("Invalid NeonTypeFlag!");
+ }
+ }
};
+ // Shared between SVE/SME and NEON
+ enum ArmImmCheckType {
+#define LLVM_GET_ARM_INTRIN_IMMCHECKTYPES
+#include "clang/Basic/arm_sve_typeflags.inc"
+#undef LLVM_GET_ARM_INTRIN_IMMCHECKTYPES
+ };
+
/// Flags to identify the types for overloaded SVE builtins.
class SVETypeFlags {
uint64_t Flags;
@@ -249,11 +279,6 @@ namespace clang {
#undef LLVM_GET_SVE_MERGETYPES
};
- enum ImmCheckType {
-#define LLVM_GET_SVE_IMMCHECKTYPES
-#include "clang/Basic/arm_sve_typeflags.inc"
-#undef LLVM_GET_SVE_IMMCHECKTYPES
- };
SVETypeFlags(uint64_t F) : Flags(F) {
EltTypeShift = llvm::countr_zero(EltTypeMask);
diff --git a/clang/include/clang/Basic/arm_fp16.td b/clang/include/clang/Basic/arm_fp16.td
index d36b4617bef5d..42228a3ba1ffa 100644
--- a/clang/include/clang/Basic/arm_fp16.td
+++ b/clang/include/clang/Basic/arm_fp16.td
@@ -76,7 +76,7 @@ let ArchGuard = "defined(__aarch64__) || defined(__arm64ec__)", TargetGuard = "f
def SCALAR_FCVTPUH : SInst<"vcvtp_u16", "(1U)1", "Sh">;
def SCALAR_FCVTPUH1 : SInst<"vcvtp_u32", "(1U>)1", "Sh">;
def SCALAR_FCVTPUH2 : SInst<"vcvtp_u64", "(1U>>)1", "Sh">;
- let isVCVT_N = 1 in {
+ let isVCVT_N = 1, ImmChecks = [ImmCheck<1, ImmCheck1_16>] in {
def SCALAR_SCVTFSHO : SInst<"vcvth_n_f16", "(1F)(1!)I", "sUs">;
def SCALAR_SCVTFSH1O: SInst<"vcvth_n_f16", "(1F<)(1!)I", "iUi">;
def SCALAR_SCVTFSH2O: SInst<"vcvth_n_f16", "(1F<<)(1!)I", "lUl">;
diff --git a/clang/include/clang/Basic/arm_immcheck_incl.td b/clang/include/clang/Basic/arm_immcheck_incl.td
new file mode 100644
index 0000000000000..3b20248f65040
--- /dev/null
+++ b/clang/include/clang/Basic/arm_immcheck_incl.td
@@ -0,0 +1,39 @@
+class ImmCheckType<int val> {
+ int Value = val;
+}
+
+// These must be kept in sync with the flags in include/clang/Basic/TargetBuiltins.h
+def ImmCheck0_31 : ImmCheckType<0>; // 0..31 (used for e.g. predicate patterns)
+def ImmCheck1_16 : ImmCheckType<1>; // 1..16
+def ImmCheckExtract : ImmCheckType<2>; // 0..(2048/sizeinbits(elt) - 1)
+def ImmCheckShiftRight : ImmCheckType<3>; // 1..sizeinbits(elt)
+def ImmCheckShiftRightNarrow : ImmCheckType<4>; // 1..sizeinbits(elt)/2
+def ImmCheckShiftLeft : ImmCheckType<5>; // 0..(sizeinbits(elt) - 1)
+def ImmCheck0_7 : ImmCheckType<6>; // 0..7
+def ImmCheckLaneIndex : ImmCheckType<7>; // 0..(128/(1*sizeinbits(elt)) - 1)
+def ImmCheckLaneQIndex : ImmCheckType<8>; // (Neon) treat type as Quad
+def ImmCheckLaneIndexCompRotate : ImmCheckType<9>; // 0..(128/(2*sizeinbits(elt)) - 1)
+def ImmCheckLaneIndexDot : ImmCheckType<10>; // 0..(128/(4*sizeinbits(elt)) - 1)
+def ImmCheckComplexRot90_270 : ImmCheckType<11>; // [90,270]
+def ImmCheckComplexRotAll90 : ImmCheckType<12>; // [0, 90, 180,270]
+def ImmCheck0_13 : ImmCheckType<13>; // 0..13
+def ImmCheck0_1 : ImmCheckType<14>; // 0..1
+def ImmCheck0_2 : ImmCheckType<15>; // 0..2
+def ImmCheck0_3 : ImmCheckType<16>; // 0..3
+def ImmCheck0_0 : ImmCheckType<17>; // 0..0
+def ImmCheck0_15 : ImmCheckType<18>; // 0..15
+def ImmCheck0_255 : ImmCheckType<19>; // 0..255
+def ImmCheck2_4_Mul2 : ImmCheckType<20>; // 2, 4
+def ImmCheck1_1 : ImmCheckType<21>; // 1..1
+def ImmCheck1_3 : ImmCheckType<22>; // 1..3
+def ImmCheck1_7 : ImmCheckType<23>; // 1..7
+def ImmCheck1_32 : ImmCheckType<24>; // 1..32
+def ImmCheck1_64 : ImmCheckType<25>; // 1..64
+def ImmCheck0_63 : ImmCheckType<26>; // 0..63
+
+class ImmCheck<int arg, ImmCheckType kind, int eltSizeArg = -1> {
+ int Arg = arg;
+ // The index of the argument whose type should be referred to when validating this immedaite.
+ int EltSizeArg = eltSizeArg;
+ ImmCheckType Kind = kind;
+}
diff --git a/clang/include/clang/Basic/arm_neon.td b/clang/include/clang/Basic/arm_neon.td
index 3098fa67e6a51..ee823f6ef6813 100644
--- a/clang/include/clang/Basic/arm_neon.td
+++ b/clang/include/clang/Basic/arm_neon.td
@@ -284,14 +284,18 @@ def OP_CVT_F32_BF16
// Splat operation - performs a range-checked splat over a vector
def SPLAT : WInst<"splat_lane", ".(!q)I",
- "UcUsUicsilPcPsfQUcQUsQUiQcQsQiQPcQPsQflUlQlQUlhdQhQdPlQPl">;
+ "UcUsUicsilPcPsfQUcQUsQUiQcQsQiQPcQPsQflUlQlQUlhdQhQdPlQPl",
+ [ImmCheck<1, ImmCheckLaneIndex, 0>]>;
def SPLATQ : WInst<"splat_laneq", ".(!Q)I",
- "UcUsUicsilPcPsfQUcQUsQUiQcQsQiQPcQPsQflUlQlQUlhdQhQdPlQPl"> {
+ "UcUsUicsilPcPsfQUcQUsQUiQcQsQiQPcQPsQflUlQlQUlhdQhQdPlQPl",
+ [ImmCheck<1, ImmCheckLaneQIndex, 1>]> {
let isLaneQ = 1;
}
let TargetGuard = "bf16,neon" in {
- def SPLAT_BF : WInst<"splat_lane", ".(!q)I", "bQb">;
- def SPLATQ_BF : WInst<"splat_laneq", ".(!Q)I", "bQb"> {
+ def SPLAT_BF : WInst<"splat_lane", ".(!q)I", "bQb",
+ [ImmCheck<1, ImmCheckLaneIndex, 0>]>;
+ def SPLATQ_BF : WInst<"splat_laneq", ".(!Q)I", "bQb",
+ [ImmCheck<1, ImmCheckLaneQIndex, 1>]> {
let isLaneQ = 1;
}
}
@@ -401,27 +405,45 @@ def VQRSHL : SInst<"vqrshl", "..S", "csilUcUsUiUlQcQsQiQlQUcQUsQUiQUl">;
////////////////////////////////////////////////////////////////////////////////
// E.3.12 Shifts by constant
let isShift = 1 in {
-def VSHR_N : SInst<"vshr_n", "..I", "csilUcUsUiUlQcQsQiQlQUcQUsQUiQUl">;
-def VSHL_N : IInst<"vshl_n", "..I", "csilUcUsUiUlQcQsQiQlQUcQUsQUiQUl">;
-def VRSHR_N : SInst<"vrshr_n", "..I", "csilUcUsUiUlQcQsQiQlQUcQUsQUiQUl">;
-def VSRA_N : SInst<"vsra_n", "...I", "csilUcUsUiUlQcQsQiQlQUcQUsQUiQUl">;
-def VRSRA_N : SInst<"vrsra_n", "...I", "csilUcUsUiUlQcQsQiQlQUcQUsQUiQUl">;
-def VQSHL_N : SInst<"vqshl_n", "..I", "csilUcUsUiUlQcQsQiQlQUcQUsQUiQUl">;
-def VQSHLU_N : SInst<"vqshlu_n", "U.I", "csilQcQsQiQl">;
-def VSHRN_N : IInst<"vshrn_n", "<QI", "silUsUiUl">;
-def VQSHRUN_N : SInst<"vqshrun_n", "(<U)QI", "sil">;
-def VQRSHRUN_N : SInst<"vqrshrun_n", "(<U)QI", "sil">;
-def VQSHRN_N : SInst<"vqshrn_n", "<QI", "silUsUiUl">;
-def VRSHRN_N : IInst<"vrshrn_n", "<QI", "silUsUiUl">;
-def VQRSHRN_N : SInst<"vqrshrn_n", "<QI", "silUsUiUl">;
-def VSHLL_N : SInst<"vshll_n", "(>Q).I", "csiUcUsUi">;
+
+
+def VSHR_N : SInst<"vshr_n", "..I", "csilUcUsUiUlQcQsQiQlQUcQUsQUiQUl",
+ [ImmCheck<1, ImmCheckShiftRight>]>;
+def VSHL_N : IInst<"vshl_n", "..I", "csilUcUsUiUlQcQsQiQlQUcQUsQUiQUl",
+ [ImmCheck<1, ImmCheckShiftLeft>]>;
+def VRSHR_N : SInst<"vrshr_n", "..I", "csilUcUsUiUlQcQsQiQlQUcQUsQUiQUl",
+ [ImmCheck<1, ImmCheckShiftRight>]>;
+def VSRA_N : SInst<"vsra_n", "...I", "csilUcUsUiUlQcQsQiQlQUcQUsQUiQUl",
+ [ImmCheck<2, ImmCheckShiftRight>]>;
+def VRSRA_N : SInst<"vrsra_n", "...I", "csilUcUsUiUlQcQsQiQlQUcQUsQUiQUl",
+ [ImmCheck<2, ImmCheckShiftRight>]>;
+def VQSHL_N : SInst<"vqshl_n", "..I", "csilUcUsUiUlQcQsQiQlQUcQUsQUiQUl",
+ [ImmCheck<1, ImmCheckShiftLeft>]>;
+def VQSHLU_N : SInst<"vqshlu_n", "U.I", "csilQcQsQiQl",
+ [ImmCheck<1, ImmCheckShiftLeft>]>;
+def VSHRN_N : IInst<"vshrn_n", "<QI", "silUsUiUl",
+ [ImmCheck<1, ImmCheckShiftRight>]>;
+def VQSHRUN_N : SInst<"vqshrun_n", "(<U)QI", "sil",
+ [ImmCheck<1, ImmCheckShiftRight>]>;
+def VQRSHRUN_N : SInst<"vqrshrun_n", "(<U)QI", "sil",
+ [ImmCheck<1, ImmCheckShiftRight>]>;
+def VQSHRN_N : SInst<"vqshrn_n", "<QI", "silUsUiUl",
+ [ImmCheck<1, ImmCheckShiftRight>]>;
+def VRSHRN_N : IInst<"vrshrn_n", "<QI", "silUsUiUl",
+ [ImmCheck<1, ImmCheckShiftRight>]>;
+def VQRSHRN_N : SInst<"vqrshrn_n", "<QI", "silUsUiUl",
+ [ImmCheck<1, ImmCheckShiftRight>]>;
+def VSHLL_N : SInst<"vshll_n", "(>Q).I", "csiUcUsUi",
+ [ImmCheck<1, ImmCheckShiftLeft>]>;
////////////////////////////////////////////////////////////////////////////////
// E.3.13 Shifts with insert
def VSRI_N : WInst<"vsri_n", "...I",
- "csilUcUsUiUlPcPsQcQsQiQlQUcQUsQUiQUlQPcQPs">;
+ "csilUcUsUiUlPcPsQcQsQiQlQUcQUsQUiQUlQPcQPs",
+ [ImmCheck<2, ImmCheckShiftRight>]>;
def VSLI_N : WInst<"vsli_n", "...I",
- "csilUcUsUiUlPcPsQcQsQiQlQUcQUsQUiQUlQPcQPs">;
+ "csilUcUsUiUlPcPsQcQsQiQlQUcQUsQUiQUlQPcQPs",
+ [ImmCheck<2, ImmCheckShiftLeft>]>;
}
////////////////////////////////////////////////////////////////////////////////
@@ -435,7 +457,8 @@ def VLD1_X3 : WInst<"vld1_x3", "3(c*!)",
def VLD1_X4 : WInst<"vld1_x4", "4(c*!)",
"cfilsUcUiUlUsQcQfQiQlQsQUcQUiQUlQUsPcPsQPcQPs">;
def VLD1_LANE : WInst<"vld1_lane", ".(c*!).I",
- "QUcQUsQUiQUlQcQsQiQlQfQPcQPsUcUsUiUlcsilfPcPs">;
+ "QUcQUsQUiQUlQcQsQiQlQfQPcQPsUcUsUiUlcsilfPcPs",
+ [ImmCheck<2, ImmCheckLaneIndex, 1>]>;
def VLD1_DUP : WInst<"vld1_dup", ".(c*!)",
"QUcQUsQUiQUlQcQsQiQlQfQPcQPsUcUsUiUlcsilfPcPs">;
def VST1 : WInst<"vst1", "v*(.!)",
@@ -447,19 +470,23 @@ def VST1_X3 : WInst<"vst1_x3", "v*(3!)",
def VST1_X4 : WInst<"vst1_x4", "v*(4!)",
"cfilsUcUiUlUsQcQfQiQlQsQUcQUiQUlQUsPcPsQPcQPs">;
def VST1_LANE : WInst<"vst1_lane", "v*(.!)I",
- "QUcQUsQUiQUlQcQsQiQlQfQPcQPsUcUsUiUlcsilfPcPs">;
+ "QUcQUsQUiQUlQcQsQiQlQfQPcQPsUcUsUiUlcsilfPcPs",
+ [ImmCheck<2, ImmCheckLaneIndex, 1>]>;
+
let ArchGuard = "(__ARM_FP & 2)" in {
def VLD1_F16 : WInst<"vld1", ".(c*!)", "hQh">;
def VLD1_X2_F16 : WInst<"vld1_x2", "2(c*!)", "hQh">;
def VLD1_X3_F16 : WInst<"vld1_x3", "3(c*!)", "hQh">;
def VLD1_X4_F16 : WInst<"vld1_x4", "4(c*!)", "hQh">;
-def VLD1_LANE_F16 : WInst<"vld1_lane", ".(c*!).I", "hQh">;
+def VLD1_LANE_F16 : WInst<"vld1_lane", ".(c*!).I", "hQh",
+ [ImmCheck<2, ImmCheck0_3, 1>]>;
def VLD1_DUP_F16 : WInst<"vld1_dup", ".(c*!)", "hQh">;
def VST1_F16 : WInst<"vst1", "v*(.!)", "hQh">;
def VST1_X2_F16 : WInst<"vst1_x2", "v*(2!)", "hQh">;
def VST1_X3_F16 : WInst<"vst1_x3", "v*(3!)", "hQh">;
def VST1_X4_F16 : WInst<"vst1_x4", "v*(4!)", "hQh">;
-def VST1_LANE_F16 : WInst<"vst1_lane", "v*(.!)I", "hQh">;
+def VST1_LANE_F16 : WInst<"vst1_lane", "v*(.!)I", "hQh",
+ [ImmCheck<2, ImmCheck0_3, 1>]>;
}
////////////////////////////////////////////////////////////////////////////////
@@ -473,15 +500,21 @@ def VLD3_DUP : WInst<"vld3_dup", "3(c*!)",
"UcUsUiUlcsilfPcPsQcQfQiQlQsQPcQPsQUcQUiQUlQUs">;
def VLD4_DUP : WInst<"vld4_dup", "4(c*!)",
"UcUsUiUlcsilfPcPsQcQfQiQlQsQPcQPsQUcQUiQUlQUs">;
-def VLD2_LANE : WInst<"vld2_lane", "2(c*!)2I", "QUsQUiQsQiQfQPsUcUsUicsifPcPs">;
-def VLD3_LANE : WInst<"vld3_lane", "3(c*!)3I", "QUsQUiQsQiQfQPsUcUsUicsifPcPs">;
-def VLD4_LANE : WInst<"vld4_lane", "4(c*!)4I", "QUsQUiQsQiQfQPsUcUsUicsifPcPs">;
+def VLD2_LANE : WInst<"vld2_lane", "2(c*!)2I", "QUsQUiQsQiQfQPsUcUsUicsifPcPs",
+ [ImmCheck<4, ImmCheckLaneIndex, 1>]>;
+def VLD3_LANE : WInst<"vld3_lane", "3(c*!)3I", "QUsQUiQsQiQfQPsUcUsUicsifPcPs",
+ [ImmCheck<5, ImmCheckLaneIndex, 1>]>;
+def VLD4_LANE : WInst<"vld4_lane", "4(c*!)4I", "QUsQUiQsQiQfQPsUcUsUicsifPcPs",
+ [ImmCheck<6, ImmCheckLaneIndex, 1>]>;
def VST2 : WInst<"vst2", "v*(2!)", "QUcQUsQUiQcQsQiQfQPcQPsUcUsUiUlcsilfPcPs">;
def VST3 : WInst<"vst3", "v*(3!)", "QUcQUsQUiQcQsQiQfQPcQPsUcUsUiUlcsilfPcPs">;
def VST4 : WInst<"vst4", "v*(4!)", "QUcQUsQUiQcQsQiQfQPcQPsUcUsUiUlcsilfPcPs">;
-def VST2_LANE : WInst<"vst2_lane", "v*(2!)I", "QUsQUiQsQiQfQPsUcUsUicsifPcPs">;
-def VST3_LANE : WInst<"vst3_lane", "v*(3!)I", "QUsQUiQsQiQfQPsUcUsUicsifPcPs">;
-def VST4_LANE : WInst<"vst4_lane", "v*(4!)I", "QUsQUiQsQiQfQPsUcUsUicsifPcPs">;
+def VST2_LANE : WInst<"vst2_lane", "v*(2!)I", "QUsQUiQsQiQfQPsUcUsUicsifPcPs",
+ [ImmCheck<3, ImmCheckLaneIndex, 1>]>;
+def VST3_LANE : WInst<"vst3_lane", "v*(3!)I", "QUsQUiQsQiQfQPsUcUsUicsifPcPs",
+ [ImmCheck<4, ImmCheckLaneIndex, 1>]>;
+def VST4_LANE : WInst<"vst4_lane", "v*(4!)I", "QUsQUiQsQiQfQPsUcUsUicsifPcPs",
+ [ImmCheck<5, ImmCheckLaneIndex, 1>]>;
let ArchGuard = "(__ARM_FP & 2)" in {
def VLD2_F16 : WInst<"vld2", "2(c*!)", "hQh">;
def VLD3_F16 : WInst<"vld3", "3(c*!)", "hQh">;
@@ -489,28 +522,36 @@ def VLD4_F16 : WInst<"vld4", "4(c*!)", "hQh">;
def VLD2_DUP_F16 : WInst<"vld2_dup", "2(c*!)", "hQh">;
def VLD3_DUP_F16 : WInst<"vld3_dup", "3(c*!)", "hQh">;
def VLD4_DUP_F16 : WInst<"vld4_dup", "4(c*!)", "hQh">;
-def VLD2_LANE_F16 : WInst<"vld2_lane", "2(c*!)2I", "hQh">;
-def VLD3_LANE_F16 : WInst<"vld3_lane", "3(c*!)3I", "hQh">;
-def VLD4_LANE_F16 : WInst<"vld4_lane", "4(c*!)4I", "hQh">;
+def VLD2_LANE_F16 : WInst<"vld2_lane", "2(c*!)2I", "hQh",
+ [ImmCheck<4, ImmCheck0_3, 1>]>;
+def VLD3_LANE_F16 : WInst<"vld3_lane", "3(c*!)3I", "hQh",
+ [ImmCheck<5, ImmCheck0_3, 1>]>;
+def VLD4_LANE_F16 : WInst<"vld4_lane", "4(c*!)4I", "hQh",
+ [ImmCheck<6, ImmCheck0_3, 1>]>;
def VST2_F16 : WInst<"vst2", "v*(2!)", "hQh">;
def VST3_F16 : WInst<"vst3", "v*(3!)", "hQh">;
def VST4_F16 : WInst<"vst4", "v*(4!)", "hQh">;
-def VST2_LANE_F16 : WInst<"vst2_lane", "v*(2!)I", "hQh">;
-def VST3_LANE_F16 : WInst<"vst3_lane", "v*(3!)I", "hQh">;
-def VST4_LANE_F16 : WInst<"vst4_lane", "v*(4!)I", "hQh">;
+def VST2_LANE_F16 : WInst<"vst2_lane", "v*(2!)I", "hQh",
+ [ImmCheck<3, ImmCheck0_3, 1>]>;
+def VST3_LANE_F16 : WInst<"vst3_lane", "v*(3!)I", "hQh",
+ [ImmCheck<4, ImmCheck0_3, 1>]>;
+def VST4_LANE_F16 : WInst<"vst4_lane", "v*(4!)I", "hQh",
+ [ImmCheck<5, ImmCheck0_3, 1>]>;
}
////////////////////////////////////////////////////////////////////////////////
// E.3.16 Extract lanes from a vector
let InstName = "vmov" in
def VGET_LANE : IInst<"vget_lane", "1.I",
- "UcUsUicsiPcPsfQUcQUsQUiQcQsQiQPcQPsQflUlQlQUl">;
+ "UcUsUicsiPcPsfQUcQUsQUiQcQsQiQPcQPsQflUlQlQUl",
+ [ImmCheck<1, ImmCheckLaneIndex, 0>]>;
////////////////////////////////////////////////////////////////////////////////
// E.3.17 Set lanes within a vector
let InstName = "vmov" in
def VSET_LANE : IInst<"vset_lane", ".1.I",
- "UcUsUicsiPcPsfQUcQUsQUiQcQsQiQPcQPsQflUlQlQUl">;
+ "UcUsUicsiPcPsfQUcQUsQUiQcQsQiQPcQPsQflUlQlQUl",
+ [ImmCheck<2, ImmCheckLaneIndex, 1>]>;
////////////////////////////////////////////////////////////////////////////////
// E.3.18 Initialize a vector from bit pattern
@@ -560,9 +601,12 @@ def VCVT_S32 : SInst<"vcvt_s32", "S.", "fQf">;
def VCVT_U32 : SInst<"vcvt_u32", "U.", "fQf">;
def VCVT_F32 : SInst<"vcvt_f32", "F(.!)", "iUiQiQUi">;
let isVCVT_N = 1 in {
-def VCVT_N_S32 : SInst<"vcvt_n_s32", "S.I", "fQf">;
-def VCVT_N_U32 : SInst<"vcvt_n_u32", "U.I", "fQf">;
-def VCVT_N_F32 : SInst<"vcvt_n_f32", "F(.!)I", "iUiQiQUi">;
+def VCVT_N_S32 : SInst<"vcvt_n_s32", "S.I", "fQf",
+ [ImmCheck<1, ImmCheck1_32>]>;
+def VCVT_N_U32 : SInst<"vcvt_n_u32", "U.I", "fQf",
+ [ImmCheck<1, ImmCheck1_32>]>;
+def VCVT_N_F32 : SInst<"vcvt_n_f32", "F(.!)I", "iUiQiQUi",
+ [ImmCheck<1, ImmCheck1_32>]>;
}
def VMOVN : IInst<"vmovn", "<Q", "silUsUiUl">;
@@ -610,8 +654,10 @@ def VQDMULH_LANE : SOpInst<"vqdmulh_lane", "..qI", "siQsQi", OP_QDMULH_LN>;
def VQRDMULH_LANE : SOpInst<"vqrdmulh_lane", "..qI", "siQsQi", OP_QRDMULH_LN>;
}
let ArchGuard = "defined(__aarch64__) || defined(__arm64ec__)" in {
-def A64_VQDMULH_LANE : SInst<"vqdmulh_lane", "..(!q)I", "siQsQi">;
-def A64_VQRDMULH_LANE : SInst<"vqrdmulh_lane", "..(!q)I", "siQsQi">;
+def A64_VQDMULH_LANE : SInst<"vqdmulh_lane", "..(!q)I", "siQsQi",
+ [ImmCheck<2, ImmCheckLaneIndex, 0>]>;
+def A64_VQRDMULH_LANE : SInst<"vqrdmulh_lane", "..(!q)I", "siQsQi",
+ [ImmCheck<2, ImmCheckLaneIndex, 0>]>;
}
let TargetGuard = "v8.1a,neon" in {
@@ -629,7 +675,8 @@ def VQDMLSL_N : SOpInst<"vqdmlsl_n", "(>Q)(>Q).1", "si", OP_QDMLSL_N>;
////////////////////////////////////////////////////////////////////////////////
// E.3.26 Vector Extract
def VEXT : WInst<"vext", "...I",
- "cUcPcsUsPsiUilUlfQcQUcQPcQsQUsQPsQiQUiQlQUlQf">;
+ "cUcPcsUsPsiUilUlfQcQUcQPcQsQUsQPsQiQUiQlQUlQf",
+ [ImmCheck<2, ImmCheckLaneIndex, 0>]>;
////////////////////////////////////////////////////////////////////////////////
// E.3.27 Reverse vector elements
@@ -738,14 +785,22 @@ def ST1_X2 : WInst<"vst1_x2", "v*(2!)", "dQdPlQPl">;
def ST1_X3 : WInst<"vst1_x3", "v*(3!)", "dQdPlQPl">;
def ST1_X4 : WInst<"vst1_x4", "v*(4!)", "dQdPlQPl">;
-def LD1_LANE : WInst<"vld1_lane", ".(c*!).I", "dQdPlQPl">;
-def LD2_LANE : WInst<"vld2_lane", "2(c*!)2I", "lUlQcQUcQPcQlQUldQdPlQPl">;
-def LD3_LANE : WInst<"vld3_lane", "3(c*!)3I", "lUlQcQUcQPcQlQUldQdPlQPl">;
-def LD4_LANE : WInst<"vld4_lane", "4(c*!)4I", "lUlQcQUcQPcQlQUldQdPlQPl">;
-def ST1_LANE : WInst<"vst1_lane", "v*(.!)I", "dQdPlQPl">;
-def ST2_LANE : WInst<"vst2_lane", "v*(2!)I", "lUlQcQUcQPcQlQUldQdPlQPl">;
-def ST3_LANE : WInst<"vst3_lane", "v*(3!)I", "lUlQcQUcQPcQlQUldQdPlQPl">;
-def ST4_LANE : WInst<"vst4_lane", "v*(4!)I", "lUlQcQUcQPcQlQUldQdPlQPl">;
+def LD1_LANE : WInst<"vld1_lane", ".(c*!).I", "dQdPlQPl",
+ [ImmCheck<2, ImmCheckLaneIndex, 1>]>;
+def LD2_LANE : WInst<"vld2_lane", "2(c*!)2I", "lUlQcQUcQPcQlQUldQdPlQPl",
+ [ImmCheck<4, ImmCheckLaneIndex, 1>]>;
+def LD3_LANE : WInst<"vld3_lane", "3(c*!)3I", "lUlQcQUcQPcQlQUldQdPlQPl",
+ [ImmCheck<5, ImmCheckLaneIndex, 1>]>;
+def LD4_LANE : WInst<"vld4_lane", "4(c*!)4I", "lUlQcQUcQPcQlQUldQdPlQPl",
+ [ImmCheck<6, ImmCheckLaneIndex, 1>]>;
+def ST1_LANE : WInst<"vst1_lane", "v*(.!)I", "dQdPlQPl",
+ [ImmCheck<2, ImmCheckLaneIndex, 1>]>;
+def ST2_LANE : WInst<"vst2_lane", "v*(2!)I", "lUlQcQUcQPcQlQUldQdPlQPl",
+ [ImmCheck<3, ImmCheckLaneIndex, 1>]>;
+def ST3_LANE : WInst<"vst3_lane", "v*(3!)I", "lUlQcQUcQPcQlQUldQdPlQPl",
+ [ImmCheck<4, ImmCheckLaneIndex, 1>]>;
+def ST4_LANE : WInst<"vst4_lane", "v*(4!)I", "lUlQcQUcQPcQlQUldQdPlQPl",
+ [ImmCheck<5, ImmCheckLaneIndex, 1>]>;
def LD1_DUP : WInst<"vld1_dup", ".(c*!)", "dQdPlQPl">;
def LD2_DUP : WInst<"vld2_dup", "2(c*!)", "dQdPlQPl">;
@@ -901,8 +956,8 @@ def SHLL_HIGH_N : SOpInst<"vshll_high_n", ">.I", "HcHsHiHUcHUsHUi",
OP_LONG_HI>;
////////////////////////////////////////////////////////////////////////////////
-def SRI_N : WInst<"vsri_n", "...I", "PlQPl">;
-def SLI_N : WInst<"vsli_n", "...I", "PlQPl">;
+def SRI_N : WInst<"vsri_n", "...I", "PlQPl", [ImmCheck<2, ImmCheckShiftRight, 1>]>;
+def SLI_N : WInst<"vsli_n", "...I", "PlQPl", [ImmCheck<2, ImmCheckShiftLeft, 1>]>;
// Right shift narrow high
def SHRN_HIGH_N : IOpInst<"vshrn_high_n", "<(<q).I",
@@ -924,9 +979,12 @@ def QRSHRN_HIGH_N : SOpInst<"vqrshrn_high_n", "<(<q).I",
def VMOVL_HIGH : SOpInst<"vmovl_high", ">.", "HcHsHiHUcHUsHUi", OP_MOVL_HI>;
let isVCVT_N = 1 in {
-def CVTF_N_F64 : SInst<"vcvt_n_f64", "F(.!)I", "lUlQlQUl">;
-def FCVTZS_N_S64 : SInst<"vcvt_n_s64", "S.I", "dQd">;
-def FCVTZS_N_U64 : SInst<"vcvt_n_u64", "U.I", "dQd">;
+def CVTF_N_F64 : SInst<"vcvt_n_f64", "F(.!)I", "lUlQlQUl",
+ [ImmCheck<1, ImmCheck1_64>]>;
+def FCVTZS_N_S64 : SInst<"vcvt_n_s64", "S.I", "dQd",
+ [ImmCheck<1, ImmCheck1_64>]>;
+def FCVTZS_N_U64 : SInst<"vcvt_n_u64", "U.I", "dQd",
+ [ImmCheck<1, ImmCheck1_64>]>;
}
////////////////////////////////////////////////////////////////////////////////
@@ -965,8 +1023,10 @@ let TargetGuard = "aes,neon" in {
////////////////////////////////////////////////////////////////////////////////
// Extract or insert element from vector
-def GET_LANE : IInst<"vget_lane", "1.I", "dQdPlQPl">;
-def SET_LANE : IInst<"vset_lane", ".1.I", "dQdPlQPl">;
+def GET_LANE : IInst<"vget_lane", "1.I", "dQdPlQPl",
+ [ImmCheck<1, ImmCheckLaneIndex, 0>]>;
+def SET_LANE : IInst<"vset_lane", ".1.I", "dQdPlQPl",
+ [ImmCheck<2, ImmCheckLaneIndex, 1>]>;
def COPY_LANE : IOpInst<"vcopy_lane", "..I.I",
"csilUcUsUiUlPcPsPlfd", OP_COPY_LN>;
def COPYQ_LANE : IOpInst<"vcopy_lane", "..IqI",
@@ -1011,8 +1071,10 @@ def VMLS_LANEQ : IOpInst<"vmls_laneq", "...QI",
let isLaneQ = 1;
}
-def VFMA_LANE : IInst<"vfma_lane", "...qI", "fdQfQd">;
-def VFMA_LANEQ : IInst<"vfma_laneq", "...QI", "fdQfQd"> {
+def VFMA_LANE : IInst<"vfma_lane", "...qI", "fdQfQd",
+ [ImmCheck<3, ImmCheckLaneIndex, 0>]>;
+def VFMA_LANEQ : IInst<"vfma_laneq", "...QI", "fdQfQd",
+ [ImmCheck<3, ImmCheckLaneQIndex, 2>]> {
let isLaneQ = 1;
}
def VFMS_LANE : IOpInst<"vfms_lane", "...qI", "fdQfQd", OP_FMS_LN>;
@@ -1088,8 +1150,10 @@ def VQDMULL_HIGH_LANEQ : SOpInst<"vqdmull_high_laneq", "(>Q)QQI", "si",
}
let isLaneQ = 1 in {
-def VQDMULH_LANEQ : SInst<"vqdmulh_laneq", "..QI", "siQsQi">;
-def VQRDMULH_LANEQ : SInst<"vqrdmulh_laneq", "..QI", "siQsQi">;
+def VQDMULH_LANEQ : SInst<"vqdmulh_laneq", "..QI", "siQsQi",
+ [ImmCheck<2, ImmCheckLaneQIndex, 1>]>;
+def VQRDMULH_LANEQ : SInst<"vqrdmulh_laneq", "..QI", "siQsQi",
+ [ImmCheck<2, ImmCheckLaneQIndex, 1>]>;
}
let ArchGuard = "defined(__aarch64__) || defined(__arm64ec__)", TargetGuard = "v8.1a,neon" in {
def VQRDMLAH_LANEQ : SOpInst<"vqrdmlah_laneq", "...QI", "siQsQi", OP_QRDMLAH_LN> {
@@ -1118,7 +1182,8 @@ def FMINNMV : SInst<"vminnmv", "1.", "fQfQd">;
////////////////////////////////////////////////////////////////////////////////
// Newly added Vector Extract for f64
-def VEXT_A64 : WInst<"vext", "...I", "dQdPlQPl">;
+def VEXT_A64 : WInst<"vext", "...I", "dQdPlQPl",
+ [ImmCheck<2, ImmCheckLaneIndex, 0>]>;
////////////////////////////////////////////////////////////////////////////////
// Crypto
@@ -1149,7 +1214,7 @@ def EOR3 : SInst<"veor3", "....", "QUcQUsQUiQUlQcQsQiQl">;
def RAX1 : SInst<"vrax1", "...", "QUl">;
let isVXAR = 1 in {
-def XAR : SInst<"vxar", "...I", "QUl">;
+def XAR : SInst<"vxar", "...I", "QUl", [ImmCheck<2, ImmCheck0_63>]>;
}
}
@@ -1162,10 +1227,10 @@ def SHA512H2 : SInst<"vsha512h2", "....", "QUl">;
let ArchGuard = "defined(__aarch64__) || defined(__arm64ec__)", TargetGuard = "sm4,neon" in {
def SM3SS1 : SInst<"vsm3ss1", "....", "QUi">;
-def SM3TT1A : SInst<"vsm3tt1a", "....I", "QUi">;
-def SM3TT1B : SInst<"vsm3tt1b", "....I", "QUi">;
-def SM3TT2A : SInst<"vsm3tt2a", "....I", "QUi">;
-def SM3TT2B : SInst<"vsm3tt2b", "....I", "QUi">;
+def SM3TT1A : SInst<"vsm3tt1a", "....I", "QUi", [ImmCheck<3, ImmCheck0_3>]>;
+def SM3TT1B : SInst<"vsm3tt1b", "....I", "QUi", [ImmCheck<3, ImmCheck0_3>]>;
+def SM3TT2A : SInst<"vsm3tt2a", "....I", "QUi", [ImmCheck<3, ImmCheck0_3>]>;
+def SM3TT2B : SInst<"vsm3tt2b", "....I", "QUi", [ImmCheck<3, ImmCheck0_3>]>;
def SM3PARTW1 : SInst<"vsm3partw1", "....", "QUi">;
def SM3PARTW2 : SInst<"vsm3partw2", "....", "QUi">;
}
@@ -1327,49 +1392,68 @@ def SCALAR_RSHL: SInst<"vrshl", "11(S1)", "SlSUl">;
// Scalar Shift (Immediate)
let isScalarShift = 1 in {
// Signed/Unsigned Shift Right (Immediate)
-def SCALAR_SSHR_N: SInst<"vshr_n", "11I", "SlSUl">;
+def SCALAR_SSHR_N: SInst<"vshr_n", "11I", "SlSUl",
+ [ImmCheck<1, ImmCheckShiftRight, 0>]>;
// Signed/Unsigned Rounding Shift Right (Immediate)
-def SCALAR_SRSHR_N: SInst<"vrshr_n", "11I", "SlSUl">;
+def SCALAR_SRSHR_N: SInst<"vrshr_n", "11I", "SlSUl",
+ [ImmCheck<1, ImmCheckShiftRight, 0>]>;
// Signed/Unsigned Shift Right and Accumulate (Immediate)
-def SCALAR_SSRA_N: SInst<"vsra_n", "111I", "SlSUl">;
+def SCALAR_SSRA_N: SInst<"vsra_n", "111I", "SlSUl",
+ [ImmCheck<2, ImmCheckShiftRight, 0>]>;
// Signed/Unsigned Rounding Shift Right and Accumulate (Immediate)
-def SCALAR_SRSRA_N: SInst<"vrsra_n", "111I", "SlSUl">;
+def SCALAR_SRSRA_N: SInst<"vrsra_n", "111I", "SlSUl",
+ [ImmCheck<2, ImmCheckShiftRight, 0>]>;
// Shift Left (Immediate)
-def SCALAR_SHL_N: SInst<"vshl_n", "11I", "SlSUl">;
+def SCALAR_SHL_N: SInst<"vshl_n", "11I", "SlSUl",
+ [ImmCheck<1, ImmCheckShiftLeft, 0>]>;
// Signed/Unsigned Saturating Shift Left (Immediate)
-def SCALAR_SQSHL_N: SInst<"vqshl_n", "11I", "ScSsSiSlSUcSUsSUiSUl">;
+def SCALAR_SQSHL_N: SInst<"vqshl_n", "11I", "ScSsSiSlSUcSUsSUiSUl",
+ [ImmCheck<1, ImmCheckShiftLeft, 0>]>;
// Signed Saturating Shift Left Unsigned (Immediate)
-def SCALAR_SQSHLU_N: SInst<"vqshlu_n", "11I", "ScSsSiSl">;
+def SCALAR_SQSHLU_N: SInst<"vqshlu_n", "11I", "ScSsSiSl",
+ [ImmCheck<1, ImmCheckShiftLeft, 0>]>;
// Shift Right And Insert (Immediate)
-def SCALAR_SRI_N: SInst<"vsri_n", "111I", "SlSUl">;
+def SCALAR_SRI_N: SInst<"vsri_n", "111I", "SlSUl",
+ [ImmCheck<2, ImmCheckShiftRight, 0>]>;
// Shift Left And Insert (Immediate)
-def SCALAR_SLI_N: SInst<"vsli_n", "111I", "SlSUl">;
+def SCALAR_SLI_N: SInst<"vsli_n", "111I", "SlSUl",
+ [ImmCheck<2, ImmCheckShiftLeft, 0>]>;
let isScalarNarrowShift = 1 in {
// Signed/Unsigned Saturating Shift Right Narrow (Immediate)
- def SCALAR_SQSHRN_N: SInst<"vqshrn_n", "(1<)1I", "SsSiSlSUsSUiSUl">;
+ def SCALAR_SQSHRN_N: SInst<"vqshrn_n", "(1<)1I", "SsSiSlSUsSUiSUl",
+ [ImmCheck<1, ImmCheckShiftRight, 0>]>;
// Signed/Unsigned Saturating Rounded Shift Right Narrow (Immediate)
- def SCALAR_SQRSHRN_N: SInst<"vqrshrn_n", "(1<)1I", "SsSiSlSUsSUiSUl">;
+ def SCALAR_SQRSHRN_N: SInst<"vqrshrn_n", "(1<)1I", "SsSiSlSUsSUiSUl",
+ [ImmCheck<1, ImmCheckShiftRight, 0>]>;
// Signed Saturating Shift Right Unsigned Narrow (Immediate)
- def SCALAR_SQSHRUN_N: SInst<"vqshrun_n", "(1<U)1I", "SsSiSl">;
+ def SCALAR_SQSHRUN_N: SInst<"vqshrun_n", "(1<U)1I", "SsSiSl",
+ [ImmCheck<1, ImmCheckShiftRight, 0>]>;
// Signed Saturating Rounded Shift Right Unsigned Narrow (Immediate)
- def SCALAR_SQRSHRUN_N: SInst<"vqrshrun_n", "(1<U)1I", "SsSiSl">;
+ def SCALAR_SQRSHRUN_N: SInst<"vqrshrun_n", "(1<U)1I", "SsSiSl",
+ [ImmCheck<1, ImmCheckShiftRight, 0>]>;
}
////////////////////////////////////////////////////////////////////////////////
// Scalar Signed/Unsigned Fixed-point Convert To Floating-Point (Immediate)
-def SCALAR_SCVTF_N_F32: SInst<"vcvt_n_f32", "(1F)(1!)I", "SiSUi">;
-def SCALAR_SCVTF_N_F64: SInst<"vcvt_n_f64", "(1F)(1!)I", "SlSUl">;
+def SCALAR_SCVTF_N_F32: SInst<"vcvt_n_f32", "(1F)(1!)I", "SiSUi",
+ [ImmCheck<1, ImmCheck1_32>]>;
+def SCALAR_SCVTF_N_F64: SInst<"vcvt_n_f64", "(1F)(1!)I", "SlSUl",
+ [ImmCheck<1, ImmCheck1_64>]>;
////////////////////////////////////////////////////////////////////////////////
// Scalar Floating-point Convert To Signed/Unsigned Fixed-point (Immediate)
-def SCALAR_FCVTZS_N_S32 : SInst<"vcvt_n_s32", "(1S)1I", "Sf">;
-def SCALAR_FCVTZU_N_U32 : SInst<"vcvt_n_u32", "(1U)1I", "Sf">;
-def SCALAR_FCVTZS_N_S64 : SInst<"vcvt_n_s64", "(1S)1I", "Sd">;
-def SCALAR_FCVTZU_N_U64 : SInst<"vcvt_n_u64", "(1U)1I", "Sd">;
+def SCALAR_FCVTZS_N_S32 : SInst<"vcvt_n_s32", "(1S)1I", "Sf",
+ [ImmCheck<1, ImmCheck1_32>]>;
+def SCALAR_FCVTZU_N_U32 : SInst<"vcvt_n_u32", "(1U)1I", "Sf",
+ [ImmCheck<1, ImmCheck1_32>]>;
+def SCALAR_FCVTZS_N_S64 : SInst<"vcvt_n_s64", "(1S)1I", "Sd",
+ [ImmCheck<1, ImmCheck1_64>]>;
+def SCALAR_FCVTZU_N_U64 : SInst<"vcvt_n_u64", "(1U)1I", "Sd",
+ [ImmCheck<1, ImmCheck1_64>]>;
}
////////////////////////////////////////////////////////////////////////////////
@@ -1575,10 +1659,12 @@ def SCALAR_FMULX_LANEQ : IOpInst<"vmulx_laneq", "11QI", "SfSd", OP_SCALAR_MULX_L
def SCALAR_VMUL_N : IInst<"vmul_n", "..1", "d">;
// VMUL_LANE_A64 d type implemented using scalar mul lane
-def SCALAR_VMUL_LANE : IInst<"vmul_lane", "..qI", "d">;
+def SCALAR_VMUL_LANE : IInst<"vmul_lane", "..qI", "d",
+ [ImmCheck<2, ImmCheckLaneIndex, 0>]>;
// VMUL_LANEQ d type implemented using scalar mul lane
-def SCALAR_VMUL_LANEQ : IInst<"vmul_laneq", "..QI", "d"> {
+def SCALAR_VMUL_LANEQ : IInst<"vmul_laneq", "..QI", "d",
+ [ImmCheck<2, ImmCheckLaneQIndex, 1>]> {
let isLaneQ = 1;
}
@@ -1591,8 +1677,10 @@ def SCALAR_VMULX_LANEQ : IOpInst<"vmulx_laneq", "..QI", "d", OP_SCALAR_VMULX_LNQ
}
// Scalar Floating Point fused multiply-add (scalar, by element)
-def SCALAR_FMLA_LANE : IInst<"vfma_lane", "111.I", "SfSd">;
-def SCALAR_FMLA_LANEQ : IInst<"vfma_laneq", "111QI", "SfSd"> {
+def SCALAR_FMLA_LANE : IInst<"vfma_lane", "111.I", "SfSd",
+ [ImmCheck<3, ImmCheckLaneIndex, 2>]>;
+def SCALAR_FMLA_LANEQ : IInst<"vfma_laneq", "111QI", "SfSd",
+ [ImmCheck<3, ImmCheckLaneQIndex, 2>]> {
let isLaneQ = 1;
}
@@ -1609,14 +1697,18 @@ def SCALAR_SQDMULL_LANEQ : SOpInst<"vqdmull_laneq", "(1>)1QI", "SsSi", OP_SCALAR
}
// Signed Saturating Doubling Multiply-Add Long (scalar by element)
-def SCALAR_SQDMLAL_LANE : SInst<"vqdmlal_lane", "(1>)(1>)1.I", "SsSi">;
-def SCALAR_SQDMLAL_LANEQ : SInst<"vqdmlal_laneq", "(1>)(1>)1QI", "SsSi"> {
+def SCALAR_SQDMLAL_LANE : SInst<"vqdmlal_lane", "(1>)(1>)1.I", "SsSi",
+ [ImmCheck<3, ImmCheckLaneIndex, 1>]>;
+def SCALAR_SQDMLAL_LANEQ : SInst<"vqdmlal_laneq", "(1>)(1>)1QI", "SsSi",
+ [ImmCheck<3, ImmCheckLaneQIndex, 2>]> {
let isLaneQ = 1;
}
// Signed Saturating Doubling Multiply-Subtract Long (scalar by element)
-def SCALAR_SQDMLS_LANE : SInst<"vqdmlsl_lane", "(1>)(1>)1.I", "SsSi">;
-def SCALAR_SQDMLS_LANEQ : SInst<"vqdmlsl_laneq", "(1>)(1>)1QI", "SsSi"> {
+def SCALAR_SQDMLS_LANE : SInst<"vqdmlsl_lane", "(1>)(1>)1.I", "SsSi",
+ [ImmCheck<3, ImmCheckLaneIndex, 2>]>;
+def SCALAR_SQDMLS_LANEQ : SInst<"vqdmlsl_laneq", "(1>)(1>)1QI", "SsSi",
+ [ImmCheck<3, ImmCheckLaneQIndex, 2>]> {
let isLaneQ = 1;
}
@@ -1646,8 +1738,10 @@ def SCALAR_SQRDMLSH_LANEQ : SOpInst<"vqrdmlsh_laneq", "111QI", "SsSi", OP_SCALAR
}
} // TargetGuard = "v8.1a"
-def SCALAR_VDUP_LANE : IInst<"vdup_lane", "1.I", "ScSsSiSlSfSdSUcSUsSUiSUlSPcSPs">;
-def SCALAR_VDUP_LANEQ : IInst<"vdup_laneq", "1QI", "ScSsSiSlSfSdSUcSUsSUiSUlSPcSPs"> {
+def SCALAR_VDUP_LANE : IInst<"vdup_lane", "1.I", "ScSsSiSlSfSdSUcSUsSUiSUlSPcSPs",
+ [ImmCheck<1, ImmCheckLaneIndex, 0>]>;
+def SCALAR_VDUP_LANEQ : IInst<"vdup_laneq", "1QI", "ScSsSiSlSfSdSUcSUsSUiSUlSPcSPs",
+ [ImmCheck<1, ImmCheckLaneQIndex, 0>]> {
let isLaneQ = 1;
}
@@ -1720,9 +1814,12 @@ let TargetGuard = "fullfp16,neon" in {
// Vector conversion
let isVCVT_N = 1 in {
- def VCVT_N_F16 : SInst<"vcvt_n_f16", "F(.!)I", "sUsQsQUs">;
- def VCVT_N_S16 : SInst<"vcvt_n_s16", "S.I", "hQh">;
- def VCVT_N_U16 : SInst<"vcvt_n_u16", "U.I", "hQh">;
+ def VCVT_N_F16 : SInst<"vcvt_n_f16", "F(.!)I", "sUsQsQUs",
+ [ImmCheck<1, ImmCheck1_16>]>;
+ def VCVT_N_S16 : SInst<"vcvt_n_s16", "S.I", "hQh",
+ [ImmCheck<1, ImmCheck1_16>]>;
+ def VCVT_N_U16 : SInst<"vcvt_n_u16", "U.I", "hQh",
+ [ImmCheck<1, ImmCheck1_16>]>;
}
// Max/Min
@@ -1770,7 +1867,7 @@ def VZIPH : WInst<"vzip", "2..", "hQh">;
def VUZPH : WInst<"vuzp", "2..", "hQh">;
def VTRNH : WInst<"vtrn", "2..", "hQh">;
// Vector Extract
-def VEXTH : WInst<"vext", "...I", "hQh">;
+def VEXTH : WInst<"vext", "...I", "hQh", [ImmCheck<2, ImmCheckLaneIndex, 0>]>;
// Reverse vector elements
def VREV64H : WOpInst<"vrev64", "..", "hQh", OP_REV64>;
@@ -1801,16 +1898,20 @@ let ArchGuard = "defined(__aarch64__) || defined(__arm64ec__)", TargetGuard = "f
// ARMv8.2-A FP16 lane vector intrinsics.
// FMA lane
- def VFMA_LANEH : IInst<"vfma_lane", "...qI", "hQh">;
- def VFMA_LANEQH : IInst<"vfma_laneq", "...QI", "hQh"> {
+ def VFMA_LANEH : IInst<"vfma_lane", "...qI", "hQh",
+ [ImmCheck<3, ImmCheckLaneIndex, 2>]>;
+ def VFMA_LANEQH : IInst<"vfma_laneq", "...QI", "hQh",
+ [ImmCheck<3, ImmCheckLaneQIndex, 2>]> {
let isLaneQ = 1;
}
// FMA lane with scalar argument
def FMLA_NH : SOpInst<"vfma_n", "...1", "hQh", OP_FMLA_N>;
// Scalar floating point fused multiply-add (scalar, by element)
- def SCALAR_FMLA_LANEH : IInst<"vfma_lane", "111.I", "Sh">;
- def SCALAR_FMLA_LANEQH : IInst<"vfma_laneq", "111QI", "Sh"> {
+ def SCALAR_FMLA_LANEH : IInst<"vfma_lane", "111.I", "Sh",
+ [ImmCheck<3, ImmCheckLaneIndex, 2>]>;
+ def SCALAR_FMLA_LANEQH : IInst<"vfma_laneq", "111QI", "Sh",
+ [ImmCheck<3, ImmCheckLaneQIndex, 2>]> {
let isLaneQ = 1;
}
@@ -1844,8 +1945,10 @@ let ArchGuard = "defined(__aarch64__) || defined(__arm64ec__)", TargetGuard = "f
}
def VMULX_NH : IOpInst<"vmulx_n", "..1", "hQh", OP_MULX_N>;
// Scalar floating point mulx (scalar, by element)
- def SCALAR_FMULX_LANEH : IInst<"vmulx_lane", "11.I", "Sh">;
- def SCALAR_FMULX_LANEQH : IInst<"vmulx_laneq", "11QI", "Sh"> {
+ def SCALAR_FMULX_LANEH : IInst<"vmulx_lane", "11.I", "Sh",
+ [ImmCheck<2, ImmCheckLaneIndex, 1>]>;
+ def SCALAR_FMULX_LANEQH : IInst<"vmulx_laneq", "11QI", "Sh",
+ [ImmCheck<2, ImmCheckLaneQIndex, 1>]> {
let isLaneQ = 1;
}
@@ -1865,8 +1968,10 @@ let ArchGuard = "defined(__aarch64__) || defined(__arm64ec__)" in {
def VZIP2H : SOpInst<"vzip2", "...", "hQh", OP_ZIP2>;
def VUZP2H : SOpInst<"vuzp2", "...", "hQh", OP_UZP2>;
- def SCALAR_VDUP_LANEH : IInst<"vdup_lane", "1.I", "Sh">;
- def SCALAR_VDUP_LANEQH : IInst<"vdup_laneq", "1QI", "Sh"> {
+ def SCALAR_VDUP_LANEH : IInst<"vdup_lane", "1.I", "Sh",
+ [ImmCheck<1, ImmCheckLaneIndex, 0>]>;
+ def SCALAR_VDUP_LANEQH : IInst<"vdup_laneq", "1QI", "Sh",
+ [ImmCheck<1, ImmCheckLaneQIndex, 0>]> {
let isLaneQ = 1;
}
}
@@ -1959,9 +2064,12 @@ multiclass VCMLA_ROTS<string type, string lanety, string laneqty> {
let isLaneQ = 1 in {
// vcmla{ROT}_laneq
+ // ACLE specifies that the fp16 vcmla_#ROT_laneq variant has an immedaite range of 0 <= lane <= 1.
+ // fp16 is the only variant for which these two differ.
+ // https://developer.arm.com/documentation/ihi0073/latest/
+ defvar getlanety = !if(!eq(type, "h"), lanety, laneqty);
def : SOpInst<"vcmla" # ROT # "_laneq", "...QI", type, Op<(call "vcmla" # ROT, $p0, $p1,
- (bitcast $p0, (dup_typed lanety, (call "vget_lane", (bitcast laneqty, $p2), $p3))))>>;
-
+ (bitcast $p0, (dup_typed lanety, (call "vget_lane", (bitcast getlanety, $p2), $p3))))>>;
// vcmlaq{ROT}_laneq
def : SOpInst<"vcmla" # ROT # "_laneq", "...QI", "Q" # type, Op<(call "vcmla" # ROT, $p0, $p1,
(bitcast $p0, (dup_typed laneqty , (call "vget_lane", (bitcast laneqty, $p2), $p3))))>>;
@@ -2011,10 +2119,14 @@ let TargetGuard = "bf16,neon" in {
def VGET_HIGH_BF : NoTestOpInst<"vget_high", ".Q", "b", OP_HI>;
def VGET_LOW_BF : NoTestOpInst<"vget_low", ".Q", "b", OP_LO>;
- def VGET_LANE_BF : IInst<"vget_lane", "1.I", "bQb">;
- def VSET_LANE_BF : IInst<"vset_lane", ".1.I", "bQb">;
- def SCALAR_VDUP_LANE_BF : IInst<"vdup_lane", "1.I", "Sb">;
- def SCALAR_VDUP_LANEQ_BF : IInst<"vdup_laneq", "1QI", "Sb"> {
+ def VGET_LANE_BF : IInst<"vget_lane", "1.I", "bQb",
+ [ImmCheck<1, ImmCheckLaneIndex, 0>]>;
+ def VSET_LANE_BF : IInst<"vset_lane", ".1.I", "bQb",
+ [ImmCheck<2, ImmCheckLaneIndex, 1>]>;
+ def SCALAR_VDUP_LANE_BF : IInst<"vdup_lane", "1.I", "Sb",
+ [ImmCheck<1, ImmCheckLaneIndex, 0>]>;
+ def SCALAR_VDUP_LANEQ_BF : IInst<"vdup_laneq", "1QI", "Sb",
+ [ImmCheck<1, ImmCheckLaneQIndex, 0>]> {
let isLaneQ = 1;
}
@@ -2036,14 +2148,22 @@ let TargetGuard = "bf16,neon" in {
def VST1_X3_BF : WInst<"vst1_x3", "v*(3!)", "bQb">;
def VST1_X4_BF : WInst<"vst1_x4", "v*(4!)", "bQb">;
- def VLD1_LANE_BF : WInst<"vld1_lane", ".(c*!).I", "bQb">;
- def VLD2_LANE_BF : WInst<"vld2_lane", "2(c*!)2I", "bQb">;
- def VLD3_LANE_BF : WInst<"vld3_lane", "3(c*!)3I", "bQb">;
- def VLD4_LANE_BF : WInst<"vld4_lane", "4(c*!)4I", "bQb">;
- def VST1_LANE_BF : WInst<"vst1_lane", "v*(.!)I", "bQb">;
- def VST2_LANE_BF : WInst<"vst2_lane", "v*(2!)I", "bQb">;
- def VST3_LANE_BF : WInst<"vst3_lane", "v*(3!)I", "bQb">;
- def VST4_LANE_BF : WInst<"vst4_lane", "v*(4!)I", "bQb">;
+ def VLD1_LANE_BF : WInst<"vld1_lane", ".(c*!).I", "bQb",
+ [ImmCheck<2, ImmCheckLaneIndex, 1>]>;
+ def VLD2_LANE_BF : WInst<"vld2_lane", "2(c*!)2I", "bQb",
+ [ImmCheck<4, ImmCheckLaneIndex, 1>]>;
+ def VLD3_LANE_BF : WInst<"vld3_lane", "3(c*!)3I", "bQb",
+ [ImmCheck<5, ImmCheckLaneIndex, 1>]>;
+ def VLD4_LANE_BF : WInst<"vld4_lane", "4(c*!)4I", "bQb",
+ [ImmCheck<6, ImmCheckLaneIndex, 1>]>;
+ def VST1_LANE_BF : WInst<"vst1_lane", "v*(.!)I", "bQb",
+ [ImmCheck<2, ImmCheckLaneIndex, 1>]>;
+ def VST2_LANE_BF : WInst<"vst2_lane", "v*(2!)I", "bQb",
+ [ImmCheck<3, ImmCheckLaneIndex, 1>]>;
+ def VST3_LANE_BF : WInst<"vst3_lane", "v*(3!)I", "bQb",
+ [ImmCheck<4, ImmCheckLaneIndex, 1>]>;
+ def VST4_LANE_BF : WInst<"vst4_lane", "v*(4!)I", "bQb",
+ [ImmCheck<5, ImmCheckLaneIndex, 1>]>;
def VLD1_DUP_BF : WInst<"vld1_dup", ".(c*!)", "bQb">;
def VLD2_DUP_BF : WInst<"vld2_dup", "2(c*!)", "bQb">;
@@ -2093,6 +2213,8 @@ let ArchGuard = "defined(__aarch64__) || defined(__arm64ec__)", TargetGuard = "b
// v8.9a/v9.4a LRCPC3 intrinsics
let ArchGuard = "defined(__aarch64__) || defined(__arm64ec__)", TargetGuard = "rcpc3,neon" in {
- def VLDAP1_LANE : WInst<"vldap1_lane", ".(c*!).I", "QUlQlUlldQdPlQPl">;
- def VSTL1_LANE : WInst<"vstl1_lane", "v*(.!)I", "QUlQlUlldQdPlQPl">;
+ def VLDAP1_LANE : WInst<"vldap1_lane", ".(c*!).I", "QUlQlUlldQdPlQPl",
+ [ImmCheck<2, ImmCheckLaneIndex, 1>]>;
+ def VSTL1_LANE : WInst<"vstl1_lane", "v*(.!)I", "QUlQlUlldQdPlQPl",
+ [ImmCheck<2, ImmCheckLaneIndex, 1>]>;
}
diff --git a/clang/include/clang/Basic/arm_neon_incl.td b/clang/include/clang/Basic/arm_neon_incl.td
index 3b8015daee6d9..2b5acd41e7bbd 100644
--- a/clang/include/clang/Basic/arm_neon_incl.td
+++ b/clang/include/clang/Basic/arm_neon_incl.td
@@ -21,6 +21,8 @@
//
//===----------------------------------------------------------------------===//
+include "arm_immcheck_incl.td"
+
// The base Operation class. All operations must subclass this.
class Operation<list<dag> ops=[]> {
list<dag> Ops = ops;
@@ -260,7 +262,7 @@ def OP_UNAVAILABLE : Operation {
// Every intrinsic subclasses Inst.
-class Inst <string n, string p, string t, Operation o> {
+class Inst <string n, string p, string t, Operation o, list<ImmCheck> ch = []>{
string Name = n;
string Prototype = p;
string Types = t;
@@ -278,6 +280,7 @@ class Inst <string n, string p, string t, Operation o> {
// a Q register. Only used for intrinsics which end up calling polymorphic
// builtins.
bit isLaneQ = 0;
+ list<ImmCheck> ImmChecks = ch;
// Certain intrinsics have different names than their representative
// instructions. This field allows us to handle this correctly when we
@@ -300,9 +303,9 @@ class Inst <string n, string p, string t, Operation o> {
// SInst: Instruction with signed/unsigned suffix (e.g., "s8", "u8", "p8")
// IInst: Instruction with generic integer suffix (e.g., "i8")
// WInst: Instruction with only bit size suffix (e.g., "8")
-class SInst<string n, string p, string t> : Inst<n, p, t, OP_NONE> {}
-class IInst<string n, string p, string t> : Inst<n, p, t, OP_NONE> {}
-class WInst<string n, string p, string t> : Inst<n, p, t, OP_NONE> {}
+class SInst<string n, string p, string t, list<ImmCheck> ch = []> : Inst<n, p, t, OP_NONE, ch> {}
+class IInst<string n, string p, string t, list<ImmCheck> ch = []> : Inst<n, p, t, OP_NONE, ch> {}
+class WInst<string n, string p, string t, list<ImmCheck> ch = []> : Inst<n, p, t, OP_NONE, ch> {}
// The following instruction classes are implemented via operators
// instead of builtins. As such these declarations are only used for
diff --git a/clang/include/clang/Basic/arm_sve_sme_incl.td b/clang/include/clang/Basic/arm_sve_sme_incl.td
index 6ec357825a132..fdf4ba55fe938 100644
--- a/clang/include/clang/Basic/arm_sve_sme_incl.td
+++ b/clang/include/clang/Basic/arm_sve_sme_incl.td
@@ -13,6 +13,8 @@
//
//===----------------------------------------------------------------------===//
+include "arm_immcheck_incl.td"
+
//===----------------------------------------------------------------------===//
// Instruction definitions
//===----------------------------------------------------------------------===//
@@ -233,40 +235,6 @@ def IsInZT0 : FlagType<0x400000000000>;
def IsOutZT0 : FlagType<0x800000000000>;
def IsInOutZT0 : FlagType<0x1000000000000>;
-// These must be kept in sync with the flags in include/clang/Basic/TargetBuiltins.h
-class ImmCheckType<int val> {
- int Value = val;
-}
-def ImmCheck0_31 : ImmCheckType<0>; // 0..31 (used for e.g. predicate patterns)
-def ImmCheck1_16 : ImmCheckType<1>; // 1..16
-def ImmCheckExtract : ImmCheckType<2>; // 0..(2048/sizeinbits(elt) - 1)
-def ImmCheckShiftRight : ImmCheckType<3>; // 1..sizeinbits(elt)
-def ImmCheckShiftRightNarrow : ImmCheckType<4>; // 1..sizeinbits(elt)/2
-def ImmCheckShiftLeft : ImmCheckType<5>; // 0..(sizeinbits(elt) - 1)
-def ImmCheck0_7 : ImmCheckType<6>; // 0..7
-def ImmCheckLaneIndex : ImmCheckType<7>; // 0..(128/(1*sizeinbits(elt)) - 1)
-def ImmCheckLaneIndexCompRotate : ImmCheckType<8>; // 0..(128/(2*sizeinbits(elt)) - 1)
-def ImmCheckLaneIndexDot : ImmCheckType<9>; // 0..(128/(4*sizeinbits(elt)) - 1)
-def ImmCheckComplexRot90_270 : ImmCheckType<10>; // [90,270]
-def ImmCheckComplexRotAll90 : ImmCheckType<11>; // [0, 90, 180,270]
-def ImmCheck0_13 : ImmCheckType<12>; // 0..13
-def ImmCheck0_1 : ImmCheckType<13>; // 0..1
-def ImmCheck0_2 : ImmCheckType<14>; // 0..2
-def ImmCheck0_3 : ImmCheckType<15>; // 0..3
-def ImmCheck0_0 : ImmCheckType<16>; // 0..0
-def ImmCheck0_15 : ImmCheckType<17>; // 0..15
-def ImmCheck0_255 : ImmCheckType<18>; // 0..255
-def ImmCheck2_4_Mul2 : ImmCheckType<19>; // 2, 4
-def ImmCheck1_1 : ImmCheckType<20>; // 1..1
-def ImmCheck1_3 : ImmCheckType<21>; // 1..3
-def ImmCheck1_7 : ImmCheckType<22>; // 1..7
-
-class ImmCheck<int arg, ImmCheckType kind, int eltSizeArg = -1> {
- int Arg = arg;
- int EltSizeArg = eltSizeArg;
- ImmCheckType Kind = kind;
-}
-
defvar InvalidMode = "";
class Inst<string n, string p, string t, MergeType mt, string i,
diff --git a/clang/include/clang/Sema/SemaARM.h b/clang/include/clang/Sema/SemaARM.h
index fedc7df7908f1..1ced84300c179 100644
--- a/clang/include/clang/Sema/SemaARM.h
+++ b/clang/include/clang/Sema/SemaARM.h
@@ -41,6 +41,9 @@ class SemaARM : public SemaBase {
unsigned MaxWidth);
bool CheckNeonBuiltinFunctionCall(const TargetInfo &TI, unsigned BuiltinID,
CallExpr *TheCall);
+ bool ParseNeonImmChecks(CallExpr *TheCall,
+ SmallVector<std::tuple<int, int, int>, 2> &ImmChecks,
+ int OverloadType);
bool CheckMVEBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall);
bool CheckSVEBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall);
bool
diff --git a/clang/lib/Sema/SemaARM.cpp b/clang/lib/Sema/SemaARM.cpp
index d8dd4fe16e3af..8f4d94e1df678 100644
--- a/clang/lib/Sema/SemaARM.cpp
+++ b/clang/lib/Sema/SemaARM.cpp
@@ -314,40 +314,6 @@ bool SemaARM::BuiltinARMSpecialReg(unsigned BuiltinID, CallExpr *TheCall,
return false;
}
-// Get the valid immediate range for the specified NEON type code.
-static unsigned RFT(unsigned t, bool shift = false, bool ForceQuad = false) {
- NeonTypeFlags Type(t);
- int IsQuad = ForceQuad ? true : Type.isQuad();
- switch (Type.getEltType()) {
- case NeonTypeFlags::Int8:
- case NeonTypeFlags::Poly8:
- return shift ? 7 : (8 << IsQuad) - 1;
- case NeonTypeFlags::Int16:
- case NeonTypeFlags::Poly16:
- return shift ? 15 : (4 << IsQuad) - 1;
- case NeonTypeFlags::Int32:
- return shift ? 31 : (2 << IsQuad) - 1;
- case NeonTypeFlags::Int64:
- case NeonTypeFlags::Poly64:
- return shift ? 63 : (1 << IsQuad) - 1;
- case NeonTypeFlags::Poly128:
- return shift ? 127 : (1 << IsQuad) - 1;
- case NeonTypeFlags::Float16:
- assert(!shift && "cannot shift float types!");
- return (4 << IsQuad) - 1;
- case NeonTypeFlags::Float32:
- assert(!shift && "cannot shift float types!");
- return (2 << IsQuad) - 1;
- case NeonTypeFlags::Float64:
- assert(!shift && "cannot shift float types!");
- return (1 << IsQuad) - 1;
- case NeonTypeFlags::BFloat16:
- assert(!shift && "cannot shift float types!");
- return (4 << IsQuad) - 1;
- }
- llvm_unreachable("Invalid NeonTypeFlag!");
-}
-
/// getNeonEltType - Return the QualType corresponding to the elements of
/// the vector type specified by the NeonTypeFlags. This is used to check
/// the pointer arguments for Neon load/store intrinsics.
@@ -403,6 +369,62 @@ enum ArmSMEState : unsigned {
ArmZT0Mask = 0b11 << 2
};
+bool SemaARM::ParseNeonImmChecks(CallExpr *TheCall, SmallVector<std::tuple<int, int, int>, 2>
+ &ImmChecks, int OverloadType = -1) {
+ int ArgIdx, CheckTy, ElementType;
+ bool hasError = false;
+
+ for (auto &I : ImmChecks) {
+ std::tie(ArgIdx, CheckTy, ElementType) = I;
+
+ NeonTypeFlags Type = (OverloadType != -1) ?
+ NeonTypeFlags(OverloadType) : NeonTypeFlags(ElementType);
+
+ switch((ArmImmCheckType)CheckTy) {
+ case ArmImmCheckType::ImmCheck0_3:
+ hasError |= SemaRef.BuiltinConstantArgRange(TheCall, ArgIdx, 0, 3);
+ break;
+ case ArmImmCheckType::ImmCheck0_63:
+ hasError |= SemaRef.BuiltinConstantArgRange(TheCall, ArgIdx, 0, 63);
+ break;
+ case ArmImmCheckType::ImmCheck0_7:
+ hasError |= SemaRef.BuiltinConstantArgRange(TheCall, ArgIdx, 0, 7);
+ break;
+ case ArmImmCheckType::ImmCheck1_16:
+ hasError |= SemaRef.BuiltinConstantArgRange(TheCall, ArgIdx, 1, 16);
+ break;
+ case ArmImmCheckType::ImmCheck1_32:
+ hasError |= SemaRef.BuiltinConstantArgRange(TheCall, ArgIdx, 1, 32);
+ break;
+ case ArmImmCheckType::ImmCheck1_64:
+ hasError |= SemaRef.BuiltinConstantArgRange(TheCall, ArgIdx, 1, 64);
+ break;
+ case ArmImmCheckType::ImmCheckLaneIndex:
+ hasError |= SemaRef.BuiltinConstantArgRange(TheCall, ArgIdx, 0, (64 << Type.isQuad()) /
+ Type.getEltSizeInBits() - 1);
+ break;
+ case ArmImmCheckType::ImmCheckLaneQIndex: // force to use quad
+ hasError |= SemaRef.BuiltinConstantArgRange(TheCall, ArgIdx, 0,
+ (128/Type.getEltSizeInBits()) - 1);
+ break;
+ case ArmImmCheckType::ImmCheckShiftLeft:
+ hasError |= SemaRef.BuiltinConstantArgRange(TheCall, ArgIdx, 0,
+ Type.getEltSizeInBits() - 1);
+ break;
+ case ArmImmCheckType::ImmCheckShiftRight:
+ hasError |= SemaRef.BuiltinConstantArgRange(TheCall, ArgIdx,
+ 1, Type.getEltSizeInBits());
+ break;
+ default:
+ llvm_unreachable("Invalid Neon immediate range typeflag!");
+ break;
+ }
+ }
+
+ return hasError;
+}
+
+
bool SemaARM::ParseSVEImmChecks(
CallExpr *TheCall, SmallVector<std::tuple<int, int, int>, 3> &ImmChecks) {
// Perform all the immediate checks for this builtin call.
@@ -432,76 +454,76 @@ bool SemaARM::ParseSVEImmChecks(
return false;
};
- switch ((SVETypeFlags::ImmCheckType)CheckTy) {
- case SVETypeFlags::ImmCheck0_31:
+ switch ((ArmImmCheckType)CheckTy) {
+ case ArmImmCheckType::ImmCheck0_31:
if (SemaRef.BuiltinConstantArgRange(TheCall, ArgNum, 0, 31))
HasError = true;
break;
- case SVETypeFlags::ImmCheck0_13:
+ case ArmImmCheckType::ImmCheck0_13:
if (SemaRef.BuiltinConstantArgRange(TheCall, ArgNum, 0, 13))
HasError = true;
break;
- case SVETypeFlags::ImmCheck1_16:
+ case ArmImmCheckType::ImmCheck1_16:
if (SemaRef.BuiltinConstantArgRange(TheCall, ArgNum, 1, 16))
HasError = true;
break;
- case SVETypeFlags::ImmCheck0_7:
+ case ArmImmCheckType::ImmCheck0_7:
if (SemaRef.BuiltinConstantArgRange(TheCall, ArgNum, 0, 7))
HasError = true;
break;
- case SVETypeFlags::ImmCheck1_1:
+ case ArmImmCheckType::ImmCheck1_1:
if (SemaRef.BuiltinConstantArgRange(TheCall, ArgNum, 1, 1))
HasError = true;
break;
- case SVETypeFlags::ImmCheck1_3:
+ case ArmImmCheckType::ImmCheck1_3:
if (SemaRef.BuiltinConstantArgRange(TheCall, ArgNum, 1, 3))
HasError = true;
break;
- case SVETypeFlags::ImmCheck1_7:
+ case ArmImmCheckType::ImmCheck1_7:
if (SemaRef.BuiltinConstantArgRange(TheCall, ArgNum, 1, 7))
HasError = true;
break;
- case SVETypeFlags::ImmCheckExtract:
+ case ArmImmCheckType::ImmCheckExtract:
if (SemaRef.BuiltinConstantArgRange(TheCall, ArgNum, 0,
(2048 / ElementSizeInBits) - 1))
HasError = true;
break;
- case SVETypeFlags::ImmCheckShiftRight:
+ case ArmImmCheckType::ImmCheckShiftRight:
if (SemaRef.BuiltinConstantArgRange(TheCall, ArgNum, 1,
ElementSizeInBits))
HasError = true;
break;
- case SVETypeFlags::ImmCheckShiftRightNarrow:
+ case ArmImmCheckType::ImmCheckShiftRightNarrow:
if (SemaRef.BuiltinConstantArgRange(TheCall, ArgNum, 1,
ElementSizeInBits / 2))
HasError = true;
break;
- case SVETypeFlags::ImmCheckShiftLeft:
+ case ArmImmCheckType::ImmCheckShiftLeft:
if (SemaRef.BuiltinConstantArgRange(TheCall, ArgNum, 0,
ElementSizeInBits - 1))
HasError = true;
break;
- case SVETypeFlags::ImmCheckLaneIndex:
+ case ArmImmCheckType::ImmCheckLaneIndex:
if (SemaRef.BuiltinConstantArgRange(TheCall, ArgNum, 0,
(128 / (1 * ElementSizeInBits)) - 1))
HasError = true;
break;
- case SVETypeFlags::ImmCheckLaneIndexCompRotate:
+ case ArmImmCheckType::ImmCheckLaneIndexCompRotate:
if (SemaRef.BuiltinConstantArgRange(TheCall, ArgNum, 0,
(128 / (2 * ElementSizeInBits)) - 1))
HasError = true;
break;
- case SVETypeFlags::ImmCheckLaneIndexDot:
+ case ArmImmCheckType::ImmCheckLaneIndexDot:
if (SemaRef.BuiltinConstantArgRange(TheCall, ArgNum, 0,
(128 / (4 * ElementSizeInBits)) - 1))
HasError = true;
break;
- case SVETypeFlags::ImmCheckComplexRot90_270:
+ case ArmImmCheckType::ImmCheckComplexRot90_270:
if (CheckImmediateInSet([](int64_t V) { return V == 90 || V == 270; },
diag::err_rotation_argument_to_cadd))
HasError = true;
break;
- case SVETypeFlags::ImmCheckComplexRotAll90:
+ case ArmImmCheckType::ImmCheckComplexRotAll90:
if (CheckImmediateInSet(
[](int64_t V) {
return V == 0 || V == 90 || V == 180 || V == 270;
@@ -509,35 +531,38 @@ bool SemaARM::ParseSVEImmChecks(
diag::err_rotation_argument_to_cmla))
HasError = true;
break;
- case SVETypeFlags::ImmCheck0_1:
+ case ArmImmCheckType::ImmCheck0_1:
if (SemaRef.BuiltinConstantArgRange(TheCall, ArgNum, 0, 1))
HasError = true;
break;
- case SVETypeFlags::ImmCheck0_2:
+ case ArmImmCheckType::ImmCheck0_2:
if (SemaRef.BuiltinConstantArgRange(TheCall, ArgNum, 0, 2))
HasError = true;
break;
- case SVETypeFlags::ImmCheck0_3:
+ case ArmImmCheckType::ImmCheck0_3:
if (SemaRef.BuiltinConstantArgRange(TheCall, ArgNum, 0, 3))
HasError = true;
break;
- case SVETypeFlags::ImmCheck0_0:
+ case ArmImmCheckType::ImmCheck0_0:
if (SemaRef.BuiltinConstantArgRange(TheCall, ArgNum, 0, 0))
HasError = true;
break;
- case SVETypeFlags::ImmCheck0_15:
+ case ArmImmCheckType::ImmCheck0_15:
if (SemaRef.BuiltinConstantArgRange(TheCall, ArgNum, 0, 15))
HasError = true;
break;
- case SVETypeFlags::ImmCheck0_255:
+ case ArmImmCheckType::ImmCheck0_255:
if (SemaRef.BuiltinConstantArgRange(TheCall, ArgNum, 0, 255))
HasError = true;
break;
- case SVETypeFlags::ImmCheck2_4_Mul2:
+ case ArmImmCheckType::ImmCheck2_4_Mul2:
if (SemaRef.BuiltinConstantArgRange(TheCall, ArgNum, 2, 4) ||
SemaRef.BuiltinConstantArgMultiple(TheCall, ArgNum, 2))
HasError = true;
break;
+ default:
+ llvm_unreachable("Invalid SVE immediate range typeflag!");
+ break;
}
}
@@ -748,7 +773,7 @@ bool SemaARM::CheckNeonBuiltinFunctionCall(const TargetInfo &TI,
llvm::APSInt Result;
uint64_t mask = 0;
- unsigned TV = 0;
+ int TV = -1;
int PtrArgNum = -1;
bool HasConstPtr = false;
switch (BuiltinID) {
@@ -800,7 +825,7 @@ bool SemaARM::CheckNeonBuiltinFunctionCall(const TargetInfo &TI,
// For NEON intrinsics which take an immediate value as part of the
// instruction, range check them here.
- unsigned i = 0, l = 0, u = 0;
+ SmallVector<std::tuple<int, int, int>, 2> ImmChecks;
switch (BuiltinID) {
default:
return false;
@@ -808,9 +833,9 @@ bool SemaARM::CheckNeonBuiltinFunctionCall(const TargetInfo &TI,
#include "clang/Basic/arm_fp16.inc"
#include "clang/Basic/arm_neon.inc"
#undef GET_NEON_IMMEDIATE_CHECK
- }
-
- return SemaRef.BuiltinConstantArgRange(TheCall, i, l, u + l);
+ }
+
+ return ParseNeonImmChecks(TheCall, ImmChecks, TV);
}
bool SemaARM::CheckMVEBuiltinFunctionCall(unsigned BuiltinID,
diff --git a/clang/test/CodeGen/aarch64-neon-vcmla.c b/clang/test/CodeGen/aarch64-neon-vcmla.c
index 02171527cc6a3..2ff48fd97b427 100644
--- a/clang/test/CodeGen/aarch64-neon-vcmla.c
+++ b/clang/test/CodeGen/aarch64-neon-vcmla.c
@@ -155,15 +155,14 @@ float16x4_t test_vcmla_lane_f16(float16x4_t acc, float16x4_t lhs, float16x4_t rh
return vcmla_lane_f16(acc, lhs, rhs, 1);
}
-// ACLE says this exists, but it won't map to a single instruction if lane > 1.
// CHECK-LABEL: @test_vcmla_laneq_f16(
-// CHECK: [[CPLX:%.*]] = bitcast <8 x half> %rhs to <4 x i32>
-// CHECK: [[DUP:%.*]] = shufflevector <4 x i32> [[CPLX]], <4 x i32> poison, <2 x i32> <i32 3, i32 3>
-// CHECK: [[DUP_FLT:%.*]] = bitcast <2 x i32> [[DUP]] to <4 x half>
-// CHECK: [[RES:%.*]] = tail call <4 x half> @llvm.aarch64.neon.vcmla.rot0.v4f16(<4 x half> %acc, <4 x half> %lhs, <4 x half> [[DUP_FLT]])
-// CHECK: ret <4 x half> [[RES]]
+// CHECK: %0 = bitcast <8 x half> %rhs to <4 x i32>
+// CHECK: %1 = shufflevector <4 x i32> %0, <4 x i32> poison, <2 x i32> <i32 poison, i32 1>
+// CHECK: %2 = bitcast <2 x i32> %1 to <4 x half>
+// CHECK: %3 = shufflevector <4 x half> %2, <4 x half> poison, <4 x i32> <i32 2, i32 3, i32 2, i32 3>
+// CHECK: %vcmla_f163.i = tail call <4 x half> @llvm.aarch64.neon.vcmla.rot0.v4f16(<4 x half> %acc, <4 x half> %lhs, <4 x half>
float16x4_t test_vcmla_laneq_f16(float16x4_t acc, float16x4_t lhs, float16x8_t rhs) {
- return vcmla_laneq_f16(acc, lhs, rhs, 3);
+ return vcmla_laneq_f16(acc, lhs, rhs, 1);
}
// CHECK-LABEL: @test_vcmlaq_lane_f16(
@@ -191,7 +190,6 @@ float32x2_t test_vcmla_lane_f32(float32x2_t acc, float32x2_t lhs, float32x2_t rh
return vcmla_lane_f32(acc, lhs, rhs, 0);
}
-// ACLE says this exists, but it won't map to a single instruction if lane > 1.
// CHECK-LABEL: @test_vcmla_laneq_f32(
// CHECK: [[CPLX:%.*]] = bitcast <4 x float> %rhs to <2 x i64>
// CHECK: [[DUP:%.*]] = shufflevector <2 x i64> [[CPLX]], <2 x i64> poison, <1 x i32> <i32 1>
@@ -229,15 +227,14 @@ float16x4_t test_vcmla_rot90_lane_f16(float16x4_t acc, float16x4_t lhs, float16x
return vcmla_rot90_lane_f16(acc, lhs, rhs, 1);
}
-// ACLE says this exists, but it won't map to a single instruction if lane > 1.
-// CHECK-LABEL: @test_vcmla_rot90_laneq_f16(
-// CHECK: [[CPLX:%.*]] = bitcast <8 x half> %rhs to <4 x i32>
-// CHECK: [[DUP:%.*]] = shufflevector <4 x i32> [[CPLX]], <4 x i32> poison, <2 x i32> <i32 3, i32 3>
-// CHECK: [[DUP_FLT:%.*]] = bitcast <2 x i32> [[DUP]] to <4 x half>
-// CHECK: [[RES:%.*]] = tail call <4 x half> @llvm.aarch64.neon.vcmla.rot90.v4f16(<4 x half> %acc, <4 x half> %lhs, <4 x half> [[DUP_FLT]])
-// CHECK: ret <4 x half> [[RES]]
+// CHECK: %0 = bitcast <8 x half> %rhs to <4 x i32>
+// CHECK: %1 = shufflevector <4 x i32> %0, <4 x i32> poison, <2 x i32> <i32 0, i32 poison>
+// CHECK: %2 = bitcast <2 x i32> %1 to <4 x half>
+// CHECK: %3 = shufflevector <4 x half> %2, <4 x half> poison, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
+// CHECK: %vcmla_rot90_f163.i = tail call <4 x half> @llvm.aarch64.neon.vcmla.rot90.v4f16(<4 x half> %acc, <4 x half> %lhs, <4 x half> %3)
+// CHECK: ret <4 x half> %vcmla_rot90_f163.i
float16x4_t test_vcmla_rot90_laneq_f16(float16x4_t acc, float16x4_t lhs, float16x8_t rhs) {
- return vcmla_rot90_laneq_f16(acc, lhs, rhs, 3);
+ return vcmla_rot90_laneq_f16(acc, lhs, rhs, 0);
}
// CHECK-LABEL: @test_vcmlaq_rot90_lane_f16(
@@ -265,7 +262,6 @@ float32x2_t test_vcmla_rot90_lane_f32(float32x2_t acc, float32x2_t lhs, float32x
return vcmla_rot90_lane_f32(acc, lhs, rhs, 0);
}
-// ACLE says this exists, but it won't map to a single instruction if lane > 1.
// CHECK-LABEL: @test_vcmla_rot90_laneq_f32(
// CHECK: [[CPLX:%.*]] = bitcast <4 x float> %rhs to <2 x i64>
// CHECK: [[DUP:%.*]] = shufflevector <2 x i64> [[CPLX]], <2 x i64> poison, <1 x i32> <i32 1>
@@ -303,15 +299,15 @@ float16x4_t test_vcmla_rot180_lane_f16(float16x4_t acc, float16x4_t lhs, float16
return vcmla_rot180_lane_f16(acc, lhs, rhs, 1);
}
-// ACLE says this exists, but it won't map to a single instruction if lane > 1.
// CHECK-LABEL: @test_vcmla_rot180_laneq_f16(
-// CHECK: [[CPLX:%.*]] = bitcast <8 x half> %rhs to <4 x i32>
-// CHECK: [[DUP:%.*]] = shufflevector <4 x i32> [[CPLX]], <4 x i32> poison, <2 x i32> <i32 3, i32 3>
-// CHECK: [[DUP_FLT:%.*]] = bitcast <2 x i32> [[DUP]] to <4 x half>
-// CHECK: [[RES:%.*]] = tail call <4 x half> @llvm.aarch64.neon.vcmla.rot180.v4f16(<4 x half> %acc, <4 x half> %lhs, <4 x half> [[DUP_FLT]])
-// CHECK: ret <4 x half> [[RES]]
+// CHECK: %0 = bitcast <8 x half> %rhs to <4 x i32>
+// CHECK: %1 = shufflevector <4 x i32> %0, <4 x i32> poison, <2 x i32> <i32 poison, i32 1>
+// CHECK: %2 = bitcast <2 x i32> %1 to <4 x half>
+// CHECK: %3 = shufflevector <4 x half> %2, <4 x half> poison, <4 x i32> <i32 2, i32 3, i32 2, i32 3>
+// CHECK: %vcmla_rot180_f163.i = tail call <4 x half> @llvm.aarch64.neon.vcmla.rot180.v4f16(<4 x half> %acc, <4 x half> %lhs, <4 x half> %3)
+// CHECK: ret <4 x half> %vcmla_rot180_f163.i
float16x4_t test_vcmla_rot180_laneq_f16(float16x4_t acc, float16x4_t lhs, float16x8_t rhs) {
- return vcmla_rot180_laneq_f16(acc, lhs, rhs, 3);
+ return vcmla_rot180_laneq_f16(acc, lhs, rhs, 1);
}
// CHECK-LABEL: @test_vcmlaq_rot180_lane_f16(
@@ -339,7 +335,6 @@ float32x2_t test_vcmla_rot180_lane_f32(float32x2_t acc, float32x2_t lhs, float32
return vcmla_rot180_lane_f32(acc, lhs, rhs, 0);
}
-// ACLE says this exists, but it won't map to a single instruction if lane > 1.
// CHECK-LABEL: @test_vcmla_rot180_laneq_f32(
// CHECK: [[CPLX:%.*]] = bitcast <4 x float> %rhs to <2 x i64>
// CHECK: [[DUP:%.*]] = shufflevector <2 x i64> [[CPLX]], <2 x i64> poison, <1 x i32> <i32 1>
@@ -377,15 +372,15 @@ float16x4_t test_vcmla_rot270_lane_f16(float16x4_t acc, float16x4_t lhs, float16
return vcmla_rot270_lane_f16(acc, lhs, rhs, 1);
}
-// ACLE says this exists, but it won't map to a single instruction if lane > 1.
// CHECK-LABEL: @test_vcmla_rot270_laneq_f16(
-// CHECK: [[CPLX:%.*]] = bitcast <8 x half> %rhs to <4 x i32>
-// CHECK: [[DUP:%.*]] = shufflevector <4 x i32> [[CPLX]], <4 x i32> poison, <2 x i32> <i32 3, i32 3>
-// CHECK: [[DUP_FLT:%.*]] = bitcast <2 x i32> [[DUP]] to <4 x half>
-// CHECK: [[RES:%.*]] = tail call <4 x half> @llvm.aarch64.neon.vcmla.rot270.v4f16(<4 x half> %acc, <4 x half> %lhs, <4 x half> [[DUP_FLT]])
-// CHECK: ret <4 x half> [[RES]]
+// CHECK: %0 = bitcast <8 x half> %rhs to <4 x i32>
+// CHECK: %1 = shufflevector <4 x i32> %0, <4 x i32> poison, <2 x i32> <i32 0, i32 poison>
+// CHECK: %2 = bitcast <2 x i32> %1 to <4 x half>
+// CHECK: %3 = shufflevector <4 x half> %2, <4 x half> poison, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
+// CHECK: %vcmla_rot270_f163.i = tail call <4 x half> @llvm.aarch64.neon.vcmla.rot270.v4f16(<4 x half> %acc, <4 x half> %lhs, <4 x half> %3)
+// CHECK: ret <4 x half> %vcmla_rot270_f163.
float16x4_t test_vcmla_rot270_laneq_f16(float16x4_t acc, float16x4_t lhs, float16x8_t rhs) {
- return vcmla_rot270_laneq_f16(acc, lhs, rhs, 3);
+ return vcmla_rot270_laneq_f16(acc, lhs, rhs, 0);
}
// CHECK-LABEL: @test_vcmlaq_rot270_lane_f16(
@@ -413,7 +408,6 @@ float32x2_t test_vcmla_rot270_lane_f32(float32x2_t acc, float32x2_t lhs, float32
return vcmla_rot270_lane_f32(acc, lhs, rhs, 0);
}
-// ACLE says this exists, but it won't map to a single instruction if lane > 1.
// CHECK-LABEL: @test_vcmla_rot270_laneq_f32(
// CHECK: [[CPLX:%.*]] = bitcast <4 x float> %rhs to <2 x i64>
// CHECK: [[DUP:%.*]] = shufflevector <2 x i64> [[CPLX]], <2 x i64> poison, <1 x i32> <i32 1>
diff --git a/clang/test/Sema/aarch64-neon-vcmla-ranges.c b/clang/test/Sema/aarch64-neon-vcmla-ranges.c
new file mode 100644
index 0000000000000..9b42e68670da0
--- /dev/null
+++ b/clang/test/Sema/aarch64-neon-vcmla-ranges.c
@@ -0,0 +1,202 @@
+// RUN: %clang_cc1 -triple aarch64-linux-gnu -target-feature +neon -target-feature +v8.3a -ffreestanding -fsyntax-only -verify %s
+// REQUIRES: aarch64-registered-target
+
+#include <arm_neon.h>
+#include <arm_fp16.h>
+
+void test_vcmla_lane_f16(float16x4_t a, float16x4_t b, float16x4_t c){
+ vcmla_lane_f16(a, b, c, 0);
+ vcmla_lane_f16(a, b, c, 1);
+
+ vcmla_lane_f16(a, b, c, 2); // expected-error-re +{{argument value {{.*}} is outside the valid range}}
+ vcmla_lane_f16(a, b, c, -1); // expected-error-re +{{argument value {{.*}} is outside the valid range}}
+}
+
+void test_vcmla_laneq_f16(float16x4_t a, float16x4_t b, float16x8_t c){
+ vcmla_laneq_f16(a, b, c, 0);
+ vcmla_laneq_f16(a, b, c, 1);
+
+ vcmla_laneq_f16(a, b, c, -1); // expected-error-re +{{argument value {{.*}} is outside the valid range}}
+ vcmla_laneq_f16(a, b, c, 2); // expected-error-re +{{argument value {{.*}} is outside the valid range}}
+}
+
+void test_vcmlaq_lane_f16(float16x8_t a, float16x8_t b, float16x4_t c){
+ vcmlaq_lane_f16(a, b, c, 0);
+ vcmlaq_lane_f16(a, b, c, 1);
+
+ vcmlaq_lane_f16(a, b, c, -1); // expected-error-re +{{argument value {{.*}} is outside the valid range}}
+ vcmlaq_lane_f16(a, b, c, 2); // expected-error-re +{{argument value {{.*}} is outside the valid range}}
+}
+
+void test_vcmlaq_laneq_f16(float16x8_t a, float16x8_t b, float16x8_t c){
+ vcmlaq_laneq_f16(a, b, c, 0);
+ vcmlaq_laneq_f16(a, b, c, 1);
+ vcmlaq_laneq_f16(a, b, c, 3);
+
+ vcmlaq_laneq_f16(a, b, c, -1); // expected-error-re +{{argument value {{.*}} is outside the valid range}}
+ vcmlaq_laneq_f16(a, b, c, 4); // expected-error-re +{{argument value {{.*}} is outside the valid range}}
+}
+
+void test_vcmla_lane_f32(float32x2_t a, float32x2_t b, float32x2_t c){
+ vcmla_lane_f32(a, b, c, 0);
+
+ vcmla_lane_f32(a, b, c, 1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+ vcmla_lane_f32(a, b, c, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+ vcmla_lane_f32(a, b, c, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+}
+
+void test_vcmla_laneq_f32(float32x2_t a, float32x2_t b, float32x4_t c){
+ vcmla_laneq_f32(a, b, c, 0);
+
+ vcmla_laneq_f32(a, b, c, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+ vcmla_laneq_f32(a, b, c, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+}
+
+void test_vcmlaq_laneq_f32(float32x4_t a, float32x4_t b, float32x4_t c){
+ vcmlaq_laneq_f32(a, b, c, 0);
+ vcmlaq_laneq_f32(a, b, c, 1);
+
+ vcmlaq_laneq_f32(a, b, c, 2); // expected-error-re +{{argument value {{.*}} is outside the valid range}}
+ vcmlaq_laneq_f32(a, b, c, -1); // expected-error-re +{{argument value {{.*}} is outside the valid range}}
+}
+
+void test_vcmla_rot90_lane_f16(float16x4_t a, float16x4_t b, float16x4_t c){
+ vcmla_rot90_lane_f16(a, b, c, 0);
+ vcmla_rot90_lane_f16(a, b, c, 1);
+
+ vcmla_rot90_lane_f16(a, b, c, -1); // expected-error-re +{{argument value {{.*}} is outside the valid range}}
+ vcmla_rot90_lane_f16(a, b, c, 2); // expected-error-re +{{argument value {{.*}} is outside the valid range}}
+}
+
+void test_vcmla_rot90_laneq_f16(float16x4_t a, float16x4_t b, float16x8_t c){
+ vcmla_rot90_laneq_f16(a, b, c, 0);
+ vcmla_rot90_laneq_f16(a, b, c, 1);
+
+ vcmla_rot90_laneq_f16(a, b, c, -1); // expected-error-re +{{argument value {{.*}} is outside the valid range}}
+ vcmla_rot90_laneq_f16(a, b, c, 2); // expected-error-re +{{argument value {{.*}} is outside the valid range}}
+}
+
+void test_vcmlaq_rot90_laneq_f16(float16x8_t a, float16x8_t b, float16x8_t c){
+ vcmlaq_rot90_laneq_f16(a, b, c, 0);
+ vcmlaq_rot90_laneq_f16(a, b, c, 3);
+
+ vcmlaq_rot90_laneq_f16(a, b, c, -1); // expected-error-re +{{argument value {{.*}} is outside the valid range}}
+ vcmlaq_rot90_laneq_f16(a, b, c, 4); // expected-error-re +{{argument value {{.*}} is outside the valid range}}
+}
+
+void test_vcmla_rot180_lane_f16(float16x4_t a, float16x4_t b, float16x4_t c){
+ vcmla_rot180_lane_f16(a, b, c, 0);
+ vcmla_rot180_lane_f16(a, b, c, 1);
+
+ vcmla_rot180_lane_f16(a, b, c, -1); // expected-error-re +{{argument value {{.*}} is outside the valid range}}
+ vcmla_rot180_lane_f16(a, b, c, 2); // expected-error-re +{{argument value {{.*}} is outside the valid range}}
+}
+
+void test_vcmla_rot180_laneq_f16(float16x4_t a, float16x4_t b, float16x8_t c){
+ vcmla_rot180_laneq_f16(a, b, c, 0);
+ vcmla_rot180_laneq_f16(a, b, c, 1);
+
+ vcmla_rot180_laneq_f16(a, b, c, -1); // expected-error-re +{{argument value {{.*}} is outside the valid range}}
+ vcmla_rot180_laneq_f16(a, b, c, 2); // expected-error-re +{{argument value {{.*}} is outside the valid range}}
+}
+
+void test_vcmlaq_rot180_laneq_f16(float16x8_t a, float16x8_t b, float16x8_t c){
+ vcmlaq_rot180_laneq_f16(a, b, c, 0);
+ vcmlaq_rot180_laneq_f16(a, b, c, 3);
+
+ vcmlaq_rot180_laneq_f16(a, b, c, -1); // expected-error-re +{{argument value {{.*}} is outside the valid range}}
+ vcmlaq_rot180_laneq_f16(a, b, c, 4); // expected-error-re +{{argument value {{.*}} is outside the valid range}}
+}
+
+void test_vcmla_rot270_lane_f16(float16x4_t a, float16x4_t b, float16x4_t c){
+ vcmla_rot270_lane_f16(a, b, c, 0);
+ vcmla_rot270_lane_f16(a, b, c, 1);
+
+ vcmla_rot270_lane_f16(a, b, c, -1); // expected-error-re +{{argument value {{.*}} is outside the valid range}}
+ vcmla_rot270_lane_f16(a, b, c, 2); // expected-error-re +{{argument value {{.*}} is outside the valid range}}
+}
+
+void test_vcmla_rot270_laneq_f16(float16x4_t a, float16x4_t b, float16x8_t c){
+ vcmla_rot270_laneq_f16(a, b, c, 0);
+ vcmla_rot270_laneq_f16(a, b, c, 1);
+
+ vcmla_rot270_laneq_f16(a, b, c, -1); // expected-error-re +{{argument value {{.*}} is outside the valid range}}
+ vcmla_rot270_laneq_f16(a, b, c, 2); // expected-error-re +{{argument value {{.*}} is outside the valid range}}
+}
+
+void test_vcmlaq_rot270_laneq_f16(float16x8_t a, float16x8_t b, float16x8_t c){
+ vcmlaq_rot270_laneq_f16(a, b, c, 0);
+ vcmlaq_rot270_laneq_f16(a, b, c, 3);
+
+ vcmlaq_rot270_laneq_f16(a, b, c, -1); // expected-error-re +{{argument value {{.*}} is outside the valid range}}
+ vcmlaq_rot270_laneq_f16(a, b, c, 4); // expected-error-re +{{argument value {{.*}} is outside the valid range}}
+}
+
+void test_vcmla_rot90_lane_f32(float32x2_t a, float32x2_t b, float32x2_t c){
+ vcmla_rot90_lane_f32(a, b, c, 0);
+
+ vcmla_rot90_lane_f32(a, b, c, 1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+ vcmla_rot90_lane_f32(a, b, c, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+}
+
+void test_vcmla_rot90_laneq_f32(float32x2_t a, float32x2_t b, float32x4_t c){
+ vcmla_rot90_laneq_f32(a, b, c, 0);
+ vcmla_rot90_laneq_f32(a, b, c, 1);
+
+ vcmla_rot90_laneq_f32(a, b, c, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+ vcmla_rot90_laneq_f32(a, b, c, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+}
+
+void test_vcmlaq_rot90_laneq_f32(float32x4_t a, float32x4_t b, float32x4_t c){
+ vcmlaq_rot90_laneq_f32(a, b, c, 0);
+ vcmlaq_rot90_laneq_f32(a, b, c, 1);
+
+ vcmlaq_rot90_laneq_f32(a, b, c, 2); // expected-error-re +{{argument value {{.*}} is outside the valid range}}
+ vcmlaq_rot90_laneq_f32(a, b, c, -1); // expected-error-re +{{argument value {{.*}} is outside the valid range}}
+}
+
+void test_vcmla_rot180_lane_f32(float32x2_t a, float32x2_t b, float32x2_t c){
+ vcmla_rot180_lane_f32(a, b, c, 0);
+
+ vcmla_rot180_lane_f32(a, b, c, 1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+ vcmla_rot180_lane_f32(a, b, c, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+}
+
+void test_vcmla_rot180_laneq_f32(float32x2_t a, float32x2_t b, float32x4_t c){
+ vcmla_rot180_laneq_f32(a, b, c, 0);
+ vcmla_rot180_laneq_f32(a, b, c, 1);
+
+ vcmla_rot180_laneq_f32(a, b, c, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+ vcmla_rot180_laneq_f32(a, b, c, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+}
+
+void test_vcmlaq_rot180_laneq_f32(float32x4_t a, float32x4_t b, float32x4_t c){
+ vcmlaq_rot90_laneq_f32(a, b, c, 0);
+ vcmlaq_rot90_laneq_f32(a, b, c, 1);
+
+ vcmlaq_rot90_laneq_f32(a, b, c, 2); // expected-error-re +{{argument value {{.*}} is outside the valid range}}
+ vcmlaq_rot90_laneq_f32(a, b, c, -1); // expected-error-re +{{argument value {{.*}} is outside the valid range}}
+}
+
+void test_vcmla_rot270_lane_f32(float32x2_t a, float32x2_t b, float32x2_t c){
+ vcmla_rot270_lane_f32(a, b, c, 0);
+
+ vcmla_rot270_lane_f32(a, b, c, 1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+ vcmla_rot270_lane_f32(a, b, c, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+}
+
+void test_vcmla_rot270_laneq_f32(float32x2_t a, float32x2_t b, float32x4_t c){
+ vcmla_rot270_laneq_f32(a, b, c, 0);
+ vcmla_rot270_laneq_f32(a, b, c, 1);
+
+ vcmla_rot270_laneq_f32(a, b, c, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+ vcmla_rot270_laneq_f32(a, b, c, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+}
+
+void test_vcmlaq_rot270_laneq_f32(float32x4_t a, float32x4_t b, float32x4_t c){
+ vcmlaq_rot270_laneq_f32(a, b, c, 0);
+ vcmlaq_rot270_laneq_f32(a, b, c, 1);
+
+ vcmlaq_rot270_laneq_f32(a, b, c, 2); // expected-error-re +{{argument value {{.*}} is outside the valid range}}
+ vcmlaq_rot270_laneq_f32(a, b, c, -1); // expected-error-re +{{argument value {{.*}} is outside the valid range}}
+}
\ No newline at end of file
diff --git a/clang/utils/TableGen/NeonEmitter.cpp b/clang/utils/TableGen/NeonEmitter.cpp
index 30fbb8c5d65e5..7666b53000edc 100644
--- a/clang/utils/TableGen/NeonEmitter.cpp
+++ b/clang/utils/TableGen/NeonEmitter.cpp
@@ -333,6 +333,8 @@ class Intrinsic {
/// The types of return value [0] and parameters [1..].
std::vector<Type> Types;
+
+ SmallVector<std::tuple<int, int, int>, 2> ImmChecks;
/// The index of the key type passed to CGBuiltin.cpp for polymorphic calls.
int PolymorphicKeyType;
/// The local variables defined.
@@ -368,9 +370,9 @@ class Intrinsic {
public:
Intrinsic(Record *R, StringRef Name, StringRef Proto, TypeSpec OutTS,
- TypeSpec InTS, ClassKind CK, ListInit *Body, NeonEmitter &Emitter,
+ TypeSpec InTS, ArrayRef<std::tuple<int, int, int>> ImmChecks, ClassKind CK, ListInit *Body, NeonEmitter &Emitter,
StringRef ArchGuard, StringRef TargetGuard, bool IsUnavailable, bool BigEndianSafe)
- : R(R), Name(Name.str()), OutTS(OutTS), InTS(InTS), CK(CK), Body(Body),
+ : R(R), Name(Name.str()), OutTS(OutTS), InTS(InTS), ImmChecks(ImmChecks), CK(CK), Body(Body),
ArchGuard(ArchGuard.str()), TargetGuard(TargetGuard.str()), IsUnavailable(IsUnavailable),
BigEndianSafe(BigEndianSafe), PolymorphicKeyType(0), NeededEarly(false),
UseMacro(false), BaseType(OutTS, "."), InBaseType(InTS, "."),
@@ -414,23 +416,22 @@ class Intrinsic {
/// Get the architectural guard string (#ifdef).
std::string getArchGuard() const { return ArchGuard; }
std::string getTargetGuard() const { return TargetGuard; }
+ ArrayRef<std::tuple<int, int, int>> getImmChecks() const {return ImmChecks; }
/// Get the non-mangled name.
std::string getName() const { return Name; }
/// Return true if the intrinsic takes an immediate operand.
bool hasImmediate() const {
return llvm::any_of(Types, [](const Type &T) { return T.isImmediate(); });
+ //return !ImmChecks.empty();
}
- /// Return the parameter index of the immediate operand.
- unsigned getImmediateIdx() const {
- for (unsigned Idx = 0; Idx < Types.size(); ++Idx)
- if (Types[Idx].isImmediate())
- return Idx - 1;
- llvm_unreachable("Intrinsic has no immediate");
+ // Return if the supplied argument is an immediate
+ bool isArgImmediate(unsigned idx) const {
+ assert((idx + 1) < Types.size() && "Argument type index out of range!");
+ return Types[idx + 1].isImmediate();
}
-
unsigned getNumParams() const { return Types.size() - 1; }
Type getReturnType() const { return Types[0]; }
Type getParamType(unsigned I) const { return Types[I + 1]; }
@@ -554,9 +555,9 @@ class NeonEmitter {
SmallVectorImpl<Intrinsic *> &Defs);
void genOverloadTypeCheckCode(raw_ostream &OS,
SmallVectorImpl<Intrinsic *> &Defs);
+ void genNeonImmCheckTypes(raw_ostream &OS);
void genIntrinsicRangeCheckCode(raw_ostream &OS,
SmallVectorImpl<Intrinsic *> &Defs);
-
public:
/// Called by Intrinsic - this attempts to get an intrinsic that takes
/// the given types as arguments.
@@ -1031,7 +1032,7 @@ std::string Intrinsic::getBuiltinTypeStr() {
if (LocalCK == ClassI && T.isInteger())
T.makeSigned();
- if (hasImmediate() && getImmediateIdx() == I)
+ if(isArgImmediate(I))
T.makeImmediate(32);
S += T.builtin_str();
@@ -1952,6 +1953,16 @@ void NeonEmitter::createIntrinsic(Record *R,
bool BigEndianSafe = R->getValueAsBit("BigEndianSafe");
std::string ArchGuard = std::string(R->getValueAsString("ArchGuard"));
std::string TargetGuard = std::string(R->getValueAsString("TargetGuard"));
+ std::vector<Record*> ImmCheckList = R->getValueAsListOfDefs("ImmChecks");
+
+ SmallVector<std::tuple<int, int, int>, 2> ImmChecks;
+ for(const auto *R: ImmCheckList) {
+
+ ImmChecks.push_back(std::make_tuple(R->getValueAsInt("Arg"),
+ R->getValueAsDef("Kind")->getValueAsInt("Value"),
+ R->getValueAsInt("EltSizeArg")));
+ }
+
bool IsUnavailable = OperationRec->getValueAsBit("Unavailable");
std::string CartesianProductWith = std::string(R->getValueAsString("CartesianProductWith"));
@@ -1992,7 +2003,7 @@ void NeonEmitter::createIntrinsic(Record *R,
auto &Entry = IntrinsicMap[Name];
for (auto &I : NewTypeSpecs) {
- Entry.emplace_back(R, Name, Proto, I.first, I.second, CK, Body, *this,
+ Entry.emplace_back(R, Name, Proto, I.first, I.second, ImmChecks, CK, Body, *this,
ArchGuard, TargetGuard, IsUnavailable, BigEndianSafe);
Out.push_back(&Entry.back());
}
@@ -2142,84 +2153,40 @@ void NeonEmitter::genOverloadTypeCheckCode(raw_ostream &OS,
OS << "#endif\n\n";
}
-void NeonEmitter::genIntrinsicRangeCheckCode(raw_ostream &OS,
- SmallVectorImpl<Intrinsic *> &Defs) {
- OS << "#ifdef GET_NEON_IMMEDIATE_CHECK\n";
+void NeonEmitter::genNeonImmCheckTypes(raw_ostream &OS) {
+ OS << "#ifdef GET_NEON_IMMCHECKTYPES\n";
+
+ for (auto *RV : Records.getAllDerivedDefinitions("ImmCheckType")) {
+ OS << " " << RV->getNameInitAsString() << " = " << RV->getValueAsInt("Value") << ",\n";
+ }
+ OS << "#endif\n\n";
+}
+
+void NeonEmitter::genIntrinsicRangeCheckCode(raw_ostream &OS, SmallVectorImpl<Intrinsic *> &Defs) {
+ OS << "#ifdef GET_NEON_IMMEDIATE_CHECK\n";
+ int EltType;
+ // Ensure these are only emitted once.
std::set<std::string> Emitted;
- for (auto *Def : Defs) {
- if (Def->hasBody())
- continue;
- // Functions which do not have an immediate do not need to have range
- // checking code emitted.
- if (!Def->hasImmediate())
- continue;
- if (Emitted.find(Def->getMangledName()) != Emitted.end())
+ for (auto &Def : Defs) {
+ if (Emitted.find(Def->getMangledName()) != Emitted.end() || !Def->hasImmediate())
continue;
- std::string LowerBound, UpperBound;
-
- Record *R = Def->getRecord();
- if (R->getValueAsBit("isVXAR")) {
- //VXAR takes an immediate in the range [0, 63]
- LowerBound = "0";
- UpperBound = "63";
- } else if (R->getValueAsBit("isVCVT_N")) {
- // VCVT between floating- and fixed-point values takes an immediate
- // in the range [1, 32) for f32 or [1, 64) for f64 or [1, 16) for f16.
- LowerBound = "1";
- if (Def->getBaseType().getElementSizeInBits() == 16 ||
- Def->getName().find('h') != std::string::npos)
- // VCVTh operating on FP16 intrinsics in range [1, 16)
- UpperBound = "15";
- else if (Def->getBaseType().getElementSizeInBits() == 32)
- UpperBound = "31";
- else
- UpperBound = "63";
- } else if (R->getValueAsBit("isScalarShift")) {
- // Right shifts have an 'r' in the name, left shifts do not. Convert
- // instructions have the same bounds and right shifts.
- if (Def->getName().find('r') != std::string::npos ||
- Def->getName().find("cvt") != std::string::npos)
- LowerBound = "1";
-
- UpperBound = utostr(Def->getReturnType().getElementSizeInBits() - 1);
- } else if (R->getValueAsBit("isShift")) {
- // Builtins which are overloaded by type will need to have their upper
- // bound computed at Sema time based on the type constant.
-
- // Right shifts have an 'r' in the name, left shifts do not.
- if (Def->getName().find('r') != std::string::npos)
- LowerBound = "1";
- UpperBound = "RFT(TV, true)";
- } else if (Def->getClassKind(true) == ClassB) {
- // ClassB intrinsics have a type (and hence lane number) that is only
- // known at runtime.
- if (R->getValueAsBit("isLaneQ"))
- UpperBound = "RFT(TV, false, true)";
- else
- UpperBound = "RFT(TV, false, false)";
- } else {
- // The immediate generally refers to a lane in the preceding argument.
- assert(Def->getImmediateIdx() > 0);
- Type T = Def->getParamType(Def->getImmediateIdx() - 1);
- UpperBound = utostr(T.getNumElements() - 1);
- }
+ // If the Def has a body (operation DAGs), it is not a __builtin_neon_
+ if(Def->hasBody()) continue;
- // Calculate the index of the immediate that should be range checked.
- unsigned Idx = Def->getNumParams();
- if (Def->hasImmediate())
- Idx = Def->getGeneratedParamIdx(Def->getImmediateIdx());
-
- OS << "case NEON::BI__builtin_neon_" << Def->getMangledName() << ": "
- << "i = " << Idx << ";";
- if (!LowerBound.empty())
- OS << " l = " << LowerBound << ";";
- if (!UpperBound.empty())
- OS << " u = " << UpperBound << ";";
- OS << " break;\n";
+ OS << "case NEON::BI__builtin_neon_" << Def->getMangledName() << ":\n";
+
+ for(const auto &Check: Def->getImmChecks()){
+ EltType = std::get<2>(Check); // elt type argument
+ if(EltType >= 0)
+ EltType = Def->getParamType(EltType).getNeonEnum();
+ OS << " ImmChecks.push_back(std::make_tuple(" << std::get<0>(Check) <<
+ ", " << std::get<1>(Check) << ", " << EltType << ")); \n";
+ OS << " break;\n";
+ }
Emitted.insert(Def->getMangledName());
}
diff --git a/clang/utils/TableGen/SveEmitter.cpp b/clang/utils/TableGen/SveEmitter.cpp
index caedd5978a87c..027aa4b4c6bb2 100644
--- a/clang/utils/TableGen/SveEmitter.cpp
+++ b/clang/utils/TableGen/SveEmitter.cpp
@@ -1573,7 +1573,7 @@ void SVEEmitter::createTypeFlags(raw_ostream &OS) {
OS << " " << KV.getKey() << " = " << KV.getValue() << ",\n";
OS << "#endif\n\n";
- OS << "#ifdef LLVM_GET_SVE_IMMCHECKTYPES\n";
+ OS << "#ifdef LLVM_GET_ARM_INTRIN_IMMCHECKTYPES\n";
for (auto &KV : ImmCheckTypes)
OS << " " << KV.getKey() << " = " << KV.getValue() << ",\n";
OS << "#endif\n\n";
>From 1d9084e9c246d2fcb395c329c6cf1ca19ef032aa Mon Sep 17 00:00:00 2001
From: Spencer Abson <Spencer.Abson at arm.com>
Date: Tue, 23 Jul 2024 22:28:48 +0000
Subject: [PATCH 2/3] Updated/consistent vcmla codegen tests
---
clang/test/CodeGen/aarch64-neon-vcmla.c | 610 +++++++++++++++---------
1 file changed, 384 insertions(+), 226 deletions(-)
diff --git a/clang/test/CodeGen/aarch64-neon-vcmla.c b/clang/test/CodeGen/aarch64-neon-vcmla.c
index 2ff48fd97b427..d82d74d019c01 100644
--- a/clang/test/CodeGen/aarch64-neon-vcmla.c
+++ b/clang/test/CodeGen/aarch64-neon-vcmla.c
@@ -1,438 +1,596 @@
-// RUN: %clang_cc1 -triple arm64-apple-ios -target-feature +neon \
-// RUN: -target-feature +v8.3a \
-// RUN: -target-feature +fullfp16 \
-// RUN: -disable-O0-optnone -emit-llvm -o - %s | opt -S -O1 | FileCheck %s
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5
+// RUN: %clang_cc1 -triple arm64-linux-gnu -target-feature +neon \
+// RUN: -target-feature +v8.3a -target-feature +fullfp16 \
+// RUN: -disable-O0-optnone -emit-llvm -o - %s | opt -S -O1 | FileCheck %s
// REQUIRES: aarch64-registered-target
#include <arm_neon.h>
-// CHECK-LABEL: @test_vcmla_f16(
-// CHECK: [[RES:%.*]] = tail call <4 x half> @llvm.aarch64.neon.vcmla.rot0.v4f16(<4 x half> %acc, <4 x half> %lhs, <4 x half> %rhs)
-// CHECK: ret <4 x half> [[RES]]
+// CHECK-LABEL: define dso_local <4 x half> @test_vcmla_f16(
+// CHECK-SAME: <4 x half> noundef [[ACC:%.*]], <4 x half> noundef [[LHS:%.*]], <4 x half> noundef [[RHS:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VCMLA_F163_I:%.*]] = tail call <4 x half> @llvm.aarch64.neon.vcmla.rot0.v4f16(<4 x half> [[ACC]], <4 x half> [[LHS]], <4 x half> [[RHS]])
+// CHECK-NEXT: ret <4 x half> [[VCMLA_F163_I]]
+//
float16x4_t test_vcmla_f16(float16x4_t acc, float16x4_t lhs, float16x4_t rhs) {
return vcmla_f16(acc, lhs, rhs);
}
-// CHECK-LABEL: @test_vcmla_f32(
-// CHECK: [[RES:%.*]] = tail call <2 x float> @llvm.aarch64.neon.vcmla.rot0.v2f32(<2 x float> %acc, <2 x float> %lhs, <2 x float> %rhs)
-// CHECK: ret <2 x float> [[RES]]
+// CHECK-LABEL: define dso_local <2 x float> @test_vcmla_f32(
+// CHECK-SAME: <2 x float> noundef [[ACC:%.*]], <2 x float> noundef [[LHS:%.*]], <2 x float> noundef [[RHS:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VCMLA_F323_I:%.*]] = tail call <2 x float> @llvm.aarch64.neon.vcmla.rot0.v2f32(<2 x float> [[ACC]], <2 x float> [[LHS]], <2 x float> [[RHS]])
+// CHECK-NEXT: ret <2 x float> [[VCMLA_F323_I]]
+//
float32x2_t test_vcmla_f32(float32x2_t acc, float32x2_t lhs, float32x2_t rhs) {
return vcmla_f32(acc, lhs, rhs);
}
-// CHECK-LABEL: @test_vcmlaq_f16(
-// CHECK: [[RES:%.*]] = tail call <8 x half> @llvm.aarch64.neon.vcmla.rot0.v8f16(<8 x half> %acc, <8 x half> %lhs, <8 x half> %rhs)
-// CHECK: ret <8 x half> [[RES]]
+// CHECK-LABEL: define dso_local <8 x half> @test_vcmlaq_f16(
+// CHECK-SAME: <8 x half> noundef [[ACC:%.*]], <8 x half> noundef [[LHS:%.*]], <8 x half> noundef [[RHS:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VCMLAQ_F163_I:%.*]] = tail call <8 x half> @llvm.aarch64.neon.vcmla.rot0.v8f16(<8 x half> [[ACC]], <8 x half> [[LHS]], <8 x half> [[RHS]])
+// CHECK-NEXT: ret <8 x half> [[VCMLAQ_F163_I]]
+//
float16x8_t test_vcmlaq_f16(float16x8_t acc, float16x8_t lhs, float16x8_t rhs) {
return vcmlaq_f16(acc, lhs, rhs);
}
-// CHECK-LABEL: @test_vcmlaq_f32(
-// CHECK: [[RES:%.*]] = tail call <4 x float> @llvm.aarch64.neon.vcmla.rot0.v4f32(<4 x float> %acc, <4 x float> %lhs, <4 x float> %rhs)
-// CHECK: ret <4 x float> [[RES]]
+// CHECK-LABEL: define dso_local <4 x float> @test_vcmlaq_f32(
+// CHECK-SAME: <4 x float> noundef [[ACC:%.*]], <4 x float> noundef [[LHS:%.*]], <4 x float> noundef [[RHS:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VCMLAQ_F323_I:%.*]] = tail call <4 x float> @llvm.aarch64.neon.vcmla.rot0.v4f32(<4 x float> [[ACC]], <4 x float> [[LHS]], <4 x float> [[RHS]])
+// CHECK-NEXT: ret <4 x float> [[VCMLAQ_F323_I]]
+//
float32x4_t test_vcmlaq_f32(float32x4_t acc, float32x4_t lhs, float32x4_t rhs) {
return vcmlaq_f32(acc, lhs, rhs);
}
-// CHECK-LABEL: @test_vcmlaq_f64(
-// CHECK: [[RES:%.*]] = tail call <2 x double> @llvm.aarch64.neon.vcmla.rot0.v2f64(<2 x double> %acc, <2 x double> %lhs, <2 x double> %rhs)
-// CHECK: ret <2 x double> [[RES]]
+// CHECK-LABEL: define dso_local <2 x double> @test_vcmlaq_f64(
+// CHECK-SAME: <2 x double> noundef [[ACC:%.*]], <2 x double> noundef [[LHS:%.*]], <2 x double> noundef [[RHS:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VCMLAQ_F643_I:%.*]] = tail call <2 x double> @llvm.aarch64.neon.vcmla.rot0.v2f64(<2 x double> [[ACC]], <2 x double> [[LHS]], <2 x double> [[RHS]])
+// CHECK-NEXT: ret <2 x double> [[VCMLAQ_F643_I]]
+//
float64x2_t test_vcmlaq_f64(float64x2_t acc, float64x2_t lhs, float64x2_t rhs) {
return vcmlaq_f64(acc, lhs, rhs);
}
-// CHECK-LABEL: @test_vcmla_rot90_f16(
-// CHECK: [[RES:%.*]] = tail call <4 x half> @llvm.aarch64.neon.vcmla.rot90.v4f16(<4 x half> %acc, <4 x half> %lhs, <4 x half> %rhs)
-// CHECK: ret <4 x half> [[RES]]
+// CHECK-LABEL: define dso_local <4 x half> @test_vcmla_rot90_f16(
+// CHECK-SAME: <4 x half> noundef [[ACC:%.*]], <4 x half> noundef [[LHS:%.*]], <4 x half> noundef [[RHS:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VCMLA_ROT90_F163_I:%.*]] = tail call <4 x half> @llvm.aarch64.neon.vcmla.rot90.v4f16(<4 x half> [[ACC]], <4 x half> [[LHS]], <4 x half> [[RHS]])
+// CHECK-NEXT: ret <4 x half> [[VCMLA_ROT90_F163_I]]
+//
float16x4_t test_vcmla_rot90_f16(float16x4_t acc, float16x4_t lhs, float16x4_t rhs) {
return vcmla_rot90_f16(acc, lhs, rhs);
}
-// CHECK-LABEL: @test_vcmla_rot90_f32(
-// CHECK: [[RES:%.*]] = tail call <2 x float> @llvm.aarch64.neon.vcmla.rot90.v2f32(<2 x float> %acc, <2 x float> %lhs, <2 x float> %rhs)
-// CHECK: ret <2 x float> [[RES]]
+// CHECK-LABEL: define dso_local <2 x float> @test_vcmla_rot90_f32(
+// CHECK-SAME: <2 x float> noundef [[ACC:%.*]], <2 x float> noundef [[LHS:%.*]], <2 x float> noundef [[RHS:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VCMLA_ROT90_F323_I:%.*]] = tail call <2 x float> @llvm.aarch64.neon.vcmla.rot90.v2f32(<2 x float> [[ACC]], <2 x float> [[LHS]], <2 x float> [[RHS]])
+// CHECK-NEXT: ret <2 x float> [[VCMLA_ROT90_F323_I]]
+//
float32x2_t test_vcmla_rot90_f32(float32x2_t acc, float32x2_t lhs, float32x2_t rhs) {
return vcmla_rot90_f32(acc, lhs, rhs);
}
-// CHECK-LABEL: @test_vcmlaq_rot90_f16(
-// CHECK: [[RES:%.*]] = tail call <8 x half> @llvm.aarch64.neon.vcmla.rot90.v8f16(<8 x half> %acc, <8 x half> %lhs, <8 x half> %rhs)
-// CHECK: ret <8 x half> [[RES]]
+// CHECK-LABEL: define dso_local <8 x half> @test_vcmlaq_rot90_f16(
+// CHECK-SAME: <8 x half> noundef [[ACC:%.*]], <8 x half> noundef [[LHS:%.*]], <8 x half> noundef [[RHS:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VCMLAQ_ROT90_F163_I:%.*]] = tail call <8 x half> @llvm.aarch64.neon.vcmla.rot90.v8f16(<8 x half> [[ACC]], <8 x half> [[LHS]], <8 x half> [[RHS]])
+// CHECK-NEXT: ret <8 x half> [[VCMLAQ_ROT90_F163_I]]
+//
float16x8_t test_vcmlaq_rot90_f16(float16x8_t acc, float16x8_t lhs, float16x8_t rhs) {
return vcmlaq_rot90_f16(acc, lhs, rhs);
}
-// CHECK-LABEL: @test_vcmlaq_rot90_f32(
-// CHECK: [[RES:%.*]] = tail call <4 x float> @llvm.aarch64.neon.vcmla.rot90.v4f32(<4 x float> %acc, <4 x float> %lhs, <4 x float> %rhs)
-// CHECK: ret <4 x float> [[RES]]
+// CHECK-LABEL: define dso_local <4 x float> @test_vcmlaq_rot90_f32(
+// CHECK-SAME: <4 x float> noundef [[ACC:%.*]], <4 x float> noundef [[LHS:%.*]], <4 x float> noundef [[RHS:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VCMLAQ_ROT90_F323_I:%.*]] = tail call <4 x float> @llvm.aarch64.neon.vcmla.rot90.v4f32(<4 x float> [[ACC]], <4 x float> [[LHS]], <4 x float> [[RHS]])
+// CHECK-NEXT: ret <4 x float> [[VCMLAQ_ROT90_F323_I]]
+//
float32x4_t test_vcmlaq_rot90_f32(float32x4_t acc, float32x4_t lhs, float32x4_t rhs) {
return vcmlaq_rot90_f32(acc, lhs, rhs);
}
-// CHECK-LABEL: @test_vcmlaq_rot90_f64(
-// CHECK: [[RES:%.*]] = tail call <2 x double> @llvm.aarch64.neon.vcmla.rot90.v2f64(<2 x double> %acc, <2 x double> %lhs, <2 x double> %rhs)
-// CHECK: ret <2 x double> [[RES]]
+// CHECK-LABEL: define dso_local <2 x double> @test_vcmlaq_rot90_f64(
+// CHECK-SAME: <2 x double> noundef [[ACC:%.*]], <2 x double> noundef [[LHS:%.*]], <2 x double> noundef [[RHS:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VCMLAQ_ROT90_F643_I:%.*]] = tail call <2 x double> @llvm.aarch64.neon.vcmla.rot90.v2f64(<2 x double> [[ACC]], <2 x double> [[LHS]], <2 x double> [[RHS]])
+// CHECK-NEXT: ret <2 x double> [[VCMLAQ_ROT90_F643_I]]
+//
float64x2_t test_vcmlaq_rot90_f64(float64x2_t acc, float64x2_t lhs, float64x2_t rhs) {
return vcmlaq_rot90_f64(acc, lhs, rhs);
}
-// CHECK-LABEL: @test_vcmla_rot180_f16(
-// CHECK: [[RES:%.*]] = tail call <4 x half> @llvm.aarch64.neon.vcmla.rot180.v4f16(<4 x half> %acc, <4 x half> %lhs, <4 x half> %rhs)
-// CHECK: ret <4 x half> [[RES]]
+// CHECK-LABEL: define dso_local <4 x half> @test_vcmla_rot180_f16(
+// CHECK-SAME: <4 x half> noundef [[ACC:%.*]], <4 x half> noundef [[LHS:%.*]], <4 x half> noundef [[RHS:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VCMLA_ROT180_F163_I:%.*]] = tail call <4 x half> @llvm.aarch64.neon.vcmla.rot180.v4f16(<4 x half> [[ACC]], <4 x half> [[LHS]], <4 x half> [[RHS]])
+// CHECK-NEXT: ret <4 x half> [[VCMLA_ROT180_F163_I]]
+//
float16x4_t test_vcmla_rot180_f16(float16x4_t acc, float16x4_t lhs, float16x4_t rhs) {
return vcmla_rot180_f16(acc, lhs, rhs);
}
-// CHECK-LABEL: @test_vcmla_rot180_f32(
-// CHECK: [[RES:%.*]] = tail call <2 x float> @llvm.aarch64.neon.vcmla.rot180.v2f32(<2 x float> %acc, <2 x float> %lhs, <2 x float> %rhs)
-// CHECK: ret <2 x float> [[RES]]
+// CHECK-LABEL: define dso_local <2 x float> @test_vcmla_rot180_f32(
+// CHECK-SAME: <2 x float> noundef [[ACC:%.*]], <2 x float> noundef [[LHS:%.*]], <2 x float> noundef [[RHS:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VCMLA_ROT180_F323_I:%.*]] = tail call <2 x float> @llvm.aarch64.neon.vcmla.rot180.v2f32(<2 x float> [[ACC]], <2 x float> [[LHS]], <2 x float> [[RHS]])
+// CHECK-NEXT: ret <2 x float> [[VCMLA_ROT180_F323_I]]
+//
float32x2_t test_vcmla_rot180_f32(float32x2_t acc, float32x2_t lhs, float32x2_t rhs) {
return vcmla_rot180_f32(acc, lhs, rhs);
}
-// CHECK-LABEL: @test_vcmlaq_rot180_f16(
-// CHECK: [[RES:%.*]] = tail call <8 x half> @llvm.aarch64.neon.vcmla.rot180.v8f16(<8 x half> %acc, <8 x half> %lhs, <8 x half> %rhs)
-// CHECK: ret <8 x half> [[RES]]
+// CHECK-LABEL: define dso_local <8 x half> @test_vcmlaq_rot180_f16(
+// CHECK-SAME: <8 x half> noundef [[ACC:%.*]], <8 x half> noundef [[LHS:%.*]], <8 x half> noundef [[RHS:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VCMLAQ_ROT180_F163_I:%.*]] = tail call <8 x half> @llvm.aarch64.neon.vcmla.rot180.v8f16(<8 x half> [[ACC]], <8 x half> [[LHS]], <8 x half> [[RHS]])
+// CHECK-NEXT: ret <8 x half> [[VCMLAQ_ROT180_F163_I]]
+//
float16x8_t test_vcmlaq_rot180_f16(float16x8_t acc, float16x8_t lhs, float16x8_t rhs) {
return vcmlaq_rot180_f16(acc, lhs, rhs);
}
-// CHECK-LABEL: @test_vcmlaq_rot180_f32(
-// CHECK: [[RES:%.*]] = tail call <4 x float> @llvm.aarch64.neon.vcmla.rot180.v4f32(<4 x float> %acc, <4 x float> %lhs, <4 x float> %rhs)
-// CHECK: ret <4 x float> [[RES]]
+// CHECK-LABEL: define dso_local <4 x float> @test_vcmlaq_rot180_f32(
+// CHECK-SAME: <4 x float> noundef [[ACC:%.*]], <4 x float> noundef [[LHS:%.*]], <4 x float> noundef [[RHS:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VCMLAQ_ROT180_F323_I:%.*]] = tail call <4 x float> @llvm.aarch64.neon.vcmla.rot180.v4f32(<4 x float> [[ACC]], <4 x float> [[LHS]], <4 x float> [[RHS]])
+// CHECK-NEXT: ret <4 x float> [[VCMLAQ_ROT180_F323_I]]
+//
float32x4_t test_vcmlaq_rot180_f32(float32x4_t acc, float32x4_t lhs, float32x4_t rhs) {
return vcmlaq_rot180_f32(acc, lhs, rhs);
}
-// CHECK-LABEL: @test_vcmlaq_rot180_f64(
-// CHECK: [[RES:%.*]] = tail call <2 x double> @llvm.aarch64.neon.vcmla.rot180.v2f64(<2 x double> %acc, <2 x double> %lhs, <2 x double> %rhs)
-// CHECK: ret <2 x double> [[RES]]
+// CHECK-LABEL: define dso_local <2 x double> @test_vcmlaq_rot180_f64(
+// CHECK-SAME: <2 x double> noundef [[ACC:%.*]], <2 x double> noundef [[LHS:%.*]], <2 x double> noundef [[RHS:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VCMLAQ_ROT180_F643_I:%.*]] = tail call <2 x double> @llvm.aarch64.neon.vcmla.rot180.v2f64(<2 x double> [[ACC]], <2 x double> [[LHS]], <2 x double> [[RHS]])
+// CHECK-NEXT: ret <2 x double> [[VCMLAQ_ROT180_F643_I]]
+//
float64x2_t test_vcmlaq_rot180_f64(float64x2_t acc, float64x2_t lhs, float64x2_t rhs) {
return vcmlaq_rot180_f64(acc, lhs, rhs);
}
-// CHECK-LABEL: @test_vcmla_rot270_f16(
-// CHECK: [[RES:%.*]] = tail call <4 x half> @llvm.aarch64.neon.vcmla.rot270.v4f16(<4 x half> %acc, <4 x half> %lhs, <4 x half> %rhs)
-// CHECK: ret <4 x half> [[RES]]
+// CHECK-LABEL: define dso_local <4 x half> @test_vcmla_rot270_f16(
+// CHECK-SAME: <4 x half> noundef [[ACC:%.*]], <4 x half> noundef [[LHS:%.*]], <4 x half> noundef [[RHS:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VCMLA_ROT270_F163_I:%.*]] = tail call <4 x half> @llvm.aarch64.neon.vcmla.rot270.v4f16(<4 x half> [[ACC]], <4 x half> [[LHS]], <4 x half> [[RHS]])
+// CHECK-NEXT: ret <4 x half> [[VCMLA_ROT270_F163_I]]
+//
float16x4_t test_vcmla_rot270_f16(float16x4_t acc, float16x4_t lhs, float16x4_t rhs) {
return vcmla_rot270_f16(acc, lhs, rhs);
}
-// CHECK-LABEL: @test_vcmla_rot270_f32(
-// CHECK: [[RES:%.*]] = tail call <2 x float> @llvm.aarch64.neon.vcmla.rot270.v2f32(<2 x float> %acc, <2 x float> %lhs, <2 x float> %rhs)
-// CHECK: ret <2 x float> [[RES]]
+// CHECK-LABEL: define dso_local <2 x float> @test_vcmla_rot270_f32(
+// CHECK-SAME: <2 x float> noundef [[ACC:%.*]], <2 x float> noundef [[LHS:%.*]], <2 x float> noundef [[RHS:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VCMLA_ROT270_F323_I:%.*]] = tail call <2 x float> @llvm.aarch64.neon.vcmla.rot270.v2f32(<2 x float> [[ACC]], <2 x float> [[LHS]], <2 x float> [[RHS]])
+// CHECK-NEXT: ret <2 x float> [[VCMLA_ROT270_F323_I]]
+//
float32x2_t test_vcmla_rot270_f32(float32x2_t acc, float32x2_t lhs, float32x2_t rhs) {
return vcmla_rot270_f32(acc, lhs, rhs);
}
-// CHECK-LABEL: @test_vcmlaq_rot270_f16(
-// CHECK: [[RES:%.*]] = tail call <8 x half> @llvm.aarch64.neon.vcmla.rot270.v8f16(<8 x half> %acc, <8 x half> %lhs, <8 x half> %rhs)
-// CHECK: ret <8 x half> [[RES]]
+// CHECK-LABEL: define dso_local <8 x half> @test_vcmlaq_rot270_f16(
+// CHECK-SAME: <8 x half> noundef [[ACC:%.*]], <8 x half> noundef [[LHS:%.*]], <8 x half> noundef [[RHS:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VCMLAQ_ROT270_F163_I:%.*]] = tail call <8 x half> @llvm.aarch64.neon.vcmla.rot270.v8f16(<8 x half> [[ACC]], <8 x half> [[LHS]], <8 x half> [[RHS]])
+// CHECK-NEXT: ret <8 x half> [[VCMLAQ_ROT270_F163_I]]
+//
float16x8_t test_vcmlaq_rot270_f16(float16x8_t acc, float16x8_t lhs, float16x8_t rhs) {
return vcmlaq_rot270_f16(acc, lhs, rhs);
}
-// CHECK-LABEL: @test_vcmlaq_rot270_f32(
-// CHECK: [[RES:%.*]] = tail call <4 x float> @llvm.aarch64.neon.vcmla.rot270.v4f32(<4 x float> %acc, <4 x float> %lhs, <4 x float> %rhs)
-// CHECK: ret <4 x float> [[RES]]
+// CHECK-LABEL: define dso_local <4 x float> @test_vcmlaq_rot270_f32(
+// CHECK-SAME: <4 x float> noundef [[ACC:%.*]], <4 x float> noundef [[LHS:%.*]], <4 x float> noundef [[RHS:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VCMLAQ_ROT270_F323_I:%.*]] = tail call <4 x float> @llvm.aarch64.neon.vcmla.rot270.v4f32(<4 x float> [[ACC]], <4 x float> [[LHS]], <4 x float> [[RHS]])
+// CHECK-NEXT: ret <4 x float> [[VCMLAQ_ROT270_F323_I]]
+//
float32x4_t test_vcmlaq_rot270_f32(float32x4_t acc, float32x4_t lhs, float32x4_t rhs) {
return vcmlaq_rot270_f32(acc, lhs, rhs);
}
-// CHECK-LABEL: @test_vcmlaq_rot270_f64(
-// CHECK: [[RES:%.*]] = tail call <2 x double> @llvm.aarch64.neon.vcmla.rot270.v2f64(<2 x double> %acc, <2 x double> %lhs, <2 x double> %rhs)
-// CHECK: ret <2 x double> [[RES]]
+// CHECK-LABEL: define dso_local <2 x double> @test_vcmlaq_rot270_f64(
+// CHECK-SAME: <2 x double> noundef [[ACC:%.*]], <2 x double> noundef [[LHS:%.*]], <2 x double> noundef [[RHS:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VCMLAQ_ROT270_F643_I:%.*]] = tail call <2 x double> @llvm.aarch64.neon.vcmla.rot270.v2f64(<2 x double> [[ACC]], <2 x double> [[LHS]], <2 x double> [[RHS]])
+// CHECK-NEXT: ret <2 x double> [[VCMLAQ_ROT270_F643_I]]
+//
float64x2_t test_vcmlaq_rot270_f64(float64x2_t acc, float64x2_t lhs, float64x2_t rhs) {
return vcmlaq_rot270_f64(acc, lhs, rhs);
}
-// CHECK-LABEL: @test_vcmla_lane_f16(
-// CHECK: [[DUP:%.*]] = shufflevector <4 x half> %rhs, <4 x half> poison, <4 x i32> <i32 2, i32 3, i32 2, i32 3>
-// CHECK: [[RES:%.*]] = tail call <4 x half> @llvm.aarch64.neon.vcmla.rot0.v4f16(<4 x half> %acc, <4 x half> %lhs, <4 x half> [[DUP]])
-// CHECK: ret <4 x half> [[RES]]
+// CHECK-LABEL: define dso_local <4 x half> @test_vcmla_lane_f16(
+// CHECK-SAME: <4 x half> noundef [[ACC:%.*]], <4 x half> noundef [[LHS:%.*]], <4 x half> noundef [[RHS:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = shufflevector <4 x half> [[RHS]], <4 x half> poison, <4 x i32> <i32 2, i32 3, i32 2, i32 3>
+// CHECK-NEXT: [[VCMLA_F163_I:%.*]] = tail call <4 x half> @llvm.aarch64.neon.vcmla.rot0.v4f16(<4 x half> [[ACC]], <4 x half> [[LHS]], <4 x half> [[TMP0]])
+// CHECK-NEXT: ret <4 x half> [[VCMLA_F163_I]]
+//
float16x4_t test_vcmla_lane_f16(float16x4_t acc, float16x4_t lhs, float16x4_t rhs) {
return vcmla_lane_f16(acc, lhs, rhs, 1);
}
-// CHECK-LABEL: @test_vcmla_laneq_f16(
-// CHECK: %0 = bitcast <8 x half> %rhs to <4 x i32>
-// CHECK: %1 = shufflevector <4 x i32> %0, <4 x i32> poison, <2 x i32> <i32 poison, i32 1>
-// CHECK: %2 = bitcast <2 x i32> %1 to <4 x half>
-// CHECK: %3 = shufflevector <4 x half> %2, <4 x half> poison, <4 x i32> <i32 2, i32 3, i32 2, i32 3>
-// CHECK: %vcmla_f163.i = tail call <4 x half> @llvm.aarch64.neon.vcmla.rot0.v4f16(<4 x half> %acc, <4 x half> %lhs, <4 x half>
+// CHECK-LABEL: define dso_local <4 x half> @test_vcmla_laneq_f16(
+// CHECK-SAME: <4 x half> noundef [[ACC:%.*]], <4 x half> noundef [[LHS:%.*]], <8 x half> noundef [[RHS:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[RHS]] to <4 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[TMP0]], <4 x i32> poison, <2 x i32> <i32 poison, i32 1>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[TMP1]] to <4 x half>
+// CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x half> [[TMP2]], <4 x half> poison, <4 x i32> <i32 2, i32 3, i32 2, i32 3>
+// CHECK-NEXT: [[VCMLA_F163_I:%.*]] = tail call <4 x half> @llvm.aarch64.neon.vcmla.rot0.v4f16(<4 x half> [[ACC]], <4 x half> [[LHS]], <4 x half> [[TMP3]])
+// CHECK-NEXT: ret <4 x half> [[VCMLA_F163_I]]
+//
float16x4_t test_vcmla_laneq_f16(float16x4_t acc, float16x4_t lhs, float16x8_t rhs) {
return vcmla_laneq_f16(acc, lhs, rhs, 1);
}
-// CHECK-LABEL: @test_vcmlaq_lane_f16(
-// CHECK: [[DUP:%.*]] = shufflevector <4 x half> %rhs, <4 x half> poison, <8 x i32> <i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
-// CHECK: [[RES:%.*]] = tail call <8 x half> @llvm.aarch64.neon.vcmla.rot0.v8f16(<8 x half> %acc, <8 x half> %lhs, <8 x half> [[DUP]])
-// CHECK: ret <8 x half> [[RES]]
+// CHECK-LABEL: define dso_local <8 x half> @test_vcmlaq_lane_f16(
+// CHECK-SAME: <8 x half> noundef [[ACC:%.*]], <8 x half> noundef [[LHS:%.*]], <4 x half> noundef [[RHS:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = shufflevector <4 x half> [[RHS]], <4 x half> poison, <8 x i32> <i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
+// CHECK-NEXT: [[VCMLAQ_F163_I:%.*]] = tail call <8 x half> @llvm.aarch64.neon.vcmla.rot0.v8f16(<8 x half> [[ACC]], <8 x half> [[LHS]], <8 x half> [[TMP0]])
+// CHECK-NEXT: ret <8 x half> [[VCMLAQ_F163_I]]
+//
float16x8_t test_vcmlaq_lane_f16(float16x8_t acc, float16x8_t lhs, float16x4_t rhs) {
return vcmlaq_lane_f16(acc, lhs, rhs, 1);
}
-// CHECK-LABEL: @test_vcmlaq_laneq_f16(
-// CHECK: [[CPLX:%.*]] = bitcast <8 x half> %rhs to <4 x i32>
-// CHECK: [[DUP:%.*]] = shufflevector <4 x i32> [[CPLX]], <4 x i32> poison, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
-// CHECK: [[DUP_FLT:%.*]] = bitcast <4 x i32> [[DUP]] to <8 x half>
-// CHECK: [[RES:%.*]] = tail call <8 x half> @llvm.aarch64.neon.vcmla.rot0.v8f16(<8 x half> %acc, <8 x half> %lhs, <8 x half> [[DUP_FLT]])
-// CHECK: ret <8 x half> [[RES]]
+// CHECK-LABEL: define dso_local <8 x half> @test_vcmlaq_laneq_f16(
+// CHECK-SAME: <8 x half> noundef [[ACC:%.*]], <8 x half> noundef [[LHS:%.*]], <8 x half> noundef [[RHS:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[RHS]] to <4 x i32>
+// CHECK-NEXT: [[VECINIT15:%.*]] = shufflevector <4 x i32> [[TMP0]], <4 x i32> poison, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[VECINIT15]] to <8 x half>
+// CHECK-NEXT: [[VCMLAQ_F163_I:%.*]] = tail call <8 x half> @llvm.aarch64.neon.vcmla.rot0.v8f16(<8 x half> [[ACC]], <8 x half> [[LHS]], <8 x half> [[TMP1]])
+// CHECK-NEXT: ret <8 x half> [[VCMLAQ_F163_I]]
+//
float16x8_t test_vcmlaq_laneq_f16(float16x8_t acc, float16x8_t lhs, float16x8_t rhs) {
return vcmlaq_laneq_f16(acc, lhs, rhs, 3);
}
-// CHECK-LABEL: @test_vcmla_lane_f32(
-// CHECK: [[RES:%.*]] = tail call <2 x float> @llvm.aarch64.neon.vcmla.rot0.v2f32(<2 x float> %acc, <2 x float> %lhs, <2 x float> %rhs)
-// CHECK: ret <2 x float> [[RES]]
+// CHECK-LABEL: define dso_local <2 x float> @test_vcmla_lane_f32(
+// CHECK-SAME: <2 x float> noundef [[ACC:%.*]], <2 x float> noundef [[LHS:%.*]], <2 x float> noundef [[RHS:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VCMLA_F323_I:%.*]] = tail call <2 x float> @llvm.aarch64.neon.vcmla.rot0.v2f32(<2 x float> [[ACC]], <2 x float> [[LHS]], <2 x float> [[RHS]])
+// CHECK-NEXT: ret <2 x float> [[VCMLA_F323_I]]
+//
float32x2_t test_vcmla_lane_f32(float32x2_t acc, float32x2_t lhs, float32x2_t rhs) {
return vcmla_lane_f32(acc, lhs, rhs, 0);
}
-// CHECK-LABEL: @test_vcmla_laneq_f32(
-// CHECK: [[CPLX:%.*]] = bitcast <4 x float> %rhs to <2 x i64>
-// CHECK: [[DUP:%.*]] = shufflevector <2 x i64> [[CPLX]], <2 x i64> poison, <1 x i32> <i32 1>
-// CHECK: [[DUP_FLT:%.*]] = bitcast <1 x i64> [[DUP]] to <2 x float>
-// CHECK: [[RES:%.*]] = tail call <2 x float> @llvm.aarch64.neon.vcmla.rot0.v2f32(<2 x float> %acc, <2 x float> %lhs, <2 x float> [[DUP_FLT]])
-// CHECK: ret <2 x float> [[RES]]
+// CHECK-LABEL: define dso_local <2 x float> @test_vcmla_laneq_f32(
+// CHECK-SAME: <2 x float> noundef [[ACC:%.*]], <2 x float> noundef [[LHS:%.*]], <4 x float> noundef [[RHS:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[RHS]] to <2 x i64>
+// CHECK-NEXT: [[VECINIT:%.*]] = shufflevector <2 x i64> [[TMP0]], <2 x i64> poison, <1 x i32> <i32 1>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[VECINIT]] to <2 x float>
+// CHECK-NEXT: [[VCMLA_F323_I:%.*]] = tail call <2 x float> @llvm.aarch64.neon.vcmla.rot0.v2f32(<2 x float> [[ACC]], <2 x float> [[LHS]], <2 x float> [[TMP1]])
+// CHECK-NEXT: ret <2 x float> [[VCMLA_F323_I]]
+//
float32x2_t test_vcmla_laneq_f32(float32x2_t acc, float32x2_t lhs, float32x4_t rhs) {
return vcmla_laneq_f32(acc, lhs, rhs, 1);
}
-// CHECK-LABEL: @test_vcmlaq_lane_f32(
-// CHECK: [[CPLX:%.*]] = bitcast <2 x float> %rhs to i64
-// CHECK: [[CPLX_VEC:%.*]] = insertelement <2 x i64> poison, i64 [[CPLX]], i64 0
-// CHECK: [[CPLX2:%.*]] = bitcast <2 x i64> [[CPLX_VEC]] to <4 x float>
-// CHECK: [[DUP:%.*]] = shufflevector <4 x float> [[CPLX2]], <4 x float> poison, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
-// CHECK: [[RES:%.*]] = tail call <4 x float> @llvm.aarch64.neon.vcmla.rot0.v4f32(<4 x float> %acc, <4 x float> %lhs, <4 x float> [[DUP]])
-// CHECK: ret <4 x float> [[RES]]
+// CHECK-LABEL: define dso_local <4 x float> @test_vcmlaq_lane_f32(
+// CHECK-SAME: <4 x float> noundef [[ACC:%.*]], <4 x float> noundef [[LHS:%.*]], <2 x float> noundef [[RHS:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[RHS]] to i64
+// CHECK-NEXT: [[VECINIT:%.*]] = insertelement <2 x i64> poison, i64 [[TMP0]], i64 0
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[VECINIT]] to <4 x float>
+// CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
+// CHECK-NEXT: [[VCMLAQ_F323_I:%.*]] = tail call <4 x float> @llvm.aarch64.neon.vcmla.rot0.v4f32(<4 x float> [[ACC]], <4 x float> [[LHS]], <4 x float> [[TMP2]])
+// CHECK-NEXT: ret <4 x float> [[VCMLAQ_F323_I]]
+//
float32x4_t test_vcmlaq_lane_f32(float32x4_t acc, float32x4_t lhs, float32x2_t rhs) {
return vcmlaq_lane_f32(acc, lhs, rhs, 0);
}
-// CHECK-LABEL: @test_vcmlaq_laneq_f32(
-// CHECK: [[DUP:%.*]] = shufflevector <4 x float> %rhs, <4 x float> poison, <4 x i32> <i32 2, i32 3, i32 2, i32 3>
-// CHECK: [[RES:%.*]] = tail call <4 x float> @llvm.aarch64.neon.vcmla.rot0.v4f32(<4 x float> %acc, <4 x float> %lhs, <4 x float> [[DUP]])
-// CHECK: ret <4 x float> [[RES]]
+// CHECK-LABEL: define dso_local <4 x float> @test_vcmlaq_laneq_f32(
+// CHECK-SAME: <4 x float> noundef [[ACC:%.*]], <4 x float> noundef [[LHS:%.*]], <4 x float> noundef [[RHS:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = shufflevector <4 x float> [[RHS]], <4 x float> poison, <4 x i32> <i32 2, i32 3, i32 2, i32 3>
+// CHECK-NEXT: [[VCMLAQ_F323_I:%.*]] = tail call <4 x float> @llvm.aarch64.neon.vcmla.rot0.v4f32(<4 x float> [[ACC]], <4 x float> [[LHS]], <4 x float> [[TMP0]])
+// CHECK-NEXT: ret <4 x float> [[VCMLAQ_F323_I]]
+//
float32x4_t test_vcmlaq_laneq_f32(float32x4_t acc, float32x4_t lhs, float32x4_t rhs) {
return vcmlaq_laneq_f32(acc, lhs, rhs, 1);
}
-// CHECK-LABEL: @test_vcmla_rot90_lane_f16(
-// CHECK: [[DUP:%.*]] = shufflevector <4 x half> %rhs, <4 x half> poison, <4 x i32> <i32 2, i32 3, i32 2, i32 3>
-// CHECK: [[RES:%.*]] = tail call <4 x half> @llvm.aarch64.neon.vcmla.rot90.v4f16(<4 x half> %acc, <4 x half> %lhs, <4 x half> [[DUP]])
-// CHECK: ret <4 x half> [[RES]]
+// CHECK-LABEL: define dso_local <4 x half> @test_vcmla_rot90_lane_f16(
+// CHECK-SAME: <4 x half> noundef [[ACC:%.*]], <4 x half> noundef [[LHS:%.*]], <4 x half> noundef [[RHS:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = shufflevector <4 x half> [[RHS]], <4 x half> poison, <4 x i32> <i32 2, i32 3, i32 2, i32 3>
+// CHECK-NEXT: [[VCMLA_ROT90_F163_I:%.*]] = tail call <4 x half> @llvm.aarch64.neon.vcmla.rot90.v4f16(<4 x half> [[ACC]], <4 x half> [[LHS]], <4 x half> [[TMP0]])
+// CHECK-NEXT: ret <4 x half> [[VCMLA_ROT90_F163_I]]
+//
float16x4_t test_vcmla_rot90_lane_f16(float16x4_t acc, float16x4_t lhs, float16x4_t rhs) {
return vcmla_rot90_lane_f16(acc, lhs, rhs, 1);
}
-// CHECK: %0 = bitcast <8 x half> %rhs to <4 x i32>
-// CHECK: %1 = shufflevector <4 x i32> %0, <4 x i32> poison, <2 x i32> <i32 0, i32 poison>
-// CHECK: %2 = bitcast <2 x i32> %1 to <4 x half>
-// CHECK: %3 = shufflevector <4 x half> %2, <4 x half> poison, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
-// CHECK: %vcmla_rot90_f163.i = tail call <4 x half> @llvm.aarch64.neon.vcmla.rot90.v4f16(<4 x half> %acc, <4 x half> %lhs, <4 x half> %3)
-// CHECK: ret <4 x half> %vcmla_rot90_f163.i
+// CHECK-LABEL: define dso_local <4 x half> @test_vcmla_rot90_laneq_f16(
+// CHECK-SAME: <4 x half> noundef [[ACC:%.*]], <4 x half> noundef [[LHS:%.*]], <8 x half> noundef [[RHS:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[RHS]] to <4 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[TMP0]], <4 x i32> poison, <2 x i32> <i32 0, i32 poison>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[TMP1]] to <4 x half>
+// CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x half> [[TMP2]], <4 x half> poison, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
+// CHECK-NEXT: [[VCMLA_ROT90_F163_I:%.*]] = tail call <4 x half> @llvm.aarch64.neon.vcmla.rot90.v4f16(<4 x half> [[ACC]], <4 x half> [[LHS]], <4 x half> [[TMP3]])
+// CHECK-NEXT: ret <4 x half> [[VCMLA_ROT90_F163_I]]
+//
float16x4_t test_vcmla_rot90_laneq_f16(float16x4_t acc, float16x4_t lhs, float16x8_t rhs) {
return vcmla_rot90_laneq_f16(acc, lhs, rhs, 0);
}
-// CHECK-LABEL: @test_vcmlaq_rot90_lane_f16(
-// CHECK: [[DUP:%.*]] = shufflevector <4 x half> %rhs, <4 x half> poison, <8 x i32> <i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
-// CHECK: [[RES:%.*]] = tail call <8 x half> @llvm.aarch64.neon.vcmla.rot90.v8f16(<8 x half> %acc, <8 x half> %lhs, <8 x half> [[DUP]])
-// CHECK: ret <8 x half> [[RES]]
+// CHECK-LABEL: define dso_local <8 x half> @test_vcmlaq_rot90_lane_f16(
+// CHECK-SAME: <8 x half> noundef [[ACC:%.*]], <8 x half> noundef [[LHS:%.*]], <4 x half> noundef [[RHS:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = shufflevector <4 x half> [[RHS]], <4 x half> poison, <8 x i32> <i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
+// CHECK-NEXT: [[VCMLAQ_ROT90_F163_I:%.*]] = tail call <8 x half> @llvm.aarch64.neon.vcmla.rot90.v8f16(<8 x half> [[ACC]], <8 x half> [[LHS]], <8 x half> [[TMP0]])
+// CHECK-NEXT: ret <8 x half> [[VCMLAQ_ROT90_F163_I]]
+//
float16x8_t test_vcmlaq_rot90_lane_f16(float16x8_t acc, float16x8_t lhs, float16x4_t rhs) {
return vcmlaq_rot90_lane_f16(acc, lhs, rhs, 1);
}
-// CHECK-LABEL: @test_vcmlaq_rot90_laneq_f16(
-// CHECK: [[CPLX:%.*]] = bitcast <8 x half> %rhs to <4 x i32>
-// CHECK: [[DUP:%.*]] = shufflevector <4 x i32> [[CPLX]], <4 x i32> poison, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
-// CHECK: [[DUP_FLT:%.*]] = bitcast <4 x i32> [[DUP]] to <8 x half>
-// CHECK: [[RES:%.*]] = tail call <8 x half> @llvm.aarch64.neon.vcmla.rot90.v8f16(<8 x half> %acc, <8 x half> %lhs, <8 x half> [[DUP_FLT]])
-// CHECK: ret <8 x half> [[RES]]
+// CHECK-LABEL: define dso_local <8 x half> @test_vcmlaq_rot90_laneq_f16(
+// CHECK-SAME: <8 x half> noundef [[ACC:%.*]], <8 x half> noundef [[LHS:%.*]], <8 x half> noundef [[RHS:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[RHS]] to <4 x i32>
+// CHECK-NEXT: [[VECINIT15:%.*]] = shufflevector <4 x i32> [[TMP0]], <4 x i32> poison, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[VECINIT15]] to <8 x half>
+// CHECK-NEXT: [[VCMLAQ_ROT90_F163_I:%.*]] = tail call <8 x half> @llvm.aarch64.neon.vcmla.rot90.v8f16(<8 x half> [[ACC]], <8 x half> [[LHS]], <8 x half> [[TMP1]])
+// CHECK-NEXT: ret <8 x half> [[VCMLAQ_ROT90_F163_I]]
+//
float16x8_t test_vcmlaq_rot90_laneq_f16(float16x8_t acc, float16x8_t lhs, float16x8_t rhs) {
return vcmlaq_rot90_laneq_f16(acc, lhs, rhs, 3);
}
-// CHECK-LABEL: @test_vcmla_rot90_lane_f32(
-// CHECK: [[RES:%.*]] = tail call <2 x float> @llvm.aarch64.neon.vcmla.rot90.v2f32(<2 x float> %acc, <2 x float> %lhs, <2 x float> %rhs)
-// CHECK: ret <2 x float> [[RES]]
+// CHECK-LABEL: define dso_local <2 x float> @test_vcmla_rot90_lane_f32(
+// CHECK-SAME: <2 x float> noundef [[ACC:%.*]], <2 x float> noundef [[LHS:%.*]], <2 x float> noundef [[RHS:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VCMLA_ROT90_F323_I:%.*]] = tail call <2 x float> @llvm.aarch64.neon.vcmla.rot90.v2f32(<2 x float> [[ACC]], <2 x float> [[LHS]], <2 x float> [[RHS]])
+// CHECK-NEXT: ret <2 x float> [[VCMLA_ROT90_F323_I]]
+//
float32x2_t test_vcmla_rot90_lane_f32(float32x2_t acc, float32x2_t lhs, float32x2_t rhs) {
return vcmla_rot90_lane_f32(acc, lhs, rhs, 0);
}
-// CHECK-LABEL: @test_vcmla_rot90_laneq_f32(
-// CHECK: [[CPLX:%.*]] = bitcast <4 x float> %rhs to <2 x i64>
-// CHECK: [[DUP:%.*]] = shufflevector <2 x i64> [[CPLX]], <2 x i64> poison, <1 x i32> <i32 1>
-// CHECK: [[DUP_FLT:%.*]] = bitcast <1 x i64> [[DUP]] to <2 x float>
-// CHECK: [[RES:%.*]] = tail call <2 x float> @llvm.aarch64.neon.vcmla.rot90.v2f32(<2 x float> %acc, <2 x float> %lhs, <2 x float> [[DUP_FLT]])
-// CHECK: ret <2 x float> [[RES]]
+// CHECK-LABEL: define dso_local <2 x float> @test_vcmla_rot90_laneq_f32(
+// CHECK-SAME: <2 x float> noundef [[ACC:%.*]], <2 x float> noundef [[LHS:%.*]], <4 x float> noundef [[RHS:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[RHS]] to <2 x i64>
+// CHECK-NEXT: [[VECINIT:%.*]] = shufflevector <2 x i64> [[TMP0]], <2 x i64> poison, <1 x i32> <i32 1>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[VECINIT]] to <2 x float>
+// CHECK-NEXT: [[VCMLA_ROT90_F323_I:%.*]] = tail call <2 x float> @llvm.aarch64.neon.vcmla.rot90.v2f32(<2 x float> [[ACC]], <2 x float> [[LHS]], <2 x float> [[TMP1]])
+// CHECK-NEXT: ret <2 x float> [[VCMLA_ROT90_F323_I]]
+//
float32x2_t test_vcmla_rot90_laneq_f32(float32x2_t acc, float32x2_t lhs, float32x4_t rhs) {
return vcmla_rot90_laneq_f32(acc, lhs, rhs, 1);
}
-// CHECK-LABEL: @test_vcmlaq_rot90_lane_f32(
-// CHECK: [[CPLX:%.*]] = bitcast <2 x float> %rhs to i64
-// CHECK: [[CPLX_VEC:%.*]] = insertelement <2 x i64> poison, i64 [[CPLX]], i64 0
-// CHECK: [[CPLX2:%.*]] = bitcast <2 x i64> [[CPLX_VEC]] to <4 x float>
-// CHECK: [[DUP:%.*]] = shufflevector <4 x float> [[CPLX2]], <4 x float> poison, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
-// CHECK: [[RES:%.*]] = tail call <4 x float> @llvm.aarch64.neon.vcmla.rot90.v4f32(<4 x float> %acc, <4 x float> %lhs, <4 x float> [[DUP]])
-// CHECK: ret <4 x float> [[RES]]
+// CHECK-LABEL: define dso_local <4 x float> @test_vcmlaq_rot90_lane_f32(
+// CHECK-SAME: <4 x float> noundef [[ACC:%.*]], <4 x float> noundef [[LHS:%.*]], <2 x float> noundef [[RHS:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[RHS]] to i64
+// CHECK-NEXT: [[VECINIT:%.*]] = insertelement <2 x i64> poison, i64 [[TMP0]], i64 0
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[VECINIT]] to <4 x float>
+// CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
+// CHECK-NEXT: [[VCMLAQ_ROT90_F323_I:%.*]] = tail call <4 x float> @llvm.aarch64.neon.vcmla.rot90.v4f32(<4 x float> [[ACC]], <4 x float> [[LHS]], <4 x float> [[TMP2]])
+// CHECK-NEXT: ret <4 x float> [[VCMLAQ_ROT90_F323_I]]
+//
float32x4_t test_vcmlaq_rot90_lane_f32(float32x4_t acc, float32x4_t lhs, float32x2_t rhs) {
return vcmlaq_rot90_lane_f32(acc, lhs, rhs, 0);
}
-// CHECK-LABEL: @test_vcmlaq_rot90_laneq_f32(
-// CHECK: [[DUP:%.*]] = shufflevector <4 x float> %rhs, <4 x float> poison, <4 x i32> <i32 2, i32 3, i32 2, i32 3>
-// CHECK: [[RES:%.*]] = tail call <4 x float> @llvm.aarch64.neon.vcmla.rot90.v4f32(<4 x float> %acc, <4 x float> %lhs, <4 x float> [[DUP]])
-// CHECK: ret <4 x float> [[RES]]
+// CHECK-LABEL: define dso_local <4 x float> @test_vcmlaq_rot90_laneq_f32(
+// CHECK-SAME: <4 x float> noundef [[ACC:%.*]], <4 x float> noundef [[LHS:%.*]], <4 x float> noundef [[RHS:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = shufflevector <4 x float> [[RHS]], <4 x float> poison, <4 x i32> <i32 2, i32 3, i32 2, i32 3>
+// CHECK-NEXT: [[VCMLAQ_ROT90_F323_I:%.*]] = tail call <4 x float> @llvm.aarch64.neon.vcmla.rot90.v4f32(<4 x float> [[ACC]], <4 x float> [[LHS]], <4 x float> [[TMP0]])
+// CHECK-NEXT: ret <4 x float> [[VCMLAQ_ROT90_F323_I]]
+//
float32x4_t test_vcmlaq_rot90_laneq_f32(float32x4_t acc, float32x4_t lhs, float32x4_t rhs) {
return vcmlaq_rot90_laneq_f32(acc, lhs, rhs, 1);
}
-// CHECK-LABEL: @test_vcmla_rot180_lane_f16(
-// CHECK: [[DUP:%.*]] = shufflevector <4 x half> %rhs, <4 x half> poison, <4 x i32> <i32 2, i32 3, i32 2, i32 3>
-// CHECK: [[RES:%.*]] = tail call <4 x half> @llvm.aarch64.neon.vcmla.rot180.v4f16(<4 x half> %acc, <4 x half> %lhs, <4 x half> [[DUP]])
-// CHECK: ret <4 x half> [[RES]]
+// CHECK-LABEL: define dso_local <4 x half> @test_vcmla_rot180_lane_f16(
+// CHECK-SAME: <4 x half> noundef [[ACC:%.*]], <4 x half> noundef [[LHS:%.*]], <4 x half> noundef [[RHS:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = shufflevector <4 x half> [[RHS]], <4 x half> poison, <4 x i32> <i32 2, i32 3, i32 2, i32 3>
+// CHECK-NEXT: [[VCMLA_ROT180_F163_I:%.*]] = tail call <4 x half> @llvm.aarch64.neon.vcmla.rot180.v4f16(<4 x half> [[ACC]], <4 x half> [[LHS]], <4 x half> [[TMP0]])
+// CHECK-NEXT: ret <4 x half> [[VCMLA_ROT180_F163_I]]
+//
float16x4_t test_vcmla_rot180_lane_f16(float16x4_t acc, float16x4_t lhs, float16x4_t rhs) {
return vcmla_rot180_lane_f16(acc, lhs, rhs, 1);
}
-// CHECK-LABEL: @test_vcmla_rot180_laneq_f16(
-// CHECK: %0 = bitcast <8 x half> %rhs to <4 x i32>
-// CHECK: %1 = shufflevector <4 x i32> %0, <4 x i32> poison, <2 x i32> <i32 poison, i32 1>
-// CHECK: %2 = bitcast <2 x i32> %1 to <4 x half>
-// CHECK: %3 = shufflevector <4 x half> %2, <4 x half> poison, <4 x i32> <i32 2, i32 3, i32 2, i32 3>
-// CHECK: %vcmla_rot180_f163.i = tail call <4 x half> @llvm.aarch64.neon.vcmla.rot180.v4f16(<4 x half> %acc, <4 x half> %lhs, <4 x half> %3)
-// CHECK: ret <4 x half> %vcmla_rot180_f163.i
+// CHECK-LABEL: define dso_local <4 x half> @test_vcmla_rot180_laneq_f16(
+// CHECK-SAME: <4 x half> noundef [[ACC:%.*]], <4 x half> noundef [[LHS:%.*]], <8 x half> noundef [[RHS:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[RHS]] to <4 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[TMP0]], <4 x i32> poison, <2 x i32> <i32 poison, i32 1>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[TMP1]] to <4 x half>
+// CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x half> [[TMP2]], <4 x half> poison, <4 x i32> <i32 2, i32 3, i32 2, i32 3>
+// CHECK-NEXT: [[VCMLA_ROT180_F163_I:%.*]] = tail call <4 x half> @llvm.aarch64.neon.vcmla.rot180.v4f16(<4 x half> [[ACC]], <4 x half> [[LHS]], <4 x half> [[TMP3]])
+// CHECK-NEXT: ret <4 x half> [[VCMLA_ROT180_F163_I]]
+//
float16x4_t test_vcmla_rot180_laneq_f16(float16x4_t acc, float16x4_t lhs, float16x8_t rhs) {
return vcmla_rot180_laneq_f16(acc, lhs, rhs, 1);
}
-// CHECK-LABEL: @test_vcmlaq_rot180_lane_f16(
-// CHECK: [[DUP:%.*]] = shufflevector <4 x half> %rhs, <4 x half> poison, <8 x i32> <i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
-// CHECK: [[RES:%.*]] = tail call <8 x half> @llvm.aarch64.neon.vcmla.rot180.v8f16(<8 x half> %acc, <8 x half> %lhs, <8 x half> [[DUP]])
-// CHECK: ret <8 x half> [[RES]]
+// CHECK-LABEL: define dso_local <8 x half> @test_vcmlaq_rot180_lane_f16(
+// CHECK-SAME: <8 x half> noundef [[ACC:%.*]], <8 x half> noundef [[LHS:%.*]], <4 x half> noundef [[RHS:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = shufflevector <4 x half> [[RHS]], <4 x half> poison, <8 x i32> <i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
+// CHECK-NEXT: [[VCMLAQ_ROT180_F163_I:%.*]] = tail call <8 x half> @llvm.aarch64.neon.vcmla.rot180.v8f16(<8 x half> [[ACC]], <8 x half> [[LHS]], <8 x half> [[TMP0]])
+// CHECK-NEXT: ret <8 x half> [[VCMLAQ_ROT180_F163_I]]
+//
float16x8_t test_vcmlaq_rot180_lane_f16(float16x8_t acc, float16x8_t lhs, float16x4_t rhs) {
return vcmlaq_rot180_lane_f16(acc, lhs, rhs, 1);
}
-// CHECK-LABEL: @test_vcmlaq_rot180_laneq_f16(
-// CHECK: [[CPLX:%.*]] = bitcast <8 x half> %rhs to <4 x i32>
-// CHECK: [[DUP:%.*]] = shufflevector <4 x i32> [[CPLX]], <4 x i32> poison, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
-// CHECK: [[DUP_FLT:%.*]] = bitcast <4 x i32> [[DUP]] to <8 x half>
-// CHECK: [[RES:%.*]] = tail call <8 x half> @llvm.aarch64.neon.vcmla.rot180.v8f16(<8 x half> %acc, <8 x half> %lhs, <8 x half> [[DUP_FLT]])
-// CHECK: ret <8 x half> [[RES]]
+// CHECK-LABEL: define dso_local <8 x half> @test_vcmlaq_rot180_laneq_f16(
+// CHECK-SAME: <8 x half> noundef [[ACC:%.*]], <8 x half> noundef [[LHS:%.*]], <8 x half> noundef [[RHS:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[RHS]] to <4 x i32>
+// CHECK-NEXT: [[VECINIT15:%.*]] = shufflevector <4 x i32> [[TMP0]], <4 x i32> poison, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[VECINIT15]] to <8 x half>
+// CHECK-NEXT: [[VCMLAQ_ROT180_F163_I:%.*]] = tail call <8 x half> @llvm.aarch64.neon.vcmla.rot180.v8f16(<8 x half> [[ACC]], <8 x half> [[LHS]], <8 x half> [[TMP1]])
+// CHECK-NEXT: ret <8 x half> [[VCMLAQ_ROT180_F163_I]]
+//
float16x8_t test_vcmlaq_rot180_laneq_f16(float16x8_t acc, float16x8_t lhs, float16x8_t rhs) {
return vcmlaq_rot180_laneq_f16(acc, lhs, rhs, 3);
}
-// CHECK-LABEL: @test_vcmla_rot180_lane_f32(
-// CHECK: [[RES:%.*]] = tail call <2 x float> @llvm.aarch64.neon.vcmla.rot180.v2f32(<2 x float> %acc, <2 x float> %lhs, <2 x float> %rhs)
-// CHECK: ret <2 x float> [[RES]]
+// CHECK-LABEL: define dso_local <2 x float> @test_vcmla_rot180_lane_f32(
+// CHECK-SAME: <2 x float> noundef [[ACC:%.*]], <2 x float> noundef [[LHS:%.*]], <2 x float> noundef [[RHS:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VCMLA_ROT180_F323_I:%.*]] = tail call <2 x float> @llvm.aarch64.neon.vcmla.rot180.v2f32(<2 x float> [[ACC]], <2 x float> [[LHS]], <2 x float> [[RHS]])
+// CHECK-NEXT: ret <2 x float> [[VCMLA_ROT180_F323_I]]
+//
float32x2_t test_vcmla_rot180_lane_f32(float32x2_t acc, float32x2_t lhs, float32x2_t rhs) {
return vcmla_rot180_lane_f32(acc, lhs, rhs, 0);
}
-// CHECK-LABEL: @test_vcmla_rot180_laneq_f32(
-// CHECK: [[CPLX:%.*]] = bitcast <4 x float> %rhs to <2 x i64>
-// CHECK: [[DUP:%.*]] = shufflevector <2 x i64> [[CPLX]], <2 x i64> poison, <1 x i32> <i32 1>
-// CHECK: [[DUP_FLT:%.*]] = bitcast <1 x i64> [[DUP]] to <2 x float>
-// CHECK: [[RES:%.*]] = tail call <2 x float> @llvm.aarch64.neon.vcmla.rot180.v2f32(<2 x float> %acc, <2 x float> %lhs, <2 x float> [[DUP_FLT]])
-// CHECK: ret <2 x float> [[RES]]
+// CHECK-LABEL: define dso_local <2 x float> @test_vcmla_rot180_laneq_f32(
+// CHECK-SAME: <2 x float> noundef [[ACC:%.*]], <2 x float> noundef [[LHS:%.*]], <4 x float> noundef [[RHS:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[RHS]] to <2 x i64>
+// CHECK-NEXT: [[VECINIT:%.*]] = shufflevector <2 x i64> [[TMP0]], <2 x i64> poison, <1 x i32> <i32 1>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[VECINIT]] to <2 x float>
+// CHECK-NEXT: [[VCMLA_ROT180_F323_I:%.*]] = tail call <2 x float> @llvm.aarch64.neon.vcmla.rot180.v2f32(<2 x float> [[ACC]], <2 x float> [[LHS]], <2 x float> [[TMP1]])
+// CHECK-NEXT: ret <2 x float> [[VCMLA_ROT180_F323_I]]
+//
float32x2_t test_vcmla_rot180_laneq_f32(float32x2_t acc, float32x2_t lhs, float32x4_t rhs) {
return vcmla_rot180_laneq_f32(acc, lhs, rhs, 1);
}
-// CHECK-LABEL: @test_vcmlaq_rot180_lane_f32(
-// CHECK: [[CPLX:%.*]] = bitcast <2 x float> %rhs to i64
-// CHECK: [[CPLX_VEC:%.*]] = insertelement <2 x i64> poison, i64 [[CPLX]], i64 0
-// CHECK: [[CPLX2:%.*]] = bitcast <2 x i64> [[CPLX_VEC]] to <4 x float>
-// CHECK: [[DUP:%.*]] = shufflevector <4 x float> [[CPLX2]], <4 x float> poison, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
-// CHECK: [[RES:%.*]] = tail call <4 x float> @llvm.aarch64.neon.vcmla.rot180.v4f32(<4 x float> %acc, <4 x float> %lhs, <4 x float> [[DUP]])
-// CHECK: ret <4 x float> [[RES]]
+// CHECK-LABEL: define dso_local <4 x float> @test_vcmlaq_rot180_lane_f32(
+// CHECK-SAME: <4 x float> noundef [[ACC:%.*]], <4 x float> noundef [[LHS:%.*]], <2 x float> noundef [[RHS:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[RHS]] to i64
+// CHECK-NEXT: [[VECINIT:%.*]] = insertelement <2 x i64> poison, i64 [[TMP0]], i64 0
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[VECINIT]] to <4 x float>
+// CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
+// CHECK-NEXT: [[VCMLAQ_ROT180_F323_I:%.*]] = tail call <4 x float> @llvm.aarch64.neon.vcmla.rot180.v4f32(<4 x float> [[ACC]], <4 x float> [[LHS]], <4 x float> [[TMP2]])
+// CHECK-NEXT: ret <4 x float> [[VCMLAQ_ROT180_F323_I]]
+//
float32x4_t test_vcmlaq_rot180_lane_f32(float32x4_t acc, float32x4_t lhs, float32x2_t rhs) {
return vcmlaq_rot180_lane_f32(acc, lhs, rhs, 0);
}
-// CHECK-LABEL: @test_vcmlaq_rot180_laneq_f32(
-// CHECK: [[DUP:%.*]] = shufflevector <4 x float> %rhs, <4 x float> poison, <4 x i32> <i32 2, i32 3, i32 2, i32 3>
-// CHECK: [[RES:%.*]] = tail call <4 x float> @llvm.aarch64.neon.vcmla.rot180.v4f32(<4 x float> %acc, <4 x float> %lhs, <4 x float> [[DUP]])
-// CHECK: ret <4 x float> [[RES]]
+// CHECK-LABEL: define dso_local <4 x float> @test_vcmlaq_rot180_laneq_f32(
+// CHECK-SAME: <4 x float> noundef [[ACC:%.*]], <4 x float> noundef [[LHS:%.*]], <4 x float> noundef [[RHS:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = shufflevector <4 x float> [[RHS]], <4 x float> poison, <4 x i32> <i32 2, i32 3, i32 2, i32 3>
+// CHECK-NEXT: [[VCMLAQ_ROT180_F323_I:%.*]] = tail call <4 x float> @llvm.aarch64.neon.vcmla.rot180.v4f32(<4 x float> [[ACC]], <4 x float> [[LHS]], <4 x float> [[TMP0]])
+// CHECK-NEXT: ret <4 x float> [[VCMLAQ_ROT180_F323_I]]
+//
float32x4_t test_vcmlaq_rot180_laneq_f32(float32x4_t acc, float32x4_t lhs, float32x4_t rhs) {
return vcmlaq_rot180_laneq_f32(acc, lhs, rhs, 1);
}
-// CHECK-LABEL: @test_vcmla_rot270_lane_f16(
-// CHECK: [[DUP:%.*]] = shufflevector <4 x half> %rhs, <4 x half> poison, <4 x i32> <i32 2, i32 3, i32 2, i32 3>
-// CHECK: [[RES:%.*]] = tail call <4 x half> @llvm.aarch64.neon.vcmla.rot270.v4f16(<4 x half> %acc, <4 x half> %lhs, <4 x half> [[DUP]])
-// CHECK: ret <4 x half> [[RES]]
+// CHECK-LABEL: define dso_local <4 x half> @test_vcmla_rot270_lane_f16(
+// CHECK-SAME: <4 x half> noundef [[ACC:%.*]], <4 x half> noundef [[LHS:%.*]], <4 x half> noundef [[RHS:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = shufflevector <4 x half> [[RHS]], <4 x half> poison, <4 x i32> <i32 2, i32 3, i32 2, i32 3>
+// CHECK-NEXT: [[VCMLA_ROT270_F163_I:%.*]] = tail call <4 x half> @llvm.aarch64.neon.vcmla.rot270.v4f16(<4 x half> [[ACC]], <4 x half> [[LHS]], <4 x half> [[TMP0]])
+// CHECK-NEXT: ret <4 x half> [[VCMLA_ROT270_F163_I]]
+//
float16x4_t test_vcmla_rot270_lane_f16(float16x4_t acc, float16x4_t lhs, float16x4_t rhs) {
return vcmla_rot270_lane_f16(acc, lhs, rhs, 1);
}
-// CHECK-LABEL: @test_vcmla_rot270_laneq_f16(
-// CHECK: %0 = bitcast <8 x half> %rhs to <4 x i32>
-// CHECK: %1 = shufflevector <4 x i32> %0, <4 x i32> poison, <2 x i32> <i32 0, i32 poison>
-// CHECK: %2 = bitcast <2 x i32> %1 to <4 x half>
-// CHECK: %3 = shufflevector <4 x half> %2, <4 x half> poison, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
-// CHECK: %vcmla_rot270_f163.i = tail call <4 x half> @llvm.aarch64.neon.vcmla.rot270.v4f16(<4 x half> %acc, <4 x half> %lhs, <4 x half> %3)
-// CHECK: ret <4 x half> %vcmla_rot270_f163.
+// CHECK-LABEL: define dso_local <4 x half> @test_vcmla_rot270_laneq_f16(
+// CHECK-SAME: <4 x half> noundef [[ACC:%.*]], <4 x half> noundef [[LHS:%.*]], <8 x half> noundef [[RHS:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[RHS]] to <4 x i32>
+// CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[TMP0]], <4 x i32> poison, <2 x i32> <i32 0, i32 poison>
+// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[TMP1]] to <4 x half>
+// CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x half> [[TMP2]], <4 x half> poison, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
+// CHECK-NEXT: [[VCMLA_ROT270_F163_I:%.*]] = tail call <4 x half> @llvm.aarch64.neon.vcmla.rot270.v4f16(<4 x half> [[ACC]], <4 x half> [[LHS]], <4 x half> [[TMP3]])
+// CHECK-NEXT: ret <4 x half> [[VCMLA_ROT270_F163_I]]
+//
float16x4_t test_vcmla_rot270_laneq_f16(float16x4_t acc, float16x4_t lhs, float16x8_t rhs) {
return vcmla_rot270_laneq_f16(acc, lhs, rhs, 0);
}
-// CHECK-LABEL: @test_vcmlaq_rot270_lane_f16(
-// CHECK: [[DUP:%.*]] = shufflevector <4 x half> %rhs, <4 x half> poison, <8 x i32> <i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
-// CHECK: [[RES:%.*]] = tail call <8 x half> @llvm.aarch64.neon.vcmla.rot270.v8f16(<8 x half> %acc, <8 x half> %lhs, <8 x half> [[DUP]])
-// CHECK: ret <8 x half> [[RES]]
+// CHECK-LABEL: define dso_local <8 x half> @test_vcmlaq_rot270_lane_f16(
+// CHECK-SAME: <8 x half> noundef [[ACC:%.*]], <8 x half> noundef [[LHS:%.*]], <4 x half> noundef [[RHS:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = shufflevector <4 x half> [[RHS]], <4 x half> poison, <8 x i32> <i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
+// CHECK-NEXT: [[VCMLAQ_ROT270_F163_I:%.*]] = tail call <8 x half> @llvm.aarch64.neon.vcmla.rot270.v8f16(<8 x half> [[ACC]], <8 x half> [[LHS]], <8 x half> [[TMP0]])
+// CHECK-NEXT: ret <8 x half> [[VCMLAQ_ROT270_F163_I]]
+//
float16x8_t test_vcmlaq_rot270_lane_f16(float16x8_t acc, float16x8_t lhs, float16x4_t rhs) {
return vcmlaq_rot270_lane_f16(acc, lhs, rhs, 1);
}
-// CHECK-LABEL: @test_vcmlaq_rot270_laneq_f16(
-// CHECK: [[CPLX:%.*]] = bitcast <8 x half> %rhs to <4 x i32>
-// CHECK: [[DUP:%.*]] = shufflevector <4 x i32> [[CPLX]], <4 x i32> poison, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
-// CHECK: [[DUP_FLT:%.*]] = bitcast <4 x i32> [[DUP]] to <8 x half>
-// CHECK: [[RES:%.*]] = tail call <8 x half> @llvm.aarch64.neon.vcmla.rot270.v8f16(<8 x half> %acc, <8 x half> %lhs, <8 x half> [[DUP_FLT]])
-// CHECK: ret <8 x half> [[RES]]
+// CHECK-LABEL: define dso_local <8 x half> @test_vcmlaq_rot270_laneq_f16(
+// CHECK-SAME: <8 x half> noundef [[ACC:%.*]], <8 x half> noundef [[LHS:%.*]], <8 x half> noundef [[RHS:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[RHS]] to <4 x i32>
+// CHECK-NEXT: [[VECINIT15:%.*]] = shufflevector <4 x i32> [[TMP0]], <4 x i32> poison, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[VECINIT15]] to <8 x half>
+// CHECK-NEXT: [[VCMLAQ_ROT270_F163_I:%.*]] = tail call <8 x half> @llvm.aarch64.neon.vcmla.rot270.v8f16(<8 x half> [[ACC]], <8 x half> [[LHS]], <8 x half> [[TMP1]])
+// CHECK-NEXT: ret <8 x half> [[VCMLAQ_ROT270_F163_I]]
+//
float16x8_t test_vcmlaq_rot270_laneq_f16(float16x8_t acc, float16x8_t lhs, float16x8_t rhs) {
return vcmlaq_rot270_laneq_f16(acc, lhs, rhs, 3);
}
-// CHECK-LABEL: @test_vcmla_rot270_lane_f32(
-// CHECK: [[RES:%.*]] = tail call <2 x float> @llvm.aarch64.neon.vcmla.rot270.v2f32(<2 x float> %acc, <2 x float> %lhs, <2 x float> %rhs)
-// CHECK: ret <2 x float> [[RES]]
+// CHECK-LABEL: define dso_local <2 x float> @test_vcmla_rot270_lane_f32(
+// CHECK-SAME: <2 x float> noundef [[ACC:%.*]], <2 x float> noundef [[LHS:%.*]], <2 x float> noundef [[RHS:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VCMLA_ROT270_F323_I:%.*]] = tail call <2 x float> @llvm.aarch64.neon.vcmla.rot270.v2f32(<2 x float> [[ACC]], <2 x float> [[LHS]], <2 x float> [[RHS]])
+// CHECK-NEXT: ret <2 x float> [[VCMLA_ROT270_F323_I]]
+//
float32x2_t test_vcmla_rot270_lane_f32(float32x2_t acc, float32x2_t lhs, float32x2_t rhs) {
return vcmla_rot270_lane_f32(acc, lhs, rhs, 0);
}
-// CHECK-LABEL: @test_vcmla_rot270_laneq_f32(
-// CHECK: [[CPLX:%.*]] = bitcast <4 x float> %rhs to <2 x i64>
-// CHECK: [[DUP:%.*]] = shufflevector <2 x i64> [[CPLX]], <2 x i64> poison, <1 x i32> <i32 1>
-// CHECK: [[DUP_FLT:%.*]] = bitcast <1 x i64> [[DUP]] to <2 x float>
-// CHECK: [[RES:%.*]] = tail call <2 x float> @llvm.aarch64.neon.vcmla.rot270.v2f32(<2 x float> %acc, <2 x float> %lhs, <2 x float> [[DUP_FLT]])
-// CHECK: ret <2 x float> [[RES]]
+// CHECK-LABEL: define dso_local <2 x float> @test_vcmla_rot270_laneq_f32(
+// CHECK-SAME: <2 x float> noundef [[ACC:%.*]], <2 x float> noundef [[LHS:%.*]], <4 x float> noundef [[RHS:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[RHS]] to <2 x i64>
+// CHECK-NEXT: [[VECINIT:%.*]] = shufflevector <2 x i64> [[TMP0]], <2 x i64> poison, <1 x i32> <i32 1>
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[VECINIT]] to <2 x float>
+// CHECK-NEXT: [[VCMLA_ROT270_F323_I:%.*]] = tail call <2 x float> @llvm.aarch64.neon.vcmla.rot270.v2f32(<2 x float> [[ACC]], <2 x float> [[LHS]], <2 x float> [[TMP1]])
+// CHECK-NEXT: ret <2 x float> [[VCMLA_ROT270_F323_I]]
+//
float32x2_t test_vcmla_rot270_laneq_f32(float32x2_t acc, float32x2_t lhs, float32x4_t rhs) {
return vcmla_rot270_laneq_f32(acc, lhs, rhs, 1);
}
-// CHECK-LABEL: @test_vcmlaq_rot270_lane_f32(
-// CHECK: [[CPLX:%.*]] = bitcast <2 x float> %rhs to i64
-// CHECK: [[CPLX_VEC:%.*]] = insertelement <2 x i64> poison, i64 [[CPLX]], i64 0
-// CHECK: [[CPLX2:%.*]] = bitcast <2 x i64> [[DUP]] to <4 x float>
-// CHECK: [[DUP:%.*]] = shufflevector <4 x float> [[CPLX2]], <4 x float> poison, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
-// CHECK: [[RES:%.*]] = tail call <4 x float> @llvm.aarch64.neon.vcmla.rot270.v4f32(<4 x float> %acc, <4 x float> %lhs, <4 x float> [[DUP]])
-// CHECK: ret <4 x float> [[RES]]
+// CHECK-LABEL: define dso_local <4 x float> @test_vcmlaq_rot270_lane_f32(
+// CHECK-SAME: <4 x float> noundef [[ACC:%.*]], <4 x float> noundef [[LHS:%.*]], <2 x float> noundef [[RHS:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[RHS]] to i64
+// CHECK-NEXT: [[VECINIT:%.*]] = insertelement <2 x i64> poison, i64 [[TMP0]], i64 0
+// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[VECINIT]] to <4 x float>
+// CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
+// CHECK-NEXT: [[VCMLAQ_ROT270_F323_I:%.*]] = tail call <4 x float> @llvm.aarch64.neon.vcmla.rot270.v4f32(<4 x float> [[ACC]], <4 x float> [[LHS]], <4 x float> [[TMP2]])
+// CHECK-NEXT: ret <4 x float> [[VCMLAQ_ROT270_F323_I]]
+//
float32x4_t test_vcmlaq_rot270_lane_f32(float32x4_t acc, float32x4_t lhs, float32x2_t rhs) {
return vcmlaq_rot270_lane_f32(acc, lhs, rhs, 0);
}
-// CHECK-LABEL: @test_vcmlaq_rot270_laneq_f32(
-// CHECK: [[DUP:%.*]] = shufflevector <4 x float> %rhs, <4 x float> poison, <4 x i32> <i32 2, i32 3, i32 2, i32 3>
-// CHECK: [[RES:%.*]] = tail call <4 x float> @llvm.aarch64.neon.vcmla.rot270.v4f32(<4 x float> %acc, <4 x float> %lhs, <4 x float> [[DUP]])
-// CHECK: ret <4 x float> [[RES]]
+// CHECK-LABEL: define dso_local <4 x float> @test_vcmlaq_rot270_laneq_f32(
+// CHECK-SAME: <4 x float> noundef [[ACC:%.*]], <4 x float> noundef [[LHS:%.*]], <4 x float> noundef [[RHS:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = shufflevector <4 x float> [[RHS]], <4 x float> poison, <4 x i32> <i32 2, i32 3, i32 2, i32 3>
+// CHECK-NEXT: [[VCMLAQ_ROT270_F323_I:%.*]] = tail call <4 x float> @llvm.aarch64.neon.vcmla.rot270.v4f32(<4 x float> [[ACC]], <4 x float> [[LHS]], <4 x float> [[TMP0]])
+// CHECK-NEXT: ret <4 x float> [[VCMLAQ_ROT270_F323_I]]
+//
float32x4_t test_vcmlaq_rot270_laneq_f32(float32x4_t acc, float32x4_t lhs, float32x4_t rhs) {
return vcmlaq_rot270_laneq_f32(acc, lhs, rhs, 1);
}
>From 53216bc2003e03dfc69732f0a9f2753687c4a6ae Mon Sep 17 00:00:00 2001
From: Spencer Abson <Spencer.Abson at arm.com>
Date: Wed, 24 Jul 2024 11:33:27 +0000
Subject: [PATCH 3/3] Run clang-format
---
clang/include/clang/Basic/TargetBuiltins.h | 51 ++++++------
clang/include/clang/Sema/SemaARM.h | 4 +-
clang/lib/Sema/SemaARM.cpp | 93 +++++++++++-----------
clang/utils/TableGen/NeonEmitter.cpp | 54 ++++++++-----
4 files changed, 106 insertions(+), 96 deletions(-)
diff --git a/clang/include/clang/Basic/TargetBuiltins.h b/clang/include/clang/Basic/TargetBuiltins.h
index 50e17ad7e1628..384811f9281ac 100644
--- a/clang/include/clang/Basic/TargetBuiltins.h
+++ b/clang/include/clang/Basic/TargetBuiltins.h
@@ -209,7 +209,7 @@ namespace clang {
Flags |= QuadFlag;
}
- EltType getEltType() const { return (EltType)(Flags & EltTypeMask); }
+ EltType getEltType() const { return (EltType)(Flags & EltTypeMask); }
bool isPoly() const {
EltType ET = getEltType();
return ET == Poly8 || ET == Poly16 || ET == Poly64;
@@ -217,36 +217,36 @@ namespace clang {
bool isUnsigned() const { return (Flags & UnsignedFlag) != 0; }
bool isQuad() const { return (Flags & QuadFlag) != 0; };
unsigned getEltSizeInBits() const {
- switch(getEltType()){
- case Int8:
- case Poly8:
- return 8;
- case Int16:
- case Float16:
- case Poly16:
- case BFloat16:
- return 16;
- case Int32:
- case Float32:
- return 32;
- case Int64:
- case Float64:
- case Poly64:
- return 64;
- case Poly128:
- return 128;
- default:
- llvm_unreachable("Invalid NeonTypeFlag!");
+ switch (getEltType()) {
+ case Int8:
+ case Poly8:
+ return 8;
+ case Int16:
+ case Float16:
+ case Poly16:
+ case BFloat16:
+ return 16;
+ case Int32:
+ case Float32:
+ return 32;
+ case Int64:
+ case Float64:
+ case Poly64:
+ return 64;
+ case Poly128:
+ return 128;
+ default:
+ llvm_unreachable("Invalid NeonTypeFlag!");
}
}
};
- // Shared between SVE/SME and NEON
- enum ArmImmCheckType {
+ // Shared between SVE/SME and NEON
+ enum ArmImmCheckType {
#define LLVM_GET_ARM_INTRIN_IMMCHECKTYPES
#include "clang/Basic/arm_sve_typeflags.inc"
-#undef LLVM_GET_ARM_INTRIN_IMMCHECKTYPES
- };
+#undef LLVM_GET_ARM_INTRIN_IMMCHECKTYPES
+ };
/// Flags to identify the types for overloaded SVE builtins.
class SVETypeFlags {
@@ -279,7 +279,6 @@ namespace clang {
#undef LLVM_GET_SVE_MERGETYPES
};
-
SVETypeFlags(uint64_t F) : Flags(F) {
EltTypeShift = llvm::countr_zero(EltTypeMask);
MemEltTypeShift = llvm::countr_zero(MemEltTypeMask);
diff --git a/clang/include/clang/Sema/SemaARM.h b/clang/include/clang/Sema/SemaARM.h
index 1ced84300c179..2f13e60f081c5 100644
--- a/clang/include/clang/Sema/SemaARM.h
+++ b/clang/include/clang/Sema/SemaARM.h
@@ -41,8 +41,8 @@ class SemaARM : public SemaBase {
unsigned MaxWidth);
bool CheckNeonBuiltinFunctionCall(const TargetInfo &TI, unsigned BuiltinID,
CallExpr *TheCall);
- bool ParseNeonImmChecks(CallExpr *TheCall,
- SmallVector<std::tuple<int, int, int>, 2> &ImmChecks,
+ bool ParseNeonImmChecks(CallExpr *TheCall,
+ SmallVector<std::tuple<int, int, int>, 2> &ImmChecks,
int OverloadType);
bool CheckMVEBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall);
bool CheckSVEBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall);
diff --git a/clang/lib/Sema/SemaARM.cpp b/clang/lib/Sema/SemaARM.cpp
index 8f4d94e1df678..bbcca1c72465a 100644
--- a/clang/lib/Sema/SemaARM.cpp
+++ b/clang/lib/Sema/SemaARM.cpp
@@ -369,62 +369,63 @@ enum ArmSMEState : unsigned {
ArmZT0Mask = 0b11 << 2
};
-bool SemaARM::ParseNeonImmChecks(CallExpr *TheCall, SmallVector<std::tuple<int, int, int>, 2>
- &ImmChecks, int OverloadType = -1) {
+bool SemaARM::ParseNeonImmChecks(
+ CallExpr *TheCall, SmallVector<std::tuple<int, int, int>, 2> &ImmChecks,
+ int OverloadType = -1) {
int ArgIdx, CheckTy, ElementType;
bool hasError = false;
for (auto &I : ImmChecks) {
std::tie(ArgIdx, CheckTy, ElementType) = I;
- NeonTypeFlags Type = (OverloadType != -1) ?
- NeonTypeFlags(OverloadType) : NeonTypeFlags(ElementType);
-
- switch((ArmImmCheckType)CheckTy) {
- case ArmImmCheckType::ImmCheck0_3:
- hasError |= SemaRef.BuiltinConstantArgRange(TheCall, ArgIdx, 0, 3);
- break;
- case ArmImmCheckType::ImmCheck0_63:
- hasError |= SemaRef.BuiltinConstantArgRange(TheCall, ArgIdx, 0, 63);
- break;
- case ArmImmCheckType::ImmCheck0_7:
- hasError |= SemaRef.BuiltinConstantArgRange(TheCall, ArgIdx, 0, 7);
- break;
- case ArmImmCheckType::ImmCheck1_16:
- hasError |= SemaRef.BuiltinConstantArgRange(TheCall, ArgIdx, 1, 16);
- break;
- case ArmImmCheckType::ImmCheck1_32:
- hasError |= SemaRef.BuiltinConstantArgRange(TheCall, ArgIdx, 1, 32);
- break;
- case ArmImmCheckType::ImmCheck1_64:
- hasError |= SemaRef.BuiltinConstantArgRange(TheCall, ArgIdx, 1, 64);
- break;
- case ArmImmCheckType::ImmCheckLaneIndex:
- hasError |= SemaRef.BuiltinConstantArgRange(TheCall, ArgIdx, 0, (64 << Type.isQuad()) /
- Type.getEltSizeInBits() - 1);
- break;
- case ArmImmCheckType::ImmCheckLaneQIndex: // force to use quad
- hasError |= SemaRef.BuiltinConstantArgRange(TheCall, ArgIdx, 0,
- (128/Type.getEltSizeInBits()) - 1);
- break;
- case ArmImmCheckType::ImmCheckShiftLeft:
- hasError |= SemaRef.BuiltinConstantArgRange(TheCall, ArgIdx, 0,
- Type.getEltSizeInBits() - 1);
- break;
- case ArmImmCheckType::ImmCheckShiftRight:
- hasError |= SemaRef.BuiltinConstantArgRange(TheCall, ArgIdx,
- 1, Type.getEltSizeInBits());
- break;
- default:
- llvm_unreachable("Invalid Neon immediate range typeflag!");
- break;
+ NeonTypeFlags Type = (OverloadType != -1) ? NeonTypeFlags(OverloadType)
+ : NeonTypeFlags(ElementType);
+
+ switch ((ArmImmCheckType)CheckTy) {
+ case ArmImmCheckType::ImmCheck0_3:
+ hasError |= SemaRef.BuiltinConstantArgRange(TheCall, ArgIdx, 0, 3);
+ break;
+ case ArmImmCheckType::ImmCheck0_63:
+ hasError |= SemaRef.BuiltinConstantArgRange(TheCall, ArgIdx, 0, 63);
+ break;
+ case ArmImmCheckType::ImmCheck0_7:
+ hasError |= SemaRef.BuiltinConstantArgRange(TheCall, ArgIdx, 0, 7);
+ break;
+ case ArmImmCheckType::ImmCheck1_16:
+ hasError |= SemaRef.BuiltinConstantArgRange(TheCall, ArgIdx, 1, 16);
+ break;
+ case ArmImmCheckType::ImmCheck1_32:
+ hasError |= SemaRef.BuiltinConstantArgRange(TheCall, ArgIdx, 1, 32);
+ break;
+ case ArmImmCheckType::ImmCheck1_64:
+ hasError |= SemaRef.BuiltinConstantArgRange(TheCall, ArgIdx, 1, 64);
+ break;
+ case ArmImmCheckType::ImmCheckLaneIndex:
+ hasError |= SemaRef.BuiltinConstantArgRange(
+ TheCall, ArgIdx, 0,
+ (64 << Type.isQuad()) / Type.getEltSizeInBits() - 1);
+ break;
+ case ArmImmCheckType::ImmCheckLaneQIndex: // force to use quad
+ hasError |= SemaRef.BuiltinConstantArgRange(
+ TheCall, ArgIdx, 0, (128 / Type.getEltSizeInBits()) - 1);
+ break;
+ case ArmImmCheckType::ImmCheckShiftLeft:
+ hasError |= SemaRef.BuiltinConstantArgRange(TheCall, ArgIdx, 0,
+ Type.getEltSizeInBits() - 1);
+ break;
+ case ArmImmCheckType::ImmCheckShiftRight:
+ hasError |= SemaRef.BuiltinConstantArgRange(TheCall, ArgIdx, 1,
+ Type.getEltSizeInBits());
+ break;
+ default:
+ llvm_unreachable("Invalid Neon immediate range typeflag!");
+ break;
}
}
return hasError;
}
-
bool SemaARM::ParseSVEImmChecks(
CallExpr *TheCall, SmallVector<std::tuple<int, int, int>, 3> &ImmChecks) {
// Perform all the immediate checks for this builtin call.
@@ -833,8 +834,8 @@ bool SemaARM::CheckNeonBuiltinFunctionCall(const TargetInfo &TI,
#include "clang/Basic/arm_fp16.inc"
#include "clang/Basic/arm_neon.inc"
#undef GET_NEON_IMMEDIATE_CHECK
- }
-
+ }
+
return ParseNeonImmChecks(TheCall, ImmChecks, TV);
}
diff --git a/clang/utils/TableGen/NeonEmitter.cpp b/clang/utils/TableGen/NeonEmitter.cpp
index 7666b53000edc..8dc6312525cf5 100644
--- a/clang/utils/TableGen/NeonEmitter.cpp
+++ b/clang/utils/TableGen/NeonEmitter.cpp
@@ -370,10 +370,13 @@ class Intrinsic {
public:
Intrinsic(Record *R, StringRef Name, StringRef Proto, TypeSpec OutTS,
- TypeSpec InTS, ArrayRef<std::tuple<int, int, int>> ImmChecks, ClassKind CK, ListInit *Body, NeonEmitter &Emitter,
- StringRef ArchGuard, StringRef TargetGuard, bool IsUnavailable, bool BigEndianSafe)
- : R(R), Name(Name.str()), OutTS(OutTS), InTS(InTS), ImmChecks(ImmChecks), CK(CK), Body(Body),
- ArchGuard(ArchGuard.str()), TargetGuard(TargetGuard.str()), IsUnavailable(IsUnavailable),
+ TypeSpec InTS, ArrayRef<std::tuple<int, int, int>> ImmChecks,
+ ClassKind CK, ListInit *Body, NeonEmitter &Emitter,
+ StringRef ArchGuard, StringRef TargetGuard, bool IsUnavailable,
+ bool BigEndianSafe)
+ : R(R), Name(Name.str()), OutTS(OutTS), InTS(InTS), ImmChecks(ImmChecks),
+ CK(CK), Body(Body), ArchGuard(ArchGuard.str()),
+ TargetGuard(TargetGuard.str()), IsUnavailable(IsUnavailable),
BigEndianSafe(BigEndianSafe), PolymorphicKeyType(0), NeededEarly(false),
UseMacro(false), BaseType(OutTS, "."), InBaseType(InTS, "."),
Emitter(Emitter) {
@@ -416,14 +419,14 @@ class Intrinsic {
/// Get the architectural guard string (#ifdef).
std::string getArchGuard() const { return ArchGuard; }
std::string getTargetGuard() const { return TargetGuard; }
- ArrayRef<std::tuple<int, int, int>> getImmChecks() const {return ImmChecks; }
+ ArrayRef<std::tuple<int, int, int>> getImmChecks() const { return ImmChecks; }
/// Get the non-mangled name.
std::string getName() const { return Name; }
/// Return true if the intrinsic takes an immediate operand.
bool hasImmediate() const {
return llvm::any_of(Types, [](const Type &T) { return T.isImmediate(); });
- //return !ImmChecks.empty();
+ // return !ImmChecks.empty();
}
// Return if the supplied argument is an immediate
@@ -558,6 +561,7 @@ class NeonEmitter {
void genNeonImmCheckTypes(raw_ostream &OS);
void genIntrinsicRangeCheckCode(raw_ostream &OS,
SmallVectorImpl<Intrinsic *> &Defs);
+
public:
/// Called by Intrinsic - this attempts to get an intrinsic that takes
/// the given types as arguments.
@@ -1032,7 +1036,7 @@ std::string Intrinsic::getBuiltinTypeStr() {
if (LocalCK == ClassI && T.isInteger())
T.makeSigned();
- if(isArgImmediate(I))
+ if (isArgImmediate(I))
T.makeImmediate(32);
S += T.builtin_str();
@@ -1953,12 +1957,13 @@ void NeonEmitter::createIntrinsic(Record *R,
bool BigEndianSafe = R->getValueAsBit("BigEndianSafe");
std::string ArchGuard = std::string(R->getValueAsString("ArchGuard"));
std::string TargetGuard = std::string(R->getValueAsString("TargetGuard"));
- std::vector<Record*> ImmCheckList = R->getValueAsListOfDefs("ImmChecks");
+ std::vector<Record *> ImmCheckList = R->getValueAsListOfDefs("ImmChecks");
SmallVector<std::tuple<int, int, int>, 2> ImmChecks;
- for(const auto *R: ImmCheckList) {
+ for (const auto *R : ImmCheckList) {
- ImmChecks.push_back(std::make_tuple(R->getValueAsInt("Arg"),
+ ImmChecks.push_back(
+ std::make_tuple(R->getValueAsInt("Arg"),
R->getValueAsDef("Kind")->getValueAsInt("Value"),
R->getValueAsInt("EltSizeArg")));
}
@@ -2003,8 +2008,9 @@ void NeonEmitter::createIntrinsic(Record *R,
auto &Entry = IntrinsicMap[Name];
for (auto &I : NewTypeSpecs) {
- Entry.emplace_back(R, Name, Proto, I.first, I.second, ImmChecks, CK, Body, *this,
- ArchGuard, TargetGuard, IsUnavailable, BigEndianSafe);
+ Entry.emplace_back(R, Name, Proto, I.first, I.second, ImmChecks, CK, Body,
+ *this, ArchGuard, TargetGuard, IsUnavailable,
+ BigEndianSafe);
Out.push_back(&Entry.back());
}
@@ -2157,34 +2163,38 @@ void NeonEmitter::genNeonImmCheckTypes(raw_ostream &OS) {
OS << "#ifdef GET_NEON_IMMCHECKTYPES\n";
for (auto *RV : Records.getAllDerivedDefinitions("ImmCheckType")) {
- OS << " " << RV->getNameInitAsString() << " = " << RV->getValueAsInt("Value") << ",\n";
+ OS << " " << RV->getNameInitAsString() << " = "
+ << RV->getValueAsInt("Value") << ",\n";
}
OS << "#endif\n\n";
}
-void NeonEmitter::genIntrinsicRangeCheckCode(raw_ostream &OS, SmallVectorImpl<Intrinsic *> &Defs) {
+void NeonEmitter::genIntrinsicRangeCheckCode(
+ raw_ostream &OS, SmallVectorImpl<Intrinsic *> &Defs) {
OS << "#ifdef GET_NEON_IMMEDIATE_CHECK\n";
int EltType;
// Ensure these are only emitted once.
std::set<std::string> Emitted;
for (auto &Def : Defs) {
- if (Emitted.find(Def->getMangledName()) != Emitted.end() || !Def->hasImmediate())
+ if (Emitted.find(Def->getMangledName()) != Emitted.end() ||
+ !Def->hasImmediate())
continue;
// If the Def has a body (operation DAGs), it is not a __builtin_neon_
- if(Def->hasBody()) continue;
+ if (Def->hasBody())
+ continue;
OS << "case NEON::BI__builtin_neon_" << Def->getMangledName() << ":\n";
-
- for(const auto &Check: Def->getImmChecks()){
- EltType = std::get<2>(Check); // elt type argument
- if(EltType >= 0)
+
+ for (const auto &Check : Def->getImmChecks()) {
+ EltType = std::get<2>(Check); // elt type argument
+ if (EltType >= 0)
EltType = Def->getParamType(EltType).getNeonEnum();
- OS << " ImmChecks.push_back(std::make_tuple(" << std::get<0>(Check) <<
- ", " << std::get<1>(Check) << ", " << EltType << ")); \n";
+ OS << " ImmChecks.push_back(std::make_tuple(" << std::get<0>(Check)
+ << ", " << std::get<1>(Check) << ", " << EltType << ")); \n";
OS << " break;\n";
}
Emitted.insert(Def->getMangledName());
More information about the cfe-commits
mailing list